mm/swap: free swap slots in batch
authorTim Chen <tim.c.chen@linux.intel.com>
Wed, 22 Feb 2017 23:45:36 +0000 (15:45 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 23 Feb 2017 00:41:30 +0000 (16:41 -0800)
Add new functions that free unused swap slots in batches without the
need to reacquire swap info lock.  This improves scalability and reduce
lock contention.

Link: http://lkml.kernel.org/r/c25e0fcdfd237ec4ca7db91631d3b9f6ed23824e.1484082593.git.tim.c.chen@linux.intel.com
Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: "Huang, Ying" <ying.huang@intel.com>
Cc: Aaron Lu <aaron.lu@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Christian Borntraeger <borntraeger@de.ibm.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: Hillf Danton <hillf.zj@alibaba-inc.com>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Jonathan Corbet <corbet@lwn.net> escreveu:
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Shaohua Li <shli@kernel.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
include/linux/swap.h
mm/swapfile.c

index 956eae8a8edf9769571b3f6723e273d3d82572e7..bcc0b18f96d2cd5aa6935d026eeb4418f9efcc66 100644 (file)
@@ -394,6 +394,7 @@ extern int swap_duplicate(swp_entry_t);
 extern int swapcache_prepare(swp_entry_t);
 extern void swap_free(swp_entry_t);
 extern void swapcache_free(swp_entry_t);
+extern void swapcache_free_entries(swp_entry_t *entries, int n);
 extern int free_swap_and_cache(swp_entry_t);
 extern int swap_type_of(dev_t, sector_t, struct block_device **);
 extern unsigned int count_swap_pages(int, int);
index e73b5441055b3f24b89f2e36fbf0e6171b81c940..8b5bd34b1a0026c055cd392248ec9866be62759c 100644 (file)
@@ -942,35 +942,34 @@ static struct swap_info_struct *swap_info_get(swp_entry_t entry)
        return p;
 }
 
-static unsigned char swap_entry_free(struct swap_info_struct *p,
-                                    swp_entry_t entry, unsigned char usage,
-                                    bool swap_info_locked)
+static struct swap_info_struct *swap_info_get_cont(swp_entry_t entry,
+                                       struct swap_info_struct *q)
+{
+       struct swap_info_struct *p;
+
+       p = _swap_info_get(entry);
+
+       if (p != q) {
+               if (q != NULL)
+                       spin_unlock(&q->lock);
+               if (p != NULL)
+                       spin_lock(&p->lock);
+       }
+       return p;
+}
+
+static unsigned char __swap_entry_free(struct swap_info_struct *p,
+                                      swp_entry_t entry, unsigned char usage)
 {
        struct swap_cluster_info *ci;
        unsigned long offset = swp_offset(entry);
        unsigned char count;
        unsigned char has_cache;
-       bool lock_swap_info = false;
-
-       if (!swap_info_locked) {
-               count = p->swap_map[offset];
-               if (!p->cluster_info || count == usage || count == SWAP_MAP_SHMEM) {
-lock_swap_info:
-                       swap_info_locked = true;
-                       lock_swap_info = true;
-                       spin_lock(&p->lock);
-               }
-       }
 
-       ci = lock_cluster(p, offset);
+       ci = lock_cluster_or_swap_info(p, offset);
 
        count = p->swap_map[offset];
 
-       if (!swap_info_locked && (count == usage || count == SWAP_MAP_SHMEM)) {
-               unlock_cluster(ci);
-               goto lock_swap_info;
-       }
-
        has_cache = count & SWAP_HAS_CACHE;
        count &= ~SWAP_HAS_CACHE;
 
@@ -994,46 +993,52 @@ lock_swap_info:
        }
 
        usage = count | has_cache;
-       p->swap_map[offset] = usage;
+       p->swap_map[offset] = usage ? : SWAP_HAS_CACHE;
+
+       unlock_cluster_or_swap_info(p, ci);
+
+       return usage;
+}
 
+static void swap_entry_free(struct swap_info_struct *p, swp_entry_t entry)
+{
+       struct swap_cluster_info *ci;
+       unsigned long offset = swp_offset(entry);
+       unsigned char count;
+
+       ci = lock_cluster(p, offset);
+       count = p->swap_map[offset];
+       VM_BUG_ON(count != SWAP_HAS_CACHE);
+       p->swap_map[offset] = 0;
+       dec_cluster_info_page(p, p->cluster_info, offset);
        unlock_cluster(ci);
 
-       /* free if no reference */
-       if (!usage) {
-               VM_BUG_ON(!swap_info_locked);
-               mem_cgroup_uncharge_swap(entry);
-               ci = lock_cluster(p, offset);
-               dec_cluster_info_page(p, p->cluster_info, offset);
-               unlock_cluster(ci);
-               if (offset < p->lowest_bit)
-                       p->lowest_bit = offset;
-               if (offset > p->highest_bit) {
-                       bool was_full = !p->highest_bit;
-                       p->highest_bit = offset;
-                       if (was_full && (p->flags & SWP_WRITEOK)) {
-                               spin_lock(&swap_avail_lock);
-                               WARN_ON(!plist_node_empty(&p->avail_list));
-                               if (plist_node_empty(&p->avail_list))
-                                       plist_add(&p->avail_list,
-                                                 &swap_avail_head);
-                               spin_unlock(&swap_avail_lock);
-                       }
-               }
-               atomic_long_inc(&nr_swap_pages);
-               p->inuse_pages--;
-               frontswap_invalidate_page(p->type, offset);
-               if (p->flags & SWP_BLKDEV) {
-                       struct gendisk *disk = p->bdev->bd_disk;
-                       if (disk->fops->swap_slot_free_notify)
-                               disk->fops->swap_slot_free_notify(p->bdev,
-                                                                 offset);
+       mem_cgroup_uncharge_swap(entry);
+       if (offset < p->lowest_bit)
+               p->lowest_bit = offset;
+       if (offset > p->highest_bit) {
+               bool was_full = !p->highest_bit;
+
+               p->highest_bit = offset;
+               if (was_full && (p->flags & SWP_WRITEOK)) {
+                       spin_lock(&swap_avail_lock);
+                       WARN_ON(!plist_node_empty(&p->avail_list));
+                       if (plist_node_empty(&p->avail_list))
+                               plist_add(&p->avail_list,
+                                         &swap_avail_head);
+                       spin_unlock(&swap_avail_lock);
                }
        }
+       atomic_long_inc(&nr_swap_pages);
+       p->inuse_pages--;
+       frontswap_invalidate_page(p->type, offset);
+       if (p->flags & SWP_BLKDEV) {
+               struct gendisk *disk = p->bdev->bd_disk;
 
-       if (lock_swap_info)
-               spin_unlock(&p->lock);
-
-       return usage;
+               if (disk->fops->swap_slot_free_notify)
+                       disk->fops->swap_slot_free_notify(p->bdev,
+                                                         offset);
+       }
 }
 
 /*
@@ -1045,8 +1050,10 @@ void swap_free(swp_entry_t entry)
        struct swap_info_struct *p;
 
        p = _swap_info_get(entry);
-       if (p)
-               swap_entry_free(p, entry, 1, false);
+       if (p) {
+               if (!__swap_entry_free(p, entry, 1))
+                       swapcache_free_entries(&entry, 1);
+       }
 }
 
 /*
@@ -1057,8 +1064,32 @@ void swapcache_free(swp_entry_t entry)
        struct swap_info_struct *p;
 
        p = _swap_info_get(entry);
+       if (p) {
+               if (!__swap_entry_free(p, entry, SWAP_HAS_CACHE))
+                       swapcache_free_entries(&entry, 1);
+       }
+}
+
+void swapcache_free_entries(swp_entry_t *entries, int n)
+{
+       struct swap_info_struct *p, *prev;
+       int i;
+
+       if (n <= 0)
+               return;
+
+       prev = NULL;
+       p = NULL;
+       for (i = 0; i < n; ++i) {
+               p = swap_info_get_cont(entries[i], prev);
+               if (p)
+                       swap_entry_free(p, entries[i]);
+               else
+                       break;
+               prev = p;
+       }
        if (p)
-               swap_entry_free(p, entry, SWAP_HAS_CACHE, false);
+               spin_unlock(&p->lock);
 }
 
 /*
@@ -1241,21 +1272,23 @@ int free_swap_and_cache(swp_entry_t entry)
 {
        struct swap_info_struct *p;
        struct page *page = NULL;
+       unsigned char count;
 
        if (non_swap_entry(entry))
                return 1;
 
-       p = swap_info_get(entry);
+       p = _swap_info_get(entry);
        if (p) {
-               if (swap_entry_free(p, entry, 1, true) == SWAP_HAS_CACHE) {
+               count = __swap_entry_free(p, entry, 1);
+               if (count == SWAP_HAS_CACHE) {
                        page = find_get_page(swap_address_space(entry),
                                             swp_offset(entry));
                        if (page && !trylock_page(page)) {
                                put_page(page);
                                page = NULL;
                        }
-               }
-               spin_unlock(&p->lock);
+               } else if (!count)
+                       swapcache_free_entries(&entry, 1);
        }
        if (page) {
                /*