mm/z3fold.c: fix race between migration and destruction

author Henry Burns <henryburns@google.com>

Sun, 25 Aug 2019 00:54:37 +0000 (17:54 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Sun, 25 Aug 2019 02:48:42 +0000 (19:48 -0700)
author Henry Burns <henryburns@google.com>
Sun, 25 Aug 2019 00:54:37 +0000 (17:54 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Sun, 25 Aug 2019 02:48:42 +0000 (19:48 -0700)
diff --git a/mm/z3fold.c b/mm/z3fold.c

index ed19d98c9dcd1641c5e66d27d586e93f43fadce1..e31cd9bd4ed561bcf4f6788da18f4759c7f61b96 100644 (file)
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -41,6 +41,7 @@
  #include <linux/workqueue.h>
  #include <linux/slab.h>
  #include <linux/spinlock.h>
+#include <linux/wait.h>
  #include <linux/zpool.h>
  #include <linux/magic.h>
  
@@ -145,6 +146,8 @@ struct z3fold_header {
   * @release_wq:        workqueue for safe page release
   * @work:      work_struct for safe page release
   * @inode:     inode for z3fold pseudo filesystem
+ * @destroying: bool to stop migration once we start destruction
+ * @isolated: int to count the number of pages currently in isolation
   *
   * This structure is allocated at pool creation time and maintains metadata
   * pertaining to a particular z3fold pool.
@@ -163,8 +166,11 @@ struct z3fold_pool {
         const struct zpool_ops *zpool_ops;
         struct workqueue_struct *compact_wq;
         struct workqueue_struct *release_wq;
+       struct wait_queue_head isolate_wait;
         struct work_struct work;
         struct inode *inode;
+       bool destroying;
+       int isolated;
  };
  
  /*
@@ -769,6 +775,7 @@ static struct z3fold_pool *z3fold_create_pool(const char *name, gfp_t gfp,
                 goto out_c;
         spin_lock_init(&pool->lock);
         spin_lock_init(&pool->stale_lock);
+       init_waitqueue_head(&pool->isolate_wait);
         pool->unbuddied = __alloc_percpu(sizeof(struct list_head)*NCHUNKS, 2);
         if (!pool->unbuddied)
                 goto out_pool;
@@ -808,6 +815,15 @@ out:
         return NULL;
  }
  
+static bool pool_isolated_are_drained(struct z3fold_pool *pool)
+{
+       bool ret;
+
+       spin_lock(&pool->lock);
+       ret = pool->isolated == 0;
+       spin_unlock(&pool->lock);
+       return ret;
+}
  /**
   * z3fold_destroy_pool() - destroys an existing z3fold pool
   * @pool:      the z3fold pool to be destroyed
@@ -817,6 +833,22 @@ out:
  static void z3fold_destroy_pool(struct z3fold_pool *pool)
  {
         kmem_cache_destroy(pool->c_handle);
+       /*
+        * We set pool-> destroying under lock to ensure that
+        * z3fold_page_isolate() sees any changes to destroying. This way we
+        * avoid the need for any memory barriers.
+        */
+
+       spin_lock(&pool->lock);
+       pool->destroying = true;
+       spin_unlock(&pool->lock);
+
+       /*
+        * We need to ensure that no pages are being migrated while we destroy
+        * these workqueues, as migration can queue work on either of the
+        * workqueues.
+        */
+       wait_event(pool->isolate_wait, !pool_isolated_are_drained(pool));
  
         /*
          * We need to destroy pool->compact_wq before pool->release_wq,
@@ -1307,6 +1339,28 @@ static u64 z3fold_get_pool_size(struct z3fold_pool *pool)
         return atomic64_read(&pool->pages_nr);
  }
  
+/*
+ * z3fold_dec_isolated() expects to be called while pool->lock is held.
+ */
+static void z3fold_dec_isolated(struct z3fold_pool *pool)
+{
+       assert_spin_locked(&pool->lock);
+       VM_BUG_ON(pool->isolated <= 0);
+       pool->isolated--;
+
+       /*
+        * If we have no more isolated pages, we have to see if
+        * z3fold_destroy_pool() is waiting for a signal.
+        */
+       if (pool->isolated == 0 && waitqueue_active(&pool->isolate_wait))
+               wake_up_all(&pool->isolate_wait);
+}
+
+static void z3fold_inc_isolated(struct z3fold_pool *pool)
+{
+       pool->isolated++;
+}
+
  static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode)
  {
         struct z3fold_header *zhdr;
@@ -1333,6 +1387,33 @@ static bool z3fold_page_isolate(struct page *page, isolate_mode_t mode)
                 spin_lock(&pool->lock);
                 if (!list_empty(&page->lru))
                         list_del(&page->lru);
+               /*
+                * We need to check for destruction while holding pool->lock, as
+                * otherwise destruction could see 0 isolated pages, and
+                * proceed.
+                */
+               if (unlikely(pool->destroying)) {
+                       spin_unlock(&pool->lock);
+                       /*
+                        * If this page isn't stale, somebody else holds a
+                        * reference to it. Let't drop our refcount so that they
+                        * can call the release logic.
+                        */
+                       if (unlikely(kref_put(&zhdr->refcount,
+                                             release_z3fold_page_locked))) {
+                               /*
+                                * If we get here we have kref problems, so we
+                                * should freak out.
+                                */
+                               WARN(1, "Z3fold is experiencing kref problems\n");
+                               return false;
+                       }
+                       z3fold_page_unlock(zhdr);
+                       return false;
+               }
+
+
+               z3fold_inc_isolated(pool);
                 spin_unlock(&pool->lock);
                 z3fold_page_unlock(zhdr);
                 return true;
@@ -1401,6 +1482,10 @@ static int z3fold_page_migrate(struct address_space *mapping, struct page *newpa
  
         queue_work_on(new_zhdr->cpu, pool->compact_wq, &new_zhdr->work);
  
+       spin_lock(&pool->lock);
+       z3fold_dec_isolated(pool);
+       spin_unlock(&pool->lock);
+
         page_mapcount_reset(page);
         put_page(page);
         return 0;
@@ -1420,10 +1505,14 @@ static void z3fold_page_putback(struct page *page)
         INIT_LIST_HEAD(&page->lru);
         if (kref_put(&zhdr->refcount, release_z3fold_page_locked)) {
                 atomic64_dec(&pool->pages_nr);
+               spin_lock(&pool->lock);
+               z3fold_dec_isolated(pool);
+               spin_unlock(&pool->lock);
                 return;
         }
         spin_lock(&pool->lock);
         list_add(&page->lru, &pool->lru);
+       z3fold_dec_isolated(pool);
         spin_unlock(&pool->lock);
         z3fold_page_unlock(zhdr);
  }
author	Henry Burns <henryburns@google.com>
	Sun, 25 Aug 2019 00:54:37 +0000 (17:54 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Sun, 25 Aug 2019 02:48:42 +0000 (19:48 -0700)