mm/zsmalloc.c: fix race condition in zs_destroy_pool
[sfrench/cifs-2.6.git] / mm / zsmalloc.c
index 57fbb7ced69f3a47111db0f665ec05b26e4186db..08def3a0d2007c3030384e4cd4d04f83b5ac7c74 100644 (file)
@@ -54,6 +54,7 @@
 #include <linux/mount.h>
 #include <linux/pseudo_fs.h>
 #include <linux/migrate.h>
+#include <linux/wait.h>
 #include <linux/pagemap.h>
 #include <linux/fs.h>
 
@@ -268,6 +269,10 @@ struct zs_pool {
 #ifdef CONFIG_COMPACTION
        struct inode *inode;
        struct work_struct free_work;
+       /* A wait queue for when migration races with async_free_zspage() */
+       struct wait_queue_head migration_wait;
+       atomic_long_t isolated_pages;
+       bool destroying;
 #endif
 };
 
@@ -1862,6 +1867,31 @@ static void dec_zspage_isolation(struct zspage *zspage)
        zspage->isolated--;
 }
 
+static void putback_zspage_deferred(struct zs_pool *pool,
+                                   struct size_class *class,
+                                   struct zspage *zspage)
+{
+       enum fullness_group fg;
+
+       fg = putback_zspage(class, zspage);
+       if (fg == ZS_EMPTY)
+               schedule_work(&pool->free_work);
+
+}
+
+static inline void zs_pool_dec_isolated(struct zs_pool *pool)
+{
+       VM_BUG_ON(atomic_long_read(&pool->isolated_pages) <= 0);
+       atomic_long_dec(&pool->isolated_pages);
+       /*
+        * There's no possibility of racing, since wait_for_isolated_drain()
+        * checks the isolated count under &class->lock after enqueuing
+        * on migration_wait.
+        */
+       if (atomic_long_read(&pool->isolated_pages) == 0 && pool->destroying)
+               wake_up_all(&pool->migration_wait);
+}
+
 static void replace_sub_page(struct size_class *class, struct zspage *zspage,
                                struct page *newpage, struct page *oldpage)
 {
@@ -1931,6 +1961,7 @@ static bool zs_page_isolate(struct page *page, isolate_mode_t mode)
         */
        if (!list_empty(&zspage->list) && !is_zspage_isolated(zspage)) {
                get_zspage_mapping(zspage, &class_idx, &fullness);
+               atomic_long_inc(&pool->isolated_pages);
                remove_zspage(class, zspage, fullness);
        }
 
@@ -2030,8 +2061,16 @@ static int zs_page_migrate(struct address_space *mapping, struct page *newpage,
         * Page migration is done so let's putback isolated zspage to
         * the list if @page is final isolated subpage in the zspage.
         */
-       if (!is_zspage_isolated(zspage))
-               putback_zspage(class, zspage);
+       if (!is_zspage_isolated(zspage)) {
+               /*
+                * We cannot race with zs_destroy_pool() here because we wait
+                * for isolation to hit zero before we start destroying.
+                * Also, we ensure that everyone can see pool->destroying before
+                * we start waiting.
+                */
+               putback_zspage_deferred(pool, class, zspage);
+               zs_pool_dec_isolated(pool);
+       }
 
        reset_page(page);
        put_page(page);
@@ -2077,13 +2116,12 @@ static void zs_page_putback(struct page *page)
        spin_lock(&class->lock);
        dec_zspage_isolation(zspage);
        if (!is_zspage_isolated(zspage)) {
-               fg = putback_zspage(class, zspage);
                /*
                 * Due to page_lock, we cannot free zspage immediately
                 * so let's defer.
                 */
-               if (fg == ZS_EMPTY)
-                       schedule_work(&pool->free_work);
+               putback_zspage_deferred(pool, class, zspage);
+               zs_pool_dec_isolated(pool);
        }
        spin_unlock(&class->lock);
 }
@@ -2107,8 +2145,36 @@ static int zs_register_migration(struct zs_pool *pool)
        return 0;
 }
 
+static bool pool_isolated_are_drained(struct zs_pool *pool)
+{
+       return atomic_long_read(&pool->isolated_pages) == 0;
+}
+
+/* Function for resolving migration */
+static void wait_for_isolated_drain(struct zs_pool *pool)
+{
+
+       /*
+        * We're in the process of destroying the pool, so there are no
+        * active allocations. zs_page_isolate() fails for completely free
+        * zspages, so we need only wait for the zs_pool's isolated
+        * count to hit zero.
+        */
+       wait_event(pool->migration_wait,
+                  pool_isolated_are_drained(pool));
+}
+
 static void zs_unregister_migration(struct zs_pool *pool)
 {
+       pool->destroying = true;
+       /*
+        * We need a memory barrier here to ensure global visibility of
+        * pool->destroying. Thus pool->isolated pages will either be 0 in which
+        * case we don't care, or it will be > 0 and pool->destroying will
+        * ensure that we wake up once isolation hits 0.
+        */
+       smp_mb();
+       wait_for_isolated_drain(pool); /* This can block */
        flush_work(&pool->free_work);
        iput(pool->inode);
 }
@@ -2346,6 +2412,8 @@ struct zs_pool *zs_create_pool(const char *name)
        if (!pool->name)
                goto err;
 
+       init_waitqueue_head(&pool->migration_wait);
+
        if (create_cache(pool))
                goto err;