mm/zsmalloc: adjust order of functions
[sfrench/cifs-2.6.git] / mm / zsmalloc.c
index 839a48c3ca27b2bf008496c89867f7210aeb6610..b72403927aa4b06610cb08e9506d677c98a1a5cd 100644 (file)
  *  (reason above)
  */
 #define ZS_SIZE_CLASS_DELTA    (PAGE_SIZE >> 8)
-#define ZS_SIZE_CLASSES                ((ZS_MAX_ALLOC_SIZE - ZS_MIN_ALLOC_SIZE) / \
-                                       ZS_SIZE_CLASS_DELTA + 1)
 
 /*
  * We do not maintain any list for completely empty or full pages
@@ -170,6 +168,11 @@ enum fullness_group {
        ZS_FULL
 };
 
+/*
+ * number of size_classes
+ */
+static int zs_size_classes;
+
 /*
  * We assign a page to ZS_ALMOST_EMPTY fullness group when:
  *     n <= N / f, where
@@ -214,7 +217,7 @@ struct link_free {
 };
 
 struct zs_pool {
-       struct size_class size_class[ZS_SIZE_CLASSES];
+       struct size_class **size_class;
 
        gfp_t flags;    /* allocation flags used when growing pool */
        atomic_long_t pages_allocated;
@@ -468,7 +471,7 @@ static enum fullness_group fix_fullness_group(struct zs_pool *pool,
        if (newfg == currfg)
                goto out;
 
-       class = &pool->size_class[class_idx];
+       class = pool->size_class[class_idx];
        remove_zspage(page, class, currfg);
        insert_zspage(page, class, newfg);
        set_zspage_mapping(page, class_idx, newfg);
@@ -629,6 +632,7 @@ static void init_zspage(struct page *first_page, struct size_class *class)
                struct page *next_page;
                struct link_free *link;
                unsigned int i = 1;
+               void *vaddr;
 
                /*
                 * page->index stores offset of first object starting
@@ -639,8 +643,8 @@ static void init_zspage(struct page *first_page, struct size_class *class)
                if (page != first_page)
                        page->index = off;
 
-               link = (struct link_free *)kmap_atomic(page) +
-                                               off / sizeof(*link);
+               vaddr = kmap_atomic(page);
+               link = (struct link_free *)vaddr + off / sizeof(*link);
 
                while ((off += class->size) < PAGE_SIZE) {
                        link->next = obj_location_to_handle(page, i++);
@@ -654,7 +658,7 @@ static void init_zspage(struct page *first_page, struct size_class *class)
                 */
                next_page = get_next_page(page);
                link->next = obj_location_to_handle(next_page, 0);
-               kunmap_atomic(link);
+               kunmap_atomic(vaddr);
                page = next_page;
                off %= PAGE_SIZE;
        }
@@ -784,7 +788,7 @@ static inline int __zs_cpu_up(struct mapping_area *area)
         */
        if (area->vm_buf)
                return 0;
-       area->vm_buf = (char *)__get_free_page(GFP_KERNEL);
+       area->vm_buf = kmalloc(ZS_MAX_ALLOC_SIZE, GFP_KERNEL);
        if (!area->vm_buf)
                return -ENOMEM;
        return 0;
@@ -792,8 +796,7 @@ static inline int __zs_cpu_up(struct mapping_area *area)
 
 static inline void __zs_cpu_down(struct mapping_area *area)
 {
-       if (area->vm_buf)
-               free_page((unsigned long)area->vm_buf);
+       kfree(area->vm_buf);
        area->vm_buf = NULL;
 }
 
@@ -881,13 +884,26 @@ static struct notifier_block zs_cpu_nb = {
        .notifier_call = zs_cpu_notifier
 };
 
-static void zs_exit(void)
+static int zs_register_cpu_notifier(void)
 {
-       int cpu;
+       int cpu, uninitialized_var(ret);
 
-#ifdef CONFIG_ZPOOL
-       zpool_unregister_driver(&zs_zpool_driver);
-#endif
+       cpu_notifier_register_begin();
+
+       __register_cpu_notifier(&zs_cpu_nb);
+       for_each_online_cpu(cpu) {
+               ret = zs_cpu_notifier(NULL, CPU_UP_PREPARE, (void *)(long)cpu);
+               if (notifier_to_errno(ret))
+                       break;
+       }
+
+       cpu_notifier_register_done();
+       return notifier_to_errno(ret);
+}
+
+static void zs_unregister_cpu_notifier(void)
+{
+       int cpu;
 
        cpu_notifier_register_begin();
 
@@ -898,93 +914,129 @@ static void zs_exit(void)
        cpu_notifier_register_done();
 }
 
-static int zs_init(void)
+static void init_zs_size_classes(void)
 {
-       int cpu, ret;
+       int nr;
 
-       cpu_notifier_register_begin();
+       nr = (ZS_MAX_ALLOC_SIZE - ZS_MIN_ALLOC_SIZE) / ZS_SIZE_CLASS_DELTA + 1;
+       if ((ZS_MAX_ALLOC_SIZE - ZS_MIN_ALLOC_SIZE) % ZS_SIZE_CLASS_DELTA)
+               nr += 1;
 
-       __register_cpu_notifier(&zs_cpu_nb);
-       for_each_online_cpu(cpu) {
-               ret = zs_cpu_notifier(NULL, CPU_UP_PREPARE, (void *)(long)cpu);
-               if (notifier_to_errno(ret)) {
-                       cpu_notifier_register_done();
-                       goto fail;
-               }
-       }
+       zs_size_classes = nr;
+}
 
-       cpu_notifier_register_done();
+static unsigned int get_maxobj_per_zspage(int size, int pages_per_zspage)
+{
+       return pages_per_zspage * PAGE_SIZE / size;
+}
 
-#ifdef CONFIG_ZPOOL
-       zpool_register_driver(&zs_zpool_driver);
-#endif
+static bool can_merge(struct size_class *prev, int size, int pages_per_zspage)
+{
+       if (prev->pages_per_zspage != pages_per_zspage)
+               return false;
 
-       return 0;
-fail:
-       zs_exit();
-       return notifier_to_errno(ret);
+       if (get_maxobj_per_zspage(prev->size, prev->pages_per_zspage)
+               != get_maxobj_per_zspage(size, pages_per_zspage))
+               return false;
+
+       return true;
 }
 
+unsigned long zs_get_total_pages(struct zs_pool *pool)
+{
+       return atomic_long_read(&pool->pages_allocated);
+}
+EXPORT_SYMBOL_GPL(zs_get_total_pages);
+
 /**
- * zs_create_pool - Creates an allocation pool to work from.
- * @flags: allocation flags used to allocate pool metadata
+ * zs_map_object - get address of allocated object from handle.
+ * @pool: pool from which the object was allocated
+ * @handle: handle returned from zs_malloc
  *
- * This function must be called before anything when using
- * the zsmalloc allocator.
+ * Before using an object allocated from zs_malloc, it must be mapped using
+ * this function. When done with the object, it must be unmapped using
+ * zs_unmap_object.
  *
- * On success, a pointer to the newly created pool is returned,
- * otherwise NULL.
+ * Only one object can be mapped per cpu at a time. There is no protection
+ * against nested mappings.
+ *
+ * This function returns with preemption and page faults disabled.
  */
-struct zs_pool *zs_create_pool(gfp_t flags)
+void *zs_map_object(struct zs_pool *pool, unsigned long handle,
+                       enum zs_mapmode mm)
 {
-       int i, ovhd_size;
-       struct zs_pool *pool;
+       struct page *page;
+       unsigned long obj_idx, off;
 
-       ovhd_size = roundup(sizeof(*pool), PAGE_SIZE);
-       pool = kzalloc(ovhd_size, GFP_KERNEL);
-       if (!pool)
-               return NULL;
+       unsigned int class_idx;
+       enum fullness_group fg;
+       struct size_class *class;
+       struct mapping_area *area;
+       struct page *pages[2];
 
-       for (i = 0; i < ZS_SIZE_CLASSES; i++) {
-               int size;
-               struct size_class *class;
+       BUG_ON(!handle);
 
-               size = ZS_MIN_ALLOC_SIZE + i * ZS_SIZE_CLASS_DELTA;
-               if (size > ZS_MAX_ALLOC_SIZE)
-                       size = ZS_MAX_ALLOC_SIZE;
+       /*
+        * Because we use per-cpu mapping areas shared among the
+        * pools/users, we can't allow mapping in interrupt context
+        * because it can corrupt another users mappings.
+        */
+       BUG_ON(in_interrupt());
 
-               class = &pool->size_class[i];
-               class->size = size;
-               class->index = i;
-               spin_lock_init(&class->lock);
-               class->pages_per_zspage = get_pages_per_zspage(size);
+       obj_handle_to_location(handle, &page, &obj_idx);
+       get_zspage_mapping(get_first_page(page), &class_idx, &fg);
+       class = pool->size_class[class_idx];
+       off = obj_idx_to_offset(page, obj_idx, class->size);
 
+       area = &get_cpu_var(zs_map_area);
+       area->vm_mm = mm;
+       if (off + class->size <= PAGE_SIZE) {
+               /* this object is contained entirely within a page */
+               area->vm_addr = kmap_atomic(page);
+               return area->vm_addr + off;
        }
 
-       pool->flags = flags;
+       /* this object spans two pages */
+       pages[0] = page;
+       pages[1] = get_next_page(page);
+       BUG_ON(!pages[1]);
 
-       return pool;
+       return __zs_map_object(area, pages, off, class->size);
 }
-EXPORT_SYMBOL_GPL(zs_create_pool);
+EXPORT_SYMBOL_GPL(zs_map_object);
 
-void zs_destroy_pool(struct zs_pool *pool)
+void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
 {
-       int i;
+       struct page *page;
+       unsigned long obj_idx, off;
 
-       for (i = 0; i < ZS_SIZE_CLASSES; i++) {
-               int fg;
-               struct size_class *class = &pool->size_class[i];
+       unsigned int class_idx;
+       enum fullness_group fg;
+       struct size_class *class;
+       struct mapping_area *area;
 
-               for (fg = 0; fg < _ZS_NR_FULLNESS_GROUPS; fg++) {
-                       if (class->fullness_list[fg]) {
-                               pr_info("Freeing non-empty class with size %db, fullness group %d\n",
-                                       class->size, fg);
-                       }
-               }
+       BUG_ON(!handle);
+
+       obj_handle_to_location(handle, &page, &obj_idx);
+       get_zspage_mapping(get_first_page(page), &class_idx, &fg);
+       class = pool->size_class[class_idx];
+       off = obj_idx_to_offset(page, obj_idx, class->size);
+
+       area = this_cpu_ptr(&zs_map_area);
+       if (off + class->size <= PAGE_SIZE)
+               kunmap_atomic(area->vm_addr);
+       else {
+               struct page *pages[2];
+
+               pages[0] = page;
+               pages[1] = get_next_page(page);
+               BUG_ON(!pages[1]);
+
+               __zs_unmap_object(area, pages, off, class->size);
        }
-       kfree(pool);
+       put_cpu_var(zs_map_area);
 }
-EXPORT_SYMBOL_GPL(zs_destroy_pool);
+EXPORT_SYMBOL_GPL(zs_unmap_object);
 
 /**
  * zs_malloc - Allocate block of given size from pool.
@@ -999,8 +1051,8 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size)
 {
        unsigned long obj;
        struct link_free *link;
-       int class_idx;
        struct size_class *class;
+       void *vaddr;
 
        struct page *first_page, *m_page;
        unsigned long m_objidx, m_offset;
@@ -1008,9 +1060,7 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size)
        if (unlikely(!size || size > ZS_MAX_ALLOC_SIZE))
                return 0;
 
-       class_idx = get_size_class_index(size);
-       class = &pool->size_class[class_idx];
-       BUG_ON(class_idx != class->index);
+       class = pool->size_class[get_size_class_index(size)];
 
        spin_lock(&class->lock);
        first_page = find_get_zspage(class);
@@ -1031,11 +1081,11 @@ unsigned long zs_malloc(struct zs_pool *pool, size_t size)
        obj_handle_to_location(obj, &m_page, &m_objidx);
        m_offset = obj_idx_to_offset(m_page, m_objidx, class->size);
 
-       link = (struct link_free *)kmap_atomic(m_page) +
-                                       m_offset / sizeof(*link);
+       vaddr = kmap_atomic(m_page);
+       link = (struct link_free *)vaddr + m_offset / sizeof(*link);
        first_page->freelist = link->next;
        memset(link, POISON_INUSE, sizeof(*link));
-       kunmap_atomic(link);
+       kunmap_atomic(vaddr);
 
        first_page->inuse++;
        /* Now move the zspage to another fullness group, if required */
@@ -1051,6 +1101,7 @@ void zs_free(struct zs_pool *pool, unsigned long obj)
        struct link_free *link;
        struct page *first_page, *f_page;
        unsigned long f_objidx, f_offset;
+       void *vaddr;
 
        int class_idx;
        struct size_class *class;
@@ -1063,16 +1114,16 @@ void zs_free(struct zs_pool *pool, unsigned long obj)
        first_page = get_first_page(f_page);
 
        get_zspage_mapping(first_page, &class_idx, &fullness);
-       class = &pool->size_class[class_idx];
+       class = pool->size_class[class_idx];
        f_offset = obj_idx_to_offset(f_page, f_objidx, class->size);
 
        spin_lock(&class->lock);
 
        /* Insert this object in containing zspage's freelist */
-       link = (struct link_free *)((unsigned char *)kmap_atomic(f_page)
-                                                       + f_offset);
+       vaddr = kmap_atomic(f_page);
+       link = (struct link_free *)(vaddr + f_offset);
        link->next = first_page->freelist;
-       kunmap_atomic(link);
+       kunmap_atomic(vaddr);
        first_page->freelist = (void *)obj;
 
        first_page->inuse--;
@@ -1088,100 +1139,137 @@ void zs_free(struct zs_pool *pool, unsigned long obj)
 EXPORT_SYMBOL_GPL(zs_free);
 
 /**
- * zs_map_object - get address of allocated object from handle.
- * @pool: pool from which the object was allocated
- * @handle: handle returned from zs_malloc
- *
- * Before using an object allocated from zs_malloc, it must be mapped using
- * this function. When done with the object, it must be unmapped using
- * zs_unmap_object.
+ * zs_create_pool - Creates an allocation pool to work from.
+ * @flags: allocation flags used to allocate pool metadata
  *
- * Only one object can be mapped per cpu at a time. There is no protection
- * against nested mappings.
+ * This function must be called before anything when using
+ * the zsmalloc allocator.
  *
- * This function returns with preemption and page faults disabled.
+ * On success, a pointer to the newly created pool is returned,
+ * otherwise NULL.
  */
-void *zs_map_object(struct zs_pool *pool, unsigned long handle,
-                       enum zs_mapmode mm)
+struct zs_pool *zs_create_pool(gfp_t flags)
 {
-       struct page *page;
-       unsigned long obj_idx, off;
+       int i;
+       struct zs_pool *pool;
+       struct size_class *prev_class = NULL;
 
-       unsigned int class_idx;
-       enum fullness_group fg;
-       struct size_class *class;
-       struct mapping_area *area;
-       struct page *pages[2];
+       pool = kzalloc(sizeof(*pool), GFP_KERNEL);
+       if (!pool)
+               return NULL;
 
-       BUG_ON(!handle);
+       pool->size_class = kcalloc(zs_size_classes, sizeof(struct size_class *),
+                       GFP_KERNEL);
+       if (!pool->size_class) {
+               kfree(pool);
+               return NULL;
+       }
 
        /*
-        * Because we use per-cpu mapping areas shared among the
-        * pools/users, we can't allow mapping in interrupt context
-        * because it can corrupt another users mappings.
+        * Iterate reversly, because, size of size_class that we want to use
+        * for merging should be larger or equal to current size.
         */
-       BUG_ON(in_interrupt());
+       for (i = zs_size_classes - 1; i >= 0; i--) {
+               int size;
+               int pages_per_zspage;
+               struct size_class *class;
 
-       obj_handle_to_location(handle, &page, &obj_idx);
-       get_zspage_mapping(get_first_page(page), &class_idx, &fg);
-       class = &pool->size_class[class_idx];
-       off = obj_idx_to_offset(page, obj_idx, class->size);
+               size = ZS_MIN_ALLOC_SIZE + i * ZS_SIZE_CLASS_DELTA;
+               if (size > ZS_MAX_ALLOC_SIZE)
+                       size = ZS_MAX_ALLOC_SIZE;
+               pages_per_zspage = get_pages_per_zspage(size);
 
-       area = &get_cpu_var(zs_map_area);
-       area->vm_mm = mm;
-       if (off + class->size <= PAGE_SIZE) {
-               /* this object is contained entirely within a page */
-               area->vm_addr = kmap_atomic(page);
-               return area->vm_addr + off;
+               /*
+                * size_class is used for normal zsmalloc operation such
+                * as alloc/free for that size. Although it is natural that we
+                * have one size_class for each size, there is a chance that we
+                * can get more memory utilization if we use one size_class for
+                * many different sizes whose size_class have same
+                * characteristics. So, we makes size_class point to
+                * previous size_class if possible.
+                */
+               if (prev_class) {
+                       if (can_merge(prev_class, size, pages_per_zspage)) {
+                               pool->size_class[i] = prev_class;
+                               continue;
+                       }
+               }
+
+               class = kzalloc(sizeof(struct size_class), GFP_KERNEL);
+               if (!class)
+                       goto err;
+
+               class->size = size;
+               class->index = i;
+               class->pages_per_zspage = pages_per_zspage;
+               spin_lock_init(&class->lock);
+               pool->size_class[i] = class;
+
+               prev_class = class;
        }
 
-       /* this object spans two pages */
-       pages[0] = page;
-       pages[1] = get_next_page(page);
-       BUG_ON(!pages[1]);
+       pool->flags = flags;
 
-       return __zs_map_object(area, pages, off, class->size);
+       return pool;
+
+err:
+       zs_destroy_pool(pool);
+       return NULL;
 }
-EXPORT_SYMBOL_GPL(zs_map_object);
+EXPORT_SYMBOL_GPL(zs_create_pool);
 
-void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
+void zs_destroy_pool(struct zs_pool *pool)
 {
-       struct page *page;
-       unsigned long obj_idx, off;
+       int i;
 
-       unsigned int class_idx;
-       enum fullness_group fg;
-       struct size_class *class;
-       struct mapping_area *area;
+       for (i = 0; i < zs_size_classes; i++) {
+               int fg;
+               struct size_class *class = pool->size_class[i];
 
-       BUG_ON(!handle);
+               if (!class)
+                       continue;
 
-       obj_handle_to_location(handle, &page, &obj_idx);
-       get_zspage_mapping(get_first_page(page), &class_idx, &fg);
-       class = &pool->size_class[class_idx];
-       off = obj_idx_to_offset(page, obj_idx, class->size);
+               if (class->index != i)
+                       continue;
 
-       area = this_cpu_ptr(&zs_map_area);
-       if (off + class->size <= PAGE_SIZE)
-               kunmap_atomic(area->vm_addr);
-       else {
-               struct page *pages[2];
+               for (fg = 0; fg < _ZS_NR_FULLNESS_GROUPS; fg++) {
+                       if (class->fullness_list[fg]) {
+                               pr_info("Freeing non-empty class with size %db, fullness group %d\n",
+                                       class->size, fg);
+                       }
+               }
+               kfree(class);
+       }
 
-               pages[0] = page;
-               pages[1] = get_next_page(page);
-               BUG_ON(!pages[1]);
+       kfree(pool->size_class);
+       kfree(pool);
+}
+EXPORT_SYMBOL_GPL(zs_destroy_pool);
 
-               __zs_unmap_object(area, pages, off, class->size);
+static int __init zs_init(void)
+{
+       int ret = zs_register_cpu_notifier();
+
+       if (ret) {
+               zs_unregister_cpu_notifier();
+               return ret;
        }
-       put_cpu_var(zs_map_area);
+
+       init_zs_size_classes();
+
+#ifdef CONFIG_ZPOOL
+       zpool_register_driver(&zs_zpool_driver);
+#endif
+       return 0;
 }
-EXPORT_SYMBOL_GPL(zs_unmap_object);
 
-unsigned long zs_get_total_pages(struct zs_pool *pool)
+static void __exit zs_exit(void)
 {
-       return atomic_long_read(&pool->pages_allocated);
+#ifdef CONFIG_ZPOOL
+       zpool_unregister_driver(&zs_zpool_driver);
+#endif
+       zs_unregister_cpu_notifier();
 }
-EXPORT_SYMBOL_GPL(zs_get_total_pages);
 
 module_init(zs_init);
 module_exit(zs_exit);