Merge branch 'timers-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[sfrench/cifs-2.6.git] / drivers / block / zram / zram_drv.c
1 /*
2  * Compressed RAM block device
3  *
4  * Copyright (C) 2008, 2009, 2010  Nitin Gupta
5  *               2012, 2013 Minchan Kim
6  *
7  * This code is released using a dual license strategy: BSD/GPL
8  * You can choose the licence that better fits your requirements.
9  *
10  * Released under the terms of 3-clause BSD License
11  * Released under the terms of GNU General Public License Version 2.0
12  *
13  */
14
15 #define KMSG_COMPONENT "zram"
16 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
17
18 #include <linux/module.h>
19 #include <linux/kernel.h>
20 #include <linux/bio.h>
21 #include <linux/bitops.h>
22 #include <linux/blkdev.h>
23 #include <linux/buffer_head.h>
24 #include <linux/device.h>
25 #include <linux/genhd.h>
26 #include <linux/highmem.h>
27 #include <linux/slab.h>
28 #include <linux/backing-dev.h>
29 #include <linux/string.h>
30 #include <linux/vmalloc.h>
31 #include <linux/err.h>
32 #include <linux/idr.h>
33 #include <linux/sysfs.h>
34 #include <linux/debugfs.h>
35 #include <linux/cpuhotplug.h>
36
37 #include "zram_drv.h"
38
39 static DEFINE_IDR(zram_index_idr);
40 /* idr index must be protected */
41 static DEFINE_MUTEX(zram_index_mutex);
42
43 static int zram_major;
44 static const char *default_compressor = "lzo";
45
46 /* Module params (documentation at end) */
47 static unsigned int num_devices = 1;
48 /*
49  * Pages that compress to sizes equals or greater than this are stored
50  * uncompressed in memory.
51  */
52 static size_t huge_class_size;
53
54 static void zram_free_page(struct zram *zram, size_t index);
55
56 static void zram_slot_lock(struct zram *zram, u32 index)
57 {
58         bit_spin_lock(ZRAM_LOCK, &zram->table[index].value);
59 }
60
61 static void zram_slot_unlock(struct zram *zram, u32 index)
62 {
63         bit_spin_unlock(ZRAM_LOCK, &zram->table[index].value);
64 }
65
66 static inline bool init_done(struct zram *zram)
67 {
68         return zram->disksize;
69 }
70
71 static inline bool zram_allocated(struct zram *zram, u32 index)
72 {
73
74         return (zram->table[index].value >> (ZRAM_FLAG_SHIFT + 1)) ||
75                                         zram->table[index].handle;
76 }
77
78 static inline struct zram *dev_to_zram(struct device *dev)
79 {
80         return (struct zram *)dev_to_disk(dev)->private_data;
81 }
82
83 static unsigned long zram_get_handle(struct zram *zram, u32 index)
84 {
85         return zram->table[index].handle;
86 }
87
88 static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle)
89 {
90         zram->table[index].handle = handle;
91 }
92
93 /* flag operations require table entry bit_spin_lock() being held */
94 static bool zram_test_flag(struct zram *zram, u32 index,
95                         enum zram_pageflags flag)
96 {
97         return zram->table[index].value & BIT(flag);
98 }
99
100 static void zram_set_flag(struct zram *zram, u32 index,
101                         enum zram_pageflags flag)
102 {
103         zram->table[index].value |= BIT(flag);
104 }
105
106 static void zram_clear_flag(struct zram *zram, u32 index,
107                         enum zram_pageflags flag)
108 {
109         zram->table[index].value &= ~BIT(flag);
110 }
111
112 static inline void zram_set_element(struct zram *zram, u32 index,
113                         unsigned long element)
114 {
115         zram->table[index].element = element;
116 }
117
118 static unsigned long zram_get_element(struct zram *zram, u32 index)
119 {
120         return zram->table[index].element;
121 }
122
123 static size_t zram_get_obj_size(struct zram *zram, u32 index)
124 {
125         return zram->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1);
126 }
127
128 static void zram_set_obj_size(struct zram *zram,
129                                         u32 index, size_t size)
130 {
131         unsigned long flags = zram->table[index].value >> ZRAM_FLAG_SHIFT;
132
133         zram->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size;
134 }
135
136 #if PAGE_SIZE != 4096
137 static inline bool is_partial_io(struct bio_vec *bvec)
138 {
139         return bvec->bv_len != PAGE_SIZE;
140 }
141 #else
142 static inline bool is_partial_io(struct bio_vec *bvec)
143 {
144         return false;
145 }
146 #endif
147
148 /*
149  * Check if request is within bounds and aligned on zram logical blocks.
150  */
151 static inline bool valid_io_request(struct zram *zram,
152                 sector_t start, unsigned int size)
153 {
154         u64 end, bound;
155
156         /* unaligned request */
157         if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)))
158                 return false;
159         if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))
160                 return false;
161
162         end = start + (size >> SECTOR_SHIFT);
163         bound = zram->disksize >> SECTOR_SHIFT;
164         /* out of range range */
165         if (unlikely(start >= bound || end > bound || start > end))
166                 return false;
167
168         /* I/O request is valid */
169         return true;
170 }
171
172 static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
173 {
174         *index  += (*offset + bvec->bv_len) / PAGE_SIZE;
175         *offset = (*offset + bvec->bv_len) % PAGE_SIZE;
176 }
177
178 static inline void update_used_max(struct zram *zram,
179                                         const unsigned long pages)
180 {
181         unsigned long old_max, cur_max;
182
183         old_max = atomic_long_read(&zram->stats.max_used_pages);
184
185         do {
186                 cur_max = old_max;
187                 if (pages > cur_max)
188                         old_max = atomic_long_cmpxchg(
189                                 &zram->stats.max_used_pages, cur_max, pages);
190         } while (old_max != cur_max);
191 }
192
193 static inline void zram_fill_page(void *ptr, unsigned long len,
194                                         unsigned long value)
195 {
196         WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long)));
197         memset_l(ptr, value, len / sizeof(unsigned long));
198 }
199
200 static bool page_same_filled(void *ptr, unsigned long *element)
201 {
202         unsigned int pos;
203         unsigned long *page;
204         unsigned long val;
205
206         page = (unsigned long *)ptr;
207         val = page[0];
208
209         for (pos = 1; pos < PAGE_SIZE / sizeof(*page); pos++) {
210                 if (val != page[pos])
211                         return false;
212         }
213
214         *element = val;
215
216         return true;
217 }
218
219 static ssize_t initstate_show(struct device *dev,
220                 struct device_attribute *attr, char *buf)
221 {
222         u32 val;
223         struct zram *zram = dev_to_zram(dev);
224
225         down_read(&zram->init_lock);
226         val = init_done(zram);
227         up_read(&zram->init_lock);
228
229         return scnprintf(buf, PAGE_SIZE, "%u\n", val);
230 }
231
232 static ssize_t disksize_show(struct device *dev,
233                 struct device_attribute *attr, char *buf)
234 {
235         struct zram *zram = dev_to_zram(dev);
236
237         return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize);
238 }
239
240 static ssize_t mem_limit_store(struct device *dev,
241                 struct device_attribute *attr, const char *buf, size_t len)
242 {
243         u64 limit;
244         char *tmp;
245         struct zram *zram = dev_to_zram(dev);
246
247         limit = memparse(buf, &tmp);
248         if (buf == tmp) /* no chars parsed, invalid input */
249                 return -EINVAL;
250
251         down_write(&zram->init_lock);
252         zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT;
253         up_write(&zram->init_lock);
254
255         return len;
256 }
257
258 static ssize_t mem_used_max_store(struct device *dev,
259                 struct device_attribute *attr, const char *buf, size_t len)
260 {
261         int err;
262         unsigned long val;
263         struct zram *zram = dev_to_zram(dev);
264
265         err = kstrtoul(buf, 10, &val);
266         if (err || val != 0)
267                 return -EINVAL;
268
269         down_read(&zram->init_lock);
270         if (init_done(zram)) {
271                 atomic_long_set(&zram->stats.max_used_pages,
272                                 zs_get_total_pages(zram->mem_pool));
273         }
274         up_read(&zram->init_lock);
275
276         return len;
277 }
278
279 #ifdef CONFIG_ZRAM_WRITEBACK
280 static bool zram_wb_enabled(struct zram *zram)
281 {
282         return zram->backing_dev;
283 }
284
285 static void reset_bdev(struct zram *zram)
286 {
287         struct block_device *bdev;
288
289         if (!zram_wb_enabled(zram))
290                 return;
291
292         bdev = zram->bdev;
293         if (zram->old_block_size)
294                 set_blocksize(bdev, zram->old_block_size);
295         blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
296         /* hope filp_close flush all of IO */
297         filp_close(zram->backing_dev, NULL);
298         zram->backing_dev = NULL;
299         zram->old_block_size = 0;
300         zram->bdev = NULL;
301         zram->disk->queue->backing_dev_info->capabilities |=
302                                 BDI_CAP_SYNCHRONOUS_IO;
303         kvfree(zram->bitmap);
304         zram->bitmap = NULL;
305 }
306
307 static ssize_t backing_dev_show(struct device *dev,
308                 struct device_attribute *attr, char *buf)
309 {
310         struct zram *zram = dev_to_zram(dev);
311         struct file *file = zram->backing_dev;
312         char *p;
313         ssize_t ret;
314
315         down_read(&zram->init_lock);
316         if (!zram_wb_enabled(zram)) {
317                 memcpy(buf, "none\n", 5);
318                 up_read(&zram->init_lock);
319                 return 5;
320         }
321
322         p = file_path(file, buf, PAGE_SIZE - 1);
323         if (IS_ERR(p)) {
324                 ret = PTR_ERR(p);
325                 goto out;
326         }
327
328         ret = strlen(p);
329         memmove(buf, p, ret);
330         buf[ret++] = '\n';
331 out:
332         up_read(&zram->init_lock);
333         return ret;
334 }
335
336 static ssize_t backing_dev_store(struct device *dev,
337                 struct device_attribute *attr, const char *buf, size_t len)
338 {
339         char *file_name;
340         struct file *backing_dev = NULL;
341         struct inode *inode;
342         struct address_space *mapping;
343         unsigned int bitmap_sz, old_block_size = 0;
344         unsigned long nr_pages, *bitmap = NULL;
345         struct block_device *bdev = NULL;
346         int err;
347         struct zram *zram = dev_to_zram(dev);
348
349         file_name = kmalloc(PATH_MAX, GFP_KERNEL);
350         if (!file_name)
351                 return -ENOMEM;
352
353         down_write(&zram->init_lock);
354         if (init_done(zram)) {
355                 pr_info("Can't setup backing device for initialized device\n");
356                 err = -EBUSY;
357                 goto out;
358         }
359
360         strlcpy(file_name, buf, len);
361
362         backing_dev = filp_open(file_name, O_RDWR|O_LARGEFILE, 0);
363         if (IS_ERR(backing_dev)) {
364                 err = PTR_ERR(backing_dev);
365                 backing_dev = NULL;
366                 goto out;
367         }
368
369         mapping = backing_dev->f_mapping;
370         inode = mapping->host;
371
372         /* Support only block device in this moment */
373         if (!S_ISBLK(inode->i_mode)) {
374                 err = -ENOTBLK;
375                 goto out;
376         }
377
378         bdev = bdgrab(I_BDEV(inode));
379         err = blkdev_get(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram);
380         if (err < 0)
381                 goto out;
382
383         nr_pages = i_size_read(inode) >> PAGE_SHIFT;
384         bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long);
385         bitmap = kvzalloc(bitmap_sz, GFP_KERNEL);
386         if (!bitmap) {
387                 err = -ENOMEM;
388                 goto out;
389         }
390
391         old_block_size = block_size(bdev);
392         err = set_blocksize(bdev, PAGE_SIZE);
393         if (err)
394                 goto out;
395
396         reset_bdev(zram);
397         spin_lock_init(&zram->bitmap_lock);
398
399         zram->old_block_size = old_block_size;
400         zram->bdev = bdev;
401         zram->backing_dev = backing_dev;
402         zram->bitmap = bitmap;
403         zram->nr_pages = nr_pages;
404         /*
405          * With writeback feature, zram does asynchronous IO so it's no longer
406          * synchronous device so let's remove synchronous io flag. Othewise,
407          * upper layer(e.g., swap) could wait IO completion rather than
408          * (submit and return), which will cause system sluggish.
409          * Furthermore, when the IO function returns(e.g., swap_readpage),
410          * upper layer expects IO was done so it could deallocate the page
411          * freely but in fact, IO is going on so finally could cause
412          * use-after-free when the IO is really done.
413          */
414         zram->disk->queue->backing_dev_info->capabilities &=
415                         ~BDI_CAP_SYNCHRONOUS_IO;
416         up_write(&zram->init_lock);
417
418         pr_info("setup backing device %s\n", file_name);
419         kfree(file_name);
420
421         return len;
422 out:
423         if (bitmap)
424                 kvfree(bitmap);
425
426         if (bdev)
427                 blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
428
429         if (backing_dev)
430                 filp_close(backing_dev, NULL);
431
432         up_write(&zram->init_lock);
433
434         kfree(file_name);
435
436         return err;
437 }
438
439 static unsigned long get_entry_bdev(struct zram *zram)
440 {
441         unsigned long entry;
442
443         spin_lock(&zram->bitmap_lock);
444         /* skip 0 bit to confuse zram.handle = 0 */
445         entry = find_next_zero_bit(zram->bitmap, zram->nr_pages, 1);
446         if (entry == zram->nr_pages) {
447                 spin_unlock(&zram->bitmap_lock);
448                 return 0;
449         }
450
451         set_bit(entry, zram->bitmap);
452         spin_unlock(&zram->bitmap_lock);
453
454         return entry;
455 }
456
457 static void put_entry_bdev(struct zram *zram, unsigned long entry)
458 {
459         int was_set;
460
461         spin_lock(&zram->bitmap_lock);
462         was_set = test_and_clear_bit(entry, zram->bitmap);
463         spin_unlock(&zram->bitmap_lock);
464         WARN_ON_ONCE(!was_set);
465 }
466
467 static void zram_page_end_io(struct bio *bio)
468 {
469         struct page *page = bio_first_page_all(bio);
470
471         page_endio(page, op_is_write(bio_op(bio)),
472                         blk_status_to_errno(bio->bi_status));
473         bio_put(bio);
474 }
475
476 /*
477  * Returns 1 if the submission is successful.
478  */
479 static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec,
480                         unsigned long entry, struct bio *parent)
481 {
482         struct bio *bio;
483
484         bio = bio_alloc(GFP_ATOMIC, 1);
485         if (!bio)
486                 return -ENOMEM;
487
488         bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9);
489         bio_set_dev(bio, zram->bdev);
490         if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, bvec->bv_offset)) {
491                 bio_put(bio);
492                 return -EIO;
493         }
494
495         if (!parent) {
496                 bio->bi_opf = REQ_OP_READ;
497                 bio->bi_end_io = zram_page_end_io;
498         } else {
499                 bio->bi_opf = parent->bi_opf;
500                 bio_chain(bio, parent);
501         }
502
503         submit_bio(bio);
504         return 1;
505 }
506
507 struct zram_work {
508         struct work_struct work;
509         struct zram *zram;
510         unsigned long entry;
511         struct bio *bio;
512 };
513
514 #if PAGE_SIZE != 4096
515 static void zram_sync_read(struct work_struct *work)
516 {
517         struct bio_vec bvec;
518         struct zram_work *zw = container_of(work, struct zram_work, work);
519         struct zram *zram = zw->zram;
520         unsigned long entry = zw->entry;
521         struct bio *bio = zw->bio;
522
523         read_from_bdev_async(zram, &bvec, entry, bio);
524 }
525
526 /*
527  * Block layer want one ->make_request_fn to be active at a time
528  * so if we use chained IO with parent IO in same context,
529  * it's a deadlock. To avoid, it, it uses worker thread context.
530  */
531 static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
532                                 unsigned long entry, struct bio *bio)
533 {
534         struct zram_work work;
535
536         work.zram = zram;
537         work.entry = entry;
538         work.bio = bio;
539
540         INIT_WORK_ONSTACK(&work.work, zram_sync_read);
541         queue_work(system_unbound_wq, &work.work);
542         flush_work(&work.work);
543         destroy_work_on_stack(&work.work);
544
545         return 1;
546 }
547 #else
548 static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec,
549                                 unsigned long entry, struct bio *bio)
550 {
551         WARN_ON(1);
552         return -EIO;
553 }
554 #endif
555
556 static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
557                         unsigned long entry, struct bio *parent, bool sync)
558 {
559         if (sync)
560                 return read_from_bdev_sync(zram, bvec, entry, parent);
561         else
562                 return read_from_bdev_async(zram, bvec, entry, parent);
563 }
564
565 static int write_to_bdev(struct zram *zram, struct bio_vec *bvec,
566                                         u32 index, struct bio *parent,
567                                         unsigned long *pentry)
568 {
569         struct bio *bio;
570         unsigned long entry;
571
572         bio = bio_alloc(GFP_ATOMIC, 1);
573         if (!bio)
574                 return -ENOMEM;
575
576         entry = get_entry_bdev(zram);
577         if (!entry) {
578                 bio_put(bio);
579                 return -ENOSPC;
580         }
581
582         bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9);
583         bio_set_dev(bio, zram->bdev);
584         if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len,
585                                         bvec->bv_offset)) {
586                 bio_put(bio);
587                 put_entry_bdev(zram, entry);
588                 return -EIO;
589         }
590
591         if (!parent) {
592                 bio->bi_opf = REQ_OP_WRITE | REQ_SYNC;
593                 bio->bi_end_io = zram_page_end_io;
594         } else {
595                 bio->bi_opf = parent->bi_opf;
596                 bio_chain(bio, parent);
597         }
598
599         submit_bio(bio);
600         *pentry = entry;
601
602         return 0;
603 }
604
605 static void zram_wb_clear(struct zram *zram, u32 index)
606 {
607         unsigned long entry;
608
609         zram_clear_flag(zram, index, ZRAM_WB);
610         entry = zram_get_element(zram, index);
611         zram_set_element(zram, index, 0);
612         put_entry_bdev(zram, entry);
613 }
614
615 #else
616 static bool zram_wb_enabled(struct zram *zram) { return false; }
617 static inline void reset_bdev(struct zram *zram) {};
618 static int write_to_bdev(struct zram *zram, struct bio_vec *bvec,
619                                         u32 index, struct bio *parent,
620                                         unsigned long *pentry)
621
622 {
623         return -EIO;
624 }
625
626 static int read_from_bdev(struct zram *zram, struct bio_vec *bvec,
627                         unsigned long entry, struct bio *parent, bool sync)
628 {
629         return -EIO;
630 }
631 static void zram_wb_clear(struct zram *zram, u32 index) {}
632 #endif
633
634 #ifdef CONFIG_ZRAM_MEMORY_TRACKING
635
636 static struct dentry *zram_debugfs_root;
637
638 static void zram_debugfs_create(void)
639 {
640         zram_debugfs_root = debugfs_create_dir("zram", NULL);
641 }
642
643 static void zram_debugfs_destroy(void)
644 {
645         debugfs_remove_recursive(zram_debugfs_root);
646 }
647
648 static void zram_accessed(struct zram *zram, u32 index)
649 {
650         zram->table[index].ac_time = ktime_get_boottime();
651 }
652
653 static void zram_reset_access(struct zram *zram, u32 index)
654 {
655         zram->table[index].ac_time = 0;
656 }
657
658 static ssize_t read_block_state(struct file *file, char __user *buf,
659                                 size_t count, loff_t *ppos)
660 {
661         char *kbuf;
662         ssize_t index, written = 0;
663         struct zram *zram = file->private_data;
664         unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
665         struct timespec64 ts;
666
667         kbuf = kvmalloc(count, GFP_KERNEL);
668         if (!kbuf)
669                 return -ENOMEM;
670
671         down_read(&zram->init_lock);
672         if (!init_done(zram)) {
673                 up_read(&zram->init_lock);
674                 kvfree(kbuf);
675                 return -EINVAL;
676         }
677
678         for (index = *ppos; index < nr_pages; index++) {
679                 int copied;
680
681                 zram_slot_lock(zram, index);
682                 if (!zram_allocated(zram, index))
683                         goto next;
684
685                 ts = ktime_to_timespec64(zram->table[index].ac_time);
686                 copied = snprintf(kbuf + written, count,
687                         "%12zd %12lld.%06lu %c%c%c\n",
688                         index, (s64)ts.tv_sec,
689                         ts.tv_nsec / NSEC_PER_USEC,
690                         zram_test_flag(zram, index, ZRAM_SAME) ? 's' : '.',
691                         zram_test_flag(zram, index, ZRAM_WB) ? 'w' : '.',
692                         zram_test_flag(zram, index, ZRAM_HUGE) ? 'h' : '.');
693
694                 if (count < copied) {
695                         zram_slot_unlock(zram, index);
696                         break;
697                 }
698                 written += copied;
699                 count -= copied;
700 next:
701                 zram_slot_unlock(zram, index);
702                 *ppos += 1;
703         }
704
705         up_read(&zram->init_lock);
706         if (copy_to_user(buf, kbuf, written))
707                 written = -EFAULT;
708         kvfree(kbuf);
709
710         return written;
711 }
712
713 static const struct file_operations proc_zram_block_state_op = {
714         .open = simple_open,
715         .read = read_block_state,
716         .llseek = default_llseek,
717 };
718
719 static void zram_debugfs_register(struct zram *zram)
720 {
721         if (!zram_debugfs_root)
722                 return;
723
724         zram->debugfs_dir = debugfs_create_dir(zram->disk->disk_name,
725                                                 zram_debugfs_root);
726         debugfs_create_file("block_state", 0400, zram->debugfs_dir,
727                                 zram, &proc_zram_block_state_op);
728 }
729
730 static void zram_debugfs_unregister(struct zram *zram)
731 {
732         debugfs_remove_recursive(zram->debugfs_dir);
733 }
734 #else
735 static void zram_debugfs_create(void) {};
736 static void zram_debugfs_destroy(void) {};
737 static void zram_accessed(struct zram *zram, u32 index) {};
738 static void zram_reset_access(struct zram *zram, u32 index) {};
739 static void zram_debugfs_register(struct zram *zram) {};
740 static void zram_debugfs_unregister(struct zram *zram) {};
741 #endif
742
743 /*
744  * We switched to per-cpu streams and this attr is not needed anymore.
745  * However, we will keep it around for some time, because:
746  * a) we may revert per-cpu streams in the future
747  * b) it's visible to user space and we need to follow our 2 years
748  *    retirement rule; but we already have a number of 'soon to be
749  *    altered' attrs, so max_comp_streams need to wait for the next
750  *    layoff cycle.
751  */
752 static ssize_t max_comp_streams_show(struct device *dev,
753                 struct device_attribute *attr, char *buf)
754 {
755         return scnprintf(buf, PAGE_SIZE, "%d\n", num_online_cpus());
756 }
757
758 static ssize_t max_comp_streams_store(struct device *dev,
759                 struct device_attribute *attr, const char *buf, size_t len)
760 {
761         return len;
762 }
763
764 static ssize_t comp_algorithm_show(struct device *dev,
765                 struct device_attribute *attr, char *buf)
766 {
767         size_t sz;
768         struct zram *zram = dev_to_zram(dev);
769
770         down_read(&zram->init_lock);
771         sz = zcomp_available_show(zram->compressor, buf);
772         up_read(&zram->init_lock);
773
774         return sz;
775 }
776
777 static ssize_t comp_algorithm_store(struct device *dev,
778                 struct device_attribute *attr, const char *buf, size_t len)
779 {
780         struct zram *zram = dev_to_zram(dev);
781         char compressor[ARRAY_SIZE(zram->compressor)];
782         size_t sz;
783
784         strlcpy(compressor, buf, sizeof(compressor));
785         /* ignore trailing newline */
786         sz = strlen(compressor);
787         if (sz > 0 && compressor[sz - 1] == '\n')
788                 compressor[sz - 1] = 0x00;
789
790         if (!zcomp_available_algorithm(compressor))
791                 return -EINVAL;
792
793         down_write(&zram->init_lock);
794         if (init_done(zram)) {
795                 up_write(&zram->init_lock);
796                 pr_info("Can't change algorithm for initialized device\n");
797                 return -EBUSY;
798         }
799
800         strcpy(zram->compressor, compressor);
801         up_write(&zram->init_lock);
802         return len;
803 }
804
805 static ssize_t compact_store(struct device *dev,
806                 struct device_attribute *attr, const char *buf, size_t len)
807 {
808         struct zram *zram = dev_to_zram(dev);
809
810         down_read(&zram->init_lock);
811         if (!init_done(zram)) {
812                 up_read(&zram->init_lock);
813                 return -EINVAL;
814         }
815
816         zs_compact(zram->mem_pool);
817         up_read(&zram->init_lock);
818
819         return len;
820 }
821
822 static ssize_t io_stat_show(struct device *dev,
823                 struct device_attribute *attr, char *buf)
824 {
825         struct zram *zram = dev_to_zram(dev);
826         ssize_t ret;
827
828         down_read(&zram->init_lock);
829         ret = scnprintf(buf, PAGE_SIZE,
830                         "%8llu %8llu %8llu %8llu\n",
831                         (u64)atomic64_read(&zram->stats.failed_reads),
832                         (u64)atomic64_read(&zram->stats.failed_writes),
833                         (u64)atomic64_read(&zram->stats.invalid_io),
834                         (u64)atomic64_read(&zram->stats.notify_free));
835         up_read(&zram->init_lock);
836
837         return ret;
838 }
839
840 static ssize_t mm_stat_show(struct device *dev,
841                 struct device_attribute *attr, char *buf)
842 {
843         struct zram *zram = dev_to_zram(dev);
844         struct zs_pool_stats pool_stats;
845         u64 orig_size, mem_used = 0;
846         long max_used;
847         ssize_t ret;
848
849         memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats));
850
851         down_read(&zram->init_lock);
852         if (init_done(zram)) {
853                 mem_used = zs_get_total_pages(zram->mem_pool);
854                 zs_pool_stats(zram->mem_pool, &pool_stats);
855         }
856
857         orig_size = atomic64_read(&zram->stats.pages_stored);
858         max_used = atomic_long_read(&zram->stats.max_used_pages);
859
860         ret = scnprintf(buf, PAGE_SIZE,
861                         "%8llu %8llu %8llu %8lu %8ld %8llu %8lu %8llu\n",
862                         orig_size << PAGE_SHIFT,
863                         (u64)atomic64_read(&zram->stats.compr_data_size),
864                         mem_used << PAGE_SHIFT,
865                         zram->limit_pages << PAGE_SHIFT,
866                         max_used << PAGE_SHIFT,
867                         (u64)atomic64_read(&zram->stats.same_pages),
868                         pool_stats.pages_compacted,
869                         (u64)atomic64_read(&zram->stats.huge_pages));
870         up_read(&zram->init_lock);
871
872         return ret;
873 }
874
875 static ssize_t debug_stat_show(struct device *dev,
876                 struct device_attribute *attr, char *buf)
877 {
878         int version = 1;
879         struct zram *zram = dev_to_zram(dev);
880         ssize_t ret;
881
882         down_read(&zram->init_lock);
883         ret = scnprintf(buf, PAGE_SIZE,
884                         "version: %d\n%8llu\n",
885                         version,
886                         (u64)atomic64_read(&zram->stats.writestall));
887         up_read(&zram->init_lock);
888
889         return ret;
890 }
891
892 static DEVICE_ATTR_RO(io_stat);
893 static DEVICE_ATTR_RO(mm_stat);
894 static DEVICE_ATTR_RO(debug_stat);
895
896 static void zram_meta_free(struct zram *zram, u64 disksize)
897 {
898         size_t num_pages = disksize >> PAGE_SHIFT;
899         size_t index;
900
901         /* Free all pages that are still in this zram device */
902         for (index = 0; index < num_pages; index++)
903                 zram_free_page(zram, index);
904
905         zs_destroy_pool(zram->mem_pool);
906         vfree(zram->table);
907 }
908
909 static bool zram_meta_alloc(struct zram *zram, u64 disksize)
910 {
911         size_t num_pages;
912
913         num_pages = disksize >> PAGE_SHIFT;
914         zram->table = vzalloc(array_size(num_pages, sizeof(*zram->table)));
915         if (!zram->table)
916                 return false;
917
918         zram->mem_pool = zs_create_pool(zram->disk->disk_name);
919         if (!zram->mem_pool) {
920                 vfree(zram->table);
921                 return false;
922         }
923
924         if (!huge_class_size)
925                 huge_class_size = zs_huge_class_size(zram->mem_pool);
926         return true;
927 }
928
929 /*
930  * To protect concurrent access to the same index entry,
931  * caller should hold this table index entry's bit_spinlock to
932  * indicate this index entry is accessing.
933  */
934 static void zram_free_page(struct zram *zram, size_t index)
935 {
936         unsigned long handle;
937
938         zram_reset_access(zram, index);
939
940         if (zram_test_flag(zram, index, ZRAM_HUGE)) {
941                 zram_clear_flag(zram, index, ZRAM_HUGE);
942                 atomic64_dec(&zram->stats.huge_pages);
943         }
944
945         if (zram_wb_enabled(zram) && zram_test_flag(zram, index, ZRAM_WB)) {
946                 zram_wb_clear(zram, index);
947                 atomic64_dec(&zram->stats.pages_stored);
948                 return;
949         }
950
951         /*
952          * No memory is allocated for same element filled pages.
953          * Simply clear same page flag.
954          */
955         if (zram_test_flag(zram, index, ZRAM_SAME)) {
956                 zram_clear_flag(zram, index, ZRAM_SAME);
957                 zram_set_element(zram, index, 0);
958                 atomic64_dec(&zram->stats.same_pages);
959                 atomic64_dec(&zram->stats.pages_stored);
960                 return;
961         }
962
963         handle = zram_get_handle(zram, index);
964         if (!handle)
965                 return;
966
967         zs_free(zram->mem_pool, handle);
968
969         atomic64_sub(zram_get_obj_size(zram, index),
970                         &zram->stats.compr_data_size);
971         atomic64_dec(&zram->stats.pages_stored);
972
973         zram_set_handle(zram, index, 0);
974         zram_set_obj_size(zram, index, 0);
975 }
976
977 static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index,
978                                 struct bio *bio, bool partial_io)
979 {
980         int ret;
981         unsigned long handle;
982         unsigned int size;
983         void *src, *dst;
984
985         if (zram_wb_enabled(zram)) {
986                 zram_slot_lock(zram, index);
987                 if (zram_test_flag(zram, index, ZRAM_WB)) {
988                         struct bio_vec bvec;
989
990                         zram_slot_unlock(zram, index);
991
992                         bvec.bv_page = page;
993                         bvec.bv_len = PAGE_SIZE;
994                         bvec.bv_offset = 0;
995                         return read_from_bdev(zram, &bvec,
996                                         zram_get_element(zram, index),
997                                         bio, partial_io);
998                 }
999                 zram_slot_unlock(zram, index);
1000         }
1001
1002         zram_slot_lock(zram, index);
1003         handle = zram_get_handle(zram, index);
1004         if (!handle || zram_test_flag(zram, index, ZRAM_SAME)) {
1005                 unsigned long value;
1006                 void *mem;
1007
1008                 value = handle ? zram_get_element(zram, index) : 0;
1009                 mem = kmap_atomic(page);
1010                 zram_fill_page(mem, PAGE_SIZE, value);
1011                 kunmap_atomic(mem);
1012                 zram_slot_unlock(zram, index);
1013                 return 0;
1014         }
1015
1016         size = zram_get_obj_size(zram, index);
1017
1018         src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO);
1019         if (size == PAGE_SIZE) {
1020                 dst = kmap_atomic(page);
1021                 memcpy(dst, src, PAGE_SIZE);
1022                 kunmap_atomic(dst);
1023                 ret = 0;
1024         } else {
1025                 struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp);
1026
1027                 dst = kmap_atomic(page);
1028                 ret = zcomp_decompress(zstrm, src, size, dst);
1029                 kunmap_atomic(dst);
1030                 zcomp_stream_put(zram->comp);
1031         }
1032         zs_unmap_object(zram->mem_pool, handle);
1033         zram_slot_unlock(zram, index);
1034
1035         /* Should NEVER happen. Return bio error if it does. */
1036         if (unlikely(ret))
1037                 pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
1038
1039         return ret;
1040 }
1041
1042 static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
1043                                 u32 index, int offset, struct bio *bio)
1044 {
1045         int ret;
1046         struct page *page;
1047
1048         page = bvec->bv_page;
1049         if (is_partial_io(bvec)) {
1050                 /* Use a temporary buffer to decompress the page */
1051                 page = alloc_page(GFP_NOIO|__GFP_HIGHMEM);
1052                 if (!page)
1053                         return -ENOMEM;
1054         }
1055
1056         ret = __zram_bvec_read(zram, page, index, bio, is_partial_io(bvec));
1057         if (unlikely(ret))
1058                 goto out;
1059
1060         if (is_partial_io(bvec)) {
1061                 void *dst = kmap_atomic(bvec->bv_page);
1062                 void *src = kmap_atomic(page);
1063
1064                 memcpy(dst + bvec->bv_offset, src + offset, bvec->bv_len);
1065                 kunmap_atomic(src);
1066                 kunmap_atomic(dst);
1067         }
1068 out:
1069         if (is_partial_io(bvec))
1070                 __free_page(page);
1071
1072         return ret;
1073 }
1074
1075 static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
1076                                 u32 index, struct bio *bio)
1077 {
1078         int ret = 0;
1079         unsigned long alloced_pages;
1080         unsigned long handle = 0;
1081         unsigned int comp_len = 0;
1082         void *src, *dst, *mem;
1083         struct zcomp_strm *zstrm;
1084         struct page *page = bvec->bv_page;
1085         unsigned long element = 0;
1086         enum zram_pageflags flags = 0;
1087         bool allow_wb = true;
1088
1089         mem = kmap_atomic(page);
1090         if (page_same_filled(mem, &element)) {
1091                 kunmap_atomic(mem);
1092                 /* Free memory associated with this sector now. */
1093                 flags = ZRAM_SAME;
1094                 atomic64_inc(&zram->stats.same_pages);
1095                 goto out;
1096         }
1097         kunmap_atomic(mem);
1098
1099 compress_again:
1100         zstrm = zcomp_stream_get(zram->comp);
1101         src = kmap_atomic(page);
1102         ret = zcomp_compress(zstrm, src, &comp_len);
1103         kunmap_atomic(src);
1104
1105         if (unlikely(ret)) {
1106                 zcomp_stream_put(zram->comp);
1107                 pr_err("Compression failed! err=%d\n", ret);
1108                 zs_free(zram->mem_pool, handle);
1109                 return ret;
1110         }
1111
1112         if (unlikely(comp_len >= huge_class_size)) {
1113                 comp_len = PAGE_SIZE;
1114                 if (zram_wb_enabled(zram) && allow_wb) {
1115                         zcomp_stream_put(zram->comp);
1116                         ret = write_to_bdev(zram, bvec, index, bio, &element);
1117                         if (!ret) {
1118                                 flags = ZRAM_WB;
1119                                 ret = 1;
1120                                 goto out;
1121                         }
1122                         allow_wb = false;
1123                         goto compress_again;
1124                 }
1125         }
1126
1127         /*
1128          * handle allocation has 2 paths:
1129          * a) fast path is executed with preemption disabled (for
1130          *  per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear,
1131          *  since we can't sleep;
1132          * b) slow path enables preemption and attempts to allocate
1133          *  the page with __GFP_DIRECT_RECLAIM bit set. we have to
1134          *  put per-cpu compression stream and, thus, to re-do
1135          *  the compression once handle is allocated.
1136          *
1137          * if we have a 'non-null' handle here then we are coming
1138          * from the slow path and handle has already been allocated.
1139          */
1140         if (!handle)
1141                 handle = zs_malloc(zram->mem_pool, comp_len,
1142                                 __GFP_KSWAPD_RECLAIM |
1143                                 __GFP_NOWARN |
1144                                 __GFP_HIGHMEM |
1145                                 __GFP_MOVABLE);
1146         if (!handle) {
1147                 zcomp_stream_put(zram->comp);
1148                 atomic64_inc(&zram->stats.writestall);
1149                 handle = zs_malloc(zram->mem_pool, comp_len,
1150                                 GFP_NOIO | __GFP_HIGHMEM |
1151                                 __GFP_MOVABLE);
1152                 if (handle)
1153                         goto compress_again;
1154                 return -ENOMEM;
1155         }
1156
1157         alloced_pages = zs_get_total_pages(zram->mem_pool);
1158         update_used_max(zram, alloced_pages);
1159
1160         if (zram->limit_pages && alloced_pages > zram->limit_pages) {
1161                 zcomp_stream_put(zram->comp);
1162                 zs_free(zram->mem_pool, handle);
1163                 return -ENOMEM;
1164         }
1165
1166         dst = zs_map_object(zram->mem_pool, handle, ZS_MM_WO);
1167
1168         src = zstrm->buffer;
1169         if (comp_len == PAGE_SIZE)
1170                 src = kmap_atomic(page);
1171         memcpy(dst, src, comp_len);
1172         if (comp_len == PAGE_SIZE)
1173                 kunmap_atomic(src);
1174
1175         zcomp_stream_put(zram->comp);
1176         zs_unmap_object(zram->mem_pool, handle);
1177         atomic64_add(comp_len, &zram->stats.compr_data_size);
1178 out:
1179         /*
1180          * Free memory associated with this sector
1181          * before overwriting unused sectors.
1182          */
1183         zram_slot_lock(zram, index);
1184         zram_free_page(zram, index);
1185
1186         if (comp_len == PAGE_SIZE) {
1187                 zram_set_flag(zram, index, ZRAM_HUGE);
1188                 atomic64_inc(&zram->stats.huge_pages);
1189         }
1190
1191         if (flags) {
1192                 zram_set_flag(zram, index, flags);
1193                 zram_set_element(zram, index, element);
1194         }  else {
1195                 zram_set_handle(zram, index, handle);
1196                 zram_set_obj_size(zram, index, comp_len);
1197         }
1198         zram_slot_unlock(zram, index);
1199
1200         /* Update stats */
1201         atomic64_inc(&zram->stats.pages_stored);
1202         return ret;
1203 }
1204
1205 static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
1206                                 u32 index, int offset, struct bio *bio)
1207 {
1208         int ret;
1209         struct page *page = NULL;
1210         void *src;
1211         struct bio_vec vec;
1212
1213         vec = *bvec;
1214         if (is_partial_io(bvec)) {
1215                 void *dst;
1216                 /*
1217                  * This is a partial IO. We need to read the full page
1218                  * before to write the changes.
1219                  */
1220                 page = alloc_page(GFP_NOIO|__GFP_HIGHMEM);
1221                 if (!page)
1222                         return -ENOMEM;
1223
1224                 ret = __zram_bvec_read(zram, page, index, bio, true);
1225                 if (ret)
1226                         goto out;
1227
1228                 src = kmap_atomic(bvec->bv_page);
1229                 dst = kmap_atomic(page);
1230                 memcpy(dst + offset, src + bvec->bv_offset, bvec->bv_len);
1231                 kunmap_atomic(dst);
1232                 kunmap_atomic(src);
1233
1234                 vec.bv_page = page;
1235                 vec.bv_len = PAGE_SIZE;
1236                 vec.bv_offset = 0;
1237         }
1238
1239         ret = __zram_bvec_write(zram, &vec, index, bio);
1240 out:
1241         if (is_partial_io(bvec))
1242                 __free_page(page);
1243         return ret;
1244 }
1245
1246 /*
1247  * zram_bio_discard - handler on discard request
1248  * @index: physical block index in PAGE_SIZE units
1249  * @offset: byte offset within physical block
1250  */
1251 static void zram_bio_discard(struct zram *zram, u32 index,
1252                              int offset, struct bio *bio)
1253 {
1254         size_t n = bio->bi_iter.bi_size;
1255
1256         /*
1257          * zram manages data in physical block size units. Because logical block
1258          * size isn't identical with physical block size on some arch, we
1259          * could get a discard request pointing to a specific offset within a
1260          * certain physical block.  Although we can handle this request by
1261          * reading that physiclal block and decompressing and partially zeroing
1262          * and re-compressing and then re-storing it, this isn't reasonable
1263          * because our intent with a discard request is to save memory.  So
1264          * skipping this logical block is appropriate here.
1265          */
1266         if (offset) {
1267                 if (n <= (PAGE_SIZE - offset))
1268                         return;
1269
1270                 n -= (PAGE_SIZE - offset);
1271                 index++;
1272         }
1273
1274         while (n >= PAGE_SIZE) {
1275                 zram_slot_lock(zram, index);
1276                 zram_free_page(zram, index);
1277                 zram_slot_unlock(zram, index);
1278                 atomic64_inc(&zram->stats.notify_free);
1279                 index++;
1280                 n -= PAGE_SIZE;
1281         }
1282 }
1283
1284 /*
1285  * Returns errno if it has some problem. Otherwise return 0 or 1.
1286  * Returns 0 if IO request was done synchronously
1287  * Returns 1 if IO request was successfully submitted.
1288  */
1289 static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
1290                         int offset, bool is_write, struct bio *bio)
1291 {
1292         unsigned long start_time = jiffies;
1293         int rw_acct = is_write ? REQ_OP_WRITE : REQ_OP_READ;
1294         struct request_queue *q = zram->disk->queue;
1295         int ret;
1296
1297         generic_start_io_acct(q, rw_acct, bvec->bv_len >> SECTOR_SHIFT,
1298                         &zram->disk->part0);
1299
1300         if (!is_write) {
1301                 atomic64_inc(&zram->stats.num_reads);
1302                 ret = zram_bvec_read(zram, bvec, index, offset, bio);
1303                 flush_dcache_page(bvec->bv_page);
1304         } else {
1305                 atomic64_inc(&zram->stats.num_writes);
1306                 ret = zram_bvec_write(zram, bvec, index, offset, bio);
1307         }
1308
1309         generic_end_io_acct(q, rw_acct, &zram->disk->part0, start_time);
1310
1311         zram_slot_lock(zram, index);
1312         zram_accessed(zram, index);
1313         zram_slot_unlock(zram, index);
1314
1315         if (unlikely(ret < 0)) {
1316                 if (!is_write)
1317                         atomic64_inc(&zram->stats.failed_reads);
1318                 else
1319                         atomic64_inc(&zram->stats.failed_writes);
1320         }
1321
1322         return ret;
1323 }
1324
1325 static void __zram_make_request(struct zram *zram, struct bio *bio)
1326 {
1327         int offset;
1328         u32 index;
1329         struct bio_vec bvec;
1330         struct bvec_iter iter;
1331
1332         index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
1333         offset = (bio->bi_iter.bi_sector &
1334                   (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
1335
1336         switch (bio_op(bio)) {
1337         case REQ_OP_DISCARD:
1338         case REQ_OP_WRITE_ZEROES:
1339                 zram_bio_discard(zram, index, offset, bio);
1340                 bio_endio(bio);
1341                 return;
1342         default:
1343                 break;
1344         }
1345
1346         bio_for_each_segment(bvec, bio, iter) {
1347                 struct bio_vec bv = bvec;
1348                 unsigned int unwritten = bvec.bv_len;
1349
1350                 do {
1351                         bv.bv_len = min_t(unsigned int, PAGE_SIZE - offset,
1352                                                         unwritten);
1353                         if (zram_bvec_rw(zram, &bv, index, offset,
1354                                         op_is_write(bio_op(bio)), bio) < 0)
1355                                 goto out;
1356
1357                         bv.bv_offset += bv.bv_len;
1358                         unwritten -= bv.bv_len;
1359
1360                         update_position(&index, &offset, &bv);
1361                 } while (unwritten);
1362         }
1363
1364         bio_endio(bio);
1365         return;
1366
1367 out:
1368         bio_io_error(bio);
1369 }
1370
1371 /*
1372  * Handler function for all zram I/O requests.
1373  */
1374 static blk_qc_t zram_make_request(struct request_queue *queue, struct bio *bio)
1375 {
1376         struct zram *zram = queue->queuedata;
1377
1378         if (!valid_io_request(zram, bio->bi_iter.bi_sector,
1379                                         bio->bi_iter.bi_size)) {
1380                 atomic64_inc(&zram->stats.invalid_io);
1381                 goto error;
1382         }
1383
1384         __zram_make_request(zram, bio);
1385         return BLK_QC_T_NONE;
1386
1387 error:
1388         bio_io_error(bio);
1389         return BLK_QC_T_NONE;
1390 }
1391
1392 static void zram_slot_free_notify(struct block_device *bdev,
1393                                 unsigned long index)
1394 {
1395         struct zram *zram;
1396
1397         zram = bdev->bd_disk->private_data;
1398
1399         zram_slot_lock(zram, index);
1400         zram_free_page(zram, index);
1401         zram_slot_unlock(zram, index);
1402         atomic64_inc(&zram->stats.notify_free);
1403 }
1404
1405 static int zram_rw_page(struct block_device *bdev, sector_t sector,
1406                        struct page *page, bool is_write)
1407 {
1408         int offset, ret;
1409         u32 index;
1410         struct zram *zram;
1411         struct bio_vec bv;
1412
1413         if (PageTransHuge(page))
1414                 return -ENOTSUPP;
1415         zram = bdev->bd_disk->private_data;
1416
1417         if (!valid_io_request(zram, sector, PAGE_SIZE)) {
1418                 atomic64_inc(&zram->stats.invalid_io);
1419                 ret = -EINVAL;
1420                 goto out;
1421         }
1422
1423         index = sector >> SECTORS_PER_PAGE_SHIFT;
1424         offset = (sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
1425
1426         bv.bv_page = page;
1427         bv.bv_len = PAGE_SIZE;
1428         bv.bv_offset = 0;
1429
1430         ret = zram_bvec_rw(zram, &bv, index, offset, is_write, NULL);
1431 out:
1432         /*
1433          * If I/O fails, just return error(ie, non-zero) without
1434          * calling page_endio.
1435          * It causes resubmit the I/O with bio request by upper functions
1436          * of rw_page(e.g., swap_readpage, __swap_writepage) and
1437          * bio->bi_end_io does things to handle the error
1438          * (e.g., SetPageError, set_page_dirty and extra works).
1439          */
1440         if (unlikely(ret < 0))
1441                 return ret;
1442
1443         switch (ret) {
1444         case 0:
1445                 page_endio(page, is_write, 0);
1446                 break;
1447         case 1:
1448                 ret = 0;
1449                 break;
1450         default:
1451                 WARN_ON(1);
1452         }
1453         return ret;
1454 }
1455
1456 static void zram_reset_device(struct zram *zram)
1457 {
1458         struct zcomp *comp;
1459         u64 disksize;
1460
1461         down_write(&zram->init_lock);
1462
1463         zram->limit_pages = 0;
1464
1465         if (!init_done(zram)) {
1466                 up_write(&zram->init_lock);
1467                 return;
1468         }
1469
1470         comp = zram->comp;
1471         disksize = zram->disksize;
1472         zram->disksize = 0;
1473
1474         set_capacity(zram->disk, 0);
1475         part_stat_set_all(&zram->disk->part0, 0);
1476
1477         up_write(&zram->init_lock);
1478         /* I/O operation under all of CPU are done so let's free */
1479         zram_meta_free(zram, disksize);
1480         memset(&zram->stats, 0, sizeof(zram->stats));
1481         zcomp_destroy(comp);
1482         reset_bdev(zram);
1483 }
1484
1485 static ssize_t disksize_store(struct device *dev,
1486                 struct device_attribute *attr, const char *buf, size_t len)
1487 {
1488         u64 disksize;
1489         struct zcomp *comp;
1490         struct zram *zram = dev_to_zram(dev);
1491         int err;
1492
1493         disksize = memparse(buf, NULL);
1494         if (!disksize)
1495                 return -EINVAL;
1496
1497         down_write(&zram->init_lock);
1498         if (init_done(zram)) {
1499                 pr_info("Cannot change disksize for initialized device\n");
1500                 err = -EBUSY;
1501                 goto out_unlock;
1502         }
1503
1504         disksize = PAGE_ALIGN(disksize);
1505         if (!zram_meta_alloc(zram, disksize)) {
1506                 err = -ENOMEM;
1507                 goto out_unlock;
1508         }
1509
1510         comp = zcomp_create(zram->compressor);
1511         if (IS_ERR(comp)) {
1512                 pr_err("Cannot initialise %s compressing backend\n",
1513                                 zram->compressor);
1514                 err = PTR_ERR(comp);
1515                 goto out_free_meta;
1516         }
1517
1518         zram->comp = comp;
1519         zram->disksize = disksize;
1520         set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
1521
1522         revalidate_disk(zram->disk);
1523         up_write(&zram->init_lock);
1524
1525         return len;
1526
1527 out_free_meta:
1528         zram_meta_free(zram, disksize);
1529 out_unlock:
1530         up_write(&zram->init_lock);
1531         return err;
1532 }
1533
1534 static ssize_t reset_store(struct device *dev,
1535                 struct device_attribute *attr, const char *buf, size_t len)
1536 {
1537         int ret;
1538         unsigned short do_reset;
1539         struct zram *zram;
1540         struct block_device *bdev;
1541
1542         ret = kstrtou16(buf, 10, &do_reset);
1543         if (ret)
1544                 return ret;
1545
1546         if (!do_reset)
1547                 return -EINVAL;
1548
1549         zram = dev_to_zram(dev);
1550         bdev = bdget_disk(zram->disk, 0);
1551         if (!bdev)
1552                 return -ENOMEM;
1553
1554         mutex_lock(&bdev->bd_mutex);
1555         /* Do not reset an active device or claimed device */
1556         if (bdev->bd_openers || zram->claim) {
1557                 mutex_unlock(&bdev->bd_mutex);
1558                 bdput(bdev);
1559                 return -EBUSY;
1560         }
1561
1562         /* From now on, anyone can't open /dev/zram[0-9] */
1563         zram->claim = true;
1564         mutex_unlock(&bdev->bd_mutex);
1565
1566         /* Make sure all the pending I/O are finished */
1567         fsync_bdev(bdev);
1568         zram_reset_device(zram);
1569         revalidate_disk(zram->disk);
1570         bdput(bdev);
1571
1572         mutex_lock(&bdev->bd_mutex);
1573         zram->claim = false;
1574         mutex_unlock(&bdev->bd_mutex);
1575
1576         return len;
1577 }
1578
1579 static int zram_open(struct block_device *bdev, fmode_t mode)
1580 {
1581         int ret = 0;
1582         struct zram *zram;
1583
1584         WARN_ON(!mutex_is_locked(&bdev->bd_mutex));
1585
1586         zram = bdev->bd_disk->private_data;
1587         /* zram was claimed to reset so open request fails */
1588         if (zram->claim)
1589                 ret = -EBUSY;
1590
1591         return ret;
1592 }
1593
1594 static const struct block_device_operations zram_devops = {
1595         .open = zram_open,
1596         .swap_slot_free_notify = zram_slot_free_notify,
1597         .rw_page = zram_rw_page,
1598         .owner = THIS_MODULE
1599 };
1600
1601 static DEVICE_ATTR_WO(compact);
1602 static DEVICE_ATTR_RW(disksize);
1603 static DEVICE_ATTR_RO(initstate);
1604 static DEVICE_ATTR_WO(reset);
1605 static DEVICE_ATTR_WO(mem_limit);
1606 static DEVICE_ATTR_WO(mem_used_max);
1607 static DEVICE_ATTR_RW(max_comp_streams);
1608 static DEVICE_ATTR_RW(comp_algorithm);
1609 #ifdef CONFIG_ZRAM_WRITEBACK
1610 static DEVICE_ATTR_RW(backing_dev);
1611 #endif
1612
1613 static struct attribute *zram_disk_attrs[] = {
1614         &dev_attr_disksize.attr,
1615         &dev_attr_initstate.attr,
1616         &dev_attr_reset.attr,
1617         &dev_attr_compact.attr,
1618         &dev_attr_mem_limit.attr,
1619         &dev_attr_mem_used_max.attr,
1620         &dev_attr_max_comp_streams.attr,
1621         &dev_attr_comp_algorithm.attr,
1622 #ifdef CONFIG_ZRAM_WRITEBACK
1623         &dev_attr_backing_dev.attr,
1624 #endif
1625         &dev_attr_io_stat.attr,
1626         &dev_attr_mm_stat.attr,
1627         &dev_attr_debug_stat.attr,
1628         NULL,
1629 };
1630
1631 static const struct attribute_group zram_disk_attr_group = {
1632         .attrs = zram_disk_attrs,
1633 };
1634
1635 /*
1636  * Allocate and initialize new zram device. the function returns
1637  * '>= 0' device_id upon success, and negative value otherwise.
1638  */
1639 static int zram_add(void)
1640 {
1641         struct zram *zram;
1642         struct request_queue *queue;
1643         int ret, device_id;
1644
1645         zram = kzalloc(sizeof(struct zram), GFP_KERNEL);
1646         if (!zram)
1647                 return -ENOMEM;
1648
1649         ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL);
1650         if (ret < 0)
1651                 goto out_free_dev;
1652         device_id = ret;
1653
1654         init_rwsem(&zram->init_lock);
1655
1656         queue = blk_alloc_queue(GFP_KERNEL);
1657         if (!queue) {
1658                 pr_err("Error allocating disk queue for device %d\n",
1659                         device_id);
1660                 ret = -ENOMEM;
1661                 goto out_free_idr;
1662         }
1663
1664         blk_queue_make_request(queue, zram_make_request);
1665
1666         /* gendisk structure */
1667         zram->disk = alloc_disk(1);
1668         if (!zram->disk) {
1669                 pr_err("Error allocating disk structure for device %d\n",
1670                         device_id);
1671                 ret = -ENOMEM;
1672                 goto out_free_queue;
1673         }
1674
1675         zram->disk->major = zram_major;
1676         zram->disk->first_minor = device_id;
1677         zram->disk->fops = &zram_devops;
1678         zram->disk->queue = queue;
1679         zram->disk->queue->queuedata = zram;
1680         zram->disk->private_data = zram;
1681         snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
1682
1683         /* Actual capacity set using syfs (/sys/block/zram<id>/disksize */
1684         set_capacity(zram->disk, 0);
1685         /* zram devices sort of resembles non-rotational disks */
1686         blk_queue_flag_set(QUEUE_FLAG_NONROT, zram->disk->queue);
1687         blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue);
1688
1689         /*
1690          * To ensure that we always get PAGE_SIZE aligned
1691          * and n*PAGE_SIZED sized I/O requests.
1692          */
1693         blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE);
1694         blk_queue_logical_block_size(zram->disk->queue,
1695                                         ZRAM_LOGICAL_BLOCK_SIZE);
1696         blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
1697         blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
1698         zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
1699         blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX);
1700         blk_queue_flag_set(QUEUE_FLAG_DISCARD, zram->disk->queue);
1701
1702         /*
1703          * zram_bio_discard() will clear all logical blocks if logical block
1704          * size is identical with physical block size(PAGE_SIZE). But if it is
1705          * different, we will skip discarding some parts of logical blocks in
1706          * the part of the request range which isn't aligned to physical block
1707          * size.  So we can't ensure that all discarded logical blocks are
1708          * zeroed.
1709          */
1710         if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE)
1711                 blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX);
1712
1713         zram->disk->queue->backing_dev_info->capabilities |=
1714                         (BDI_CAP_STABLE_WRITES | BDI_CAP_SYNCHRONOUS_IO);
1715         add_disk(zram->disk);
1716
1717         ret = sysfs_create_group(&disk_to_dev(zram->disk)->kobj,
1718                                 &zram_disk_attr_group);
1719         if (ret < 0) {
1720                 pr_err("Error creating sysfs group for device %d\n",
1721                                 device_id);
1722                 goto out_free_disk;
1723         }
1724         strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor));
1725
1726         zram_debugfs_register(zram);
1727         pr_info("Added device: %s\n", zram->disk->disk_name);
1728         return device_id;
1729
1730 out_free_disk:
1731         del_gendisk(zram->disk);
1732         put_disk(zram->disk);
1733 out_free_queue:
1734         blk_cleanup_queue(queue);
1735 out_free_idr:
1736         idr_remove(&zram_index_idr, device_id);
1737 out_free_dev:
1738         kfree(zram);
1739         return ret;
1740 }
1741
1742 static int zram_remove(struct zram *zram)
1743 {
1744         struct block_device *bdev;
1745
1746         bdev = bdget_disk(zram->disk, 0);
1747         if (!bdev)
1748                 return -ENOMEM;
1749
1750         mutex_lock(&bdev->bd_mutex);
1751         if (bdev->bd_openers || zram->claim) {
1752                 mutex_unlock(&bdev->bd_mutex);
1753                 bdput(bdev);
1754                 return -EBUSY;
1755         }
1756
1757         zram->claim = true;
1758         mutex_unlock(&bdev->bd_mutex);
1759
1760         zram_debugfs_unregister(zram);
1761         /*
1762          * Remove sysfs first, so no one will perform a disksize
1763          * store while we destroy the devices. This also helps during
1764          * hot_remove -- zram_reset_device() is the last holder of
1765          * ->init_lock, no later/concurrent disksize_store() or any
1766          * other sysfs handlers are possible.
1767          */
1768         sysfs_remove_group(&disk_to_dev(zram->disk)->kobj,
1769                         &zram_disk_attr_group);
1770
1771         /* Make sure all the pending I/O are finished */
1772         fsync_bdev(bdev);
1773         zram_reset_device(zram);
1774         bdput(bdev);
1775
1776         pr_info("Removed device: %s\n", zram->disk->disk_name);
1777
1778         del_gendisk(zram->disk);
1779         blk_cleanup_queue(zram->disk->queue);
1780         put_disk(zram->disk);
1781         kfree(zram);
1782         return 0;
1783 }
1784
1785 /* zram-control sysfs attributes */
1786
1787 /*
1788  * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a
1789  * sense that reading from this file does alter the state of your system -- it
1790  * creates a new un-initialized zram device and returns back this device's
1791  * device_id (or an error code if it fails to create a new device).
1792  */
1793 static ssize_t hot_add_show(struct class *class,
1794                         struct class_attribute *attr,
1795                         char *buf)
1796 {
1797         int ret;
1798
1799         mutex_lock(&zram_index_mutex);
1800         ret = zram_add();
1801         mutex_unlock(&zram_index_mutex);
1802
1803         if (ret < 0)
1804                 return ret;
1805         return scnprintf(buf, PAGE_SIZE, "%d\n", ret);
1806 }
1807 static CLASS_ATTR_RO(hot_add);
1808
1809 static ssize_t hot_remove_store(struct class *class,
1810                         struct class_attribute *attr,
1811                         const char *buf,
1812                         size_t count)
1813 {
1814         struct zram *zram;
1815         int ret, dev_id;
1816
1817         /* dev_id is gendisk->first_minor, which is `int' */
1818         ret = kstrtoint(buf, 10, &dev_id);
1819         if (ret)
1820                 return ret;
1821         if (dev_id < 0)
1822                 return -EINVAL;
1823
1824         mutex_lock(&zram_index_mutex);
1825
1826         zram = idr_find(&zram_index_idr, dev_id);
1827         if (zram) {
1828                 ret = zram_remove(zram);
1829                 if (!ret)
1830                         idr_remove(&zram_index_idr, dev_id);
1831         } else {
1832                 ret = -ENODEV;
1833         }
1834
1835         mutex_unlock(&zram_index_mutex);
1836         return ret ? ret : count;
1837 }
1838 static CLASS_ATTR_WO(hot_remove);
1839
1840 static struct attribute *zram_control_class_attrs[] = {
1841         &class_attr_hot_add.attr,
1842         &class_attr_hot_remove.attr,
1843         NULL,
1844 };
1845 ATTRIBUTE_GROUPS(zram_control_class);
1846
1847 static struct class zram_control_class = {
1848         .name           = "zram-control",
1849         .owner          = THIS_MODULE,
1850         .class_groups   = zram_control_class_groups,
1851 };
1852
1853 static int zram_remove_cb(int id, void *ptr, void *data)
1854 {
1855         zram_remove(ptr);
1856         return 0;
1857 }
1858
1859 static void destroy_devices(void)
1860 {
1861         class_unregister(&zram_control_class);
1862         idr_for_each(&zram_index_idr, &zram_remove_cb, NULL);
1863         zram_debugfs_destroy();
1864         idr_destroy(&zram_index_idr);
1865         unregister_blkdev(zram_major, "zram");
1866         cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
1867 }
1868
1869 static int __init zram_init(void)
1870 {
1871         int ret;
1872
1873         ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare",
1874                                       zcomp_cpu_up_prepare, zcomp_cpu_dead);
1875         if (ret < 0)
1876                 return ret;
1877
1878         ret = class_register(&zram_control_class);
1879         if (ret) {
1880                 pr_err("Unable to register zram-control class\n");
1881                 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
1882                 return ret;
1883         }
1884
1885         zram_debugfs_create();
1886         zram_major = register_blkdev(0, "zram");
1887         if (zram_major <= 0) {
1888                 pr_err("Unable to get major number\n");
1889                 class_unregister(&zram_control_class);
1890                 cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
1891                 return -EBUSY;
1892         }
1893
1894         while (num_devices != 0) {
1895                 mutex_lock(&zram_index_mutex);
1896                 ret = zram_add();
1897                 mutex_unlock(&zram_index_mutex);
1898                 if (ret < 0)
1899                         goto out_error;
1900                 num_devices--;
1901         }
1902
1903         return 0;
1904
1905 out_error:
1906         destroy_devices();
1907         return ret;
1908 }
1909
1910 static void __exit zram_exit(void)
1911 {
1912         destroy_devices();
1913 }
1914
1915 module_init(zram_init);
1916 module_exit(zram_exit);
1917
1918 module_param(num_devices, uint, 0);
1919 MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices");
1920
1921 MODULE_LICENSE("Dual BSD/GPL");
1922 MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
1923 MODULE_DESCRIPTION("Compressed RAM Block Device");