logfs: handle powerfail on NAND flash
[sfrench/cifs-2.6.git] / fs / logfs / super.c
1 /*
2  * fs/logfs/super.c
3  *
4  * As should be obvious for Linux kernel code, license is GPLv2
5  *
6  * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
7  *
8  * Generally contains mount/umount code and also serves as a dump area for
9  * any functions that don't fit elsewhere and neither justify a file of their
10  * own.
11  */
12 #include "logfs.h"
13 #include <linux/bio.h>
14 #include <linux/slab.h>
15 #include <linux/blkdev.h>
16 #include <linux/mtd/mtd.h>
17 #include <linux/statfs.h>
18 #include <linux/buffer_head.h>
19
20 static DEFINE_MUTEX(emergency_mutex);
21 static struct page *emergency_page;
22
23 struct page *emergency_read_begin(struct address_space *mapping, pgoff_t index)
24 {
25         filler_t *filler = (filler_t *)mapping->a_ops->readpage;
26         struct page *page;
27         int err;
28
29         page = read_cache_page(mapping, index, filler, NULL);
30         if (page)
31                 return page;
32
33         /* No more pages available, switch to emergency page */
34         printk(KERN_INFO"Logfs: Using emergency page\n");
35         mutex_lock(&emergency_mutex);
36         err = filler(NULL, emergency_page);
37         if (err) {
38                 mutex_unlock(&emergency_mutex);
39                 printk(KERN_EMERG"Logfs: Error reading emergency page\n");
40                 return ERR_PTR(err);
41         }
42         return emergency_page;
43 }
44
45 void emergency_read_end(struct page *page)
46 {
47         if (page == emergency_page)
48                 mutex_unlock(&emergency_mutex);
49         else
50                 page_cache_release(page);
51 }
52
53 static void dump_segfile(struct super_block *sb)
54 {
55         struct logfs_super *super = logfs_super(sb);
56         struct logfs_segment_entry se;
57         u32 segno;
58
59         for (segno = 0; segno < super->s_no_segs; segno++) {
60                 logfs_get_segment_entry(sb, segno, &se);
61                 printk("%3x: %6x %8x", segno, be32_to_cpu(se.ec_level),
62                                 be32_to_cpu(se.valid));
63                 if (++segno < super->s_no_segs) {
64                         logfs_get_segment_entry(sb, segno, &se);
65                         printk(" %6x %8x", be32_to_cpu(se.ec_level),
66                                         be32_to_cpu(se.valid));
67                 }
68                 if (++segno < super->s_no_segs) {
69                         logfs_get_segment_entry(sb, segno, &se);
70                         printk(" %6x %8x", be32_to_cpu(se.ec_level),
71                                         be32_to_cpu(se.valid));
72                 }
73                 if (++segno < super->s_no_segs) {
74                         logfs_get_segment_entry(sb, segno, &se);
75                         printk(" %6x %8x", be32_to_cpu(se.ec_level),
76                                         be32_to_cpu(se.valid));
77                 }
78                 printk("\n");
79         }
80 }
81
82 /*
83  * logfs_crash_dump - dump debug information to device
84  *
85  * The LogFS superblock only occupies part of a segment.  This function will
86  * write as much debug information as it can gather into the spare space.
87  */
88 void logfs_crash_dump(struct super_block *sb)
89 {
90         dump_segfile(sb);
91 }
92
93 /*
94  * TODO: move to lib/string.c
95  */
96 /**
97  * memchr_inv - Find a character in an area of memory.
98  * @s: The memory area
99  * @c: The byte to search for
100  * @n: The size of the area.
101  *
102  * returns the address of the first character other than @c, or %NULL
103  * if the whole buffer contains just @c.
104  */
105 void *memchr_inv(const void *s, int c, size_t n)
106 {
107         const unsigned char *p = s;
108         while (n-- != 0)
109                 if ((unsigned char)c != *p++)
110                         return (void *)(p - 1);
111
112         return NULL;
113 }
114
115 /*
116  * FIXME: There should be a reserve for root, similar to ext2.
117  */
118 int logfs_statfs(struct dentry *dentry, struct kstatfs *stats)
119 {
120         struct super_block *sb = dentry->d_sb;
121         struct logfs_super *super = logfs_super(sb);
122
123         stats->f_type           = LOGFS_MAGIC_U32;
124         stats->f_bsize          = sb->s_blocksize;
125         stats->f_blocks         = super->s_size >> LOGFS_BLOCK_BITS >> 3;
126         stats->f_bfree          = super->s_free_bytes >> sb->s_blocksize_bits;
127         stats->f_bavail         = super->s_free_bytes >> sb->s_blocksize_bits;
128         stats->f_files          = 0;
129         stats->f_ffree          = 0;
130         stats->f_namelen        = LOGFS_MAX_NAMELEN;
131         return 0;
132 }
133
134 static int logfs_sb_set(struct super_block *sb, void *_super)
135 {
136         struct logfs_super *super = _super;
137
138         sb->s_fs_info = super;
139         sb->s_mtd = super->s_mtd;
140         sb->s_bdev = super->s_bdev;
141 #ifdef CONFIG_BLOCK
142         if (sb->s_bdev)
143                 sb->s_bdi = &bdev_get_queue(sb->s_bdev)->backing_dev_info;
144 #endif
145 #ifdef CONFIG_MTD
146         if (sb->s_mtd)
147                 sb->s_bdi = sb->s_mtd->backing_dev_info;
148 #endif
149         return 0;
150 }
151
152 static int logfs_sb_test(struct super_block *sb, void *_super)
153 {
154         struct logfs_super *super = _super;
155         struct mtd_info *mtd = super->s_mtd;
156
157         if (mtd && sb->s_mtd == mtd)
158                 return 1;
159         if (super->s_bdev && sb->s_bdev == super->s_bdev)
160                 return 1;
161         return 0;
162 }
163
164 static void set_segment_header(struct logfs_segment_header *sh, u8 type,
165                 u8 level, u32 segno, u32 ec)
166 {
167         sh->pad = 0;
168         sh->type = type;
169         sh->level = level;
170         sh->segno = cpu_to_be32(segno);
171         sh->ec = cpu_to_be32(ec);
172         sh->gec = cpu_to_be64(segno);
173         sh->crc = logfs_crc32(sh, LOGFS_SEGMENT_HEADERSIZE, 4);
174 }
175
176 static void logfs_write_ds(struct super_block *sb, struct logfs_disk_super *ds,
177                 u32 segno, u32 ec)
178 {
179         struct logfs_super *super = logfs_super(sb);
180         struct logfs_segment_header *sh = &ds->ds_sh;
181         int i;
182
183         memset(ds, 0, sizeof(*ds));
184         set_segment_header(sh, SEG_SUPER, 0, segno, ec);
185
186         ds->ds_ifile_levels     = super->s_ifile_levels;
187         ds->ds_iblock_levels    = super->s_iblock_levels;
188         ds->ds_data_levels      = super->s_data_levels; /* XXX: Remove */
189         ds->ds_segment_shift    = super->s_segshift;
190         ds->ds_block_shift      = sb->s_blocksize_bits;
191         ds->ds_write_shift      = super->s_writeshift;
192         ds->ds_filesystem_size  = cpu_to_be64(super->s_size);
193         ds->ds_segment_size     = cpu_to_be32(super->s_segsize);
194         ds->ds_bad_seg_reserve  = cpu_to_be32(super->s_bad_seg_reserve);
195         ds->ds_feature_incompat = cpu_to_be64(super->s_feature_incompat);
196         ds->ds_feature_ro_compat= cpu_to_be64(super->s_feature_ro_compat);
197         ds->ds_feature_compat   = cpu_to_be64(super->s_feature_compat);
198         ds->ds_feature_flags    = cpu_to_be64(super->s_feature_flags);
199         ds->ds_root_reserve     = cpu_to_be64(super->s_root_reserve);
200         ds->ds_speed_reserve    = cpu_to_be64(super->s_speed_reserve);
201         journal_for_each(i)
202                 ds->ds_journal_seg[i] = cpu_to_be32(super->s_journal_seg[i]);
203         ds->ds_magic            = cpu_to_be64(LOGFS_MAGIC);
204         ds->ds_crc = logfs_crc32(ds, sizeof(*ds),
205                         LOGFS_SEGMENT_HEADERSIZE + 12);
206 }
207
208 static int write_one_sb(struct super_block *sb,
209                 struct page *(*find_sb)(struct super_block *sb, u64 *ofs))
210 {
211         struct logfs_super *super = logfs_super(sb);
212         struct logfs_disk_super *ds;
213         struct logfs_segment_entry se;
214         struct page *page;
215         u64 ofs;
216         u32 ec, segno;
217         int err;
218
219         page = find_sb(sb, &ofs);
220         if (!page)
221                 return -EIO;
222         ds = page_address(page);
223         segno = seg_no(sb, ofs);
224         logfs_get_segment_entry(sb, segno, &se);
225         ec = be32_to_cpu(se.ec_level) >> 4;
226         ec++;
227         logfs_set_segment_erased(sb, segno, ec, 0);
228         logfs_write_ds(sb, ds, segno, ec);
229         err = super->s_devops->write_sb(sb, page);
230         page_cache_release(page);
231         return err;
232 }
233
234 int logfs_write_sb(struct super_block *sb)
235 {
236         struct logfs_super *super = logfs_super(sb);
237         int err;
238
239         /* First superblock */
240         err = write_one_sb(sb, super->s_devops->find_first_sb);
241         if (err)
242                 return err;
243
244         /* Last superblock */
245         err = write_one_sb(sb, super->s_devops->find_last_sb);
246         if (err)
247                 return err;
248         return 0;
249 }
250
251 static int ds_cmp(const void *ds0, const void *ds1)
252 {
253         size_t len = sizeof(struct logfs_disk_super);
254
255         /* We know the segment headers differ, so ignore them */
256         len -= LOGFS_SEGMENT_HEADERSIZE;
257         ds0 += LOGFS_SEGMENT_HEADERSIZE;
258         ds1 += LOGFS_SEGMENT_HEADERSIZE;
259         return memcmp(ds0, ds1, len);
260 }
261
262 static int logfs_recover_sb(struct super_block *sb)
263 {
264         struct logfs_super *super = logfs_super(sb);
265         struct logfs_disk_super _ds0, *ds0 = &_ds0;
266         struct logfs_disk_super _ds1, *ds1 = &_ds1;
267         int err, valid0, valid1;
268
269         /* read first superblock */
270         err = wbuf_read(sb, super->s_sb_ofs[0], sizeof(*ds0), ds0);
271         if (err)
272                 return err;
273         /* read last superblock */
274         err = wbuf_read(sb, super->s_sb_ofs[1], sizeof(*ds1), ds1);
275         if (err)
276                 return err;
277         valid0 = logfs_check_ds(ds0) == 0;
278         valid1 = logfs_check_ds(ds1) == 0;
279
280         if (!valid0 && valid1) {
281                 printk(KERN_INFO"First superblock is invalid - fixing.\n");
282                 return write_one_sb(sb, super->s_devops->find_first_sb);
283         }
284         if (valid0 && !valid1) {
285                 printk(KERN_INFO"Last superblock is invalid - fixing.\n");
286                 return write_one_sb(sb, super->s_devops->find_last_sb);
287         }
288         if (valid0 && valid1 && ds_cmp(ds0, ds1)) {
289                 printk(KERN_INFO"Superblocks don't match - fixing.\n");
290                 return logfs_write_sb(sb);
291         }
292         /* If neither is valid now, something's wrong.  Didn't we properly
293          * check them before?!? */
294         BUG_ON(!valid0 && !valid1);
295         return 0;
296 }
297
298 static int logfs_make_writeable(struct super_block *sb)
299 {
300         int err;
301
302         err = logfs_open_segfile(sb);
303         if (err)
304                 return err;
305
306         /* Repair any broken superblock copies */
307         err = logfs_recover_sb(sb);
308         if (err)
309                 return err;
310
311         /* Check areas for trailing unaccounted data */
312         err = logfs_check_areas(sb);
313         if (err)
314                 return err;
315
316         /* Do one GC pass before any data gets dirtied */
317         logfs_gc_pass(sb);
318
319         /* after all initializations are done, replay the journal
320          * for rw-mounts, if necessary */
321         err = logfs_replay_journal(sb);
322         if (err)
323                 return err;
324
325         return 0;
326 }
327
328 static int logfs_get_sb_final(struct super_block *sb, struct vfsmount *mnt)
329 {
330         struct logfs_super *super = logfs_super(sb);
331         struct inode *rootdir;
332         int err;
333
334         /* root dir */
335         rootdir = logfs_iget(sb, LOGFS_INO_ROOT);
336         if (IS_ERR(rootdir))
337                 goto fail;
338
339         sb->s_root = d_alloc_root(rootdir);
340         if (!sb->s_root)
341                 goto fail2;
342
343         super->s_erase_page = alloc_pages(GFP_KERNEL, 0);
344         if (!super->s_erase_page)
345                 goto fail2;
346         memset(page_address(super->s_erase_page), 0xFF, PAGE_SIZE);
347
348         /* FIXME: check for read-only mounts */
349         err = logfs_make_writeable(sb);
350         if (err)
351                 goto fail3;
352
353         log_super("LogFS: Finished mounting\n");
354         simple_set_mnt(mnt, sb);
355         return 0;
356
357 fail3:
358         __free_page(super->s_erase_page);
359 fail2:
360         iput(rootdir);
361 fail:
362         iput(logfs_super(sb)->s_master_inode);
363         return -EIO;
364 }
365
366 int logfs_check_ds(struct logfs_disk_super *ds)
367 {
368         struct logfs_segment_header *sh = &ds->ds_sh;
369
370         if (ds->ds_magic != cpu_to_be64(LOGFS_MAGIC))
371                 return -EINVAL;
372         if (sh->crc != logfs_crc32(sh, LOGFS_SEGMENT_HEADERSIZE, 4))
373                 return -EINVAL;
374         if (ds->ds_crc != logfs_crc32(ds, sizeof(*ds),
375                                 LOGFS_SEGMENT_HEADERSIZE + 12))
376                 return -EINVAL;
377         return 0;
378 }
379
380 static struct page *find_super_block(struct super_block *sb)
381 {
382         struct logfs_super *super = logfs_super(sb);
383         struct page *first, *last;
384
385         first = super->s_devops->find_first_sb(sb, &super->s_sb_ofs[0]);
386         if (!first || IS_ERR(first))
387                 return NULL;
388         last = super->s_devops->find_last_sb(sb, &super->s_sb_ofs[1]);
389         if (!last || IS_ERR(last)) {
390                 page_cache_release(first);
391                 return NULL;
392         }
393
394         if (!logfs_check_ds(page_address(first))) {
395                 page_cache_release(last);
396                 return first;
397         }
398
399         /* First one didn't work, try the second superblock */
400         if (!logfs_check_ds(page_address(last))) {
401                 page_cache_release(first);
402                 return last;
403         }
404
405         /* Neither worked, sorry folks */
406         page_cache_release(first);
407         page_cache_release(last);
408         return NULL;
409 }
410
411 static int __logfs_read_sb(struct super_block *sb)
412 {
413         struct logfs_super *super = logfs_super(sb);
414         struct page *page;
415         struct logfs_disk_super *ds;
416         int i;
417
418         page = find_super_block(sb);
419         if (!page)
420                 return -EINVAL;
421
422         ds = page_address(page);
423         super->s_size = be64_to_cpu(ds->ds_filesystem_size);
424         super->s_root_reserve = be64_to_cpu(ds->ds_root_reserve);
425         super->s_speed_reserve = be64_to_cpu(ds->ds_speed_reserve);
426         super->s_bad_seg_reserve = be32_to_cpu(ds->ds_bad_seg_reserve);
427         super->s_segsize = 1 << ds->ds_segment_shift;
428         super->s_segmask = (1 << ds->ds_segment_shift) - 1;
429         super->s_segshift = ds->ds_segment_shift;
430         sb->s_blocksize = 1 << ds->ds_block_shift;
431         sb->s_blocksize_bits = ds->ds_block_shift;
432         super->s_writesize = 1 << ds->ds_write_shift;
433         super->s_writeshift = ds->ds_write_shift;
434         super->s_no_segs = super->s_size >> super->s_segshift;
435         super->s_no_blocks = super->s_segsize >> sb->s_blocksize_bits;
436         super->s_feature_incompat = be64_to_cpu(ds->ds_feature_incompat);
437         super->s_feature_ro_compat = be64_to_cpu(ds->ds_feature_ro_compat);
438         super->s_feature_compat = be64_to_cpu(ds->ds_feature_compat);
439         super->s_feature_flags = be64_to_cpu(ds->ds_feature_flags);
440
441         journal_for_each(i)
442                 super->s_journal_seg[i] = be32_to_cpu(ds->ds_journal_seg[i]);
443
444         super->s_ifile_levels = ds->ds_ifile_levels;
445         super->s_iblock_levels = ds->ds_iblock_levels;
446         super->s_data_levels = ds->ds_data_levels;
447         super->s_total_levels = super->s_ifile_levels + super->s_iblock_levels
448                 + super->s_data_levels;
449         page_cache_release(page);
450         return 0;
451 }
452
453 static int logfs_read_sb(struct super_block *sb, int read_only)
454 {
455         struct logfs_super *super = logfs_super(sb);
456         int ret;
457
458         super->s_btree_pool = mempool_create(32, btree_alloc, btree_free, NULL);
459         if (!super->s_btree_pool)
460                 return -ENOMEM;
461
462         btree_init_mempool64(&super->s_shadow_tree.new, super->s_btree_pool);
463         btree_init_mempool64(&super->s_shadow_tree.old, super->s_btree_pool);
464         btree_init_mempool32(&super->s_shadow_tree.segment_map,
465                         super->s_btree_pool);
466
467         ret = logfs_init_mapping(sb);
468         if (ret)
469                 return ret;
470
471         ret = __logfs_read_sb(sb);
472         if (ret)
473                 return ret;
474
475         if (super->s_feature_incompat & ~LOGFS_FEATURES_INCOMPAT)
476                 return -EIO;
477         if ((super->s_feature_ro_compat & ~LOGFS_FEATURES_RO_COMPAT) &&
478                         !read_only)
479                 return -EIO;
480
481         mutex_init(&super->s_dirop_mutex);
482         mutex_init(&super->s_object_alias_mutex);
483         INIT_LIST_HEAD(&super->s_freeing_list);
484
485         ret = logfs_init_rw(sb);
486         if (ret)
487                 return ret;
488
489         ret = logfs_init_areas(sb);
490         if (ret)
491                 return ret;
492
493         ret = logfs_init_gc(sb);
494         if (ret)
495                 return ret;
496
497         ret = logfs_init_journal(sb);
498         if (ret)
499                 return ret;
500
501         return 0;
502 }
503
504 static void logfs_kill_sb(struct super_block *sb)
505 {
506         struct logfs_super *super = logfs_super(sb);
507
508         log_super("LogFS: Start unmounting\n");
509         /* Alias entries slow down mount, so evict as many as possible */
510         sync_filesystem(sb);
511         logfs_write_anchor(sb);
512
513         /*
514          * From this point on alias entries are simply dropped - and any
515          * writes to the object store are considered bugs.
516          */
517         super->s_flags |= LOGFS_SB_FLAG_SHUTDOWN;
518         log_super("LogFS: Now in shutdown\n");
519         generic_shutdown_super(sb);
520
521         BUG_ON(super->s_dirty_used_bytes || super->s_dirty_free_bytes);
522
523         logfs_cleanup_gc(sb);
524         logfs_cleanup_journal(sb);
525         logfs_cleanup_areas(sb);
526         logfs_cleanup_rw(sb);
527         if (super->s_erase_page)
528                 __free_page(super->s_erase_page);
529         super->s_devops->put_device(sb);
530         logfs_mempool_destroy(super->s_btree_pool);
531         logfs_mempool_destroy(super->s_alias_pool);
532         kfree(super);
533         log_super("LogFS: Finished unmounting\n");
534 }
535
536 int logfs_get_sb_device(struct file_system_type *type, int flags,
537                 struct mtd_info *mtd, struct block_device *bdev,
538                 const struct logfs_device_ops *devops, struct vfsmount *mnt)
539 {
540         struct logfs_super *super;
541         struct super_block *sb;
542         int err = -ENOMEM;
543         static int mount_count;
544
545         log_super("LogFS: Start mount %x\n", mount_count++);
546         super = kzalloc(sizeof(*super), GFP_KERNEL);
547         if (!super)
548                 goto err0;
549
550         super->s_mtd    = mtd;
551         super->s_bdev   = bdev;
552         err = -EINVAL;
553         sb = sget(type, logfs_sb_test, logfs_sb_set, super);
554         if (IS_ERR(sb))
555                 goto err0;
556
557         if (sb->s_root) {
558                 /* Device is already in use */
559                 err = 0;
560                 simple_set_mnt(mnt, sb);
561                 goto err0;
562         }
563
564         super->s_devops = devops;
565
566         /*
567          * sb->s_maxbytes is limited to 8TB.  On 32bit systems, the page cache
568          * only covers 16TB and the upper 8TB are used for indirect blocks.
569          * On 64bit system we could bump up the limit, but that would make
570          * the filesystem incompatible with 32bit systems.
571          */
572         sb->s_maxbytes  = (1ull << 43) - 1;
573         sb->s_op        = &logfs_super_operations;
574         sb->s_flags     = flags | MS_NOATIME;
575
576         err = logfs_read_sb(sb, sb->s_flags & MS_RDONLY);
577         if (err)
578                 goto err1;
579
580         sb->s_flags |= MS_ACTIVE;
581         err = logfs_get_sb_final(sb, mnt);
582         if (err)
583                 goto err1;
584         return 0;
585
586 err1:
587         deactivate_locked_super(sb);
588         return err;
589 err0:
590         kfree(super);
591         //devops->put_device(sb);
592         return err;
593 }
594
595 static int logfs_get_sb(struct file_system_type *type, int flags,
596                 const char *devname, void *data, struct vfsmount *mnt)
597 {
598         ulong mtdnr;
599
600         if (!devname)
601                 return logfs_get_sb_bdev(type, flags, devname, mnt);
602         if (strncmp(devname, "mtd", 3))
603                 return logfs_get_sb_bdev(type, flags, devname, mnt);
604
605         {
606                 char *garbage;
607                 mtdnr = simple_strtoul(devname+3, &garbage, 0);
608                 if (*garbage)
609                         return -EINVAL;
610         }
611
612         return logfs_get_sb_mtd(type, flags, mtdnr, mnt);
613 }
614
615 static struct file_system_type logfs_fs_type = {
616         .owner          = THIS_MODULE,
617         .name           = "logfs",
618         .get_sb         = logfs_get_sb,
619         .kill_sb        = logfs_kill_sb,
620         .fs_flags       = FS_REQUIRES_DEV,
621
622 };
623
624 static int __init logfs_init(void)
625 {
626         int ret;
627
628         emergency_page = alloc_pages(GFP_KERNEL, 0);
629         if (!emergency_page)
630                 return -ENOMEM;
631
632         ret = logfs_compr_init();
633         if (ret)
634                 goto out1;
635
636         ret = logfs_init_inode_cache();
637         if (ret)
638                 goto out2;
639
640         return register_filesystem(&logfs_fs_type);
641 out2:
642         logfs_compr_exit();
643 out1:
644         __free_pages(emergency_page, 0);
645         return ret;
646 }
647
648 static void __exit logfs_exit(void)
649 {
650         unregister_filesystem(&logfs_fs_type);
651         logfs_destroy_inode_cache();
652         logfs_compr_exit();
653         __free_pages(emergency_page, 0);
654 }
655
656 module_init(logfs_init);
657 module_exit(logfs_exit);
658
659 MODULE_LICENSE("GPL v2");
660 MODULE_AUTHOR("Joern Engel <joern@logfs.org>");
661 MODULE_DESCRIPTION("scalable flash filesystem");