Merge branch 'core-iommu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[sfrench/cifs-2.6.git] / fs / logfs / segment.c
1 /*
2  * fs/logfs/segment.c   - Handling the Object Store
3  *
4  * As should be obvious for Linux kernel code, license is GPLv2
5  *
6  * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
7  *
8  * Object store or ostore makes up the complete device with exception of
9  * the superblock and journal areas.  Apart from its own metadata it stores
10  * three kinds of objects: inodes, dentries and blocks, both data and indirect.
11  */
12 #include "logfs.h"
13 #include <linux/slab.h>
14
15 static int logfs_mark_segment_bad(struct super_block *sb, u32 segno)
16 {
17         struct logfs_super *super = logfs_super(sb);
18         struct btree_head32 *head = &super->s_reserved_segments;
19         int err;
20
21         err = btree_insert32(head, segno, (void *)1, GFP_NOFS);
22         if (err)
23                 return err;
24         logfs_super(sb)->s_bad_segments++;
25         /* FIXME: write to journal */
26         return 0;
27 }
28
29 int logfs_erase_segment(struct super_block *sb, u32 segno, int ensure_erase)
30 {
31         struct logfs_super *super = logfs_super(sb);
32
33         super->s_gec++;
34
35         return super->s_devops->erase(sb, (u64)segno << super->s_segshift,
36                         super->s_segsize, ensure_erase);
37 }
38
39 static s64 logfs_get_free_bytes(struct logfs_area *area, size_t bytes)
40 {
41         s32 ofs;
42
43         logfs_open_area(area, bytes);
44
45         ofs = area->a_used_bytes;
46         area->a_used_bytes += bytes;
47         BUG_ON(area->a_used_bytes >= logfs_super(area->a_sb)->s_segsize);
48
49         return dev_ofs(area->a_sb, area->a_segno, ofs);
50 }
51
52 static struct page *get_mapping_page(struct super_block *sb, pgoff_t index,
53                 int use_filler)
54 {
55         struct logfs_super *super = logfs_super(sb);
56         struct address_space *mapping = super->s_mapping_inode->i_mapping;
57         filler_t *filler = super->s_devops->readpage;
58         struct page *page;
59
60         BUG_ON(mapping_gfp_mask(mapping) & __GFP_FS);
61         if (use_filler)
62                 page = read_cache_page(mapping, index, filler, sb);
63         else {
64                 page = find_or_create_page(mapping, index, GFP_NOFS);
65                 unlock_page(page);
66         }
67         return page;
68 }
69
70 int __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len,
71                 int use_filler)
72 {
73         pgoff_t index = ofs >> PAGE_SHIFT;
74         struct page *page;
75         long offset = ofs & (PAGE_SIZE-1);
76         long copylen;
77
78         /* Only logfs_wbuf_recover may use len==0 */
79         BUG_ON(!len && !use_filler);
80         do {
81                 copylen = min((ulong)len, PAGE_SIZE - offset);
82
83                 page = get_mapping_page(area->a_sb, index, use_filler);
84                 if (IS_ERR(page))
85                         return PTR_ERR(page);
86                 BUG_ON(!page); /* FIXME: reserve a pool */
87                 SetPageUptodate(page);
88                 memcpy(page_address(page) + offset, buf, copylen);
89                 SetPagePrivate(page);
90                 page_cache_release(page);
91
92                 buf += copylen;
93                 len -= copylen;
94                 offset = 0;
95                 index++;
96         } while (len);
97         return 0;
98 }
99
100 static void pad_partial_page(struct logfs_area *area)
101 {
102         struct super_block *sb = area->a_sb;
103         struct page *page;
104         u64 ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes);
105         pgoff_t index = ofs >> PAGE_SHIFT;
106         long offset = ofs & (PAGE_SIZE-1);
107         u32 len = PAGE_SIZE - offset;
108
109         if (len % PAGE_SIZE) {
110                 page = get_mapping_page(sb, index, 0);
111                 BUG_ON(!page); /* FIXME: reserve a pool */
112                 memset(page_address(page) + offset, 0xff, len);
113                 SetPagePrivate(page);
114                 page_cache_release(page);
115         }
116 }
117
118 static void pad_full_pages(struct logfs_area *area)
119 {
120         struct super_block *sb = area->a_sb;
121         struct logfs_super *super = logfs_super(sb);
122         u64 ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes);
123         u32 len = super->s_segsize - area->a_used_bytes;
124         pgoff_t index = PAGE_CACHE_ALIGN(ofs) >> PAGE_CACHE_SHIFT;
125         pgoff_t no_indizes = len >> PAGE_CACHE_SHIFT;
126         struct page *page;
127
128         while (no_indizes) {
129                 page = get_mapping_page(sb, index, 0);
130                 BUG_ON(!page); /* FIXME: reserve a pool */
131                 SetPageUptodate(page);
132                 memset(page_address(page), 0xff, PAGE_CACHE_SIZE);
133                 SetPagePrivate(page);
134                 page_cache_release(page);
135                 index++;
136                 no_indizes--;
137         }
138 }
139
140 /*
141  * bdev_writeseg will write full pages.  Memset the tail to prevent data leaks.
142  * Also make sure we allocate (and memset) all pages for final writeout.
143  */
144 static void pad_wbuf(struct logfs_area *area, int final)
145 {
146         pad_partial_page(area);
147         if (final)
148                 pad_full_pages(area);
149 }
150
151 /*
152  * We have to be careful with the alias tree.  Since lookup is done by bix,
153  * it needs to be normalized, so 14, 15, 16, etc. all match when dealing with
154  * indirect blocks.  So always use it through accessor functions.
155  */
156 static void *alias_tree_lookup(struct super_block *sb, u64 ino, u64 bix,
157                 level_t level)
158 {
159         struct btree_head128 *head = &logfs_super(sb)->s_object_alias_tree;
160         pgoff_t index = logfs_pack_index(bix, level);
161
162         return btree_lookup128(head, ino, index);
163 }
164
165 static int alias_tree_insert(struct super_block *sb, u64 ino, u64 bix,
166                 level_t level, void *val)
167 {
168         struct btree_head128 *head = &logfs_super(sb)->s_object_alias_tree;
169         pgoff_t index = logfs_pack_index(bix, level);
170
171         return btree_insert128(head, ino, index, val, GFP_NOFS);
172 }
173
174 static int btree_write_alias(struct super_block *sb, struct logfs_block *block,
175                 write_alias_t *write_one_alias)
176 {
177         struct object_alias_item *item;
178         int err;
179
180         list_for_each_entry(item, &block->item_list, list) {
181                 err = write_alias_journal(sb, block->ino, block->bix,
182                                 block->level, item->child_no, item->val);
183                 if (err)
184                         return err;
185         }
186         return 0;
187 }
188
189 static struct logfs_block_ops btree_block_ops = {
190         .write_block    = btree_write_block,
191         .free_block     = __free_block,
192         .write_alias    = btree_write_alias,
193 };
194
195 int logfs_load_object_aliases(struct super_block *sb,
196                 struct logfs_obj_alias *oa, int count)
197 {
198         struct logfs_super *super = logfs_super(sb);
199         struct logfs_block *block;
200         struct object_alias_item *item;
201         u64 ino, bix;
202         level_t level;
203         int i, err;
204
205         super->s_flags |= LOGFS_SB_FLAG_OBJ_ALIAS;
206         count /= sizeof(*oa);
207         for (i = 0; i < count; i++) {
208                 item = mempool_alloc(super->s_alias_pool, GFP_NOFS);
209                 if (!item)
210                         return -ENOMEM;
211                 memset(item, 0, sizeof(*item));
212
213                 super->s_no_object_aliases++;
214                 item->val = oa[i].val;
215                 item->child_no = be16_to_cpu(oa[i].child_no);
216
217                 ino = be64_to_cpu(oa[i].ino);
218                 bix = be64_to_cpu(oa[i].bix);
219                 level = LEVEL(oa[i].level);
220
221                 log_aliases("logfs_load_object_aliases(%llx, %llx, %x, %x) %llx\n",
222                                 ino, bix, level, item->child_no,
223                                 be64_to_cpu(item->val));
224                 block = alias_tree_lookup(sb, ino, bix, level);
225                 if (!block) {
226                         block = __alloc_block(sb, ino, bix, level);
227                         block->ops = &btree_block_ops;
228                         err = alias_tree_insert(sb, ino, bix, level, block);
229                         BUG_ON(err); /* mempool empty */
230                 }
231                 if (test_and_set_bit(item->child_no, block->alias_map)) {
232                         printk(KERN_ERR"LogFS: Alias collision detected\n");
233                         return -EIO;
234                 }
235                 list_move_tail(&block->alias_list, &super->s_object_alias);
236                 list_add(&item->list, &block->item_list);
237         }
238         return 0;
239 }
240
241 static void kill_alias(void *_block, unsigned long ignore0,
242                 u64 ignore1, u64 ignore2, size_t ignore3)
243 {
244         struct logfs_block *block = _block;
245         struct super_block *sb = block->sb;
246         struct logfs_super *super = logfs_super(sb);
247         struct object_alias_item *item;
248
249         while (!list_empty(&block->item_list)) {
250                 item = list_entry(block->item_list.next, typeof(*item), list);
251                 list_del(&item->list);
252                 mempool_free(item, super->s_alias_pool);
253         }
254         block->ops->free_block(sb, block);
255 }
256
257 static int obj_type(struct inode *inode, level_t level)
258 {
259         if (level == 0) {
260                 if (S_ISDIR(inode->i_mode))
261                         return OBJ_DENTRY;
262                 if (inode->i_ino == LOGFS_INO_MASTER)
263                         return OBJ_INODE;
264         }
265         return OBJ_BLOCK;
266 }
267
268 static int obj_len(struct super_block *sb, int obj_type)
269 {
270         switch (obj_type) {
271         case OBJ_DENTRY:
272                 return sizeof(struct logfs_disk_dentry);
273         case OBJ_INODE:
274                 return sizeof(struct logfs_disk_inode);
275         case OBJ_BLOCK:
276                 return sb->s_blocksize;
277         default:
278                 BUG();
279         }
280 }
281
282 static int __logfs_segment_write(struct inode *inode, void *buf,
283                 struct logfs_shadow *shadow, int type, int len, int compr)
284 {
285         struct logfs_area *area;
286         struct super_block *sb = inode->i_sb;
287         s64 ofs;
288         struct logfs_object_header h;
289         int acc_len;
290
291         if (shadow->gc_level == 0)
292                 acc_len = len;
293         else
294                 acc_len = obj_len(sb, type);
295
296         area = get_area(sb, shadow->gc_level);
297         ofs = logfs_get_free_bytes(area, len + LOGFS_OBJECT_HEADERSIZE);
298         LOGFS_BUG_ON(ofs <= 0, sb);
299         /*
300          * Order is important.  logfs_get_free_bytes(), by modifying the
301          * segment file, may modify the content of the very page we're about
302          * to write now.  Which is fine, as long as the calculated crc and
303          * written data still match.  So do the modifications _before_
304          * calculating the crc.
305          */
306
307         h.len   = cpu_to_be16(len);
308         h.type  = type;
309         h.compr = compr;
310         h.ino   = cpu_to_be64(inode->i_ino);
311         h.bix   = cpu_to_be64(shadow->bix);
312         h.crc   = logfs_crc32(&h, sizeof(h) - 4, 4);
313         h.data_crc = logfs_crc32(buf, len, 0);
314
315         logfs_buf_write(area, ofs, &h, sizeof(h));
316         logfs_buf_write(area, ofs + LOGFS_OBJECT_HEADERSIZE, buf, len);
317
318         shadow->new_ofs = ofs;
319         shadow->new_len = acc_len + LOGFS_OBJECT_HEADERSIZE;
320
321         return 0;
322 }
323
324 static s64 logfs_segment_write_compress(struct inode *inode, void *buf,
325                 struct logfs_shadow *shadow, int type, int len)
326 {
327         struct super_block *sb = inode->i_sb;
328         void *compressor_buf = logfs_super(sb)->s_compressed_je;
329         ssize_t compr_len;
330         int ret;
331
332         mutex_lock(&logfs_super(sb)->s_journal_mutex);
333         compr_len = logfs_compress(buf, compressor_buf, len, len);
334
335         if (compr_len >= 0) {
336                 ret = __logfs_segment_write(inode, compressor_buf, shadow,
337                                 type, compr_len, COMPR_ZLIB);
338         } else {
339                 ret = __logfs_segment_write(inode, buf, shadow, type, len,
340                                 COMPR_NONE);
341         }
342         mutex_unlock(&logfs_super(sb)->s_journal_mutex);
343         return ret;
344 }
345
346 /**
347  * logfs_segment_write - write data block to object store
348  * @inode:              inode containing data
349  *
350  * Returns an errno or zero.
351  */
352 int logfs_segment_write(struct inode *inode, struct page *page,
353                 struct logfs_shadow *shadow)
354 {
355         struct super_block *sb = inode->i_sb;
356         struct logfs_super *super = logfs_super(sb);
357         int do_compress, type, len;
358         int ret;
359         void *buf;
360
361         super->s_flags |= LOGFS_SB_FLAG_DIRTY;
362         BUG_ON(super->s_flags & LOGFS_SB_FLAG_SHUTDOWN);
363         do_compress = logfs_inode(inode)->li_flags & LOGFS_IF_COMPRESSED;
364         if (shadow->gc_level != 0) {
365                 /* temporarily disable compression for indirect blocks */
366                 do_compress = 0;
367         }
368
369         type = obj_type(inode, shrink_level(shadow->gc_level));
370         len = obj_len(sb, type);
371         buf = kmap(page);
372         if (do_compress)
373                 ret = logfs_segment_write_compress(inode, buf, shadow, type,
374                                 len);
375         else
376                 ret = __logfs_segment_write(inode, buf, shadow, type, len,
377                                 COMPR_NONE);
378         kunmap(page);
379
380         log_segment("logfs_segment_write(%llx, %llx, %x) %llx->%llx %x->%x\n",
381                         shadow->ino, shadow->bix, shadow->gc_level,
382                         shadow->old_ofs, shadow->new_ofs,
383                         shadow->old_len, shadow->new_len);
384         /* this BUG_ON did catch a locking bug.  useful */
385         BUG_ON(!(shadow->new_ofs & (super->s_segsize - 1)));
386         return ret;
387 }
388
389 int wbuf_read(struct super_block *sb, u64 ofs, size_t len, void *buf)
390 {
391         pgoff_t index = ofs >> PAGE_SHIFT;
392         struct page *page;
393         long offset = ofs & (PAGE_SIZE-1);
394         long copylen;
395
396         while (len) {
397                 copylen = min((ulong)len, PAGE_SIZE - offset);
398
399                 page = get_mapping_page(sb, index, 1);
400                 if (IS_ERR(page))
401                         return PTR_ERR(page);
402                 memcpy(buf, page_address(page) + offset, copylen);
403                 page_cache_release(page);
404
405                 buf += copylen;
406                 len -= copylen;
407                 offset = 0;
408                 index++;
409         }
410         return 0;
411 }
412
413 /*
414  * The "position" of indirect blocks is ambiguous.  It can be the position
415  * of any data block somewhere behind this indirect block.  So we need to
416  * normalize the positions through logfs_block_mask() before comparing.
417  */
418 static int check_pos(struct super_block *sb, u64 pos1, u64 pos2, level_t level)
419 {
420         return  (pos1 & logfs_block_mask(sb, level)) !=
421                 (pos2 & logfs_block_mask(sb, level));
422 }
423
424 #if 0
425 static int read_seg_header(struct super_block *sb, u64 ofs,
426                 struct logfs_segment_header *sh)
427 {
428         __be32 crc;
429         int err;
430
431         err = wbuf_read(sb, ofs, sizeof(*sh), sh);
432         if (err)
433                 return err;
434         crc = logfs_crc32(sh, sizeof(*sh), 4);
435         if (crc != sh->crc) {
436                 printk(KERN_ERR"LOGFS: header crc error at %llx: expected %x, "
437                                 "got %x\n", ofs, be32_to_cpu(sh->crc),
438                                 be32_to_cpu(crc));
439                 return -EIO;
440         }
441         return 0;
442 }
443 #endif
444
445 static int read_obj_header(struct super_block *sb, u64 ofs,
446                 struct logfs_object_header *oh)
447 {
448         __be32 crc;
449         int err;
450
451         err = wbuf_read(sb, ofs, sizeof(*oh), oh);
452         if (err)
453                 return err;
454         crc = logfs_crc32(oh, sizeof(*oh) - 4, 4);
455         if (crc != oh->crc) {
456                 printk(KERN_ERR"LOGFS: header crc error at %llx: expected %x, "
457                                 "got %x\n", ofs, be32_to_cpu(oh->crc),
458                                 be32_to_cpu(crc));
459                 return -EIO;
460         }
461         return 0;
462 }
463
464 static void move_btree_to_page(struct inode *inode, struct page *page,
465                 __be64 *data)
466 {
467         struct super_block *sb = inode->i_sb;
468         struct logfs_super *super = logfs_super(sb);
469         struct btree_head128 *head = &super->s_object_alias_tree;
470         struct logfs_block *block;
471         struct object_alias_item *item, *next;
472
473         if (!(super->s_flags & LOGFS_SB_FLAG_OBJ_ALIAS))
474                 return;
475
476         block = btree_remove128(head, inode->i_ino, page->index);
477         if (!block)
478                 return;
479
480         log_blockmove("move_btree_to_page(%llx, %llx, %x)\n",
481                         block->ino, block->bix, block->level);
482         list_for_each_entry_safe(item, next, &block->item_list, list) {
483                 data[item->child_no] = item->val;
484                 list_del(&item->list);
485                 mempool_free(item, super->s_alias_pool);
486         }
487         block->page = page;
488         SetPagePrivate(page);
489         page->private = (unsigned long)block;
490         block->ops = &indirect_block_ops;
491         initialize_block_counters(page, block, data, 0);
492 }
493
494 /*
495  * This silences a false, yet annoying gcc warning.  I hate it when my editor
496  * jumps into bitops.h each time I recompile this file.
497  * TODO: Complain to gcc folks about this and upgrade compiler.
498  */
499 static unsigned long fnb(const unsigned long *addr,
500                 unsigned long size, unsigned long offset)
501 {
502         return find_next_bit(addr, size, offset);
503 }
504
505 void move_page_to_btree(struct page *page)
506 {
507         struct logfs_block *block = logfs_block(page);
508         struct super_block *sb = block->sb;
509         struct logfs_super *super = logfs_super(sb);
510         struct object_alias_item *item;
511         unsigned long pos;
512         __be64 *child;
513         int err;
514
515         if (super->s_flags & LOGFS_SB_FLAG_SHUTDOWN) {
516                 block->ops->free_block(sb, block);
517                 return;
518         }
519         log_blockmove("move_page_to_btree(%llx, %llx, %x)\n",
520                         block->ino, block->bix, block->level);
521         super->s_flags |= LOGFS_SB_FLAG_OBJ_ALIAS;
522
523         for (pos = 0; ; pos++) {
524                 pos = fnb(block->alias_map, LOGFS_BLOCK_FACTOR, pos);
525                 if (pos >= LOGFS_BLOCK_FACTOR)
526                         break;
527
528                 item = mempool_alloc(super->s_alias_pool, GFP_NOFS);
529                 BUG_ON(!item); /* mempool empty */
530                 memset(item, 0, sizeof(*item));
531
532                 child = kmap_atomic(page, KM_USER0);
533                 item->val = child[pos];
534                 kunmap_atomic(child, KM_USER0);
535                 item->child_no = pos;
536                 list_add(&item->list, &block->item_list);
537         }
538         block->page = NULL;
539         ClearPagePrivate(page);
540         page->private = 0;
541         block->ops = &btree_block_ops;
542         err = alias_tree_insert(block->sb, block->ino, block->bix, block->level,
543                         block);
544         BUG_ON(err); /* mempool empty */
545         ClearPageUptodate(page);
546 }
547
548 static int __logfs_segment_read(struct inode *inode, void *buf,
549                 u64 ofs, u64 bix, level_t level)
550 {
551         struct super_block *sb = inode->i_sb;
552         void *compressor_buf = logfs_super(sb)->s_compressed_je;
553         struct logfs_object_header oh;
554         __be32 crc;
555         u16 len;
556         int err, block_len;
557
558         block_len = obj_len(sb, obj_type(inode, level));
559         err = read_obj_header(sb, ofs, &oh);
560         if (err)
561                 goto out_err;
562
563         err = -EIO;
564         if (be64_to_cpu(oh.ino) != inode->i_ino
565                         || check_pos(sb, be64_to_cpu(oh.bix), bix, level)) {
566                 printk(KERN_ERR"LOGFS: (ino, bix) don't match at %llx: "
567                                 "expected (%lx, %llx), got (%llx, %llx)\n",
568                                 ofs, inode->i_ino, bix,
569                                 be64_to_cpu(oh.ino), be64_to_cpu(oh.bix));
570                 goto out_err;
571         }
572
573         len = be16_to_cpu(oh.len);
574
575         switch (oh.compr) {
576         case COMPR_NONE:
577                 err = wbuf_read(sb, ofs + LOGFS_OBJECT_HEADERSIZE, len, buf);
578                 if (err)
579                         goto out_err;
580                 crc = logfs_crc32(buf, len, 0);
581                 if (crc != oh.data_crc) {
582                         printk(KERN_ERR"LOGFS: uncompressed data crc error at "
583                                         "%llx: expected %x, got %x\n", ofs,
584                                         be32_to_cpu(oh.data_crc),
585                                         be32_to_cpu(crc));
586                         goto out_err;
587                 }
588                 break;
589         case COMPR_ZLIB:
590                 mutex_lock(&logfs_super(sb)->s_journal_mutex);
591                 err = wbuf_read(sb, ofs + LOGFS_OBJECT_HEADERSIZE, len,
592                                 compressor_buf);
593                 if (err) {
594                         mutex_unlock(&logfs_super(sb)->s_journal_mutex);
595                         goto out_err;
596                 }
597                 crc = logfs_crc32(compressor_buf, len, 0);
598                 if (crc != oh.data_crc) {
599                         printk(KERN_ERR"LOGFS: compressed data crc error at "
600                                         "%llx: expected %x, got %x\n", ofs,
601                                         be32_to_cpu(oh.data_crc),
602                                         be32_to_cpu(crc));
603                         mutex_unlock(&logfs_super(sb)->s_journal_mutex);
604                         goto out_err;
605                 }
606                 err = logfs_uncompress(compressor_buf, buf, len, block_len);
607                 mutex_unlock(&logfs_super(sb)->s_journal_mutex);
608                 if (err) {
609                         printk(KERN_ERR"LOGFS: uncompress error at %llx\n", ofs);
610                         goto out_err;
611                 }
612                 break;
613         default:
614                 LOGFS_BUG(sb);
615                 err = -EIO;
616                 goto out_err;
617         }
618         return 0;
619
620 out_err:
621         logfs_set_ro(sb);
622         printk(KERN_ERR"LOGFS: device is read-only now\n");
623         LOGFS_BUG(sb);
624         return err;
625 }
626
627 /**
628  * logfs_segment_read - read data block from object store
629  * @inode:              inode containing data
630  * @buf:                data buffer
631  * @ofs:                physical data offset
632  * @bix:                block index
633  * @level:              block level
634  *
635  * Returns 0 on success or a negative errno.
636  */
637 int logfs_segment_read(struct inode *inode, struct page *page,
638                 u64 ofs, u64 bix, level_t level)
639 {
640         int err;
641         void *buf;
642
643         if (PageUptodate(page))
644                 return 0;
645
646         ofs &= ~LOGFS_FULLY_POPULATED;
647
648         buf = kmap(page);
649         err = __logfs_segment_read(inode, buf, ofs, bix, level);
650         if (!err) {
651                 move_btree_to_page(inode, page, buf);
652                 SetPageUptodate(page);
653         }
654         kunmap(page);
655         log_segment("logfs_segment_read(%lx, %llx, %x) %llx (%d)\n",
656                         inode->i_ino, bix, level, ofs, err);
657         return err;
658 }
659
660 int logfs_segment_delete(struct inode *inode, struct logfs_shadow *shadow)
661 {
662         struct super_block *sb = inode->i_sb;
663         struct logfs_super *super = logfs_super(sb);
664         struct logfs_object_header h;
665         u16 len;
666         int err;
667
668         super->s_flags |= LOGFS_SB_FLAG_DIRTY;
669         BUG_ON(super->s_flags & LOGFS_SB_FLAG_SHUTDOWN);
670         BUG_ON(shadow->old_ofs & LOGFS_FULLY_POPULATED);
671         if (!shadow->old_ofs)
672                 return 0;
673
674         log_segment("logfs_segment_delete(%llx, %llx, %x) %llx->%llx %x->%x\n",
675                         shadow->ino, shadow->bix, shadow->gc_level,
676                         shadow->old_ofs, shadow->new_ofs,
677                         shadow->old_len, shadow->new_len);
678         err = read_obj_header(sb, shadow->old_ofs, &h);
679         LOGFS_BUG_ON(err, sb);
680         LOGFS_BUG_ON(be64_to_cpu(h.ino) != inode->i_ino, sb);
681         LOGFS_BUG_ON(check_pos(sb, shadow->bix, be64_to_cpu(h.bix),
682                                 shrink_level(shadow->gc_level)), sb);
683
684         if (shadow->gc_level == 0)
685                 len = be16_to_cpu(h.len);
686         else
687                 len = obj_len(sb, h.type);
688         shadow->old_len = len + sizeof(h);
689         return 0;
690 }
691
692 void freeseg(struct super_block *sb, u32 segno)
693 {
694         struct logfs_super *super = logfs_super(sb);
695         struct address_space *mapping = super->s_mapping_inode->i_mapping;
696         struct page *page;
697         u64 ofs, start, end;
698
699         start = dev_ofs(sb, segno, 0);
700         end = dev_ofs(sb, segno + 1, 0);
701         for (ofs = start; ofs < end; ofs += PAGE_SIZE) {
702                 page = find_get_page(mapping, ofs >> PAGE_SHIFT);
703                 if (!page)
704                         continue;
705                 ClearPagePrivate(page);
706                 page_cache_release(page);
707         }
708 }
709
710 int logfs_open_area(struct logfs_area *area, size_t bytes)
711 {
712         struct super_block *sb = area->a_sb;
713         struct logfs_super *super = logfs_super(sb);
714         int err, closed = 0;
715
716         if (area->a_is_open && area->a_used_bytes + bytes <= super->s_segsize)
717                 return 0;
718
719         if (area->a_is_open) {
720                 u64 ofs = dev_ofs(sb, area->a_segno, area->a_written_bytes);
721                 u32 len = super->s_segsize - area->a_written_bytes;
722
723                 log_gc("logfs_close_area(%x)\n", area->a_segno);
724                 pad_wbuf(area, 1);
725                 super->s_devops->writeseg(area->a_sb, ofs, len);
726                 freeseg(sb, area->a_segno);
727                 closed = 1;
728         }
729
730         area->a_used_bytes = 0;
731         area->a_written_bytes = 0;
732 again:
733         area->a_ops->get_free_segment(area);
734         area->a_ops->get_erase_count(area);
735
736         log_gc("logfs_open_area(%x, %x)\n", area->a_segno, area->a_level);
737         err = area->a_ops->erase_segment(area);
738         if (err) {
739                 printk(KERN_WARNING "LogFS: Error erasing segment %x\n",
740                                 area->a_segno);
741                 logfs_mark_segment_bad(sb, area->a_segno);
742                 goto again;
743         }
744         area->a_is_open = 1;
745         return closed;
746 }
747
748 void logfs_sync_area(struct logfs_area *area)
749 {
750         struct super_block *sb = area->a_sb;
751         struct logfs_super *super = logfs_super(sb);
752         u64 ofs = dev_ofs(sb, area->a_segno, area->a_written_bytes);
753         u32 len = (area->a_used_bytes - area->a_written_bytes);
754
755         if (super->s_writesize)
756                 len &= ~(super->s_writesize - 1);
757         if (len == 0)
758                 return;
759         pad_wbuf(area, 0);
760         super->s_devops->writeseg(sb, ofs, len);
761         area->a_written_bytes += len;
762 }
763
764 void logfs_sync_segments(struct super_block *sb)
765 {
766         struct logfs_super *super = logfs_super(sb);
767         int i;
768
769         for_each_area(i)
770                 logfs_sync_area(super->s_area[i]);
771 }
772
773 /*
774  * Pick a free segment to be used for this area.  Effectively takes a
775  * candidate from the free list (not really a candidate anymore).
776  */
777 static void ostore_get_free_segment(struct logfs_area *area)
778 {
779         struct super_block *sb = area->a_sb;
780         struct logfs_super *super = logfs_super(sb);
781
782         if (super->s_free_list.count == 0) {
783                 printk(KERN_ERR"LOGFS: ran out of free segments\n");
784                 LOGFS_BUG(sb);
785         }
786
787         area->a_segno = get_best_cand(sb, &super->s_free_list, NULL);
788 }
789
790 static void ostore_get_erase_count(struct logfs_area *area)
791 {
792         struct logfs_segment_entry se;
793         u32 ec_level;
794
795         logfs_get_segment_entry(area->a_sb, area->a_segno, &se);
796         BUG_ON(se.ec_level == cpu_to_be32(BADSEG) ||
797                         se.valid == cpu_to_be32(RESERVED));
798
799         ec_level = be32_to_cpu(se.ec_level);
800         area->a_erase_count = (ec_level >> 4) + 1;
801 }
802
803 static int ostore_erase_segment(struct logfs_area *area)
804 {
805         struct super_block *sb = area->a_sb;
806         struct logfs_segment_header sh;
807         u64 ofs;
808         int err;
809
810         err = logfs_erase_segment(sb, area->a_segno, 0);
811         if (err)
812                 return err;
813
814         sh.pad = 0;
815         sh.type = SEG_OSTORE;
816         sh.level = (__force u8)area->a_level;
817         sh.segno = cpu_to_be32(area->a_segno);
818         sh.ec = cpu_to_be32(area->a_erase_count);
819         sh.gec = cpu_to_be64(logfs_super(sb)->s_gec);
820         sh.crc = logfs_crc32(&sh, sizeof(sh), 4);
821
822         logfs_set_segment_erased(sb, area->a_segno, area->a_erase_count,
823                         area->a_level);
824
825         ofs = dev_ofs(sb, area->a_segno, 0);
826         area->a_used_bytes = sizeof(sh);
827         logfs_buf_write(area, ofs, &sh, sizeof(sh));
828         return 0;
829 }
830
831 static const struct logfs_area_ops ostore_area_ops = {
832         .get_free_segment       = ostore_get_free_segment,
833         .get_erase_count        = ostore_get_erase_count,
834         .erase_segment          = ostore_erase_segment,
835 };
836
837 static void free_area(struct logfs_area *area)
838 {
839         if (area)
840                 freeseg(area->a_sb, area->a_segno);
841         kfree(area);
842 }
843
844 static struct logfs_area *alloc_area(struct super_block *sb)
845 {
846         struct logfs_area *area;
847
848         area = kzalloc(sizeof(*area), GFP_KERNEL);
849         if (!area)
850                 return NULL;
851
852         area->a_sb = sb;
853         return area;
854 }
855
856 static void map_invalidatepage(struct page *page, unsigned long l)
857 {
858         BUG();
859 }
860
861 static int map_releasepage(struct page *page, gfp_t g)
862 {
863         /* Don't release these pages */
864         return 0;
865 }
866
867 static const struct address_space_operations mapping_aops = {
868         .invalidatepage = map_invalidatepage,
869         .releasepage    = map_releasepage,
870         .set_page_dirty = __set_page_dirty_nobuffers,
871 };
872
873 int logfs_init_mapping(struct super_block *sb)
874 {
875         struct logfs_super *super = logfs_super(sb);
876         struct address_space *mapping;
877         struct inode *inode;
878
879         inode = logfs_new_meta_inode(sb, LOGFS_INO_MAPPING);
880         if (IS_ERR(inode))
881                 return PTR_ERR(inode);
882         super->s_mapping_inode = inode;
883         mapping = inode->i_mapping;
884         mapping->a_ops = &mapping_aops;
885         /* Would it be possible to use __GFP_HIGHMEM as well? */
886         mapping_set_gfp_mask(mapping, GFP_NOFS);
887         return 0;
888 }
889
890 int logfs_init_areas(struct super_block *sb)
891 {
892         struct logfs_super *super = logfs_super(sb);
893         int i = -1;
894
895         super->s_alias_pool = mempool_create_kmalloc_pool(600,
896                         sizeof(struct object_alias_item));
897         if (!super->s_alias_pool)
898                 return -ENOMEM;
899
900         super->s_journal_area = alloc_area(sb);
901         if (!super->s_journal_area)
902                 goto err;
903
904         for_each_area(i) {
905                 super->s_area[i] = alloc_area(sb);
906                 if (!super->s_area[i])
907                         goto err;
908                 super->s_area[i]->a_level = GC_LEVEL(i);
909                 super->s_area[i]->a_ops = &ostore_area_ops;
910         }
911         btree_init_mempool128(&super->s_object_alias_tree,
912                         super->s_btree_pool);
913         return 0;
914
915 err:
916         for (i--; i >= 0; i--)
917                 free_area(super->s_area[i]);
918         free_area(super->s_journal_area);
919         logfs_mempool_destroy(super->s_alias_pool);
920         return -ENOMEM;
921 }
922
923 void logfs_cleanup_areas(struct super_block *sb)
924 {
925         struct logfs_super *super = logfs_super(sb);
926         int i;
927
928         btree_grim_visitor128(&super->s_object_alias_tree, 0, kill_alias);
929         for_each_area(i)
930                 free_area(super->s_area[i]);
931         free_area(super->s_journal_area);
932         destroy_meta_inode(super->s_mapping_inode);
933 }