fs/logfs/gc.c

   1 /*
   2  * fs/logfs/gc.c        - garbage collection code
   3  *
   4  * As should be obvious for Linux kernel code, license is GPLv2
   5  *
   6  * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
   7  */
   8 #include "logfs.h"
   9 #include <linux/sched.h>
  10 #include <linux/slab.h>
  11
  12 /*
  13  * Wear leveling needs to kick in when the difference between low erase
  14  * counts and high erase counts gets too big.  A good value for "too big"
  15  * may be somewhat below 10% of maximum erase count for the device.
  16  * Why not 397, to pick a nice round number with no specific meaning? :)
  17  *
  18  * WL_RATELIMIT is the minimum time between two wear level events.  A huge
  19  * number of segments may fulfil the requirements for wear leveling at the
  20  * same time.  If that happens we don't want to cause a latency from hell,
  21  * but just gently pick one segment every so often and minimize overhead.
  22  */
  23 #define WL_DELTA 397
  24 #define WL_RATELIMIT 100
  25 #define MAX_OBJ_ALIASES 2600
  26 #define SCAN_RATIO 512  /* number of scanned segments per gc'd segment */
  27 #define LIST_SIZE 64    /* base size of candidate lists */
  28 #define SCAN_ROUNDS 128 /* maximum number of complete medium scans */
  29 #define SCAN_ROUNDS_HIGH 4 /* maximum number of higher-level scans */
  30
  31 static int no_free_segments(struct super_block *sb)
  32 {
  33         struct logfs_super *super = logfs_super(sb);
  34
  35         return super->s_free_list.count;
  36 }
  37
  38 /* journal has distance -1, top-most ifile layer distance 0 */
  39 static u8 root_distance(struct super_block *sb, gc_level_t __gc_level)
  40 {
  41         struct logfs_super *super = logfs_super(sb);
  42         u8 gc_level = (__force u8)__gc_level;
  43
  44         switch (gc_level) {
  45         case 0: /* fall through */
  46         case 1: /* fall through */
  47         case 2: /* fall through */
  48         case 3:
  49                 /* file data or indirect blocks */
  50                 return super->s_ifile_levels + super->s_iblock_levels - gc_level;
  51         case 6: /* fall through */
  52         case 7: /* fall through */
  53         case 8: /* fall through */
  54         case 9:
  55                 /* inode file data or indirect blocks */
  56                 return super->s_ifile_levels - (gc_level - 6);
  57         default:
  58                 printk(KERN_ERR"LOGFS: segment of unknown level %x found\n",
  59                                 gc_level);
  60                 WARN_ON(1);
  61                 return super->s_ifile_levels + super->s_iblock_levels;
  62         }
  63 }
  64
  65 static int segment_is_reserved(struct super_block *sb, u32 segno)
  66 {
  67         struct logfs_super *super = logfs_super(sb);
  68         struct logfs_area *area;
  69         void *reserved;
  70         int i;
  71
  72         /* Some segments are reserved.  Just pretend they were all valid */
  73         reserved = btree_lookup32(&super->s_reserved_segments, segno);
  74         if (reserved)
  75                 return 1;
  76
  77         /* Currently open segments */
  78         for_each_area(i) {
  79                 area = super->s_area[i];
  80                 if (area->a_is_open && area->a_segno == segno)
  81                         return 1;
  82         }
  83
  84         return 0;
  85 }
  86
  87 static void logfs_mark_segment_bad(struct super_block *sb, u32 segno)
  88 {
  89         BUG();
  90 }
  91
  92 /*
  93  * Returns the bytes consumed by valid objects in this segment.  Object headers
  94  * are counted, the segment header is not.
  95  */
  96 static u32 logfs_valid_bytes(struct super_block *sb, u32 segno, u32 *ec,
  97                 gc_level_t *gc_level)
  98 {
  99         struct logfs_segment_entry se;
 100         u32 ec_level;
 101
 102         logfs_get_segment_entry(sb, segno, &se);
 103         if (se.ec_level == cpu_to_be32(BADSEG) ||
 104                         se.valid == cpu_to_be32(RESERVED))
 105                 return RESERVED;
 106
 107         ec_level = be32_to_cpu(se.ec_level);
 108         *ec = ec_level >> 4;
 109         *gc_level = GC_LEVEL(ec_level & 0xf);
 110         return be32_to_cpu(se.valid);
 111 }
 112
 113 static void logfs_cleanse_block(struct super_block *sb, u64 ofs, u64 ino,
 114                 u64 bix, gc_level_t gc_level)
 115 {
 116         struct inode *inode;
 117         int err, cookie;
 118
 119         inode = logfs_safe_iget(sb, ino, &cookie);
 120         err = logfs_rewrite_block(inode, bix, ofs, gc_level, 0);
 121         BUG_ON(err);
 122         logfs_safe_iput(inode, cookie);
 123 }
 124
 125 static u32 logfs_gc_segment(struct super_block *sb, u32 segno, u8 dist)
 126 {
 127         struct logfs_super *super = logfs_super(sb);
 128         struct logfs_segment_header sh;
 129         struct logfs_object_header oh;
 130         u64 ofs, ino, bix;
 131         u32 seg_ofs, logical_segno, cleaned = 0;
 132         int err, len, valid;
 133         gc_level_t gc_level;
 134
 135         LOGFS_BUG_ON(segment_is_reserved(sb, segno), sb);
 136
 137         btree_insert32(&super->s_reserved_segments, segno, (void *)1, GFP_NOFS);
 138         err = wbuf_read(sb, dev_ofs(sb, segno, 0), sizeof(sh), &sh);
 139         BUG_ON(err);
 140         gc_level = GC_LEVEL(sh.level);
 141         logical_segno = be32_to_cpu(sh.segno);
 142         if (sh.crc != logfs_crc32(&sh, sizeof(sh), 4)) {
 143                 logfs_mark_segment_bad(sb, segno);
 144                 cleaned = -1;
 145                 goto out;
 146         }
 147
 148         for (seg_ofs = LOGFS_SEGMENT_HEADERSIZE;
 149                         seg_ofs + sizeof(oh) < super->s_segsize; ) {
 150                 ofs = dev_ofs(sb, logical_segno, seg_ofs);
 151                 err = wbuf_read(sb, dev_ofs(sb, segno, seg_ofs), sizeof(oh),
 152                                 &oh);
 153                 BUG_ON(err);
 154
 155                 if (!memchr_inv(&oh, 0xff, sizeof(oh)))
 156                         break;
 157
 158                 if (oh.crc != logfs_crc32(&oh, sizeof(oh) - 4, 4)) {
 159                         logfs_mark_segment_bad(sb, segno);
 160                         cleaned = super->s_segsize - 1;
 161                         goto out;
 162                 }
 163
 164                 ino = be64_to_cpu(oh.ino);
 165                 bix = be64_to_cpu(oh.bix);
 166                 len = sizeof(oh) + be16_to_cpu(oh.len);
 167                 valid = logfs_is_valid_block(sb, ofs, ino, bix, gc_level);
 168                 if (valid == 1) {
 169                         logfs_cleanse_block(sb, ofs, ino, bix, gc_level);
 170                         cleaned += len;
 171                 } else if (valid == 2) {
 172                         /* Will be invalid upon journal commit */
 173                         cleaned += len;
 174                 }
 175                 seg_ofs += len;
 176         }
 177 out:
 178         btree_remove32(&super->s_reserved_segments, segno);
 179         return cleaned;
 180 }
 181
 182 static struct gc_candidate *add_list(struct gc_candidate *cand,
 183                 struct candidate_list *list)
 184 {
 185         struct rb_node **p = &list->rb_tree.rb_node;
 186         struct rb_node *parent = NULL;
 187         struct gc_candidate *cur;
 188         int comp;
 189
 190         cand->list = list;
 191         while (*p) {
 192                 parent = *p;
 193                 cur = rb_entry(parent, struct gc_candidate, rb_node);
 194
 195                 if (list->sort_by_ec)
 196                         comp = cand->erase_count < cur->erase_count;
 197                 else
 198                         comp = cand->valid < cur->valid;
 199
 200                 if (comp)
 201                         p = &parent->rb_left;
 202                 else
 203                         p = &parent->rb_right;
 204         }
 205         rb_link_node(&cand->rb_node, parent, p);
 206         rb_insert_color(&cand->rb_node, &list->rb_tree);
 207
 208         if (list->count <= list->maxcount) {
 209                 list->count++;
 210                 return NULL;
 211         }
 212         cand = rb_entry(rb_last(&list->rb_tree), struct gc_candidate, rb_node);
 213         rb_erase(&cand->rb_node, &list->rb_tree);
 214         cand->list = NULL;
 215         return cand;
 216 }
 217
 218 static void remove_from_list(struct gc_candidate *cand)
 219 {
 220         struct candidate_list *list = cand->list;
 221
 222         rb_erase(&cand->rb_node, &list->rb_tree);
 223         list->count--;
 224 }
 225
 226 static void free_candidate(struct super_block *sb, struct gc_candidate *cand)
 227 {
 228         struct logfs_super *super = logfs_super(sb);
 229
 230         btree_remove32(&super->s_cand_tree, cand->segno);
 231         kfree(cand);
 232 }
 233
 234 u32 get_best_cand(struct super_block *sb, struct candidate_list *list, u32 *ec)
 235 {
 236         struct gc_candidate *cand;
 237         u32 segno;
 238
 239         BUG_ON(list->count == 0);
 240
 241         cand = rb_entry(rb_first(&list->rb_tree), struct gc_candidate, rb_node);
 242         remove_from_list(cand);
 243         segno = cand->segno;
 244         if (ec)
 245                 *ec = cand->erase_count;
 246         free_candidate(sb, cand);
 247         return segno;
 248 }
 249
 250 /*
 251  * We have several lists to manage segments with.  The reserve_list is used to
 252  * deal with bad blocks.  We try to keep the best (lowest ec) segments on this
 253  * list.
 254  * The free_list contains free segments for normal usage.  It usually gets the
 255  * second pick after the reserve_list.  But when the free_list is running short
 256  * it is more important to keep the free_list full than to keep a reserve.
 257  *
 258  * Segments that are not free are put onto a per-level low_list.  If we have
 259  * to run garbage collection, we pick a candidate from there.  All segments on
 260  * those lists should have at least some free space so GC will make progress.
 261  *
 262  * And last we have the ec_list, which is used to pick segments for wear
 263  * leveling.
 264  *
 265  * If all appropriate lists are full, we simply free the candidate and forget
 266  * about that segment for a while.  We have better candidates for each purpose.
 267  */
 268 static void __add_candidate(struct super_block *sb, struct gc_candidate *cand)
 269 {
 270         struct logfs_super *super = logfs_super(sb);
 271         u32 full = super->s_segsize - LOGFS_SEGMENT_RESERVE;
 272
 273         if (cand->valid == 0) {
 274                 /* 100% free segments */
 275                 log_gc_noisy("add reserve segment %x (ec %x) at %llx\n",
 276                                 cand->segno, cand->erase_count,
 277                                 dev_ofs(sb, cand->segno, 0));
 278                 cand = add_list(cand, &super->s_reserve_list);
 279                 if (cand) {
 280                         log_gc_noisy("add free segment %x (ec %x) at %llx\n",
 281                                         cand->segno, cand->erase_count,
 282                                         dev_ofs(sb, cand->segno, 0));
 283                         cand = add_list(cand, &super->s_free_list);
 284                 }
 285         } else {
 286                 /* good candidates for Garbage Collection */
 287                 if (cand->valid < full)
 288                         cand = add_list(cand, &super->s_low_list[cand->dist]);
 289                 /* good candidates for wear leveling,
 290                  * segments that were recently written get ignored */
 291                 if (cand)
 292                         cand = add_list(cand, &super->s_ec_list);
 293         }
 294         if (cand)
 295                 free_candidate(sb, cand);
 296 }
 297
 298 static int add_candidate(struct super_block *sb, u32 segno, u32 valid, u32 ec,
 299                 u8 dist)
 300 {
 301         struct logfs_super *super = logfs_super(sb);
 302         struct gc_candidate *cand;
 303
 304         cand = kmalloc(sizeof(*cand), GFP_NOFS);
 305         if (!cand)
 306                 return -ENOMEM;
 307
 308         cand->segno = segno;
 309         cand->valid = valid;
 310         cand->erase_count = ec;
 311         cand->dist = dist;
 312
 313         btree_insert32(&super->s_cand_tree, segno, cand, GFP_NOFS);
 314         __add_candidate(sb, cand);
 315         return 0;
 316 }
 317
 318 static void remove_segment_from_lists(struct super_block *sb, u32 segno)
 319 {
 320         struct logfs_super *super = logfs_super(sb);
 321         struct gc_candidate *cand;
 322
 323         cand = btree_lookup32(&super->s_cand_tree, segno);
 324         if (cand) {
 325                 remove_from_list(cand);
 326                 free_candidate(sb, cand);
 327         }
 328 }
 329
 330 static void scan_segment(struct super_block *sb, u32 segno)
 331 {
 332         u32 valid, ec = 0;
 333         gc_level_t gc_level = 0;
 334         u8 dist;
 335
 336         if (segment_is_reserved(sb, segno))
 337                 return;
 338
 339         remove_segment_from_lists(sb, segno);
 340         valid = logfs_valid_bytes(sb, segno, &ec, &gc_level);
 341         if (valid == RESERVED)
 342                 return;
 343
 344         dist = root_distance(sb, gc_level);
 345         add_candidate(sb, segno, valid, ec, dist);
 346 }
 347
 348 static struct gc_candidate *first_in_list(struct candidate_list *list)
 349 {
 350         if (list->count == 0)
 351                 return NULL;
 352         return rb_entry(rb_first(&list->rb_tree), struct gc_candidate, rb_node);
 353 }
 354
 355 /*
 356  * Find the best segment for garbage collection.  Main criterion is
 357  * the segment requiring the least effort to clean.  Secondary
 358  * criterion is to GC on the lowest level available.
 359  *
 360  * So we search the least effort segment on the lowest level first,
 361  * then move up and pick another segment iff is requires significantly
 362  * less effort.  Hence the LOGFS_MAX_OBJECTSIZE in the comparison.
 363  */
 364 static struct gc_candidate *get_candidate(struct super_block *sb)
 365 {
 366         struct logfs_super *super = logfs_super(sb);
 367         int i, max_dist;
 368         struct gc_candidate *cand = NULL, *this;
 369
 370         max_dist = min(no_free_segments(sb), LOGFS_NO_AREAS);
 371
 372         for (i = max_dist; i >= 0; i--) {
 373                 this = first_in_list(&super->s_low_list[i]);
 374                 if (!this)
 375                         continue;
 376                 if (!cand)
 377                         cand = this;
 378                 if (this->valid + LOGFS_MAX_OBJECTSIZE <= cand->valid)
 379                         cand = this;
 380         }
 381         return cand;
 382 }
 383
 384 static int __logfs_gc_once(struct super_block *sb, struct gc_candidate *cand)
 385 {
 386         struct logfs_super *super = logfs_super(sb);
 387         gc_level_t gc_level;
 388         u32 cleaned, valid, segno, ec;
 389         u8 dist;
 390
 391         if (!cand) {
 392                 log_gc("GC attempted, but no candidate found\n");
 393                 return 0;
 394         }
 395
 396         segno = cand->segno;
 397         dist = cand->dist;
 398         valid = logfs_valid_bytes(sb, segno, &ec, &gc_level);
 399         free_candidate(sb, cand);
 400         log_gc("GC segment #%02x at %llx, %x required, %x free, %x valid, %llx free\n",
 401                         segno, (u64)segno << super->s_segshift,
 402                         dist, no_free_segments(sb), valid,
 403                         super->s_free_bytes);
 404         cleaned = logfs_gc_segment(sb, segno, dist);
 405         log_gc("GC segment #%02x complete - now %x valid\n", segno,
 406                         valid - cleaned);
 407         BUG_ON(cleaned != valid);
 408         return 1;
 409 }
 410
 411 static int logfs_gc_once(struct super_block *sb)
 412 {
 413         struct gc_candidate *cand;
 414
 415         cand = get_candidate(sb);
 416         if (cand)
 417                 remove_from_list(cand);
 418         return __logfs_gc_once(sb, cand);
 419 }
 420
 421 /* returns 1 if a wrap occurs, 0 otherwise */
 422 static int logfs_scan_some(struct super_block *sb)
 423 {
 424         struct logfs_super *super = logfs_super(sb);
 425         u32 segno;
 426         int i, ret = 0;
 427
 428         segno = super->s_sweeper;
 429         for (i = SCAN_RATIO; i > 0; i--) {
 430                 segno++;
 431                 if (segno >= super->s_no_segs) {
 432                         segno = 0;
 433                         ret = 1;
 434                         /* Break out of the loop.  We want to read a single
 435                          * block from the segment size on next invocation if
 436                          * SCAN_RATIO is set to match block size
 437                          */
 438                         break;
 439                 }
 440
 441                 scan_segment(sb, segno);
 442         }
 443         super->s_sweeper = segno;
 444         return ret;
 445 }
 446
 447 /*
 448  * In principle, this function should loop forever, looking for GC candidates
 449  * and moving data.  LogFS is designed in such a way that this loop is
 450  * guaranteed to terminate.
 451  *
 452  * Limiting the loop to some iterations serves purely to catch cases when
 453  * these guarantees have failed.  An actual endless loop is an obvious bug
 454  * and should be reported as such.
 455  */
 456 static void __logfs_gc_pass(struct super_block *sb, int target)
 457 {
 458         struct logfs_super *super = logfs_super(sb);
 459         struct logfs_block *block;
 460         int round, progress, last_progress = 0;
 461
 462         if (no_free_segments(sb) >= target &&
 463                         super->s_no_object_aliases < MAX_OBJ_ALIASES)
 464                 return;
 465
 466         log_gc("__logfs_gc_pass(%x)\n", target);
 467         for (round = 0; round < SCAN_ROUNDS; ) {
 468                 if (no_free_segments(sb) >= target)
 469                         goto write_alias;
 470
 471                 /* Sync in-memory state with on-medium state in case they
 472                  * diverged */
 473                 logfs_write_anchor(sb);
 474                 round += logfs_scan_some(sb);
 475                 if (no_free_segments(sb) >= target)
 476                         goto write_alias;
 477                 progress = logfs_gc_once(sb);
 478                 if (progress)
 479                         last_progress = round;
 480                 else if (round - last_progress > 2)
 481                         break;
 482                 continue;
 483
 484                 /*
 485                  * The goto logic is nasty, I just don't know a better way to
 486                  * code it.  GC is supposed to ensure two things:
 487                  * 1. Enough free segments are available.
 488                  * 2. The number of aliases is bounded.
 489                  * When 1. is achieved, we take a look at 2. and write back
 490                  * some alias-containing blocks, if necessary.  However, after
 491                  * each such write we need to go back to 1., as writes can
 492                  * consume free segments.
 493                  */
 494 write_alias:
 495                 if (super->s_no_object_aliases < MAX_OBJ_ALIASES)
 496                         return;
 497                 if (list_empty(&super->s_object_alias)) {
 498                         /* All aliases are still in btree */
 499                         return;
 500                 }
 501                 log_gc("Write back one alias\n");
 502                 block = list_entry(super->s_object_alias.next,
 503                                 struct logfs_block, alias_list);
 504                 block->ops->write_block(block);
 505                 /*
 506                  * To round off the nasty goto logic, we reset round here.  It
 507                  * is a safety-net for GC not making any progress and limited
 508                  * to something reasonably small.  If incremented it for every
 509                  * single alias, the loop could terminate rather quickly.
 510                  */
 511                 round = 0;
 512         }
 513         LOGFS_BUG(sb);
 514 }
 515
 516 static int wl_ratelimit(struct super_block *sb, u64 *next_event)
 517 {
 518         struct logfs_super *super = logfs_super(sb);
 519
 520         if (*next_event < super->s_gec) {
 521                 *next_event = super->s_gec + WL_RATELIMIT;
 522                 return 0;
 523         }
 524         return 1;
 525 }
 526
 527 static void logfs_wl_pass(struct super_block *sb)
 528 {
 529         struct logfs_super *super = logfs_super(sb);
 530         struct gc_candidate *wl_cand, *free_cand;
 531
 532         if (wl_ratelimit(sb, &super->s_wl_gec_ostore))
 533                 return;
 534
 535         wl_cand = first_in_list(&super->s_ec_list);
 536         if (!wl_cand)
 537                 return;
 538         free_cand = first_in_list(&super->s_free_list);
 539         if (!free_cand)
 540                 return;
 541
 542         if (wl_cand->erase_count < free_cand->erase_count + WL_DELTA) {
 543                 remove_from_list(wl_cand);
 544                 __logfs_gc_once(sb, wl_cand);
 545         }
 546 }
 547
 548 /*
 549  * The journal needs wear leveling as well.  But moving the journal is an
 550  * expensive operation so we try to avoid it as much as possible.  And if we
 551  * have to do it, we move the whole journal, not individual segments.
 552  *
 553  * Ratelimiting is not strictly necessary here, it mainly serves to avoid the
 554  * calculations.  First we check whether moving the journal would be a
 555  * significant improvement.  That means that a) the current journal segments
 556  * have more wear than the future journal segments and b) the current journal
 557  * segments have more wear than normal ostore segments.
 558  * Rationale for b) is that we don't have to move the journal if it is aging
 559  * less than the ostore, even if the reserve segments age even less (they are
 560  * excluded from wear leveling, after all).
 561  * Next we check that the superblocks have less wear than the journal.  Since
 562  * moving the journal requires writing the superblocks, we have to protect the
 563  * superblocks even more than the journal.
 564  *
 565  * Also we double the acceptable wear difference, compared to ostore wear
 566  * leveling.  Journal data is read and rewritten rapidly, comparatively.  So
 567  * soft errors have much less time to accumulate and we allow the journal to
 568  * be a bit worse than the ostore.
 569  */
 570 static void logfs_journal_wl_pass(struct super_block *sb)
 571 {
 572         struct logfs_super *super = logfs_super(sb);
 573         struct gc_candidate *cand;
 574         u32 min_journal_ec = -1, max_reserve_ec = 0;
 575         int i;
 576
 577         if (wl_ratelimit(sb, &super->s_wl_gec_journal))
 578                 return;
 579
 580         if (super->s_reserve_list.count < super->s_no_journal_segs) {
 581                 /* Reserve is not full enough to move complete journal */
 582                 return;
 583         }
 584
 585         journal_for_each(i)
 586                 if (super->s_journal_seg[i])
 587                         min_journal_ec = min(min_journal_ec,
 588                                         super->s_journal_ec[i]);
 589         cand = rb_entry(rb_first(&super->s_free_list.rb_tree),
 590                         struct gc_candidate, rb_node);
 591         max_reserve_ec = cand->erase_count;
 592         for (i = 0; i < 2; i++) {
 593                 struct logfs_segment_entry se;
 594                 u32 segno = seg_no(sb, super->s_sb_ofs[i]);
 595                 u32 ec;
 596
 597                 logfs_get_segment_entry(sb, segno, &se);
 598                 ec = be32_to_cpu(se.ec_level) >> 4;
 599                 max_reserve_ec = max(max_reserve_ec, ec);
 600         }
 601
 602         if (min_journal_ec > max_reserve_ec + 2 * WL_DELTA) {
 603                 do_logfs_journal_wl_pass(sb);
 604         }
 605 }
 606
 607 void logfs_gc_pass(struct super_block *sb)
 608 {
 609         struct logfs_super *super = logfs_super(sb);
 610
 611         //BUG_ON(mutex_trylock(&logfs_super(sb)->s_w_mutex));
 612         /* Write journal before free space is getting saturated with dirty
 613          * objects.
 614          */
 615         if (super->s_dirty_used_bytes + super->s_dirty_free_bytes
 616                         + LOGFS_MAX_OBJECTSIZE >= super->s_free_bytes)
 617                 logfs_write_anchor(sb);
 618         __logfs_gc_pass(sb, super->s_total_levels);
 619         logfs_wl_pass(sb);
 620         logfs_journal_wl_pass(sb);
 621 }
 622
 623 static int check_area(struct super_block *sb, int i)
 624 {
 625         struct logfs_super *super = logfs_super(sb);
 626         struct logfs_area *area = super->s_area[i];
 627         struct logfs_object_header oh;
 628         u32 segno = area->a_segno;
 629         u32 ofs = area->a_used_bytes;
 630         __be32 crc;
 631         int err;
 632
 633         if (!area->a_is_open)
 634                 return 0;
 635
 636         for (ofs = area->a_used_bytes;
 637              ofs <= super->s_segsize - sizeof(oh);
 638              ofs += (u32)be16_to_cpu(oh.len) + sizeof(oh)) {
 639                 err = wbuf_read(sb, dev_ofs(sb, segno, ofs), sizeof(oh), &oh);
 640                 if (err)
 641                         return err;
 642
 643                 if (!memchr_inv(&oh, 0xff, sizeof(oh)))
 644                         break;
 645
 646                 crc = logfs_crc32(&oh, sizeof(oh) - 4, 4);
 647                 if (crc != oh.crc) {
 648                         printk(KERN_INFO "interrupted header at %llx\n",
 649                                         dev_ofs(sb, segno, ofs));
 650                         return 0;
 651                 }
 652         }
 653         if (ofs != area->a_used_bytes) {
 654                 printk(KERN_INFO "%x bytes unaccounted data found at %llx\n",
 655                                 ofs - area->a_used_bytes,
 656                                 dev_ofs(sb, segno, area->a_used_bytes));
 657                 area->a_used_bytes = ofs;
 658         }
 659         return 0;
 660 }
 661
 662 int logfs_check_areas(struct super_block *sb)
 663 {
 664         int i, err;
 665
 666         for_each_area(i) {
 667                 err = check_area(sb, i);
 668                 if (err)
 669                         return err;
 670         }
 671         return 0;
 672 }
 673
 674 static void logfs_init_candlist(struct candidate_list *list, int maxcount,
 675                 int sort_by_ec)
 676 {
 677         list->count = 0;
 678         list->maxcount = maxcount;
 679         list->sort_by_ec = sort_by_ec;
 680         list->rb_tree = RB_ROOT;
 681 }
 682
 683 int logfs_init_gc(struct super_block *sb)
 684 {
 685         struct logfs_super *super = logfs_super(sb);
 686         int i;
 687
 688         btree_init_mempool32(&super->s_cand_tree, super->s_btree_pool);
 689         logfs_init_candlist(&super->s_free_list, LIST_SIZE + SCAN_RATIO, 1);
 690         logfs_init_candlist(&super->s_reserve_list,
 691                         super->s_bad_seg_reserve, 1);
 692         for_each_area(i)
 693                 logfs_init_candlist(&super->s_low_list[i], LIST_SIZE, 0);
 694         logfs_init_candlist(&super->s_ec_list, LIST_SIZE, 1);
 695         return 0;
 696 }
 697
 698 static void logfs_cleanup_list(struct super_block *sb,
 699                 struct candidate_list *list)
 700 {
 701         struct gc_candidate *cand;
 702
 703         while (list->count) {
 704                 cand = rb_entry(list->rb_tree.rb_node, struct gc_candidate,
 705                                 rb_node);
 706                 remove_from_list(cand);
 707                 free_candidate(sb, cand);
 708         }
 709         BUG_ON(list->rb_tree.rb_node);
 710 }
 711
 712 void logfs_cleanup_gc(struct super_block *sb)
 713 {
 714         struct logfs_super *super = logfs_super(sb);
 715         int i;
 716
 717         if (!super->s_free_list.count)
 718                 return;
 719
 720         /*
 721          * FIXME: The btree may still contain a single empty node.  So we
 722          * call the grim visitor to clean up that mess.  Btree code should
 723          * do it for us, really.
 724          */
 725         btree_grim_visitor32(&super->s_cand_tree, 0, NULL);
 726         logfs_cleanup_list(sb, &super->s_free_list);
 727         logfs_cleanup_list(sb, &super->s_reserve_list);
 728         for_each_area(i)
 729                 logfs_cleanup_list(sb, &super->s_low_list[i]);
 730         logfs_cleanup_list(sb, &super->s_ec_list);
 731 }