Merge branch 'for-linus' of git://neil.brown.name/md
[sfrench/cifs-2.6.git] / fs / jffs2 / gc.c
1 /*
2  * JFFS2 -- Journalling Flash File System, Version 2.
3  *
4  * Copyright © 2001-2007 Red Hat, Inc.
5  * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
6  *
7  * Created by David Woodhouse <dwmw2@infradead.org>
8  *
9  * For licensing information, see the file 'LICENCE' in this directory.
10  *
11  */
12
13 #include <linux/kernel.h>
14 #include <linux/mtd/mtd.h>
15 #include <linux/slab.h>
16 #include <linux/pagemap.h>
17 #include <linux/crc32.h>
18 #include <linux/compiler.h>
19 #include <linux/stat.h>
20 #include "nodelist.h"
21 #include "compr.h"
22
23 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
24                                           struct jffs2_inode_cache *ic,
25                                           struct jffs2_raw_node_ref *raw);
26 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
27                                         struct jffs2_inode_info *f, struct jffs2_full_dnode *fd);
28 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
29                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
30 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
31                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
32 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
33                                       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
34                                       uint32_t start, uint32_t end);
35 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
36                                        struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
37                                        uint32_t start, uint32_t end);
38 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
39                                struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f);
40
41 /* Called with erase_completion_lock held */
42 static struct jffs2_eraseblock *jffs2_find_gc_block(struct jffs2_sb_info *c)
43 {
44         struct jffs2_eraseblock *ret;
45         struct list_head *nextlist = NULL;
46         int n = jiffies % 128;
47
48         /* Pick an eraseblock to garbage collect next. This is where we'll
49            put the clever wear-levelling algorithms. Eventually.  */
50         /* We possibly want to favour the dirtier blocks more when the
51            number of free blocks is low. */
52 again:
53         if (!list_empty(&c->bad_used_list) && c->nr_free_blocks > c->resv_blocks_gcbad) {
54                 D1(printk(KERN_DEBUG "Picking block from bad_used_list to GC next\n"));
55                 nextlist = &c->bad_used_list;
56         } else if (n < 50 && !list_empty(&c->erasable_list)) {
57                 /* Note that most of them will have gone directly to be erased.
58                    So don't favour the erasable_list _too_ much. */
59                 D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next\n"));
60                 nextlist = &c->erasable_list;
61         } else if (n < 110 && !list_empty(&c->very_dirty_list)) {
62                 /* Most of the time, pick one off the very_dirty list */
63                 D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next\n"));
64                 nextlist = &c->very_dirty_list;
65         } else if (n < 126 && !list_empty(&c->dirty_list)) {
66                 D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next\n"));
67                 nextlist = &c->dirty_list;
68         } else if (!list_empty(&c->clean_list)) {
69                 D1(printk(KERN_DEBUG "Picking block from clean_list to GC next\n"));
70                 nextlist = &c->clean_list;
71         } else if (!list_empty(&c->dirty_list)) {
72                 D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next (clean_list was empty)\n"));
73
74                 nextlist = &c->dirty_list;
75         } else if (!list_empty(&c->very_dirty_list)) {
76                 D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next (clean_list and dirty_list were empty)\n"));
77                 nextlist = &c->very_dirty_list;
78         } else if (!list_empty(&c->erasable_list)) {
79                 D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next (clean_list and {very_,}dirty_list were empty)\n"));
80
81                 nextlist = &c->erasable_list;
82         } else if (!list_empty(&c->erasable_pending_wbuf_list)) {
83                 /* There are blocks are wating for the wbuf sync */
84                 D1(printk(KERN_DEBUG "Synching wbuf in order to reuse erasable_pending_wbuf_list blocks\n"));
85                 spin_unlock(&c->erase_completion_lock);
86                 jffs2_flush_wbuf_pad(c);
87                 spin_lock(&c->erase_completion_lock);
88                 goto again;
89         } else {
90                 /* Eep. All were empty */
91                 D1(printk(KERN_NOTICE "jffs2: No clean, dirty _or_ erasable blocks to GC from! Where are they all?\n"));
92                 return NULL;
93         }
94
95         ret = list_entry(nextlist->next, struct jffs2_eraseblock, list);
96         list_del(&ret->list);
97         c->gcblock = ret;
98         ret->gc_node = ret->first_node;
99         if (!ret->gc_node) {
100                 printk(KERN_WARNING "Eep. ret->gc_node for block at 0x%08x is NULL\n", ret->offset);
101                 BUG();
102         }
103
104         /* Have we accidentally picked a clean block with wasted space ? */
105         if (ret->wasted_size) {
106                 D1(printk(KERN_DEBUG "Converting wasted_size %08x to dirty_size\n", ret->wasted_size));
107                 ret->dirty_size += ret->wasted_size;
108                 c->wasted_size -= ret->wasted_size;
109                 c->dirty_size += ret->wasted_size;
110                 ret->wasted_size = 0;
111         }
112
113         return ret;
114 }
115
116 /* jffs2_garbage_collect_pass
117  * Make a single attempt to progress GC. Move one node, and possibly
118  * start erasing one eraseblock.
119  */
120 int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
121 {
122         struct jffs2_inode_info *f;
123         struct jffs2_inode_cache *ic;
124         struct jffs2_eraseblock *jeb;
125         struct jffs2_raw_node_ref *raw;
126         uint32_t gcblock_dirty;
127         int ret = 0, inum, nlink;
128         int xattr = 0;
129
130         if (mutex_lock_interruptible(&c->alloc_sem))
131                 return -EINTR;
132
133         for (;;) {
134                 spin_lock(&c->erase_completion_lock);
135                 if (!c->unchecked_size)
136                         break;
137
138                 /* We can't start doing GC yet. We haven't finished checking
139                    the node CRCs etc. Do it now. */
140
141                 /* checked_ino is protected by the alloc_sem */
142                 if (c->checked_ino > c->highest_ino && xattr) {
143                         printk(KERN_CRIT "Checked all inodes but still 0x%x bytes of unchecked space?\n",
144                                c->unchecked_size);
145                         jffs2_dbg_dump_block_lists_nolock(c);
146                         spin_unlock(&c->erase_completion_lock);
147                         mutex_unlock(&c->alloc_sem);
148                         return -ENOSPC;
149                 }
150
151                 spin_unlock(&c->erase_completion_lock);
152
153                 if (!xattr)
154                         xattr = jffs2_verify_xattr(c);
155
156                 spin_lock(&c->inocache_lock);
157
158                 ic = jffs2_get_ino_cache(c, c->checked_ino++);
159
160                 if (!ic) {
161                         spin_unlock(&c->inocache_lock);
162                         continue;
163                 }
164
165                 if (!ic->pino_nlink) {
166                         D1(printk(KERN_DEBUG "Skipping check of ino #%d with nlink/pino zero\n",
167                                   ic->ino));
168                         spin_unlock(&c->inocache_lock);
169                         jffs2_xattr_delete_inode(c, ic);
170                         continue;
171                 }
172                 switch(ic->state) {
173                 case INO_STATE_CHECKEDABSENT:
174                 case INO_STATE_PRESENT:
175                         D1(printk(KERN_DEBUG "Skipping ino #%u already checked\n", ic->ino));
176                         spin_unlock(&c->inocache_lock);
177                         continue;
178
179                 case INO_STATE_GC:
180                 case INO_STATE_CHECKING:
181                         printk(KERN_WARNING "Inode #%u is in state %d during CRC check phase!\n", ic->ino, ic->state);
182                         spin_unlock(&c->inocache_lock);
183                         BUG();
184
185                 case INO_STATE_READING:
186                         /* We need to wait for it to finish, lest we move on
187                            and trigger the BUG() above while we haven't yet
188                            finished checking all its nodes */
189                         D1(printk(KERN_DEBUG "Waiting for ino #%u to finish reading\n", ic->ino));
190                         /* We need to come back again for the _same_ inode. We've
191                          made no progress in this case, but that should be OK */
192                         c->checked_ino--;
193
194                         mutex_unlock(&c->alloc_sem);
195                         sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
196                         return 0;
197
198                 default:
199                         BUG();
200
201                 case INO_STATE_UNCHECKED:
202                         ;
203                 }
204                 ic->state = INO_STATE_CHECKING;
205                 spin_unlock(&c->inocache_lock);
206
207                 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() triggering inode scan of ino#%u\n", ic->ino));
208
209                 ret = jffs2_do_crccheck_inode(c, ic);
210                 if (ret)
211                         printk(KERN_WARNING "Returned error for crccheck of ino #%u. Expect badness...\n", ic->ino);
212
213                 jffs2_set_inocache_state(c, ic, INO_STATE_CHECKEDABSENT);
214                 mutex_unlock(&c->alloc_sem);
215                 return ret;
216         }
217
218         /* If there are any blocks which need erasing, erase them now */
219         if (!list_empty(&c->erase_complete_list) ||
220             !list_empty(&c->erase_pending_list)) {
221                 spin_unlock(&c->erase_completion_lock);
222                 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() erasing pending blocks\n"));
223                 if (jffs2_erase_pending_blocks(c, 1)) {
224                         mutex_unlock(&c->alloc_sem);
225                         return 0;
226                 }
227                 D1(printk(KERN_DEBUG "No progress from erasing blocks; doing GC anyway\n"));
228                 spin_lock(&c->erase_completion_lock);
229         }
230
231         /* First, work out which block we're garbage-collecting */
232         jeb = c->gcblock;
233
234         if (!jeb)
235                 jeb = jffs2_find_gc_block(c);
236
237         if (!jeb) {
238                 /* Couldn't find a free block. But maybe we can just erase one and make 'progress'? */
239                 if (c->nr_erasing_blocks) {
240                         spin_unlock(&c->erase_completion_lock);
241                         mutex_unlock(&c->alloc_sem);
242                         return -EAGAIN;
243                 }
244                 D1(printk(KERN_NOTICE "jffs2: Couldn't find erase block to garbage collect!\n"));
245                 spin_unlock(&c->erase_completion_lock);
246                 mutex_unlock(&c->alloc_sem);
247                 return -EIO;
248         }
249
250         D1(printk(KERN_DEBUG "GC from block %08x, used_size %08x, dirty_size %08x, free_size %08x\n", jeb->offset, jeb->used_size, jeb->dirty_size, jeb->free_size));
251         D1(if (c->nextblock)
252            printk(KERN_DEBUG "Nextblock at  %08x, used_size %08x, dirty_size %08x, wasted_size %08x, free_size %08x\n", c->nextblock->offset, c->nextblock->used_size, c->nextblock->dirty_size, c->nextblock->wasted_size, c->nextblock->free_size));
253
254         if (!jeb->used_size) {
255                 mutex_unlock(&c->alloc_sem);
256                 goto eraseit;
257         }
258
259         raw = jeb->gc_node;
260         gcblock_dirty = jeb->dirty_size;
261
262         while(ref_obsolete(raw)) {
263                 D1(printk(KERN_DEBUG "Node at 0x%08x is obsolete... skipping\n", ref_offset(raw)));
264                 raw = ref_next(raw);
265                 if (unlikely(!raw)) {
266                         printk(KERN_WARNING "eep. End of raw list while still supposedly nodes to GC\n");
267                         printk(KERN_WARNING "erase block at 0x%08x. free_size 0x%08x, dirty_size 0x%08x, used_size 0x%08x\n",
268                                jeb->offset, jeb->free_size, jeb->dirty_size, jeb->used_size);
269                         jeb->gc_node = raw;
270                         spin_unlock(&c->erase_completion_lock);
271                         mutex_unlock(&c->alloc_sem);
272                         BUG();
273                 }
274         }
275         jeb->gc_node = raw;
276
277         D1(printk(KERN_DEBUG "Going to garbage collect node at 0x%08x\n", ref_offset(raw)));
278
279         if (!raw->next_in_ino) {
280                 /* Inode-less node. Clean marker, snapshot or something like that */
281                 spin_unlock(&c->erase_completion_lock);
282                 if (ref_flags(raw) == REF_PRISTINE) {
283                         /* It's an unknown node with JFFS2_FEATURE_RWCOMPAT_COPY */
284                         jffs2_garbage_collect_pristine(c, NULL, raw);
285                 } else {
286                         /* Just mark it obsolete */
287                         jffs2_mark_node_obsolete(c, raw);
288                 }
289                 mutex_unlock(&c->alloc_sem);
290                 goto eraseit_lock;
291         }
292
293         ic = jffs2_raw_ref_to_ic(raw);
294
295 #ifdef CONFIG_JFFS2_FS_XATTR
296         /* When 'ic' refers xattr_datum/xattr_ref, this node is GCed as xattr.
297          * We can decide whether this node is inode or xattr by ic->class.     */
298         if (ic->class == RAWNODE_CLASS_XATTR_DATUM
299             || ic->class == RAWNODE_CLASS_XATTR_REF) {
300                 spin_unlock(&c->erase_completion_lock);
301
302                 if (ic->class == RAWNODE_CLASS_XATTR_DATUM) {
303                         ret = jffs2_garbage_collect_xattr_datum(c, (struct jffs2_xattr_datum *)ic, raw);
304                 } else {
305                         ret = jffs2_garbage_collect_xattr_ref(c, (struct jffs2_xattr_ref *)ic, raw);
306                 }
307                 goto test_gcnode;
308         }
309 #endif
310
311         /* We need to hold the inocache. Either the erase_completion_lock or
312            the inocache_lock are sufficient; we trade down since the inocache_lock
313            causes less contention. */
314         spin_lock(&c->inocache_lock);
315
316         spin_unlock(&c->erase_completion_lock);
317
318         D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass collecting from block @0x%08x. Node @0x%08x(%d), ino #%u\n", jeb->offset, ref_offset(raw), ref_flags(raw), ic->ino));
319
320         /* Three possibilities:
321            1. Inode is already in-core. We must iget it and do proper
322               updating to its fragtree, etc.
323            2. Inode is not in-core, node is REF_PRISTINE. We lock the
324               inocache to prevent a read_inode(), copy the node intact.
325            3. Inode is not in-core, node is not pristine. We must iget()
326               and take the slow path.
327         */
328
329         switch(ic->state) {
330         case INO_STATE_CHECKEDABSENT:
331                 /* It's been checked, but it's not currently in-core.
332                    We can just copy any pristine nodes, but have
333                    to prevent anyone else from doing read_inode() while
334                    we're at it, so we set the state accordingly */
335                 if (ref_flags(raw) == REF_PRISTINE)
336                         ic->state = INO_STATE_GC;
337                 else {
338                         D1(printk(KERN_DEBUG "Ino #%u is absent but node not REF_PRISTINE. Reading.\n",
339                                   ic->ino));
340                 }
341                 break;
342
343         case INO_STATE_PRESENT:
344                 /* It's in-core. GC must iget() it. */
345                 break;
346
347         case INO_STATE_UNCHECKED:
348         case INO_STATE_CHECKING:
349         case INO_STATE_GC:
350                 /* Should never happen. We should have finished checking
351                    by the time we actually start doing any GC, and since
352                    we're holding the alloc_sem, no other garbage collection
353                    can happen.
354                 */
355                 printk(KERN_CRIT "Inode #%u already in state %d in jffs2_garbage_collect_pass()!\n",
356                        ic->ino, ic->state);
357                 mutex_unlock(&c->alloc_sem);
358                 spin_unlock(&c->inocache_lock);
359                 BUG();
360
361         case INO_STATE_READING:
362                 /* Someone's currently trying to read it. We must wait for
363                    them to finish and then go through the full iget() route
364                    to do the GC. However, sometimes read_inode() needs to get
365                    the alloc_sem() (for marking nodes invalid) so we must
366                    drop the alloc_sem before sleeping. */
367
368                 mutex_unlock(&c->alloc_sem);
369                 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() waiting for ino #%u in state %d\n",
370                           ic->ino, ic->state));
371                 sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
372                 /* And because we dropped the alloc_sem we must start again from the
373                    beginning. Ponder chance of livelock here -- we're returning success
374                    without actually making any progress.
375
376                    Q: What are the chances that the inode is back in INO_STATE_READING
377                    again by the time we next enter this function? And that this happens
378                    enough times to cause a real delay?
379
380                    A: Small enough that I don't care :)
381                 */
382                 return 0;
383         }
384
385         /* OK. Now if the inode is in state INO_STATE_GC, we are going to copy the
386            node intact, and we don't have to muck about with the fragtree etc.
387            because we know it's not in-core. If it _was_ in-core, we go through
388            all the iget() crap anyway */
389
390         if (ic->state == INO_STATE_GC) {
391                 spin_unlock(&c->inocache_lock);
392
393                 ret = jffs2_garbage_collect_pristine(c, ic, raw);
394
395                 spin_lock(&c->inocache_lock);
396                 ic->state = INO_STATE_CHECKEDABSENT;
397                 wake_up(&c->inocache_wq);
398
399                 if (ret != -EBADFD) {
400                         spin_unlock(&c->inocache_lock);
401                         goto test_gcnode;
402                 }
403
404                 /* Fall through if it wanted us to, with inocache_lock held */
405         }
406
407         /* Prevent the fairly unlikely race where the gcblock is
408            entirely obsoleted by the final close of a file which had
409            the only valid nodes in the block, followed by erasure,
410            followed by freeing of the ic because the erased block(s)
411            held _all_ the nodes of that inode.... never been seen but
412            it's vaguely possible. */
413
414         inum = ic->ino;
415         nlink = ic->pino_nlink;
416         spin_unlock(&c->inocache_lock);
417
418         f = jffs2_gc_fetch_inode(c, inum, !nlink);
419         if (IS_ERR(f)) {
420                 ret = PTR_ERR(f);
421                 goto release_sem;
422         }
423         if (!f) {
424                 ret = 0;
425                 goto release_sem;
426         }
427
428         ret = jffs2_garbage_collect_live(c, jeb, raw, f);
429
430         jffs2_gc_release_inode(c, f);
431
432  test_gcnode:
433         if (jeb->dirty_size == gcblock_dirty && !ref_obsolete(jeb->gc_node)) {
434                 /* Eep. This really should never happen. GC is broken */
435                 printk(KERN_ERR "Error garbage collecting node at %08x!\n", ref_offset(jeb->gc_node));
436                 ret = -ENOSPC;
437         }
438  release_sem:
439         mutex_unlock(&c->alloc_sem);
440
441  eraseit_lock:
442         /* If we've finished this block, start it erasing */
443         spin_lock(&c->erase_completion_lock);
444
445  eraseit:
446         if (c->gcblock && !c->gcblock->used_size) {
447                 D1(printk(KERN_DEBUG "Block at 0x%08x completely obsoleted by GC. Moving to erase_pending_list\n", c->gcblock->offset));
448                 /* We're GC'ing an empty block? */
449                 list_add_tail(&c->gcblock->list, &c->erase_pending_list);
450                 c->gcblock = NULL;
451                 c->nr_erasing_blocks++;
452                 jffs2_garbage_collect_trigger(c);
453         }
454         spin_unlock(&c->erase_completion_lock);
455
456         return ret;
457 }
458
459 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
460                                       struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f)
461 {
462         struct jffs2_node_frag *frag;
463         struct jffs2_full_dnode *fn = NULL;
464         struct jffs2_full_dirent *fd;
465         uint32_t start = 0, end = 0, nrfrags = 0;
466         int ret = 0;
467
468         mutex_lock(&f->sem);
469
470         /* Now we have the lock for this inode. Check that it's still the one at the head
471            of the list. */
472
473         spin_lock(&c->erase_completion_lock);
474
475         if (c->gcblock != jeb) {
476                 spin_unlock(&c->erase_completion_lock);
477                 D1(printk(KERN_DEBUG "GC block is no longer gcblock. Restart\n"));
478                 goto upnout;
479         }
480         if (ref_obsolete(raw)) {
481                 spin_unlock(&c->erase_completion_lock);
482                 D1(printk(KERN_DEBUG "node to be GC'd was obsoleted in the meantime.\n"));
483                 /* They'll call again */
484                 goto upnout;
485         }
486         spin_unlock(&c->erase_completion_lock);
487
488         /* OK. Looks safe. And nobody can get us now because we have the semaphore. Move the block */
489         if (f->metadata && f->metadata->raw == raw) {
490                 fn = f->metadata;
491                 ret = jffs2_garbage_collect_metadata(c, jeb, f, fn);
492                 goto upnout;
493         }
494
495         /* FIXME. Read node and do lookup? */
496         for (frag = frag_first(&f->fragtree); frag; frag = frag_next(frag)) {
497                 if (frag->node && frag->node->raw == raw) {
498                         fn = frag->node;
499                         end = frag->ofs + frag->size;
500                         if (!nrfrags++)
501                                 start = frag->ofs;
502                         if (nrfrags == frag->node->frags)
503                                 break; /* We've found them all */
504                 }
505         }
506         if (fn) {
507                 if (ref_flags(raw) == REF_PRISTINE) {
508                         ret = jffs2_garbage_collect_pristine(c, f->inocache, raw);
509                         if (!ret) {
510                                 /* Urgh. Return it sensibly. */
511                                 frag->node->raw = f->inocache->nodes;
512                         }
513                         if (ret != -EBADFD)
514                                 goto upnout;
515                 }
516                 /* We found a datanode. Do the GC */
517                 if((start >> PAGE_CACHE_SHIFT) < ((end-1) >> PAGE_CACHE_SHIFT)) {
518                         /* It crosses a page boundary. Therefore, it must be a hole. */
519                         ret = jffs2_garbage_collect_hole(c, jeb, f, fn, start, end);
520                 } else {
521                         /* It could still be a hole. But we GC the page this way anyway */
522                         ret = jffs2_garbage_collect_dnode(c, jeb, f, fn, start, end);
523                 }
524                 goto upnout;
525         }
526
527         /* Wasn't a dnode. Try dirent */
528         for (fd = f->dents; fd; fd=fd->next) {
529                 if (fd->raw == raw)
530                         break;
531         }
532
533         if (fd && fd->ino) {
534                 ret = jffs2_garbage_collect_dirent(c, jeb, f, fd);
535         } else if (fd) {
536                 ret = jffs2_garbage_collect_deletion_dirent(c, jeb, f, fd);
537         } else {
538                 printk(KERN_WARNING "Raw node at 0x%08x wasn't in node lists for ino #%u\n",
539                        ref_offset(raw), f->inocache->ino);
540                 if (ref_obsolete(raw)) {
541                         printk(KERN_WARNING "But it's obsolete so we don't mind too much\n");
542                 } else {
543                         jffs2_dbg_dump_node(c, ref_offset(raw));
544                         BUG();
545                 }
546         }
547  upnout:
548         mutex_unlock(&f->sem);
549
550         return ret;
551 }
552
553 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
554                                           struct jffs2_inode_cache *ic,
555                                           struct jffs2_raw_node_ref *raw)
556 {
557         union jffs2_node_union *node;
558         size_t retlen;
559         int ret;
560         uint32_t phys_ofs, alloclen;
561         uint32_t crc, rawlen;
562         int retried = 0;
563
564         D1(printk(KERN_DEBUG "Going to GC REF_PRISTINE node at 0x%08x\n", ref_offset(raw)));
565
566         alloclen = rawlen = ref_totlen(c, c->gcblock, raw);
567
568         /* Ask for a small amount of space (or the totlen if smaller) because we
569            don't want to force wastage of the end of a block if splitting would
570            work. */
571         if (ic && alloclen > sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN)
572                 alloclen = sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN;
573
574         ret = jffs2_reserve_space_gc(c, alloclen, &alloclen, rawlen);
575         /* 'rawlen' is not the exact summary size; it is only an upper estimation */
576
577         if (ret)
578                 return ret;
579
580         if (alloclen < rawlen) {
581                 /* Doesn't fit untouched. We'll go the old route and split it */
582                 return -EBADFD;
583         }
584
585         node = kmalloc(rawlen, GFP_KERNEL);
586         if (!node)
587                 return -ENOMEM;
588
589         ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)node);
590         if (!ret && retlen != rawlen)
591                 ret = -EIO;
592         if (ret)
593                 goto out_node;
594
595         crc = crc32(0, node, sizeof(struct jffs2_unknown_node)-4);
596         if (je32_to_cpu(node->u.hdr_crc) != crc) {
597                 printk(KERN_WARNING "Header CRC failed on REF_PRISTINE node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
598                        ref_offset(raw), je32_to_cpu(node->u.hdr_crc), crc);
599                 goto bail;
600         }
601
602         switch(je16_to_cpu(node->u.nodetype)) {
603         case JFFS2_NODETYPE_INODE:
604                 crc = crc32(0, node, sizeof(node->i)-8);
605                 if (je32_to_cpu(node->i.node_crc) != crc) {
606                         printk(KERN_WARNING "Node CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
607                                ref_offset(raw), je32_to_cpu(node->i.node_crc), crc);
608                         goto bail;
609                 }
610
611                 if (je32_to_cpu(node->i.dsize)) {
612                         crc = crc32(0, node->i.data, je32_to_cpu(node->i.csize));
613                         if (je32_to_cpu(node->i.data_crc) != crc) {
614                                 printk(KERN_WARNING "Data CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
615                                        ref_offset(raw), je32_to_cpu(node->i.data_crc), crc);
616                                 goto bail;
617                         }
618                 }
619                 break;
620
621         case JFFS2_NODETYPE_DIRENT:
622                 crc = crc32(0, node, sizeof(node->d)-8);
623                 if (je32_to_cpu(node->d.node_crc) != crc) {
624                         printk(KERN_WARNING "Node CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
625                                ref_offset(raw), je32_to_cpu(node->d.node_crc), crc);
626                         goto bail;
627                 }
628
629                 if (strnlen(node->d.name, node->d.nsize) != node->d.nsize) {
630                         printk(KERN_WARNING "Name in dirent node at 0x%08x contains zeroes\n", ref_offset(raw));
631                         goto bail;
632                 }
633
634                 if (node->d.nsize) {
635                         crc = crc32(0, node->d.name, node->d.nsize);
636                         if (je32_to_cpu(node->d.name_crc) != crc) {
637                                 printk(KERN_WARNING "Name CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
638                                        ref_offset(raw), je32_to_cpu(node->d.name_crc), crc);
639                                 goto bail;
640                         }
641                 }
642                 break;
643         default:
644                 /* If it's inode-less, we don't _know_ what it is. Just copy it intact */
645                 if (ic) {
646                         printk(KERN_WARNING "Unknown node type for REF_PRISTINE node at 0x%08x: 0x%04x\n",
647                                ref_offset(raw), je16_to_cpu(node->u.nodetype));
648                         goto bail;
649                 }
650         }
651
652         /* OK, all the CRCs are good; this node can just be copied as-is. */
653  retry:
654         phys_ofs = write_ofs(c);
655
656         ret = jffs2_flash_write(c, phys_ofs, rawlen, &retlen, (char *)node);
657
658         if (ret || (retlen != rawlen)) {
659                 printk(KERN_NOTICE "Write of %d bytes at 0x%08x failed. returned %d, retlen %zd\n",
660                        rawlen, phys_ofs, ret, retlen);
661                 if (retlen) {
662                         jffs2_add_physical_node_ref(c, phys_ofs | REF_OBSOLETE, rawlen, NULL);
663                 } else {
664                         printk(KERN_NOTICE "Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n", phys_ofs);
665                 }
666                 if (!retried) {
667                         /* Try to reallocate space and retry */
668                         uint32_t dummy;
669                         struct jffs2_eraseblock *jeb = &c->blocks[phys_ofs / c->sector_size];
670
671                         retried = 1;
672
673                         D1(printk(KERN_DEBUG "Retrying failed write of REF_PRISTINE node.\n"));
674
675                         jffs2_dbg_acct_sanity_check(c,jeb);
676                         jffs2_dbg_acct_paranoia_check(c, jeb);
677
678                         ret = jffs2_reserve_space_gc(c, rawlen, &dummy, rawlen);
679                                                 /* this is not the exact summary size of it,
680                                                         it is only an upper estimation */
681
682                         if (!ret) {
683                                 D1(printk(KERN_DEBUG "Allocated space at 0x%08x to retry failed write.\n", phys_ofs));
684
685                                 jffs2_dbg_acct_sanity_check(c,jeb);
686                                 jffs2_dbg_acct_paranoia_check(c, jeb);
687
688                                 goto retry;
689                         }
690                         D1(printk(KERN_DEBUG "Failed to allocate space to retry failed write: %d!\n", ret));
691                 }
692
693                 if (!ret)
694                         ret = -EIO;
695                 goto out_node;
696         }
697         jffs2_add_physical_node_ref(c, phys_ofs | REF_PRISTINE, rawlen, ic);
698
699         jffs2_mark_node_obsolete(c, raw);
700         D1(printk(KERN_DEBUG "WHEEE! GC REF_PRISTINE node at 0x%08x succeeded\n", ref_offset(raw)));
701
702  out_node:
703         kfree(node);
704         return ret;
705  bail:
706         ret = -EBADFD;
707         goto out_node;
708 }
709
710 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
711                                         struct jffs2_inode_info *f, struct jffs2_full_dnode *fn)
712 {
713         struct jffs2_full_dnode *new_fn;
714         struct jffs2_raw_inode ri;
715         struct jffs2_node_frag *last_frag;
716         union jffs2_device_node dev;
717         char *mdata = NULL;
718         int mdatalen = 0;
719         uint32_t alloclen, ilen;
720         int ret;
721
722         if (S_ISBLK(JFFS2_F_I_MODE(f)) ||
723             S_ISCHR(JFFS2_F_I_MODE(f)) ) {
724                 /* For these, we don't actually need to read the old node */
725                 mdatalen = jffs2_encode_dev(&dev, JFFS2_F_I_RDEV(f));
726                 mdata = (char *)&dev;
727                 D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bytes of kdev_t\n", mdatalen));
728         } else if (S_ISLNK(JFFS2_F_I_MODE(f))) {
729                 mdatalen = fn->size;
730                 mdata = kmalloc(fn->size, GFP_KERNEL);
731                 if (!mdata) {
732                         printk(KERN_WARNING "kmalloc of mdata failed in jffs2_garbage_collect_metadata()\n");
733                         return -ENOMEM;
734                 }
735                 ret = jffs2_read_dnode(c, f, fn, mdata, 0, mdatalen);
736                 if (ret) {
737                         printk(KERN_WARNING "read of old metadata failed in jffs2_garbage_collect_metadata(): %d\n", ret);
738                         kfree(mdata);
739                         return ret;
740                 }
741                 D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bites of symlink target\n", mdatalen));
742
743         }
744
745         ret = jffs2_reserve_space_gc(c, sizeof(ri) + mdatalen, &alloclen,
746                                 JFFS2_SUMMARY_INODE_SIZE);
747         if (ret) {
748                 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_metadata failed: %d\n",
749                        sizeof(ri)+ mdatalen, ret);
750                 goto out;
751         }
752
753         last_frag = frag_last(&f->fragtree);
754         if (last_frag)
755                 /* Fetch the inode length from the fragtree rather then
756                  * from i_size since i_size may have not been updated yet */
757                 ilen = last_frag->ofs + last_frag->size;
758         else
759                 ilen = JFFS2_F_I_SIZE(f);
760
761         memset(&ri, 0, sizeof(ri));
762         ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
763         ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
764         ri.totlen = cpu_to_je32(sizeof(ri) + mdatalen);
765         ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
766
767         ri.ino = cpu_to_je32(f->inocache->ino);
768         ri.version = cpu_to_je32(++f->highest_version);
769         ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
770         ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
771         ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
772         ri.isize = cpu_to_je32(ilen);
773         ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
774         ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
775         ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
776         ri.offset = cpu_to_je32(0);
777         ri.csize = cpu_to_je32(mdatalen);
778         ri.dsize = cpu_to_je32(mdatalen);
779         ri.compr = JFFS2_COMPR_NONE;
780         ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
781         ri.data_crc = cpu_to_je32(crc32(0, mdata, mdatalen));
782
783         new_fn = jffs2_write_dnode(c, f, &ri, mdata, mdatalen, ALLOC_GC);
784
785         if (IS_ERR(new_fn)) {
786                 printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
787                 ret = PTR_ERR(new_fn);
788                 goto out;
789         }
790         jffs2_mark_node_obsolete(c, fn->raw);
791         jffs2_free_full_dnode(fn);
792         f->metadata = new_fn;
793  out:
794         if (S_ISLNK(JFFS2_F_I_MODE(f)))
795                 kfree(mdata);
796         return ret;
797 }
798
799 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
800                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
801 {
802         struct jffs2_full_dirent *new_fd;
803         struct jffs2_raw_dirent rd;
804         uint32_t alloclen;
805         int ret;
806
807         rd.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
808         rd.nodetype = cpu_to_je16(JFFS2_NODETYPE_DIRENT);
809         rd.nsize = strlen(fd->name);
810         rd.totlen = cpu_to_je32(sizeof(rd) + rd.nsize);
811         rd.hdr_crc = cpu_to_je32(crc32(0, &rd, sizeof(struct jffs2_unknown_node)-4));
812
813         rd.pino = cpu_to_je32(f->inocache->ino);
814         rd.version = cpu_to_je32(++f->highest_version);
815         rd.ino = cpu_to_je32(fd->ino);
816         /* If the times on this inode were set by explicit utime() they can be different,
817            so refrain from splatting them. */
818         if (JFFS2_F_I_MTIME(f) == JFFS2_F_I_CTIME(f))
819                 rd.mctime = cpu_to_je32(JFFS2_F_I_MTIME(f));
820         else
821                 rd.mctime = cpu_to_je32(0);
822         rd.type = fd->type;
823         rd.node_crc = cpu_to_je32(crc32(0, &rd, sizeof(rd)-8));
824         rd.name_crc = cpu_to_je32(crc32(0, fd->name, rd.nsize));
825
826         ret = jffs2_reserve_space_gc(c, sizeof(rd)+rd.nsize, &alloclen,
827                                 JFFS2_SUMMARY_DIRENT_SIZE(rd.nsize));
828         if (ret) {
829                 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dirent failed: %d\n",
830                        sizeof(rd)+rd.nsize, ret);
831                 return ret;
832         }
833         new_fd = jffs2_write_dirent(c, f, &rd, fd->name, rd.nsize, ALLOC_GC);
834
835         if (IS_ERR(new_fd)) {
836                 printk(KERN_WARNING "jffs2_write_dirent in garbage_collect_dirent failed: %ld\n", PTR_ERR(new_fd));
837                 return PTR_ERR(new_fd);
838         }
839         jffs2_add_fd_to_list(c, new_fd, &f->dents);
840         return 0;
841 }
842
843 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
844                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
845 {
846         struct jffs2_full_dirent **fdp = &f->dents;
847         int found = 0;
848
849         /* On a medium where we can't actually mark nodes obsolete
850            pernamently, such as NAND flash, we need to work out
851            whether this deletion dirent is still needed to actively
852            delete a 'real' dirent with the same name that's still
853            somewhere else on the flash. */
854         if (!jffs2_can_mark_obsolete(c)) {
855                 struct jffs2_raw_dirent *rd;
856                 struct jffs2_raw_node_ref *raw;
857                 int ret;
858                 size_t retlen;
859                 int name_len = strlen(fd->name);
860                 uint32_t name_crc = crc32(0, fd->name, name_len);
861                 uint32_t rawlen = ref_totlen(c, jeb, fd->raw);
862
863                 rd = kmalloc(rawlen, GFP_KERNEL);
864                 if (!rd)
865                         return -ENOMEM;
866
867                 /* Prevent the erase code from nicking the obsolete node refs while
868                    we're looking at them. I really don't like this extra lock but
869                    can't see any alternative. Suggestions on a postcard to... */
870                 mutex_lock(&c->erase_free_sem);
871
872                 for (raw = f->inocache->nodes; raw != (void *)f->inocache; raw = raw->next_in_ino) {
873
874                         cond_resched();
875
876                         /* We only care about obsolete ones */
877                         if (!(ref_obsolete(raw)))
878                                 continue;
879
880                         /* Any dirent with the same name is going to have the same length... */
881                         if (ref_totlen(c, NULL, raw) != rawlen)
882                                 continue;
883
884                         /* Doesn't matter if there's one in the same erase block. We're going to
885                            delete it too at the same time. */
886                         if (SECTOR_ADDR(raw->flash_offset) == SECTOR_ADDR(fd->raw->flash_offset))
887                                 continue;
888
889                         D1(printk(KERN_DEBUG "Check potential deletion dirent at %08x\n", ref_offset(raw)));
890
891                         /* This is an obsolete node belonging to the same directory, and it's of the right
892                            length. We need to take a closer look...*/
893                         ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)rd);
894                         if (ret) {
895                                 printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Read error (%d) reading obsolete node at %08x\n", ret, ref_offset(raw));
896                                 /* If we can't read it, we don't need to continue to obsolete it. Continue */
897                                 continue;
898                         }
899                         if (retlen != rawlen) {
900                                 printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Short read (%zd not %u) reading header from obsolete node at %08x\n",
901                                        retlen, rawlen, ref_offset(raw));
902                                 continue;
903                         }
904
905                         if (je16_to_cpu(rd->nodetype) != JFFS2_NODETYPE_DIRENT)
906                                 continue;
907
908                         /* If the name CRC doesn't match, skip */
909                         if (je32_to_cpu(rd->name_crc) != name_crc)
910                                 continue;
911
912                         /* If the name length doesn't match, or it's another deletion dirent, skip */
913                         if (rd->nsize != name_len || !je32_to_cpu(rd->ino))
914                                 continue;
915
916                         /* OK, check the actual name now */
917                         if (memcmp(rd->name, fd->name, name_len))
918                                 continue;
919
920                         /* OK. The name really does match. There really is still an older node on
921                            the flash which our deletion dirent obsoletes. So we have to write out
922                            a new deletion dirent to replace it */
923                         mutex_unlock(&c->erase_free_sem);
924
925                         D1(printk(KERN_DEBUG "Deletion dirent at %08x still obsoletes real dirent \"%s\" at %08x for ino #%u\n",
926                                   ref_offset(fd->raw), fd->name, ref_offset(raw), je32_to_cpu(rd->ino)));
927                         kfree(rd);
928
929                         return jffs2_garbage_collect_dirent(c, jeb, f, fd);
930                 }
931
932                 mutex_unlock(&c->erase_free_sem);
933                 kfree(rd);
934         }
935
936         /* FIXME: If we're deleting a dirent which contains the current mtime and ctime,
937            we should update the metadata node with those times accordingly */
938
939         /* No need for it any more. Just mark it obsolete and remove it from the list */
940         while (*fdp) {
941                 if ((*fdp) == fd) {
942                         found = 1;
943                         *fdp = fd->next;
944                         break;
945                 }
946                 fdp = &(*fdp)->next;
947         }
948         if (!found) {
949                 printk(KERN_WARNING "Deletion dirent \"%s\" not found in list for ino #%u\n", fd->name, f->inocache->ino);
950         }
951         jffs2_mark_node_obsolete(c, fd->raw);
952         jffs2_free_full_dirent(fd);
953         return 0;
954 }
955
956 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
957                                       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
958                                       uint32_t start, uint32_t end)
959 {
960         struct jffs2_raw_inode ri;
961         struct jffs2_node_frag *frag;
962         struct jffs2_full_dnode *new_fn;
963         uint32_t alloclen, ilen;
964         int ret;
965
966         D1(printk(KERN_DEBUG "Writing replacement hole node for ino #%u from offset 0x%x to 0x%x\n",
967                   f->inocache->ino, start, end));
968
969         memset(&ri, 0, sizeof(ri));
970
971         if(fn->frags > 1) {
972                 size_t readlen;
973                 uint32_t crc;
974                 /* It's partially obsoleted by a later write. So we have to
975                    write it out again with the _same_ version as before */
976                 ret = jffs2_flash_read(c, ref_offset(fn->raw), sizeof(ri), &readlen, (char *)&ri);
977                 if (readlen != sizeof(ri) || ret) {
978                         printk(KERN_WARNING "Node read failed in jffs2_garbage_collect_hole. Ret %d, retlen %zd. Data will be lost by writing new hole node\n", ret, readlen);
979                         goto fill;
980                 }
981                 if (je16_to_cpu(ri.nodetype) != JFFS2_NODETYPE_INODE) {
982                         printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had node type 0x%04x instead of JFFS2_NODETYPE_INODE(0x%04x)\n",
983                                ref_offset(fn->raw),
984                                je16_to_cpu(ri.nodetype), JFFS2_NODETYPE_INODE);
985                         return -EIO;
986                 }
987                 if (je32_to_cpu(ri.totlen) != sizeof(ri)) {
988                         printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had totlen 0x%x instead of expected 0x%zx\n",
989                                ref_offset(fn->raw),
990                                je32_to_cpu(ri.totlen), sizeof(ri));
991                         return -EIO;
992                 }
993                 crc = crc32(0, &ri, sizeof(ri)-8);
994                 if (crc != je32_to_cpu(ri.node_crc)) {
995                         printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had CRC 0x%08x which doesn't match calculated CRC 0x%08x\n",
996                                ref_offset(fn->raw),
997                                je32_to_cpu(ri.node_crc), crc);
998                         /* FIXME: We could possibly deal with this by writing new holes for each frag */
999                         printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
1000                                start, end, f->inocache->ino);
1001                         goto fill;
1002                 }
1003                 if (ri.compr != JFFS2_COMPR_ZERO) {
1004                         printk(KERN_WARNING "jffs2_garbage_collect_hole: Node 0x%08x wasn't a hole node!\n", ref_offset(fn->raw));
1005                         printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
1006                                start, end, f->inocache->ino);
1007                         goto fill;
1008                 }
1009         } else {
1010         fill:
1011                 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
1012                 ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
1013                 ri.totlen = cpu_to_je32(sizeof(ri));
1014                 ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
1015
1016                 ri.ino = cpu_to_je32(f->inocache->ino);
1017                 ri.version = cpu_to_je32(++f->highest_version);
1018                 ri.offset = cpu_to_je32(start);
1019                 ri.dsize = cpu_to_je32(end - start);
1020                 ri.csize = cpu_to_je32(0);
1021                 ri.compr = JFFS2_COMPR_ZERO;
1022         }
1023
1024         frag = frag_last(&f->fragtree);
1025         if (frag)
1026                 /* Fetch the inode length from the fragtree rather then
1027                  * from i_size since i_size may have not been updated yet */
1028                 ilen = frag->ofs + frag->size;
1029         else
1030                 ilen = JFFS2_F_I_SIZE(f);
1031
1032         ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1033         ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1034         ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1035         ri.isize = cpu_to_je32(ilen);
1036         ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1037         ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1038         ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1039         ri.data_crc = cpu_to_je32(0);
1040         ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1041
1042         ret = jffs2_reserve_space_gc(c, sizeof(ri), &alloclen,
1043                                      JFFS2_SUMMARY_INODE_SIZE);
1044         if (ret) {
1045                 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_hole failed: %d\n",
1046                        sizeof(ri), ret);
1047                 return ret;
1048         }
1049         new_fn = jffs2_write_dnode(c, f, &ri, NULL, 0, ALLOC_GC);
1050
1051         if (IS_ERR(new_fn)) {
1052                 printk(KERN_WARNING "Error writing new hole node: %ld\n", PTR_ERR(new_fn));
1053                 return PTR_ERR(new_fn);
1054         }
1055         if (je32_to_cpu(ri.version) == f->highest_version) {
1056                 jffs2_add_full_dnode_to_inode(c, f, new_fn);
1057                 if (f->metadata) {
1058                         jffs2_mark_node_obsolete(c, f->metadata->raw);
1059                         jffs2_free_full_dnode(f->metadata);
1060                         f->metadata = NULL;
1061                 }
1062                 return 0;
1063         }
1064
1065         /*
1066          * We should only get here in the case where the node we are
1067          * replacing had more than one frag, so we kept the same version
1068          * number as before. (Except in case of error -- see 'goto fill;'
1069          * above.)
1070          */
1071         D1(if(unlikely(fn->frags <= 1)) {
1072                 printk(KERN_WARNING "jffs2_garbage_collect_hole: Replacing fn with %d frag(s) but new ver %d != highest_version %d of ino #%d\n",
1073                        fn->frags, je32_to_cpu(ri.version), f->highest_version,
1074                        je32_to_cpu(ri.ino));
1075         });
1076
1077         /* This is a partially-overlapped hole node. Mark it REF_NORMAL not REF_PRISTINE */
1078         mark_ref_normal(new_fn->raw);
1079
1080         for (frag = jffs2_lookup_node_frag(&f->fragtree, fn->ofs);
1081              frag; frag = frag_next(frag)) {
1082                 if (frag->ofs > fn->size + fn->ofs)
1083                         break;
1084                 if (frag->node == fn) {
1085                         frag->node = new_fn;
1086                         new_fn->frags++;
1087                         fn->frags--;
1088                 }
1089         }
1090         if (fn->frags) {
1091                 printk(KERN_WARNING "jffs2_garbage_collect_hole: Old node still has frags!\n");
1092                 BUG();
1093         }
1094         if (!new_fn->frags) {
1095                 printk(KERN_WARNING "jffs2_garbage_collect_hole: New node has no frags!\n");
1096                 BUG();
1097         }
1098
1099         jffs2_mark_node_obsolete(c, fn->raw);
1100         jffs2_free_full_dnode(fn);
1101
1102         return 0;
1103 }
1104
1105 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *orig_jeb,
1106                                        struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
1107                                        uint32_t start, uint32_t end)
1108 {
1109         struct jffs2_full_dnode *new_fn;
1110         struct jffs2_raw_inode ri;
1111         uint32_t alloclen, offset, orig_end, orig_start;
1112         int ret = 0;
1113         unsigned char *comprbuf = NULL, *writebuf;
1114         unsigned long pg;
1115         unsigned char *pg_ptr;
1116
1117         memset(&ri, 0, sizeof(ri));
1118
1119         D1(printk(KERN_DEBUG "Writing replacement dnode for ino #%u from offset 0x%x to 0x%x\n",
1120                   f->inocache->ino, start, end));
1121
1122         orig_end = end;
1123         orig_start = start;
1124
1125         if (c->nr_free_blocks + c->nr_erasing_blocks > c->resv_blocks_gcmerge) {
1126                 /* Attempt to do some merging. But only expand to cover logically
1127                    adjacent frags if the block containing them is already considered
1128                    to be dirty. Otherwise we end up with GC just going round in
1129                    circles dirtying the nodes it already wrote out, especially
1130                    on NAND where we have small eraseblocks and hence a much higher
1131                    chance of nodes having to be split to cross boundaries. */
1132
1133                 struct jffs2_node_frag *frag;
1134                 uint32_t min, max;
1135
1136                 min = start & ~(PAGE_CACHE_SIZE-1);
1137                 max = min + PAGE_CACHE_SIZE;
1138
1139                 frag = jffs2_lookup_node_frag(&f->fragtree, start);
1140
1141                 /* BUG_ON(!frag) but that'll happen anyway... */
1142
1143                 BUG_ON(frag->ofs != start);
1144
1145                 /* First grow down... */
1146                 while((frag = frag_prev(frag)) && frag->ofs >= min) {
1147
1148                         /* If the previous frag doesn't even reach the beginning, there's
1149                            excessive fragmentation. Just merge. */
1150                         if (frag->ofs > min) {
1151                                 D1(printk(KERN_DEBUG "Expanding down to cover partial frag (0x%x-0x%x)\n",
1152                                           frag->ofs, frag->ofs+frag->size));
1153                                 start = frag->ofs;
1154                                 continue;
1155                         }
1156                         /* OK. This frag holds the first byte of the page. */
1157                         if (!frag->node || !frag->node->raw) {
1158                                 D1(printk(KERN_DEBUG "First frag in page is hole (0x%x-0x%x). Not expanding down.\n",
1159                                           frag->ofs, frag->ofs+frag->size));
1160                                 break;
1161                         } else {
1162
1163                                 /* OK, it's a frag which extends to the beginning of the page. Does it live
1164                                    in a block which is still considered clean? If so, don't obsolete it.
1165                                    If not, cover it anyway. */
1166
1167                                 struct jffs2_raw_node_ref *raw = frag->node->raw;
1168                                 struct jffs2_eraseblock *jeb;
1169
1170                                 jeb = &c->blocks[raw->flash_offset / c->sector_size];
1171
1172                                 if (jeb == c->gcblock) {
1173                                         D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1174                                                   frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1175                                         start = frag->ofs;
1176                                         break;
1177                                 }
1178                                 if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1179                                         D1(printk(KERN_DEBUG "Not expanding down to cover frag (0x%x-0x%x) in clean block %08x\n",
1180                                                   frag->ofs, frag->ofs+frag->size, jeb->offset));
1181                                         break;
1182                                 }
1183
1184                                 D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in dirty block %08x\n",
1185                                                   frag->ofs, frag->ofs+frag->size, jeb->offset));
1186                                 start = frag->ofs;
1187                                 break;
1188                         }
1189                 }
1190
1191                 /* ... then up */
1192
1193                 /* Find last frag which is actually part of the node we're to GC. */
1194                 frag = jffs2_lookup_node_frag(&f->fragtree, end-1);
1195
1196                 while((frag = frag_next(frag)) && frag->ofs+frag->size <= max) {
1197
1198                         /* If the previous frag doesn't even reach the beginning, there's lots
1199                            of fragmentation. Just merge. */
1200                         if (frag->ofs+frag->size < max) {
1201                                 D1(printk(KERN_DEBUG "Expanding up to cover partial frag (0x%x-0x%x)\n",
1202                                           frag->ofs, frag->ofs+frag->size));
1203                                 end = frag->ofs + frag->size;
1204                                 continue;
1205                         }
1206
1207                         if (!frag->node || !frag->node->raw) {
1208                                 D1(printk(KERN_DEBUG "Last frag in page is hole (0x%x-0x%x). Not expanding up.\n",
1209                                           frag->ofs, frag->ofs+frag->size));
1210                                 break;
1211                         } else {
1212
1213                                 /* OK, it's a frag which extends to the beginning of the page. Does it live
1214                                    in a block which is still considered clean? If so, don't obsolete it.
1215                                    If not, cover it anyway. */
1216
1217                                 struct jffs2_raw_node_ref *raw = frag->node->raw;
1218                                 struct jffs2_eraseblock *jeb;
1219
1220                                 jeb = &c->blocks[raw->flash_offset / c->sector_size];
1221
1222                                 if (jeb == c->gcblock) {
1223                                         D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1224                                                   frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1225                                         end = frag->ofs + frag->size;
1226                                         break;
1227                                 }
1228                                 if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1229                                         D1(printk(KERN_DEBUG "Not expanding up to cover frag (0x%x-0x%x) in clean block %08x\n",
1230                                                   frag->ofs, frag->ofs+frag->size, jeb->offset));
1231                                         break;
1232                                 }
1233
1234                                 D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in dirty block %08x\n",
1235                                                   frag->ofs, frag->ofs+frag->size, jeb->offset));
1236                                 end = frag->ofs + frag->size;
1237                                 break;
1238                         }
1239                 }
1240                 D1(printk(KERN_DEBUG "Expanded dnode to write from (0x%x-0x%x) to (0x%x-0x%x)\n",
1241                           orig_start, orig_end, start, end));
1242
1243                 D1(BUG_ON(end > frag_last(&f->fragtree)->ofs + frag_last(&f->fragtree)->size));
1244                 BUG_ON(end < orig_end);
1245                 BUG_ON(start > orig_start);
1246         }
1247
1248         /* First, use readpage() to read the appropriate page into the page cache */
1249         /* Q: What happens if we actually try to GC the _same_ page for which commit_write()
1250          *    triggered garbage collection in the first place?
1251          * A: I _think_ it's OK. read_cache_page shouldn't deadlock, we'll write out the
1252          *    page OK. We'll actually write it out again in commit_write, which is a little
1253          *    suboptimal, but at least we're correct.
1254          */
1255         pg_ptr = jffs2_gc_fetch_page(c, f, start, &pg);
1256
1257         if (IS_ERR(pg_ptr)) {
1258                 printk(KERN_WARNING "read_cache_page() returned error: %ld\n", PTR_ERR(pg_ptr));
1259                 return PTR_ERR(pg_ptr);
1260         }
1261
1262         offset = start;
1263         while(offset < orig_end) {
1264                 uint32_t datalen;
1265                 uint32_t cdatalen;
1266                 uint16_t comprtype = JFFS2_COMPR_NONE;
1267
1268                 ret = jffs2_reserve_space_gc(c, sizeof(ri) + JFFS2_MIN_DATA_LEN,
1269                                         &alloclen, JFFS2_SUMMARY_INODE_SIZE);
1270
1271                 if (ret) {
1272                         printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dnode failed: %d\n",
1273                                sizeof(ri)+ JFFS2_MIN_DATA_LEN, ret);
1274                         break;
1275                 }
1276                 cdatalen = min_t(uint32_t, alloclen - sizeof(ri), end - offset);
1277                 datalen = end - offset;
1278
1279                 writebuf = pg_ptr + (offset & (PAGE_CACHE_SIZE -1));
1280
1281                 comprtype = jffs2_compress(c, f, writebuf, &comprbuf, &datalen, &cdatalen);
1282
1283                 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
1284                 ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
1285                 ri.totlen = cpu_to_je32(sizeof(ri) + cdatalen);
1286                 ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
1287
1288                 ri.ino = cpu_to_je32(f->inocache->ino);
1289                 ri.version = cpu_to_je32(++f->highest_version);
1290                 ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1291                 ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1292                 ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1293                 ri.isize = cpu_to_je32(JFFS2_F_I_SIZE(f));
1294                 ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1295                 ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1296                 ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1297                 ri.offset = cpu_to_je32(offset);
1298                 ri.csize = cpu_to_je32(cdatalen);
1299                 ri.dsize = cpu_to_je32(datalen);
1300                 ri.compr = comprtype & 0xff;
1301                 ri.usercompr = (comprtype >> 8) & 0xff;
1302                 ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1303                 ri.data_crc = cpu_to_je32(crc32(0, comprbuf, cdatalen));
1304
1305                 new_fn = jffs2_write_dnode(c, f, &ri, comprbuf, cdatalen, ALLOC_GC);
1306
1307                 jffs2_free_comprbuf(comprbuf, writebuf);
1308
1309                 if (IS_ERR(new_fn)) {
1310                         printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
1311                         ret = PTR_ERR(new_fn);
1312                         break;
1313                 }
1314                 ret = jffs2_add_full_dnode_to_inode(c, f, new_fn);
1315                 offset += datalen;
1316                 if (f->metadata) {
1317                         jffs2_mark_node_obsolete(c, f->metadata->raw);
1318                         jffs2_free_full_dnode(f->metadata);
1319                         f->metadata = NULL;
1320                 }
1321         }
1322
1323         jffs2_gc_release_page(c, pg_ptr, &pg);
1324         return ret;
1325 }