jffs2: Require jffs2_garbage_collect_trigger() to be called with lock held
[linux-2.6.git] / fs / jffs2 / gc.c
1 /*
2  * JFFS2 -- Journalling Flash File System, Version 2.
3  *
4  * Copyright © 2001-2007 Red Hat, Inc.
5  *
6  * Created by David Woodhouse <dwmw2@infradead.org>
7  *
8  * For licensing information, see the file 'LICENCE' in this directory.
9  *
10  */
11
12 #include <linux/kernel.h>
13 #include <linux/mtd/mtd.h>
14 #include <linux/slab.h>
15 #include <linux/pagemap.h>
16 #include <linux/crc32.h>
17 #include <linux/compiler.h>
18 #include <linux/stat.h>
19 #include "nodelist.h"
20 #include "compr.h"
21
22 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
23                                           struct jffs2_inode_cache *ic,
24                                           struct jffs2_raw_node_ref *raw);
25 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
26                                         struct jffs2_inode_info *f, struct jffs2_full_dnode *fd);
27 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
28                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
29 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
30                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
31 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
32                                       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
33                                       uint32_t start, uint32_t end);
34 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
35                                        struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
36                                        uint32_t start, uint32_t end);
37 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
38                                struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f);
39
40 /* Called with erase_completion_lock held */
41 static struct jffs2_eraseblock *jffs2_find_gc_block(struct jffs2_sb_info *c)
42 {
43         struct jffs2_eraseblock *ret;
44         struct list_head *nextlist = NULL;
45         int n = jiffies % 128;
46
47         /* Pick an eraseblock to garbage collect next. This is where we'll
48            put the clever wear-levelling algorithms. Eventually.  */
49         /* We possibly want to favour the dirtier blocks more when the
50            number of free blocks is low. */
51 again:
52         if (!list_empty(&c->bad_used_list) && c->nr_free_blocks > c->resv_blocks_gcbad) {
53                 D1(printk(KERN_DEBUG "Picking block from bad_used_list to GC next\n"));
54                 nextlist = &c->bad_used_list;
55         } else if (n < 50 && !list_empty(&c->erasable_list)) {
56                 /* Note that most of them will have gone directly to be erased.
57                    So don't favour the erasable_list _too_ much. */
58                 D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next\n"));
59                 nextlist = &c->erasable_list;
60         } else if (n < 110 && !list_empty(&c->very_dirty_list)) {
61                 /* Most of the time, pick one off the very_dirty list */
62                 D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next\n"));
63                 nextlist = &c->very_dirty_list;
64         } else if (n < 126 && !list_empty(&c->dirty_list)) {
65                 D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next\n"));
66                 nextlist = &c->dirty_list;
67         } else if (!list_empty(&c->clean_list)) {
68                 D1(printk(KERN_DEBUG "Picking block from clean_list to GC next\n"));
69                 nextlist = &c->clean_list;
70         } else if (!list_empty(&c->dirty_list)) {
71                 D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next (clean_list was empty)\n"));
72
73                 nextlist = &c->dirty_list;
74         } else if (!list_empty(&c->very_dirty_list)) {
75                 D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next (clean_list and dirty_list were empty)\n"));
76                 nextlist = &c->very_dirty_list;
77         } else if (!list_empty(&c->erasable_list)) {
78                 D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next (clean_list and {very_,}dirty_list were empty)\n"));
79
80                 nextlist = &c->erasable_list;
81         } else if (!list_empty(&c->erasable_pending_wbuf_list)) {
82                 /* There are blocks are wating for the wbuf sync */
83                 D1(printk(KERN_DEBUG "Synching wbuf in order to reuse erasable_pending_wbuf_list blocks\n"));
84                 spin_unlock(&c->erase_completion_lock);
85                 jffs2_flush_wbuf_pad(c);
86                 spin_lock(&c->erase_completion_lock);
87                 goto again;
88         } else {
89                 /* Eep. All were empty */
90                 D1(printk(KERN_NOTICE "jffs2: No clean, dirty _or_ erasable blocks to GC from! Where are they all?\n"));
91                 return NULL;
92         }
93
94         ret = list_entry(nextlist->next, struct jffs2_eraseblock, list);
95         list_del(&ret->list);
96         c->gcblock = ret;
97         ret->gc_node = ret->first_node;
98         if (!ret->gc_node) {
99                 printk(KERN_WARNING "Eep. ret->gc_node for block at 0x%08x is NULL\n", ret->offset);
100                 BUG();
101         }
102
103         /* Have we accidentally picked a clean block with wasted space ? */
104         if (ret->wasted_size) {
105                 D1(printk(KERN_DEBUG "Converting wasted_size %08x to dirty_size\n", ret->wasted_size));
106                 ret->dirty_size += ret->wasted_size;
107                 c->wasted_size -= ret->wasted_size;
108                 c->dirty_size += ret->wasted_size;
109                 ret->wasted_size = 0;
110         }
111
112         return ret;
113 }
114
115 /* jffs2_garbage_collect_pass
116  * Make a single attempt to progress GC. Move one node, and possibly
117  * start erasing one eraseblock.
118  */
119 int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
120 {
121         struct jffs2_inode_info *f;
122         struct jffs2_inode_cache *ic;
123         struct jffs2_eraseblock *jeb;
124         struct jffs2_raw_node_ref *raw;
125         uint32_t gcblock_dirty;
126         int ret = 0, inum, nlink;
127         int xattr = 0;
128
129         if (mutex_lock_interruptible(&c->alloc_sem))
130                 return -EINTR;
131
132         for (;;) {
133                 spin_lock(&c->erase_completion_lock);
134                 if (!c->unchecked_size)
135                         break;
136
137                 /* We can't start doing GC yet. We haven't finished checking
138                    the node CRCs etc. Do it now. */
139
140                 /* checked_ino is protected by the alloc_sem */
141                 if (c->checked_ino > c->highest_ino && xattr) {
142                         printk(KERN_CRIT "Checked all inodes but still 0x%x bytes of unchecked space?\n",
143                                c->unchecked_size);
144                         jffs2_dbg_dump_block_lists_nolock(c);
145                         spin_unlock(&c->erase_completion_lock);
146                         mutex_unlock(&c->alloc_sem);
147                         return -ENOSPC;
148                 }
149
150                 spin_unlock(&c->erase_completion_lock);
151
152                 if (!xattr)
153                         xattr = jffs2_verify_xattr(c);
154
155                 spin_lock(&c->inocache_lock);
156
157                 ic = jffs2_get_ino_cache(c, c->checked_ino++);
158
159                 if (!ic) {
160                         spin_unlock(&c->inocache_lock);
161                         continue;
162                 }
163
164                 if (!ic->pino_nlink) {
165                         D1(printk(KERN_DEBUG "Skipping check of ino #%d with nlink/pino zero\n",
166                                   ic->ino));
167                         spin_unlock(&c->inocache_lock);
168                         jffs2_xattr_delete_inode(c, ic);
169                         continue;
170                 }
171                 switch(ic->state) {
172                 case INO_STATE_CHECKEDABSENT:
173                 case INO_STATE_PRESENT:
174                         D1(printk(KERN_DEBUG "Skipping ino #%u already checked\n", ic->ino));
175                         spin_unlock(&c->inocache_lock);
176                         continue;
177
178                 case INO_STATE_GC:
179                 case INO_STATE_CHECKING:
180                         printk(KERN_WARNING "Inode #%u is in state %d during CRC check phase!\n", ic->ino, ic->state);
181                         spin_unlock(&c->inocache_lock);
182                         BUG();
183
184                 case INO_STATE_READING:
185                         /* We need to wait for it to finish, lest we move on
186                            and trigger the BUG() above while we haven't yet
187                            finished checking all its nodes */
188                         D1(printk(KERN_DEBUG "Waiting for ino #%u to finish reading\n", ic->ino));
189                         /* We need to come back again for the _same_ inode. We've
190                          made no progress in this case, but that should be OK */
191                         c->checked_ino--;
192
193                         mutex_unlock(&c->alloc_sem);
194                         sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
195                         return 0;
196
197                 default:
198                         BUG();
199
200                 case INO_STATE_UNCHECKED:
201                         ;
202                 }
203                 ic->state = INO_STATE_CHECKING;
204                 spin_unlock(&c->inocache_lock);
205
206                 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() triggering inode scan of ino#%u\n", ic->ino));
207
208                 ret = jffs2_do_crccheck_inode(c, ic);
209                 if (ret)
210                         printk(KERN_WARNING "Returned error for crccheck of ino #%u. Expect badness...\n", ic->ino);
211
212                 jffs2_set_inocache_state(c, ic, INO_STATE_CHECKEDABSENT);
213                 mutex_unlock(&c->alloc_sem);
214                 return ret;
215         }
216
217         /* If there are any blocks which need erasing, erase them now */
218         if (!list_empty(&c->erase_complete_list) ||
219             !list_empty(&c->erase_pending_list)) {
220                 spin_unlock(&c->erase_completion_lock);
221                 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() erasing pending blocks\n"));
222                 if (jffs2_erase_pending_blocks(c, 1)) {
223                         mutex_unlock(&c->alloc_sem);
224                         return 0;
225                 }
226                 D1(printk(KERN_DEBUG "No progress from erasing blocks; doing GC anyway\n"));
227                 spin_lock(&c->erase_completion_lock);
228         }
229
230         /* First, work out which block we're garbage-collecting */
231         jeb = c->gcblock;
232
233         if (!jeb)
234                 jeb = jffs2_find_gc_block(c);
235
236         if (!jeb) {
237                 /* Couldn't find a free block. But maybe we can just erase one and make 'progress'? */
238                 if (c->nr_erasing_blocks) {
239                         spin_unlock(&c->erase_completion_lock);
240                         mutex_unlock(&c->alloc_sem);
241                         return -EAGAIN;
242                 }
243                 D1(printk(KERN_NOTICE "jffs2: Couldn't find erase block to garbage collect!\n"));
244                 spin_unlock(&c->erase_completion_lock);
245                 mutex_unlock(&c->alloc_sem);
246                 return -EIO;
247         }
248
249         D1(printk(KERN_DEBUG "GC from block %08x, used_size %08x, dirty_size %08x, free_size %08x\n", jeb->offset, jeb->used_size, jeb->dirty_size, jeb->free_size));
250         D1(if (c->nextblock)
251            printk(KERN_DEBUG "Nextblock at  %08x, used_size %08x, dirty_size %08x, wasted_size %08x, free_size %08x\n", c->nextblock->offset, c->nextblock->used_size, c->nextblock->dirty_size, c->nextblock->wasted_size, c->nextblock->free_size));
252
253         if (!jeb->used_size) {
254                 mutex_unlock(&c->alloc_sem);
255                 goto eraseit;
256         }
257
258         raw = jeb->gc_node;
259         gcblock_dirty = jeb->dirty_size;
260
261         while(ref_obsolete(raw)) {
262                 D1(printk(KERN_DEBUG "Node at 0x%08x is obsolete... skipping\n", ref_offset(raw)));
263                 raw = ref_next(raw);
264                 if (unlikely(!raw)) {
265                         printk(KERN_WARNING "eep. End of raw list while still supposedly nodes to GC\n");
266                         printk(KERN_WARNING "erase block at 0x%08x. free_size 0x%08x, dirty_size 0x%08x, used_size 0x%08x\n",
267                                jeb->offset, jeb->free_size, jeb->dirty_size, jeb->used_size);
268                         jeb->gc_node = raw;
269                         spin_unlock(&c->erase_completion_lock);
270                         mutex_unlock(&c->alloc_sem);
271                         BUG();
272                 }
273         }
274         jeb->gc_node = raw;
275
276         D1(printk(KERN_DEBUG "Going to garbage collect node at 0x%08x\n", ref_offset(raw)));
277
278         if (!raw->next_in_ino) {
279                 /* Inode-less node. Clean marker, snapshot or something like that */
280                 spin_unlock(&c->erase_completion_lock);
281                 if (ref_flags(raw) == REF_PRISTINE) {
282                         /* It's an unknown node with JFFS2_FEATURE_RWCOMPAT_COPY */
283                         jffs2_garbage_collect_pristine(c, NULL, raw);
284                 } else {
285                         /* Just mark it obsolete */
286                         jffs2_mark_node_obsolete(c, raw);
287                 }
288                 mutex_unlock(&c->alloc_sem);
289                 goto eraseit_lock;
290         }
291
292         ic = jffs2_raw_ref_to_ic(raw);
293
294 #ifdef CONFIG_JFFS2_FS_XATTR
295         /* When 'ic' refers xattr_datum/xattr_ref, this node is GCed as xattr.
296          * We can decide whether this node is inode or xattr by ic->class.     */
297         if (ic->class == RAWNODE_CLASS_XATTR_DATUM
298             || ic->class == RAWNODE_CLASS_XATTR_REF) {
299                 spin_unlock(&c->erase_completion_lock);
300
301                 if (ic->class == RAWNODE_CLASS_XATTR_DATUM) {
302                         ret = jffs2_garbage_collect_xattr_datum(c, (struct jffs2_xattr_datum *)ic, raw);
303                 } else {
304                         ret = jffs2_garbage_collect_xattr_ref(c, (struct jffs2_xattr_ref *)ic, raw);
305                 }
306                 goto test_gcnode;
307         }
308 #endif
309
310         /* We need to hold the inocache. Either the erase_completion_lock or
311            the inocache_lock are sufficient; we trade down since the inocache_lock
312            causes less contention. */
313         spin_lock(&c->inocache_lock);
314
315         spin_unlock(&c->erase_completion_lock);
316
317         D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass collecting from block @0x%08x. Node @0x%08x(%d), ino #%u\n", jeb->offset, ref_offset(raw), ref_flags(raw), ic->ino));
318
319         /* Three possibilities:
320            1. Inode is already in-core. We must iget it and do proper
321               updating to its fragtree, etc.
322            2. Inode is not in-core, node is REF_PRISTINE. We lock the
323               inocache to prevent a read_inode(), copy the node intact.
324            3. Inode is not in-core, node is not pristine. We must iget()
325               and take the slow path.
326         */
327
328         switch(ic->state) {
329         case INO_STATE_CHECKEDABSENT:
330                 /* It's been checked, but it's not currently in-core.
331                    We can just copy any pristine nodes, but have
332                    to prevent anyone else from doing read_inode() while
333                    we're at it, so we set the state accordingly */
334                 if (ref_flags(raw) == REF_PRISTINE)
335                         ic->state = INO_STATE_GC;
336                 else {
337                         D1(printk(KERN_DEBUG "Ino #%u is absent but node not REF_PRISTINE. Reading.\n",
338                                   ic->ino));
339                 }
340                 break;
341
342         case INO_STATE_PRESENT:
343                 /* It's in-core. GC must iget() it. */
344                 break;
345
346         case INO_STATE_UNCHECKED:
347         case INO_STATE_CHECKING:
348         case INO_STATE_GC:
349                 /* Should never happen. We should have finished checking
350                    by the time we actually start doing any GC, and since
351                    we're holding the alloc_sem, no other garbage collection
352                    can happen.
353                 */
354                 printk(KERN_CRIT "Inode #%u already in state %d in jffs2_garbage_collect_pass()!\n",
355                        ic->ino, ic->state);
356                 mutex_unlock(&c->alloc_sem);
357                 spin_unlock(&c->inocache_lock);
358                 BUG();
359
360         case INO_STATE_READING:
361                 /* Someone's currently trying to read it. We must wait for
362                    them to finish and then go through the full iget() route
363                    to do the GC. However, sometimes read_inode() needs to get
364                    the alloc_sem() (for marking nodes invalid) so we must
365                    drop the alloc_sem before sleeping. */
366
367                 mutex_unlock(&c->alloc_sem);
368                 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() waiting for ino #%u in state %d\n",
369                           ic->ino, ic->state));
370                 sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
371                 /* And because we dropped the alloc_sem we must start again from the
372                    beginning. Ponder chance of livelock here -- we're returning success
373                    without actually making any progress.
374
375                    Q: What are the chances that the inode is back in INO_STATE_READING
376                    again by the time we next enter this function? And that this happens
377                    enough times to cause a real delay?
378
379                    A: Small enough that I don't care :)
380                 */
381                 return 0;
382         }
383
384         /* OK. Now if the inode is in state INO_STATE_GC, we are going to copy the
385            node intact, and we don't have to muck about with the fragtree etc.
386            because we know it's not in-core. If it _was_ in-core, we go through
387            all the iget() crap anyway */
388
389         if (ic->state == INO_STATE_GC) {
390                 spin_unlock(&c->inocache_lock);
391
392                 ret = jffs2_garbage_collect_pristine(c, ic, raw);
393
394                 spin_lock(&c->inocache_lock);
395                 ic->state = INO_STATE_CHECKEDABSENT;
396                 wake_up(&c->inocache_wq);
397
398                 if (ret != -EBADFD) {
399                         spin_unlock(&c->inocache_lock);
400                         goto test_gcnode;
401                 }
402
403                 /* Fall through if it wanted us to, with inocache_lock held */
404         }
405
406         /* Prevent the fairly unlikely race where the gcblock is
407            entirely obsoleted by the final close of a file which had
408            the only valid nodes in the block, followed by erasure,
409            followed by freeing of the ic because the erased block(s)
410            held _all_ the nodes of that inode.... never been seen but
411            it's vaguely possible. */
412
413         inum = ic->ino;
414         nlink = ic->pino_nlink;
415         spin_unlock(&c->inocache_lock);
416
417         f = jffs2_gc_fetch_inode(c, inum, !nlink);
418         if (IS_ERR(f)) {
419                 ret = PTR_ERR(f);
420                 goto release_sem;
421         }
422         if (!f) {
423                 ret = 0;
424                 goto release_sem;
425         }
426
427         ret = jffs2_garbage_collect_live(c, jeb, raw, f);
428
429         jffs2_gc_release_inode(c, f);
430
431  test_gcnode:
432         if (jeb->dirty_size == gcblock_dirty && !ref_obsolete(jeb->gc_node)) {
433                 /* Eep. This really should never happen. GC is broken */
434                 printk(KERN_ERR "Error garbage collecting node at %08x!\n", ref_offset(jeb->gc_node));
435                 ret = -ENOSPC;
436         }
437  release_sem:
438         mutex_unlock(&c->alloc_sem);
439
440  eraseit_lock:
441         /* If we've finished this block, start it erasing */
442         spin_lock(&c->erase_completion_lock);
443
444  eraseit:
445         if (c->gcblock && !c->gcblock->used_size) {
446                 D1(printk(KERN_DEBUG "Block at 0x%08x completely obsoleted by GC. Moving to erase_pending_list\n", c->gcblock->offset));
447                 /* We're GC'ing an empty block? */
448                 list_add_tail(&c->gcblock->list, &c->erase_pending_list);
449                 c->gcblock = NULL;
450                 c->nr_erasing_blocks++;
451                 jffs2_erase_pending_trigger(c);
452         }
453         spin_unlock(&c->erase_completion_lock);
454
455         return ret;
456 }
457
458 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
459                                       struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f)
460 {
461         struct jffs2_node_frag *frag;
462         struct jffs2_full_dnode *fn = NULL;
463         struct jffs2_full_dirent *fd;
464         uint32_t start = 0, end = 0, nrfrags = 0;
465         int ret = 0;
466
467         mutex_lock(&f->sem);
468
469         /* Now we have the lock for this inode. Check that it's still the one at the head
470            of the list. */
471
472         spin_lock(&c->erase_completion_lock);
473
474         if (c->gcblock != jeb) {
475                 spin_unlock(&c->erase_completion_lock);
476                 D1(printk(KERN_DEBUG "GC block is no longer gcblock. Restart\n"));
477                 goto upnout;
478         }
479         if (ref_obsolete(raw)) {
480                 spin_unlock(&c->erase_completion_lock);
481                 D1(printk(KERN_DEBUG "node to be GC'd was obsoleted in the meantime.\n"));
482                 /* They'll call again */
483                 goto upnout;
484         }
485         spin_unlock(&c->erase_completion_lock);
486
487         /* OK. Looks safe. And nobody can get us now because we have the semaphore. Move the block */
488         if (f->metadata && f->metadata->raw == raw) {
489                 fn = f->metadata;
490                 ret = jffs2_garbage_collect_metadata(c, jeb, f, fn);
491                 goto upnout;
492         }
493
494         /* FIXME. Read node and do lookup? */
495         for (frag = frag_first(&f->fragtree); frag; frag = frag_next(frag)) {
496                 if (frag->node && frag->node->raw == raw) {
497                         fn = frag->node;
498                         end = frag->ofs + frag->size;
499                         if (!nrfrags++)
500                                 start = frag->ofs;
501                         if (nrfrags == frag->node->frags)
502                                 break; /* We've found them all */
503                 }
504         }
505         if (fn) {
506                 if (ref_flags(raw) == REF_PRISTINE) {
507                         ret = jffs2_garbage_collect_pristine(c, f->inocache, raw);
508                         if (!ret) {
509                                 /* Urgh. Return it sensibly. */
510                                 frag->node->raw = f->inocache->nodes;
511                         }
512                         if (ret != -EBADFD)
513                                 goto upnout;
514                 }
515                 /* We found a datanode. Do the GC */
516                 if((start >> PAGE_CACHE_SHIFT) < ((end-1) >> PAGE_CACHE_SHIFT)) {
517                         /* It crosses a page boundary. Therefore, it must be a hole. */
518                         ret = jffs2_garbage_collect_hole(c, jeb, f, fn, start, end);
519                 } else {
520                         /* It could still be a hole. But we GC the page this way anyway */
521                         ret = jffs2_garbage_collect_dnode(c, jeb, f, fn, start, end);
522                 }
523                 goto upnout;
524         }
525
526         /* Wasn't a dnode. Try dirent */
527         for (fd = f->dents; fd; fd=fd->next) {
528                 if (fd->raw == raw)
529                         break;
530         }
531
532         if (fd && fd->ino) {
533                 ret = jffs2_garbage_collect_dirent(c, jeb, f, fd);
534         } else if (fd) {
535                 ret = jffs2_garbage_collect_deletion_dirent(c, jeb, f, fd);
536         } else {
537                 printk(KERN_WARNING "Raw node at 0x%08x wasn't in node lists for ino #%u\n",
538                        ref_offset(raw), f->inocache->ino);
539                 if (ref_obsolete(raw)) {
540                         printk(KERN_WARNING "But it's obsolete so we don't mind too much\n");
541                 } else {
542                         jffs2_dbg_dump_node(c, ref_offset(raw));
543                         BUG();
544                 }
545         }
546  upnout:
547         mutex_unlock(&f->sem);
548
549         return ret;
550 }
551
552 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
553                                           struct jffs2_inode_cache *ic,
554                                           struct jffs2_raw_node_ref *raw)
555 {
556         union jffs2_node_union *node;
557         size_t retlen;
558         int ret;
559         uint32_t phys_ofs, alloclen;
560         uint32_t crc, rawlen;
561         int retried = 0;
562
563         D1(printk(KERN_DEBUG "Going to GC REF_PRISTINE node at 0x%08x\n", ref_offset(raw)));
564
565         alloclen = rawlen = ref_totlen(c, c->gcblock, raw);
566
567         /* Ask for a small amount of space (or the totlen if smaller) because we
568            don't want to force wastage of the end of a block if splitting would
569            work. */
570         if (ic && alloclen > sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN)
571                 alloclen = sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN;
572
573         ret = jffs2_reserve_space_gc(c, alloclen, &alloclen, rawlen);
574         /* 'rawlen' is not the exact summary size; it is only an upper estimation */
575
576         if (ret)
577                 return ret;
578
579         if (alloclen < rawlen) {
580                 /* Doesn't fit untouched. We'll go the old route and split it */
581                 return -EBADFD;
582         }
583
584         node = kmalloc(rawlen, GFP_KERNEL);
585         if (!node)
586                 return -ENOMEM;
587
588         ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)node);
589         if (!ret && retlen != rawlen)
590                 ret = -EIO;
591         if (ret)
592                 goto out_node;
593
594         crc = crc32(0, node, sizeof(struct jffs2_unknown_node)-4);
595         if (je32_to_cpu(node->u.hdr_crc) != crc) {
596                 printk(KERN_WARNING "Header CRC failed on REF_PRISTINE node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
597                        ref_offset(raw), je32_to_cpu(node->u.hdr_crc), crc);
598                 goto bail;
599         }
600
601         switch(je16_to_cpu(node->u.nodetype)) {
602         case JFFS2_NODETYPE_INODE:
603                 crc = crc32(0, node, sizeof(node->i)-8);
604                 if (je32_to_cpu(node->i.node_crc) != crc) {
605                         printk(KERN_WARNING "Node CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
606                                ref_offset(raw), je32_to_cpu(node->i.node_crc), crc);
607                         goto bail;
608                 }
609
610                 if (je32_to_cpu(node->i.dsize)) {
611                         crc = crc32(0, node->i.data, je32_to_cpu(node->i.csize));
612                         if (je32_to_cpu(node->i.data_crc) != crc) {
613                                 printk(KERN_WARNING "Data CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
614                                        ref_offset(raw), je32_to_cpu(node->i.data_crc), crc);
615                                 goto bail;
616                         }
617                 }
618                 break;
619
620         case JFFS2_NODETYPE_DIRENT:
621                 crc = crc32(0, node, sizeof(node->d)-8);
622                 if (je32_to_cpu(node->d.node_crc) != crc) {
623                         printk(KERN_WARNING "Node CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
624                                ref_offset(raw), je32_to_cpu(node->d.node_crc), crc);
625                         goto bail;
626                 }
627
628                 if (strnlen(node->d.name, node->d.nsize) != node->d.nsize) {
629                         printk(KERN_WARNING "Name in dirent node at 0x%08x contains zeroes\n", ref_offset(raw));
630                         goto bail;
631                 }
632
633                 if (node->d.nsize) {
634                         crc = crc32(0, node->d.name, node->d.nsize);
635                         if (je32_to_cpu(node->d.name_crc) != crc) {
636                                 printk(KERN_WARNING "Name CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
637                                        ref_offset(raw), je32_to_cpu(node->d.name_crc), crc);
638                                 goto bail;
639                         }
640                 }
641                 break;
642         default:
643                 /* If it's inode-less, we don't _know_ what it is. Just copy it intact */
644                 if (ic) {
645                         printk(KERN_WARNING "Unknown node type for REF_PRISTINE node at 0x%08x: 0x%04x\n",
646                                ref_offset(raw), je16_to_cpu(node->u.nodetype));
647                         goto bail;
648                 }
649         }
650
651         /* OK, all the CRCs are good; this node can just be copied as-is. */
652  retry:
653         phys_ofs = write_ofs(c);
654
655         ret = jffs2_flash_write(c, phys_ofs, rawlen, &retlen, (char *)node);
656
657         if (ret || (retlen != rawlen)) {
658                 printk(KERN_NOTICE "Write of %d bytes at 0x%08x failed. returned %d, retlen %zd\n",
659                        rawlen, phys_ofs, ret, retlen);
660                 if (retlen) {
661                         jffs2_add_physical_node_ref(c, phys_ofs | REF_OBSOLETE, rawlen, NULL);
662                 } else {
663                         printk(KERN_NOTICE "Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n", phys_ofs);
664                 }
665                 if (!retried) {
666                         /* Try to reallocate space and retry */
667                         uint32_t dummy;
668                         struct jffs2_eraseblock *jeb = &c->blocks[phys_ofs / c->sector_size];
669
670                         retried = 1;
671
672                         D1(printk(KERN_DEBUG "Retrying failed write of REF_PRISTINE node.\n"));
673
674                         jffs2_dbg_acct_sanity_check(c,jeb);
675                         jffs2_dbg_acct_paranoia_check(c, jeb);
676
677                         ret = jffs2_reserve_space_gc(c, rawlen, &dummy, rawlen);
678                                                 /* this is not the exact summary size of it,
679                                                         it is only an upper estimation */
680
681                         if (!ret) {
682                                 D1(printk(KERN_DEBUG "Allocated space at 0x%08x to retry failed write.\n", phys_ofs));
683
684                                 jffs2_dbg_acct_sanity_check(c,jeb);
685                                 jffs2_dbg_acct_paranoia_check(c, jeb);
686
687                                 goto retry;
688                         }
689                         D1(printk(KERN_DEBUG "Failed to allocate space to retry failed write: %d!\n", ret));
690                 }
691
692                 if (!ret)
693                         ret = -EIO;
694                 goto out_node;
695         }
696         jffs2_add_physical_node_ref(c, phys_ofs | REF_PRISTINE, rawlen, ic);
697
698         jffs2_mark_node_obsolete(c, raw);
699         D1(printk(KERN_DEBUG "WHEEE! GC REF_PRISTINE node at 0x%08x succeeded\n", ref_offset(raw)));
700
701  out_node:
702         kfree(node);
703         return ret;
704  bail:
705         ret = -EBADFD;
706         goto out_node;
707 }
708
709 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
710                                         struct jffs2_inode_info *f, struct jffs2_full_dnode *fn)
711 {
712         struct jffs2_full_dnode *new_fn;
713         struct jffs2_raw_inode ri;
714         struct jffs2_node_frag *last_frag;
715         union jffs2_device_node dev;
716         char *mdata = NULL;
717         int mdatalen = 0;
718         uint32_t alloclen, ilen;
719         int ret;
720
721         if (S_ISBLK(JFFS2_F_I_MODE(f)) ||
722             S_ISCHR(JFFS2_F_I_MODE(f)) ) {
723                 /* For these, we don't actually need to read the old node */
724                 mdatalen = jffs2_encode_dev(&dev, JFFS2_F_I_RDEV(f));
725                 mdata = (char *)&dev;
726                 D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bytes of kdev_t\n", mdatalen));
727         } else if (S_ISLNK(JFFS2_F_I_MODE(f))) {
728                 mdatalen = fn->size;
729                 mdata = kmalloc(fn->size, GFP_KERNEL);
730                 if (!mdata) {
731                         printk(KERN_WARNING "kmalloc of mdata failed in jffs2_garbage_collect_metadata()\n");
732                         return -ENOMEM;
733                 }
734                 ret = jffs2_read_dnode(c, f, fn, mdata, 0, mdatalen);
735                 if (ret) {
736                         printk(KERN_WARNING "read of old metadata failed in jffs2_garbage_collect_metadata(): %d\n", ret);
737                         kfree(mdata);
738                         return ret;
739                 }
740                 D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bites of symlink target\n", mdatalen));
741
742         }
743
744         ret = jffs2_reserve_space_gc(c, sizeof(ri) + mdatalen, &alloclen,
745                                 JFFS2_SUMMARY_INODE_SIZE);
746         if (ret) {
747                 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_metadata failed: %d\n",
748                        sizeof(ri)+ mdatalen, ret);
749                 goto out;
750         }
751
752         last_frag = frag_last(&f->fragtree);
753         if (last_frag)
754                 /* Fetch the inode length from the fragtree rather then
755                  * from i_size since i_size may have not been updated yet */
756                 ilen = last_frag->ofs + last_frag->size;
757         else
758                 ilen = JFFS2_F_I_SIZE(f);
759
760         memset(&ri, 0, sizeof(ri));
761         ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
762         ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
763         ri.totlen = cpu_to_je32(sizeof(ri) + mdatalen);
764         ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
765
766         ri.ino = cpu_to_je32(f->inocache->ino);
767         ri.version = cpu_to_je32(++f->highest_version);
768         ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
769         ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
770         ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
771         ri.isize = cpu_to_je32(ilen);
772         ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
773         ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
774         ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
775         ri.offset = cpu_to_je32(0);
776         ri.csize = cpu_to_je32(mdatalen);
777         ri.dsize = cpu_to_je32(mdatalen);
778         ri.compr = JFFS2_COMPR_NONE;
779         ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
780         ri.data_crc = cpu_to_je32(crc32(0, mdata, mdatalen));
781
782         new_fn = jffs2_write_dnode(c, f, &ri, mdata, mdatalen, ALLOC_GC);
783
784         if (IS_ERR(new_fn)) {
785                 printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
786                 ret = PTR_ERR(new_fn);
787                 goto out;
788         }
789         jffs2_mark_node_obsolete(c, fn->raw);
790         jffs2_free_full_dnode(fn);
791         f->metadata = new_fn;
792  out:
793         if (S_ISLNK(JFFS2_F_I_MODE(f)))
794                 kfree(mdata);
795         return ret;
796 }
797
798 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
799                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
800 {
801         struct jffs2_full_dirent *new_fd;
802         struct jffs2_raw_dirent rd;
803         uint32_t alloclen;
804         int ret;
805
806         rd.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
807         rd.nodetype = cpu_to_je16(JFFS2_NODETYPE_DIRENT);
808         rd.nsize = strlen(fd->name);
809         rd.totlen = cpu_to_je32(sizeof(rd) + rd.nsize);
810         rd.hdr_crc = cpu_to_je32(crc32(0, &rd, sizeof(struct jffs2_unknown_node)-4));
811
812         rd.pino = cpu_to_je32(f->inocache->ino);
813         rd.version = cpu_to_je32(++f->highest_version);
814         rd.ino = cpu_to_je32(fd->ino);
815         /* If the times on this inode were set by explicit utime() they can be different,
816            so refrain from splatting them. */
817         if (JFFS2_F_I_MTIME(f) == JFFS2_F_I_CTIME(f))
818                 rd.mctime = cpu_to_je32(JFFS2_F_I_MTIME(f));
819         else
820                 rd.mctime = cpu_to_je32(0);
821         rd.type = fd->type;
822         rd.node_crc = cpu_to_je32(crc32(0, &rd, sizeof(rd)-8));
823         rd.name_crc = cpu_to_je32(crc32(0, fd->name, rd.nsize));
824
825         ret = jffs2_reserve_space_gc(c, sizeof(rd)+rd.nsize, &alloclen,
826                                 JFFS2_SUMMARY_DIRENT_SIZE(rd.nsize));
827         if (ret) {
828                 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dirent failed: %d\n",
829                        sizeof(rd)+rd.nsize, ret);
830                 return ret;
831         }
832         new_fd = jffs2_write_dirent(c, f, &rd, fd->name, rd.nsize, ALLOC_GC);
833
834         if (IS_ERR(new_fd)) {
835                 printk(KERN_WARNING "jffs2_write_dirent in garbage_collect_dirent failed: %ld\n", PTR_ERR(new_fd));
836                 return PTR_ERR(new_fd);
837         }
838         jffs2_add_fd_to_list(c, new_fd, &f->dents);
839         return 0;
840 }
841
842 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
843                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
844 {
845         struct jffs2_full_dirent **fdp = &f->dents;
846         int found = 0;
847
848         /* On a medium where we can't actually mark nodes obsolete
849            pernamently, such as NAND flash, we need to work out
850            whether this deletion dirent is still needed to actively
851            delete a 'real' dirent with the same name that's still
852            somewhere else on the flash. */
853         if (!jffs2_can_mark_obsolete(c)) {
854                 struct jffs2_raw_dirent *rd;
855                 struct jffs2_raw_node_ref *raw;
856                 int ret;
857                 size_t retlen;
858                 int name_len = strlen(fd->name);
859                 uint32_t name_crc = crc32(0, fd->name, name_len);
860                 uint32_t rawlen = ref_totlen(c, jeb, fd->raw);
861
862                 rd = kmalloc(rawlen, GFP_KERNEL);
863                 if (!rd)
864                         return -ENOMEM;
865
866                 /* Prevent the erase code from nicking the obsolete node refs while
867                    we're looking at them. I really don't like this extra lock but
868                    can't see any alternative. Suggestions on a postcard to... */
869                 mutex_lock(&c->erase_free_sem);
870
871                 for (raw = f->inocache->nodes; raw != (void *)f->inocache; raw = raw->next_in_ino) {
872
873                         cond_resched();
874
875                         /* We only care about obsolete ones */
876                         if (!(ref_obsolete(raw)))
877                                 continue;
878
879                         /* Any dirent with the same name is going to have the same length... */
880                         if (ref_totlen(c, NULL, raw) != rawlen)
881                                 continue;
882
883                         /* Doesn't matter if there's one in the same erase block. We're going to
884                            delete it too at the same time. */
885                         if (SECTOR_ADDR(raw->flash_offset) == SECTOR_ADDR(fd->raw->flash_offset))
886                                 continue;
887
888                         D1(printk(KERN_DEBUG "Check potential deletion dirent at %08x\n", ref_offset(raw)));
889
890                         /* This is an obsolete node belonging to the same directory, and it's of the right
891                            length. We need to take a closer look...*/
892                         ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)rd);
893                         if (ret) {
894                                 printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Read error (%d) reading obsolete node at %08x\n", ret, ref_offset(raw));
895                                 /* If we can't read it, we don't need to continue to obsolete it. Continue */
896                                 continue;
897                         }
898                         if (retlen != rawlen) {
899                                 printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Short read (%zd not %u) reading header from obsolete node at %08x\n",
900                                        retlen, rawlen, ref_offset(raw));
901                                 continue;
902                         }
903
904                         if (je16_to_cpu(rd->nodetype) != JFFS2_NODETYPE_DIRENT)
905                                 continue;
906
907                         /* If the name CRC doesn't match, skip */
908                         if (je32_to_cpu(rd->name_crc) != name_crc)
909                                 continue;
910
911                         /* If the name length doesn't match, or it's another deletion dirent, skip */
912                         if (rd->nsize != name_len || !je32_to_cpu(rd->ino))
913                                 continue;
914
915                         /* OK, check the actual name now */
916                         if (memcmp(rd->name, fd->name, name_len))
917                                 continue;
918
919                         /* OK. The name really does match. There really is still an older node on
920                            the flash which our deletion dirent obsoletes. So we have to write out
921                            a new deletion dirent to replace it */
922                         mutex_unlock(&c->erase_free_sem);
923
924                         D1(printk(KERN_DEBUG "Deletion dirent at %08x still obsoletes real dirent \"%s\" at %08x for ino #%u\n",
925                                   ref_offset(fd->raw), fd->name, ref_offset(raw), je32_to_cpu(rd->ino)));
926                         kfree(rd);
927
928                         return jffs2_garbage_collect_dirent(c, jeb, f, fd);
929                 }
930
931                 mutex_unlock(&c->erase_free_sem);
932                 kfree(rd);
933         }
934
935         /* FIXME: If we're deleting a dirent which contains the current mtime and ctime,
936            we should update the metadata node with those times accordingly */
937
938         /* No need for it any more. Just mark it obsolete and remove it from the list */
939         while (*fdp) {
940                 if ((*fdp) == fd) {
941                         found = 1;
942                         *fdp = fd->next;
943                         break;
944                 }
945                 fdp = &(*fdp)->next;
946         }
947         if (!found) {
948                 printk(KERN_WARNING "Deletion dirent \"%s\" not found in list for ino #%u\n", fd->name, f->inocache->ino);
949         }
950         jffs2_mark_node_obsolete(c, fd->raw);
951         jffs2_free_full_dirent(fd);
952         return 0;
953 }
954
955 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
956                                       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
957                                       uint32_t start, uint32_t end)
958 {
959         struct jffs2_raw_inode ri;
960         struct jffs2_node_frag *frag;
961         struct jffs2_full_dnode *new_fn;
962         uint32_t alloclen, ilen;
963         int ret;
964
965         D1(printk(KERN_DEBUG "Writing replacement hole node for ino #%u from offset 0x%x to 0x%x\n",
966                   f->inocache->ino, start, end));
967
968         memset(&ri, 0, sizeof(ri));
969
970         if(fn->frags > 1) {
971                 size_t readlen;
972                 uint32_t crc;
973                 /* It's partially obsoleted by a later write. So we have to
974                    write it out again with the _same_ version as before */
975                 ret = jffs2_flash_read(c, ref_offset(fn->raw), sizeof(ri), &readlen, (char *)&ri);
976                 if (readlen != sizeof(ri) || ret) {
977                         printk(KERN_WARNING "Node read failed in jffs2_garbage_collect_hole. Ret %d, retlen %zd. Data will be lost by writing new hole node\n", ret, readlen);
978                         goto fill;
979                 }
980                 if (je16_to_cpu(ri.nodetype) != JFFS2_NODETYPE_INODE) {
981                         printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had node type 0x%04x instead of JFFS2_NODETYPE_INODE(0x%04x)\n",
982                                ref_offset(fn->raw),
983                                je16_to_cpu(ri.nodetype), JFFS2_NODETYPE_INODE);
984                         return -EIO;
985                 }
986                 if (je32_to_cpu(ri.totlen) != sizeof(ri)) {
987                         printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had totlen 0x%x instead of expected 0x%zx\n",
988                                ref_offset(fn->raw),
989                                je32_to_cpu(ri.totlen), sizeof(ri));
990                         return -EIO;
991                 }
992                 crc = crc32(0, &ri, sizeof(ri)-8);
993                 if (crc != je32_to_cpu(ri.node_crc)) {
994                         printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had CRC 0x%08x which doesn't match calculated CRC 0x%08x\n",
995                                ref_offset(fn->raw),
996                                je32_to_cpu(ri.node_crc), crc);
997                         /* FIXME: We could possibly deal with this by writing new holes for each frag */
998                         printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
999                                start, end, f->inocache->ino);
1000                         goto fill;
1001                 }
1002                 if (ri.compr != JFFS2_COMPR_ZERO) {
1003                         printk(KERN_WARNING "jffs2_garbage_collect_hole: Node 0x%08x wasn't a hole node!\n", ref_offset(fn->raw));
1004                         printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
1005                                start, end, f->inocache->ino);
1006                         goto fill;
1007                 }
1008         } else {
1009         fill:
1010                 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
1011                 ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
1012                 ri.totlen = cpu_to_je32(sizeof(ri));
1013                 ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
1014
1015                 ri.ino = cpu_to_je32(f->inocache->ino);
1016                 ri.version = cpu_to_je32(++f->highest_version);
1017                 ri.offset = cpu_to_je32(start);
1018                 ri.dsize = cpu_to_je32(end - start);
1019                 ri.csize = cpu_to_je32(0);
1020                 ri.compr = JFFS2_COMPR_ZERO;
1021         }
1022
1023         frag = frag_last(&f->fragtree);
1024         if (frag)
1025                 /* Fetch the inode length from the fragtree rather then
1026                  * from i_size since i_size may have not been updated yet */
1027                 ilen = frag->ofs + frag->size;
1028         else
1029                 ilen = JFFS2_F_I_SIZE(f);
1030
1031         ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1032         ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1033         ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1034         ri.isize = cpu_to_je32(ilen);
1035         ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1036         ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1037         ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1038         ri.data_crc = cpu_to_je32(0);
1039         ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1040
1041         ret = jffs2_reserve_space_gc(c, sizeof(ri), &alloclen,
1042                                      JFFS2_SUMMARY_INODE_SIZE);
1043         if (ret) {
1044                 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_hole failed: %d\n",
1045                        sizeof(ri), ret);
1046                 return ret;
1047         }
1048         new_fn = jffs2_write_dnode(c, f, &ri, NULL, 0, ALLOC_GC);
1049
1050         if (IS_ERR(new_fn)) {
1051                 printk(KERN_WARNING "Error writing new hole node: %ld\n", PTR_ERR(new_fn));
1052                 return PTR_ERR(new_fn);
1053         }
1054         if (je32_to_cpu(ri.version) == f->highest_version) {
1055                 jffs2_add_full_dnode_to_inode(c, f, new_fn);
1056                 if (f->metadata) {
1057                         jffs2_mark_node_obsolete(c, f->metadata->raw);
1058                         jffs2_free_full_dnode(f->metadata);
1059                         f->metadata = NULL;
1060                 }
1061                 return 0;
1062         }
1063
1064         /*
1065          * We should only get here in the case where the node we are
1066          * replacing had more than one frag, so we kept the same version
1067          * number as before. (Except in case of error -- see 'goto fill;'
1068          * above.)
1069          */
1070         D1(if(unlikely(fn->frags <= 1)) {
1071                 printk(KERN_WARNING "jffs2_garbage_collect_hole: Replacing fn with %d frag(s) but new ver %d != highest_version %d of ino #%d\n",
1072                        fn->frags, je32_to_cpu(ri.version), f->highest_version,
1073                        je32_to_cpu(ri.ino));
1074         });
1075
1076         /* This is a partially-overlapped hole node. Mark it REF_NORMAL not REF_PRISTINE */
1077         mark_ref_normal(new_fn->raw);
1078
1079         for (frag = jffs2_lookup_node_frag(&f->fragtree, fn->ofs);
1080              frag; frag = frag_next(frag)) {
1081                 if (frag->ofs > fn->size + fn->ofs)
1082                         break;
1083                 if (frag->node == fn) {
1084                         frag->node = new_fn;
1085                         new_fn->frags++;
1086                         fn->frags--;
1087                 }
1088         }
1089         if (fn->frags) {
1090                 printk(KERN_WARNING "jffs2_garbage_collect_hole: Old node still has frags!\n");
1091                 BUG();
1092         }
1093         if (!new_fn->frags) {
1094                 printk(KERN_WARNING "jffs2_garbage_collect_hole: New node has no frags!\n");
1095                 BUG();
1096         }
1097
1098         jffs2_mark_node_obsolete(c, fn->raw);
1099         jffs2_free_full_dnode(fn);
1100
1101         return 0;
1102 }
1103
1104 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *orig_jeb,
1105                                        struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
1106                                        uint32_t start, uint32_t end)
1107 {
1108         struct jffs2_full_dnode *new_fn;
1109         struct jffs2_raw_inode ri;
1110         uint32_t alloclen, offset, orig_end, orig_start;
1111         int ret = 0;
1112         unsigned char *comprbuf = NULL, *writebuf;
1113         unsigned long pg;
1114         unsigned char *pg_ptr;
1115
1116         memset(&ri, 0, sizeof(ri));
1117
1118         D1(printk(KERN_DEBUG "Writing replacement dnode for ino #%u from offset 0x%x to 0x%x\n",
1119                   f->inocache->ino, start, end));
1120
1121         orig_end = end;
1122         orig_start = start;
1123
1124         if (c->nr_free_blocks + c->nr_erasing_blocks > c->resv_blocks_gcmerge) {
1125                 /* Attempt to do some merging. But only expand to cover logically
1126                    adjacent frags if the block containing them is already considered
1127                    to be dirty. Otherwise we end up with GC just going round in
1128                    circles dirtying the nodes it already wrote out, especially
1129                    on NAND where we have small eraseblocks and hence a much higher
1130                    chance of nodes having to be split to cross boundaries. */
1131
1132                 struct jffs2_node_frag *frag;
1133                 uint32_t min, max;
1134
1135                 min = start & ~(PAGE_CACHE_SIZE-1);
1136                 max = min + PAGE_CACHE_SIZE;
1137
1138                 frag = jffs2_lookup_node_frag(&f->fragtree, start);
1139
1140                 /* BUG_ON(!frag) but that'll happen anyway... */
1141
1142                 BUG_ON(frag->ofs != start);
1143
1144                 /* First grow down... */
1145                 while((frag = frag_prev(frag)) && frag->ofs >= min) {
1146
1147                         /* If the previous frag doesn't even reach the beginning, there's
1148                            excessive fragmentation. Just merge. */
1149                         if (frag->ofs > min) {
1150                                 D1(printk(KERN_DEBUG "Expanding down to cover partial frag (0x%x-0x%x)\n",
1151                                           frag->ofs, frag->ofs+frag->size));
1152                                 start = frag->ofs;
1153                                 continue;
1154                         }
1155                         /* OK. This frag holds the first byte of the page. */
1156                         if (!frag->node || !frag->node->raw) {
1157                                 D1(printk(KERN_DEBUG "First frag in page is hole (0x%x-0x%x). Not expanding down.\n",
1158                                           frag->ofs, frag->ofs+frag->size));
1159                                 break;
1160                         } else {
1161
1162                                 /* OK, it's a frag which extends to the beginning of the page. Does it live
1163                                    in a block which is still considered clean? If so, don't obsolete it.
1164                                    If not, cover it anyway. */
1165
1166                                 struct jffs2_raw_node_ref *raw = frag->node->raw;
1167                                 struct jffs2_eraseblock *jeb;
1168
1169                                 jeb = &c->blocks[raw->flash_offset / c->sector_size];
1170
1171                                 if (jeb == c->gcblock) {
1172                                         D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1173                                                   frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1174                                         start = frag->ofs;
1175                                         break;
1176                                 }
1177                                 if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1178                                         D1(printk(KERN_DEBUG "Not expanding down to cover frag (0x%x-0x%x) in clean block %08x\n",
1179                                                   frag->ofs, frag->ofs+frag->size, jeb->offset));
1180                                         break;
1181                                 }
1182
1183                                 D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in dirty block %08x\n",
1184                                                   frag->ofs, frag->ofs+frag->size, jeb->offset));
1185                                 start = frag->ofs;
1186                                 break;
1187                         }
1188                 }
1189
1190                 /* ... then up */
1191
1192                 /* Find last frag which is actually part of the node we're to GC. */
1193                 frag = jffs2_lookup_node_frag(&f->fragtree, end-1);
1194
1195                 while((frag = frag_next(frag)) && frag->ofs+frag->size <= max) {
1196
1197                         /* If the previous frag doesn't even reach the beginning, there's lots
1198                            of fragmentation. Just merge. */
1199                         if (frag->ofs+frag->size < max) {
1200                                 D1(printk(KERN_DEBUG "Expanding up to cover partial frag (0x%x-0x%x)\n",
1201                                           frag->ofs, frag->ofs+frag->size));
1202                                 end = frag->ofs + frag->size;
1203                                 continue;
1204                         }
1205
1206                         if (!frag->node || !frag->node->raw) {
1207                                 D1(printk(KERN_DEBUG "Last frag in page is hole (0x%x-0x%x). Not expanding up.\n",
1208                                           frag->ofs, frag->ofs+frag->size));
1209                                 break;
1210                         } else {
1211
1212                                 /* OK, it's a frag which extends to the beginning of the page. Does it live
1213                                    in a block which is still considered clean? If so, don't obsolete it.
1214                                    If not, cover it anyway. */
1215
1216                                 struct jffs2_raw_node_ref *raw = frag->node->raw;
1217                                 struct jffs2_eraseblock *jeb;
1218
1219                                 jeb = &c->blocks[raw->flash_offset / c->sector_size];
1220
1221                                 if (jeb == c->gcblock) {
1222                                         D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1223                                                   frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1224                                         end = frag->ofs + frag->size;
1225                                         break;
1226                                 }
1227                                 if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1228                                         D1(printk(KERN_DEBUG "Not expanding up to cover frag (0x%x-0x%x) in clean block %08x\n",
1229                                                   frag->ofs, frag->ofs+frag->size, jeb->offset));
1230                                         break;
1231                                 }
1232
1233                                 D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in dirty block %08x\n",
1234                                                   frag->ofs, frag->ofs+frag->size, jeb->offset));
1235                                 end = frag->ofs + frag->size;
1236                                 break;
1237                         }
1238                 }
1239                 D1(printk(KERN_DEBUG "Expanded dnode to write from (0x%x-0x%x) to (0x%x-0x%x)\n",
1240                           orig_start, orig_end, start, end));
1241
1242                 D1(BUG_ON(end > frag_last(&f->fragtree)->ofs + frag_last(&f->fragtree)->size));
1243                 BUG_ON(end < orig_end);
1244                 BUG_ON(start > orig_start);
1245         }
1246
1247         /* First, use readpage() to read the appropriate page into the page cache */
1248         /* Q: What happens if we actually try to GC the _same_ page for which commit_write()
1249          *    triggered garbage collection in the first place?
1250          * A: I _think_ it's OK. read_cache_page shouldn't deadlock, we'll write out the
1251          *    page OK. We'll actually write it out again in commit_write, which is a little
1252          *    suboptimal, but at least we're correct.
1253          */
1254         pg_ptr = jffs2_gc_fetch_page(c, f, start, &pg);
1255
1256         if (IS_ERR(pg_ptr)) {
1257                 printk(KERN_WARNING "read_cache_page() returned error: %ld\n", PTR_ERR(pg_ptr));
1258                 return PTR_ERR(pg_ptr);
1259         }
1260
1261         offset = start;
1262         while(offset < orig_end) {
1263                 uint32_t datalen;
1264                 uint32_t cdatalen;
1265                 uint16_t comprtype = JFFS2_COMPR_NONE;
1266
1267                 ret = jffs2_reserve_space_gc(c, sizeof(ri) + JFFS2_MIN_DATA_LEN,
1268                                         &alloclen, JFFS2_SUMMARY_INODE_SIZE);
1269
1270                 if (ret) {
1271                         printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dnode failed: %d\n",
1272                                sizeof(ri)+ JFFS2_MIN_DATA_LEN, ret);
1273                         break;
1274                 }
1275                 cdatalen = min_t(uint32_t, alloclen - sizeof(ri), end - offset);
1276                 datalen = end - offset;
1277
1278                 writebuf = pg_ptr + (offset & (PAGE_CACHE_SIZE -1));
1279
1280                 comprtype = jffs2_compress(c, f, writebuf, &comprbuf, &datalen, &cdatalen);
1281
1282                 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
1283                 ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
1284                 ri.totlen = cpu_to_je32(sizeof(ri) + cdatalen);
1285                 ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
1286
1287                 ri.ino = cpu_to_je32(f->inocache->ino);
1288                 ri.version = cpu_to_je32(++f->highest_version);
1289                 ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1290                 ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1291                 ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1292                 ri.isize = cpu_to_je32(JFFS2_F_I_SIZE(f));
1293                 ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1294                 ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1295                 ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1296                 ri.offset = cpu_to_je32(offset);
1297                 ri.csize = cpu_to_je32(cdatalen);
1298                 ri.dsize = cpu_to_je32(datalen);
1299                 ri.compr = comprtype & 0xff;
1300                 ri.usercompr = (comprtype >> 8) & 0xff;
1301                 ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1302                 ri.data_crc = cpu_to_je32(crc32(0, comprbuf, cdatalen));
1303
1304                 new_fn = jffs2_write_dnode(c, f, &ri, comprbuf, cdatalen, ALLOC_GC);
1305
1306                 jffs2_free_comprbuf(comprbuf, writebuf);
1307
1308                 if (IS_ERR(new_fn)) {
1309                         printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
1310                         ret = PTR_ERR(new_fn);
1311                         break;
1312                 }
1313                 ret = jffs2_add_full_dnode_to_inode(c, f, new_fn);
1314                 offset += datalen;
1315                 if (f->metadata) {
1316                         jffs2_mark_node_obsolete(c, f->metadata->raw);
1317                         jffs2_free_full_dnode(f->metadata);
1318                         f->metadata = NULL;
1319                 }
1320         }
1321
1322         jffs2_gc_release_page(c, pg_ptr, &pg);
1323         return ret;
1324 }