Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm...
[linux-3.10.git] / fs / jffs2 / gc.c
1 /*
2  * JFFS2 -- Journalling Flash File System, Version 2.
3  *
4  * Copyright © 2001-2007 Red Hat, Inc.
5  * Copyright © 2004-2010 David Woodhouse <dwmw2@infradead.org>
6  *
7  * Created by David Woodhouse <dwmw2@infradead.org>
8  *
9  * For licensing information, see the file 'LICENCE' in this directory.
10  *
11  */
12
13 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
14
15 #include <linux/kernel.h>
16 #include <linux/mtd/mtd.h>
17 #include <linux/slab.h>
18 #include <linux/pagemap.h>
19 #include <linux/crc32.h>
20 #include <linux/compiler.h>
21 #include <linux/stat.h>
22 #include "nodelist.h"
23 #include "compr.h"
24
25 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
26                                           struct jffs2_inode_cache *ic,
27                                           struct jffs2_raw_node_ref *raw);
28 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
29                                         struct jffs2_inode_info *f, struct jffs2_full_dnode *fd);
30 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
31                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
32 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
33                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
34 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
35                                       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
36                                       uint32_t start, uint32_t end);
37 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
38                                        struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
39                                        uint32_t start, uint32_t end);
40 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
41                                struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f);
42
43 /* Called with erase_completion_lock held */
44 static struct jffs2_eraseblock *jffs2_find_gc_block(struct jffs2_sb_info *c)
45 {
46         struct jffs2_eraseblock *ret;
47         struct list_head *nextlist = NULL;
48         int n = jiffies % 128;
49
50         /* Pick an eraseblock to garbage collect next. This is where we'll
51            put the clever wear-levelling algorithms. Eventually.  */
52         /* We possibly want to favour the dirtier blocks more when the
53            number of free blocks is low. */
54 again:
55         if (!list_empty(&c->bad_used_list) && c->nr_free_blocks > c->resv_blocks_gcbad) {
56                 jffs2_dbg(1, "Picking block from bad_used_list to GC next\n");
57                 nextlist = &c->bad_used_list;
58         } else if (n < 50 && !list_empty(&c->erasable_list)) {
59                 /* Note that most of them will have gone directly to be erased.
60                    So don't favour the erasable_list _too_ much. */
61                 jffs2_dbg(1, "Picking block from erasable_list to GC next\n");
62                 nextlist = &c->erasable_list;
63         } else if (n < 110 && !list_empty(&c->very_dirty_list)) {
64                 /* Most of the time, pick one off the very_dirty list */
65                 jffs2_dbg(1, "Picking block from very_dirty_list to GC next\n");
66                 nextlist = &c->very_dirty_list;
67         } else if (n < 126 && !list_empty(&c->dirty_list)) {
68                 jffs2_dbg(1, "Picking block from dirty_list to GC next\n");
69                 nextlist = &c->dirty_list;
70         } else if (!list_empty(&c->clean_list)) {
71                 jffs2_dbg(1, "Picking block from clean_list to GC next\n");
72                 nextlist = &c->clean_list;
73         } else if (!list_empty(&c->dirty_list)) {
74                 jffs2_dbg(1, "Picking block from dirty_list to GC next (clean_list was empty)\n");
75
76                 nextlist = &c->dirty_list;
77         } else if (!list_empty(&c->very_dirty_list)) {
78                 jffs2_dbg(1, "Picking block from very_dirty_list to GC next (clean_list and dirty_list were empty)\n");
79                 nextlist = &c->very_dirty_list;
80         } else if (!list_empty(&c->erasable_list)) {
81                 jffs2_dbg(1, "Picking block from erasable_list to GC next (clean_list and {very_,}dirty_list were empty)\n");
82
83                 nextlist = &c->erasable_list;
84         } else if (!list_empty(&c->erasable_pending_wbuf_list)) {
85                 /* There are blocks are wating for the wbuf sync */
86                 jffs2_dbg(1, "Synching wbuf in order to reuse erasable_pending_wbuf_list blocks\n");
87                 spin_unlock(&c->erase_completion_lock);
88                 jffs2_flush_wbuf_pad(c);
89                 spin_lock(&c->erase_completion_lock);
90                 goto again;
91         } else {
92                 /* Eep. All were empty */
93                 jffs2_dbg(1, "No clean, dirty _or_ erasable blocks to GC from! Where are they all?\n");
94                 return NULL;
95         }
96
97         ret = list_entry(nextlist->next, struct jffs2_eraseblock, list);
98         list_del(&ret->list);
99         c->gcblock = ret;
100         ret->gc_node = ret->first_node;
101         if (!ret->gc_node) {
102                 pr_warn("Eep. ret->gc_node for block at 0x%08x is NULL\n",
103                         ret->offset);
104                 BUG();
105         }
106
107         /* Have we accidentally picked a clean block with wasted space ? */
108         if (ret->wasted_size) {
109                 jffs2_dbg(1, "Converting wasted_size %08x to dirty_size\n",
110                           ret->wasted_size);
111                 ret->dirty_size += ret->wasted_size;
112                 c->wasted_size -= ret->wasted_size;
113                 c->dirty_size += ret->wasted_size;
114                 ret->wasted_size = 0;
115         }
116
117         return ret;
118 }
119
120 /* jffs2_garbage_collect_pass
121  * Make a single attempt to progress GC. Move one node, and possibly
122  * start erasing one eraseblock.
123  */
124 int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
125 {
126         struct jffs2_inode_info *f;
127         struct jffs2_inode_cache *ic;
128         struct jffs2_eraseblock *jeb;
129         struct jffs2_raw_node_ref *raw;
130         uint32_t gcblock_dirty;
131         int ret = 0, inum, nlink;
132         int xattr = 0;
133
134         if (mutex_lock_interruptible(&c->alloc_sem))
135                 return -EINTR;
136
137         for (;;) {
138                 spin_lock(&c->erase_completion_lock);
139                 if (!c->unchecked_size)
140                         break;
141
142                 /* We can't start doing GC yet. We haven't finished checking
143                    the node CRCs etc. Do it now. */
144
145                 /* checked_ino is protected by the alloc_sem */
146                 if (c->checked_ino > c->highest_ino && xattr) {
147                         pr_crit("Checked all inodes but still 0x%x bytes of unchecked space?\n",
148                                 c->unchecked_size);
149                         jffs2_dbg_dump_block_lists_nolock(c);
150                         spin_unlock(&c->erase_completion_lock);
151                         mutex_unlock(&c->alloc_sem);
152                         return -ENOSPC;
153                 }
154
155                 spin_unlock(&c->erase_completion_lock);
156
157                 if (!xattr)
158                         xattr = jffs2_verify_xattr(c);
159
160                 spin_lock(&c->inocache_lock);
161
162                 ic = jffs2_get_ino_cache(c, c->checked_ino++);
163
164                 if (!ic) {
165                         spin_unlock(&c->inocache_lock);
166                         continue;
167                 }
168
169                 if (!ic->pino_nlink) {
170                         jffs2_dbg(1, "Skipping check of ino #%d with nlink/pino zero\n",
171                                   ic->ino);
172                         spin_unlock(&c->inocache_lock);
173                         jffs2_xattr_delete_inode(c, ic);
174                         continue;
175                 }
176                 switch(ic->state) {
177                 case INO_STATE_CHECKEDABSENT:
178                 case INO_STATE_PRESENT:
179                         jffs2_dbg(1, "Skipping ino #%u already checked\n",
180                                   ic->ino);
181                         spin_unlock(&c->inocache_lock);
182                         continue;
183
184                 case INO_STATE_GC:
185                 case INO_STATE_CHECKING:
186                         pr_warn("Inode #%u is in state %d during CRC check phase!\n",
187                                 ic->ino, ic->state);
188                         spin_unlock(&c->inocache_lock);
189                         BUG();
190
191                 case INO_STATE_READING:
192                         /* We need to wait for it to finish, lest we move on
193                            and trigger the BUG() above while we haven't yet
194                            finished checking all its nodes */
195                         jffs2_dbg(1, "Waiting for ino #%u to finish reading\n",
196                                   ic->ino);
197                         /* We need to come back again for the _same_ inode. We've
198                          made no progress in this case, but that should be OK */
199                         c->checked_ino--;
200
201                         mutex_unlock(&c->alloc_sem);
202                         sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
203                         return 0;
204
205                 default:
206                         BUG();
207
208                 case INO_STATE_UNCHECKED:
209                         ;
210                 }
211                 ic->state = INO_STATE_CHECKING;
212                 spin_unlock(&c->inocache_lock);
213
214                 jffs2_dbg(1, "%s(): triggering inode scan of ino#%u\n",
215                           __func__, ic->ino);
216
217                 ret = jffs2_do_crccheck_inode(c, ic);
218                 if (ret)
219                         pr_warn("Returned error for crccheck of ino #%u. Expect badness...\n",
220                                 ic->ino);
221
222                 jffs2_set_inocache_state(c, ic, INO_STATE_CHECKEDABSENT);
223                 mutex_unlock(&c->alloc_sem);
224                 return ret;
225         }
226
227         /* If there are any blocks which need erasing, erase them now */
228         if (!list_empty(&c->erase_complete_list) ||
229             !list_empty(&c->erase_pending_list)) {
230                 spin_unlock(&c->erase_completion_lock);
231                 mutex_unlock(&c->alloc_sem);
232                 jffs2_dbg(1, "%s(): erasing pending blocks\n", __func__);
233                 if (jffs2_erase_pending_blocks(c, 1))
234                         return 0;
235
236                 jffs2_dbg(1, "No progress from erasing block; doing GC anyway\n");
237                 mutex_lock(&c->alloc_sem);
238                 spin_lock(&c->erase_completion_lock);
239         }
240
241         /* First, work out which block we're garbage-collecting */
242         jeb = c->gcblock;
243
244         if (!jeb)
245                 jeb = jffs2_find_gc_block(c);
246
247         if (!jeb) {
248                 /* Couldn't find a free block. But maybe we can just erase one and make 'progress'? */
249                 if (c->nr_erasing_blocks) {
250                         spin_unlock(&c->erase_completion_lock);
251                         mutex_unlock(&c->alloc_sem);
252                         return -EAGAIN;
253                 }
254                 jffs2_dbg(1, "Couldn't find erase block to garbage collect!\n");
255                 spin_unlock(&c->erase_completion_lock);
256                 mutex_unlock(&c->alloc_sem);
257                 return -EIO;
258         }
259
260         jffs2_dbg(1, "GC from block %08x, used_size %08x, dirty_size %08x, free_size %08x\n",
261                   jeb->offset, jeb->used_size, jeb->dirty_size, jeb->free_size);
262         D1(if (c->nextblock)
263            printk(KERN_DEBUG "Nextblock at  %08x, used_size %08x, dirty_size %08x, wasted_size %08x, free_size %08x\n", c->nextblock->offset, c->nextblock->used_size, c->nextblock->dirty_size, c->nextblock->wasted_size, c->nextblock->free_size));
264
265         if (!jeb->used_size) {
266                 mutex_unlock(&c->alloc_sem);
267                 goto eraseit;
268         }
269
270         raw = jeb->gc_node;
271         gcblock_dirty = jeb->dirty_size;
272
273         while(ref_obsolete(raw)) {
274                 jffs2_dbg(1, "Node at 0x%08x is obsolete... skipping\n",
275                           ref_offset(raw));
276                 raw = ref_next(raw);
277                 if (unlikely(!raw)) {
278                         pr_warn("eep. End of raw list while still supposedly nodes to GC\n");
279                         pr_warn("erase block at 0x%08x. free_size 0x%08x, dirty_size 0x%08x, used_size 0x%08x\n",
280                                 jeb->offset, jeb->free_size,
281                                 jeb->dirty_size, jeb->used_size);
282                         jeb->gc_node = raw;
283                         spin_unlock(&c->erase_completion_lock);
284                         mutex_unlock(&c->alloc_sem);
285                         BUG();
286                 }
287         }
288         jeb->gc_node = raw;
289
290         jffs2_dbg(1, "Going to garbage collect node at 0x%08x\n",
291                   ref_offset(raw));
292
293         if (!raw->next_in_ino) {
294                 /* Inode-less node. Clean marker, snapshot or something like that */
295                 spin_unlock(&c->erase_completion_lock);
296                 if (ref_flags(raw) == REF_PRISTINE) {
297                         /* It's an unknown node with JFFS2_FEATURE_RWCOMPAT_COPY */
298                         jffs2_garbage_collect_pristine(c, NULL, raw);
299                 } else {
300                         /* Just mark it obsolete */
301                         jffs2_mark_node_obsolete(c, raw);
302                 }
303                 mutex_unlock(&c->alloc_sem);
304                 goto eraseit_lock;
305         }
306
307         ic = jffs2_raw_ref_to_ic(raw);
308
309 #ifdef CONFIG_JFFS2_FS_XATTR
310         /* When 'ic' refers xattr_datum/xattr_ref, this node is GCed as xattr.
311          * We can decide whether this node is inode or xattr by ic->class.     */
312         if (ic->class == RAWNODE_CLASS_XATTR_DATUM
313             || ic->class == RAWNODE_CLASS_XATTR_REF) {
314                 spin_unlock(&c->erase_completion_lock);
315
316                 if (ic->class == RAWNODE_CLASS_XATTR_DATUM) {
317                         ret = jffs2_garbage_collect_xattr_datum(c, (struct jffs2_xattr_datum *)ic, raw);
318                 } else {
319                         ret = jffs2_garbage_collect_xattr_ref(c, (struct jffs2_xattr_ref *)ic, raw);
320                 }
321                 goto test_gcnode;
322         }
323 #endif
324
325         /* We need to hold the inocache. Either the erase_completion_lock or
326            the inocache_lock are sufficient; we trade down since the inocache_lock
327            causes less contention. */
328         spin_lock(&c->inocache_lock);
329
330         spin_unlock(&c->erase_completion_lock);
331
332         jffs2_dbg(1, "%s(): collecting from block @0x%08x. Node @0x%08x(%d), ino #%u\n",
333                   __func__, jeb->offset, ref_offset(raw), ref_flags(raw),
334                   ic->ino);
335
336         /* Three possibilities:
337            1. Inode is already in-core. We must iget it and do proper
338               updating to its fragtree, etc.
339            2. Inode is not in-core, node is REF_PRISTINE. We lock the
340               inocache to prevent a read_inode(), copy the node intact.
341            3. Inode is not in-core, node is not pristine. We must iget()
342               and take the slow path.
343         */
344
345         switch(ic->state) {
346         case INO_STATE_CHECKEDABSENT:
347                 /* It's been checked, but it's not currently in-core.
348                    We can just copy any pristine nodes, but have
349                    to prevent anyone else from doing read_inode() while
350                    we're at it, so we set the state accordingly */
351                 if (ref_flags(raw) == REF_PRISTINE)
352                         ic->state = INO_STATE_GC;
353                 else {
354                         jffs2_dbg(1, "Ino #%u is absent but node not REF_PRISTINE. Reading.\n",
355                                   ic->ino);
356                 }
357                 break;
358
359         case INO_STATE_PRESENT:
360                 /* It's in-core. GC must iget() it. */
361                 break;
362
363         case INO_STATE_UNCHECKED:
364         case INO_STATE_CHECKING:
365         case INO_STATE_GC:
366                 /* Should never happen. We should have finished checking
367                    by the time we actually start doing any GC, and since
368                    we're holding the alloc_sem, no other garbage collection
369                    can happen.
370                 */
371                 pr_crit("Inode #%u already in state %d in jffs2_garbage_collect_pass()!\n",
372                         ic->ino, ic->state);
373                 mutex_unlock(&c->alloc_sem);
374                 spin_unlock(&c->inocache_lock);
375                 BUG();
376
377         case INO_STATE_READING:
378                 /* Someone's currently trying to read it. We must wait for
379                    them to finish and then go through the full iget() route
380                    to do the GC. However, sometimes read_inode() needs to get
381                    the alloc_sem() (for marking nodes invalid) so we must
382                    drop the alloc_sem before sleeping. */
383
384                 mutex_unlock(&c->alloc_sem);
385                 jffs2_dbg(1, "%s(): waiting for ino #%u in state %d\n",
386                           __func__, ic->ino, ic->state);
387                 sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
388                 /* And because we dropped the alloc_sem we must start again from the
389                    beginning. Ponder chance of livelock here -- we're returning success
390                    without actually making any progress.
391
392                    Q: What are the chances that the inode is back in INO_STATE_READING
393                    again by the time we next enter this function? And that this happens
394                    enough times to cause a real delay?
395
396                    A: Small enough that I don't care :)
397                 */
398                 return 0;
399         }
400
401         /* OK. Now if the inode is in state INO_STATE_GC, we are going to copy the
402            node intact, and we don't have to muck about with the fragtree etc.
403            because we know it's not in-core. If it _was_ in-core, we go through
404            all the iget() crap anyway */
405
406         if (ic->state == INO_STATE_GC) {
407                 spin_unlock(&c->inocache_lock);
408
409                 ret = jffs2_garbage_collect_pristine(c, ic, raw);
410
411                 spin_lock(&c->inocache_lock);
412                 ic->state = INO_STATE_CHECKEDABSENT;
413                 wake_up(&c->inocache_wq);
414
415                 if (ret != -EBADFD) {
416                         spin_unlock(&c->inocache_lock);
417                         goto test_gcnode;
418                 }
419
420                 /* Fall through if it wanted us to, with inocache_lock held */
421         }
422
423         /* Prevent the fairly unlikely race where the gcblock is
424            entirely obsoleted by the final close of a file which had
425            the only valid nodes in the block, followed by erasure,
426            followed by freeing of the ic because the erased block(s)
427            held _all_ the nodes of that inode.... never been seen but
428            it's vaguely possible. */
429
430         inum = ic->ino;
431         nlink = ic->pino_nlink;
432         spin_unlock(&c->inocache_lock);
433
434         f = jffs2_gc_fetch_inode(c, inum, !nlink);
435         if (IS_ERR(f)) {
436                 ret = PTR_ERR(f);
437                 goto release_sem;
438         }
439         if (!f) {
440                 ret = 0;
441                 goto release_sem;
442         }
443
444         ret = jffs2_garbage_collect_live(c, jeb, raw, f);
445
446         jffs2_gc_release_inode(c, f);
447
448  test_gcnode:
449         if (jeb->dirty_size == gcblock_dirty && !ref_obsolete(jeb->gc_node)) {
450                 /* Eep. This really should never happen. GC is broken */
451                 pr_err("Error garbage collecting node at %08x!\n",
452                        ref_offset(jeb->gc_node));
453                 ret = -ENOSPC;
454         }
455  release_sem:
456         mutex_unlock(&c->alloc_sem);
457
458  eraseit_lock:
459         /* If we've finished this block, start it erasing */
460         spin_lock(&c->erase_completion_lock);
461
462  eraseit:
463         if (c->gcblock && !c->gcblock->used_size) {
464                 jffs2_dbg(1, "Block at 0x%08x completely obsoleted by GC. Moving to erase_pending_list\n",
465                           c->gcblock->offset);
466                 /* We're GC'ing an empty block? */
467                 list_add_tail(&c->gcblock->list, &c->erase_pending_list);
468                 c->gcblock = NULL;
469                 c->nr_erasing_blocks++;
470                 jffs2_garbage_collect_trigger(c);
471         }
472         spin_unlock(&c->erase_completion_lock);
473
474         return ret;
475 }
476
477 static int jffs2_garbage_collect_live(struct jffs2_sb_info *c,  struct jffs2_eraseblock *jeb,
478                                       struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f)
479 {
480         struct jffs2_node_frag *frag;
481         struct jffs2_full_dnode *fn = NULL;
482         struct jffs2_full_dirent *fd;
483         uint32_t start = 0, end = 0, nrfrags = 0;
484         int ret = 0;
485
486         mutex_lock(&f->sem);
487
488         /* Now we have the lock for this inode. Check that it's still the one at the head
489            of the list. */
490
491         spin_lock(&c->erase_completion_lock);
492
493         if (c->gcblock != jeb) {
494                 spin_unlock(&c->erase_completion_lock);
495                 jffs2_dbg(1, "GC block is no longer gcblock. Restart\n");
496                 goto upnout;
497         }
498         if (ref_obsolete(raw)) {
499                 spin_unlock(&c->erase_completion_lock);
500                 jffs2_dbg(1, "node to be GC'd was obsoleted in the meantime.\n");
501                 /* They'll call again */
502                 goto upnout;
503         }
504         spin_unlock(&c->erase_completion_lock);
505
506         /* OK. Looks safe. And nobody can get us now because we have the semaphore. Move the block */
507         if (f->metadata && f->metadata->raw == raw) {
508                 fn = f->metadata;
509                 ret = jffs2_garbage_collect_metadata(c, jeb, f, fn);
510                 goto upnout;
511         }
512
513         /* FIXME. Read node and do lookup? */
514         for (frag = frag_first(&f->fragtree); frag; frag = frag_next(frag)) {
515                 if (frag->node && frag->node->raw == raw) {
516                         fn = frag->node;
517                         end = frag->ofs + frag->size;
518                         if (!nrfrags++)
519                                 start = frag->ofs;
520                         if (nrfrags == frag->node->frags)
521                                 break; /* We've found them all */
522                 }
523         }
524         if (fn) {
525                 if (ref_flags(raw) == REF_PRISTINE) {
526                         ret = jffs2_garbage_collect_pristine(c, f->inocache, raw);
527                         if (!ret) {
528                                 /* Urgh. Return it sensibly. */
529                                 frag->node->raw = f->inocache->nodes;
530                         }
531                         if (ret != -EBADFD)
532                                 goto upnout;
533                 }
534                 /* We found a datanode. Do the GC */
535                 if((start >> PAGE_CACHE_SHIFT) < ((end-1) >> PAGE_CACHE_SHIFT)) {
536                         /* It crosses a page boundary. Therefore, it must be a hole. */
537                         ret = jffs2_garbage_collect_hole(c, jeb, f, fn, start, end);
538                 } else {
539                         /* It could still be a hole. But we GC the page this way anyway */
540                         ret = jffs2_garbage_collect_dnode(c, jeb, f, fn, start, end);
541                 }
542                 goto upnout;
543         }
544
545         /* Wasn't a dnode. Try dirent */
546         for (fd = f->dents; fd; fd=fd->next) {
547                 if (fd->raw == raw)
548                         break;
549         }
550
551         if (fd && fd->ino) {
552                 ret = jffs2_garbage_collect_dirent(c, jeb, f, fd);
553         } else if (fd) {
554                 ret = jffs2_garbage_collect_deletion_dirent(c, jeb, f, fd);
555         } else {
556                 pr_warn("Raw node at 0x%08x wasn't in node lists for ino #%u\n",
557                         ref_offset(raw), f->inocache->ino);
558                 if (ref_obsolete(raw)) {
559                         pr_warn("But it's obsolete so we don't mind too much\n");
560                 } else {
561                         jffs2_dbg_dump_node(c, ref_offset(raw));
562                         BUG();
563                 }
564         }
565  upnout:
566         mutex_unlock(&f->sem);
567
568         return ret;
569 }
570
571 static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
572                                           struct jffs2_inode_cache *ic,
573                                           struct jffs2_raw_node_ref *raw)
574 {
575         union jffs2_node_union *node;
576         size_t retlen;
577         int ret;
578         uint32_t phys_ofs, alloclen;
579         uint32_t crc, rawlen;
580         int retried = 0;
581
582         jffs2_dbg(1, "Going to GC REF_PRISTINE node at 0x%08x\n",
583                   ref_offset(raw));
584
585         alloclen = rawlen = ref_totlen(c, c->gcblock, raw);
586
587         /* Ask for a small amount of space (or the totlen if smaller) because we
588            don't want to force wastage of the end of a block if splitting would
589            work. */
590         if (ic && alloclen > sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN)
591                 alloclen = sizeof(struct jffs2_raw_inode) + JFFS2_MIN_DATA_LEN;
592
593         ret = jffs2_reserve_space_gc(c, alloclen, &alloclen, rawlen);
594         /* 'rawlen' is not the exact summary size; it is only an upper estimation */
595
596         if (ret)
597                 return ret;
598
599         if (alloclen < rawlen) {
600                 /* Doesn't fit untouched. We'll go the old route and split it */
601                 return -EBADFD;
602         }
603
604         node = kmalloc(rawlen, GFP_KERNEL);
605         if (!node)
606                 return -ENOMEM;
607
608         ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)node);
609         if (!ret && retlen != rawlen)
610                 ret = -EIO;
611         if (ret)
612                 goto out_node;
613
614         crc = crc32(0, node, sizeof(struct jffs2_unknown_node)-4);
615         if (je32_to_cpu(node->u.hdr_crc) != crc) {
616                 pr_warn("Header CRC failed on REF_PRISTINE node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
617                         ref_offset(raw), je32_to_cpu(node->u.hdr_crc), crc);
618                 goto bail;
619         }
620
621         switch(je16_to_cpu(node->u.nodetype)) {
622         case JFFS2_NODETYPE_INODE:
623                 crc = crc32(0, node, sizeof(node->i)-8);
624                 if (je32_to_cpu(node->i.node_crc) != crc) {
625                         pr_warn("Node CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
626                                 ref_offset(raw), je32_to_cpu(node->i.node_crc),
627                                 crc);
628                         goto bail;
629                 }
630
631                 if (je32_to_cpu(node->i.dsize)) {
632                         crc = crc32(0, node->i.data, je32_to_cpu(node->i.csize));
633                         if (je32_to_cpu(node->i.data_crc) != crc) {
634                                 pr_warn("Data CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
635                                         ref_offset(raw),
636                                         je32_to_cpu(node->i.data_crc), crc);
637                                 goto bail;
638                         }
639                 }
640                 break;
641
642         case JFFS2_NODETYPE_DIRENT:
643                 crc = crc32(0, node, sizeof(node->d)-8);
644                 if (je32_to_cpu(node->d.node_crc) != crc) {
645                         pr_warn("Node CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
646                                 ref_offset(raw),
647                                 je32_to_cpu(node->d.node_crc), crc);
648                         goto bail;
649                 }
650
651                 if (strnlen(node->d.name, node->d.nsize) != node->d.nsize) {
652                         pr_warn("Name in dirent node at 0x%08x contains zeroes\n",
653                                 ref_offset(raw));
654                         goto bail;
655                 }
656
657                 if (node->d.nsize) {
658                         crc = crc32(0, node->d.name, node->d.nsize);
659                         if (je32_to_cpu(node->d.name_crc) != crc) {
660                                 pr_warn("Name CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
661                                         ref_offset(raw),
662                                         je32_to_cpu(node->d.name_crc), crc);
663                                 goto bail;
664                         }
665                 }
666                 break;
667         default:
668                 /* If it's inode-less, we don't _know_ what it is. Just copy it intact */
669                 if (ic) {
670                         pr_warn("Unknown node type for REF_PRISTINE node at 0x%08x: 0x%04x\n",
671                                 ref_offset(raw), je16_to_cpu(node->u.nodetype));
672                         goto bail;
673                 }
674         }
675
676         /* OK, all the CRCs are good; this node can just be copied as-is. */
677  retry:
678         phys_ofs = write_ofs(c);
679
680         ret = jffs2_flash_write(c, phys_ofs, rawlen, &retlen, (char *)node);
681
682         if (ret || (retlen != rawlen)) {
683                 pr_notice("Write of %d bytes at 0x%08x failed. returned %d, retlen %zd\n",
684                           rawlen, phys_ofs, ret, retlen);
685                 if (retlen) {
686                         jffs2_add_physical_node_ref(c, phys_ofs | REF_OBSOLETE, rawlen, NULL);
687                 } else {
688                         pr_notice("Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n",
689                                   phys_ofs);
690                 }
691                 if (!retried) {
692                         /* Try to reallocate space and retry */
693                         uint32_t dummy;
694                         struct jffs2_eraseblock *jeb = &c->blocks[phys_ofs / c->sector_size];
695
696                         retried = 1;
697
698                         jffs2_dbg(1, "Retrying failed write of REF_PRISTINE node.\n");
699
700                         jffs2_dbg_acct_sanity_check(c,jeb);
701                         jffs2_dbg_acct_paranoia_check(c, jeb);
702
703                         ret = jffs2_reserve_space_gc(c, rawlen, &dummy, rawlen);
704                                                 /* this is not the exact summary size of it,
705                                                         it is only an upper estimation */
706
707                         if (!ret) {
708                                 jffs2_dbg(1, "Allocated space at 0x%08x to retry failed write.\n",
709                                           phys_ofs);
710
711                                 jffs2_dbg_acct_sanity_check(c,jeb);
712                                 jffs2_dbg_acct_paranoia_check(c, jeb);
713
714                                 goto retry;
715                         }
716                         jffs2_dbg(1, "Failed to allocate space to retry failed write: %d!\n",
717                                   ret);
718                 }
719
720                 if (!ret)
721                         ret = -EIO;
722                 goto out_node;
723         }
724         jffs2_add_physical_node_ref(c, phys_ofs | REF_PRISTINE, rawlen, ic);
725
726         jffs2_mark_node_obsolete(c, raw);
727         jffs2_dbg(1, "WHEEE! GC REF_PRISTINE node at 0x%08x succeeded\n",
728                   ref_offset(raw));
729
730  out_node:
731         kfree(node);
732         return ret;
733  bail:
734         ret = -EBADFD;
735         goto out_node;
736 }
737
738 static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
739                                         struct jffs2_inode_info *f, struct jffs2_full_dnode *fn)
740 {
741         struct jffs2_full_dnode *new_fn;
742         struct jffs2_raw_inode ri;
743         struct jffs2_node_frag *last_frag;
744         union jffs2_device_node dev;
745         char *mdata = NULL;
746         int mdatalen = 0;
747         uint32_t alloclen, ilen;
748         int ret;
749
750         if (S_ISBLK(JFFS2_F_I_MODE(f)) ||
751             S_ISCHR(JFFS2_F_I_MODE(f)) ) {
752                 /* For these, we don't actually need to read the old node */
753                 mdatalen = jffs2_encode_dev(&dev, JFFS2_F_I_RDEV(f));
754                 mdata = (char *)&dev;
755                 jffs2_dbg(1, "%s(): Writing %d bytes of kdev_t\n",
756                           __func__, mdatalen);
757         } else if (S_ISLNK(JFFS2_F_I_MODE(f))) {
758                 mdatalen = fn->size;
759                 mdata = kmalloc(fn->size, GFP_KERNEL);
760                 if (!mdata) {
761                         pr_warn("kmalloc of mdata failed in jffs2_garbage_collect_metadata()\n");
762                         return -ENOMEM;
763                 }
764                 ret = jffs2_read_dnode(c, f, fn, mdata, 0, mdatalen);
765                 if (ret) {
766                         pr_warn("read of old metadata failed in jffs2_garbage_collect_metadata(): %d\n",
767                                 ret);
768                         kfree(mdata);
769                         return ret;
770                 }
771                 jffs2_dbg(1, "%s(): Writing %d bites of symlink target\n",
772                           __func__, mdatalen);
773
774         }
775
776         ret = jffs2_reserve_space_gc(c, sizeof(ri) + mdatalen, &alloclen,
777                                 JFFS2_SUMMARY_INODE_SIZE);
778         if (ret) {
779                 pr_warn("jffs2_reserve_space_gc of %zd bytes for garbage_collect_metadata failed: %d\n",
780                         sizeof(ri) + mdatalen, ret);
781                 goto out;
782         }
783
784         last_frag = frag_last(&f->fragtree);
785         if (last_frag)
786                 /* Fetch the inode length from the fragtree rather then
787                  * from i_size since i_size may have not been updated yet */
788                 ilen = last_frag->ofs + last_frag->size;
789         else
790                 ilen = JFFS2_F_I_SIZE(f);
791
792         memset(&ri, 0, sizeof(ri));
793         ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
794         ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
795         ri.totlen = cpu_to_je32(sizeof(ri) + mdatalen);
796         ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
797
798         ri.ino = cpu_to_je32(f->inocache->ino);
799         ri.version = cpu_to_je32(++f->highest_version);
800         ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
801         ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
802         ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
803         ri.isize = cpu_to_je32(ilen);
804         ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
805         ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
806         ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
807         ri.offset = cpu_to_je32(0);
808         ri.csize = cpu_to_je32(mdatalen);
809         ri.dsize = cpu_to_je32(mdatalen);
810         ri.compr = JFFS2_COMPR_NONE;
811         ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
812         ri.data_crc = cpu_to_je32(crc32(0, mdata, mdatalen));
813
814         new_fn = jffs2_write_dnode(c, f, &ri, mdata, mdatalen, ALLOC_GC);
815
816         if (IS_ERR(new_fn)) {
817                 pr_warn("Error writing new dnode: %ld\n", PTR_ERR(new_fn));
818                 ret = PTR_ERR(new_fn);
819                 goto out;
820         }
821         jffs2_mark_node_obsolete(c, fn->raw);
822         jffs2_free_full_dnode(fn);
823         f->metadata = new_fn;
824  out:
825         if (S_ISLNK(JFFS2_F_I_MODE(f)))
826                 kfree(mdata);
827         return ret;
828 }
829
830 static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
831                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
832 {
833         struct jffs2_full_dirent *new_fd;
834         struct jffs2_raw_dirent rd;
835         uint32_t alloclen;
836         int ret;
837
838         rd.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
839         rd.nodetype = cpu_to_je16(JFFS2_NODETYPE_DIRENT);
840         rd.nsize = strlen(fd->name);
841         rd.totlen = cpu_to_je32(sizeof(rd) + rd.nsize);
842         rd.hdr_crc = cpu_to_je32(crc32(0, &rd, sizeof(struct jffs2_unknown_node)-4));
843
844         rd.pino = cpu_to_je32(f->inocache->ino);
845         rd.version = cpu_to_je32(++f->highest_version);
846         rd.ino = cpu_to_je32(fd->ino);
847         /* If the times on this inode were set by explicit utime() they can be different,
848            so refrain from splatting them. */
849         if (JFFS2_F_I_MTIME(f) == JFFS2_F_I_CTIME(f))
850                 rd.mctime = cpu_to_je32(JFFS2_F_I_MTIME(f));
851         else
852                 rd.mctime = cpu_to_je32(0);
853         rd.type = fd->type;
854         rd.node_crc = cpu_to_je32(crc32(0, &rd, sizeof(rd)-8));
855         rd.name_crc = cpu_to_je32(crc32(0, fd->name, rd.nsize));
856
857         ret = jffs2_reserve_space_gc(c, sizeof(rd)+rd.nsize, &alloclen,
858                                 JFFS2_SUMMARY_DIRENT_SIZE(rd.nsize));
859         if (ret) {
860                 pr_warn("jffs2_reserve_space_gc of %zd bytes for garbage_collect_dirent failed: %d\n",
861                         sizeof(rd)+rd.nsize, ret);
862                 return ret;
863         }
864         new_fd = jffs2_write_dirent(c, f, &rd, fd->name, rd.nsize, ALLOC_GC);
865
866         if (IS_ERR(new_fd)) {
867                 pr_warn("jffs2_write_dirent in garbage_collect_dirent failed: %ld\n",
868                         PTR_ERR(new_fd));
869                 return PTR_ERR(new_fd);
870         }
871         jffs2_add_fd_to_list(c, new_fd, &f->dents);
872         return 0;
873 }
874
875 static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
876                                         struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
877 {
878         struct jffs2_full_dirent **fdp = &f->dents;
879         int found = 0;
880
881         /* On a medium where we can't actually mark nodes obsolete
882            pernamently, such as NAND flash, we need to work out
883            whether this deletion dirent is still needed to actively
884            delete a 'real' dirent with the same name that's still
885            somewhere else on the flash. */
886         if (!jffs2_can_mark_obsolete(c)) {
887                 struct jffs2_raw_dirent *rd;
888                 struct jffs2_raw_node_ref *raw;
889                 int ret;
890                 size_t retlen;
891                 int name_len = strlen(fd->name);
892                 uint32_t name_crc = crc32(0, fd->name, name_len);
893                 uint32_t rawlen = ref_totlen(c, jeb, fd->raw);
894
895                 rd = kmalloc(rawlen, GFP_KERNEL);
896                 if (!rd)
897                         return -ENOMEM;
898
899                 /* Prevent the erase code from nicking the obsolete node refs while
900                    we're looking at them. I really don't like this extra lock but
901                    can't see any alternative. Suggestions on a postcard to... */
902                 mutex_lock(&c->erase_free_sem);
903
904                 for (raw = f->inocache->nodes; raw != (void *)f->inocache; raw = raw->next_in_ino) {
905
906                         cond_resched();
907
908                         /* We only care about obsolete ones */
909                         if (!(ref_obsolete(raw)))
910                                 continue;
911
912                         /* Any dirent with the same name is going to have the same length... */
913                         if (ref_totlen(c, NULL, raw) != rawlen)
914                                 continue;
915
916                         /* Doesn't matter if there's one in the same erase block. We're going to
917                            delete it too at the same time. */
918                         if (SECTOR_ADDR(raw->flash_offset) == SECTOR_ADDR(fd->raw->flash_offset))
919                                 continue;
920
921                         jffs2_dbg(1, "Check potential deletion dirent at %08x\n",
922                                   ref_offset(raw));
923
924                         /* This is an obsolete node belonging to the same directory, and it's of the right
925                            length. We need to take a closer look...*/
926                         ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)rd);
927                         if (ret) {
928                                 pr_warn("%s(): Read error (%d) reading obsolete node at %08x\n",
929                                         __func__, ret, ref_offset(raw));
930                                 /* If we can't read it, we don't need to continue to obsolete it. Continue */
931                                 continue;
932                         }
933                         if (retlen != rawlen) {
934                                 pr_warn("%s(): Short read (%zd not %u) reading header from obsolete node at %08x\n",
935                                         __func__, retlen, rawlen,
936                                         ref_offset(raw));
937                                 continue;
938                         }
939
940                         if (je16_to_cpu(rd->nodetype) != JFFS2_NODETYPE_DIRENT)
941                                 continue;
942
943                         /* If the name CRC doesn't match, skip */
944                         if (je32_to_cpu(rd->name_crc) != name_crc)
945                                 continue;
946
947                         /* If the name length doesn't match, or it's another deletion dirent, skip */
948                         if (rd->nsize != name_len || !je32_to_cpu(rd->ino))
949                                 continue;
950
951                         /* OK, check the actual name now */
952                         if (memcmp(rd->name, fd->name, name_len))
953                                 continue;
954
955                         /* OK. The name really does match. There really is still an older node on
956                            the flash which our deletion dirent obsoletes. So we have to write out
957                            a new deletion dirent to replace it */
958                         mutex_unlock(&c->erase_free_sem);
959
960                         jffs2_dbg(1, "Deletion dirent at %08x still obsoletes real dirent \"%s\" at %08x for ino #%u\n",
961                                   ref_offset(fd->raw), fd->name,
962                                   ref_offset(raw), je32_to_cpu(rd->ino));
963                         kfree(rd);
964
965                         return jffs2_garbage_collect_dirent(c, jeb, f, fd);
966                 }
967
968                 mutex_unlock(&c->erase_free_sem);
969                 kfree(rd);
970         }
971
972         /* FIXME: If we're deleting a dirent which contains the current mtime and ctime,
973            we should update the metadata node with those times accordingly */
974
975         /* No need for it any more. Just mark it obsolete and remove it from the list */
976         while (*fdp) {
977                 if ((*fdp) == fd) {
978                         found = 1;
979                         *fdp = fd->next;
980                         break;
981                 }
982                 fdp = &(*fdp)->next;
983         }
984         if (!found) {
985                 pr_warn("Deletion dirent \"%s\" not found in list for ino #%u\n",
986                         fd->name, f->inocache->ino);
987         }
988         jffs2_mark_node_obsolete(c, fd->raw);
989         jffs2_free_full_dirent(fd);
990         return 0;
991 }
992
993 static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
994                                       struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
995                                       uint32_t start, uint32_t end)
996 {
997         struct jffs2_raw_inode ri;
998         struct jffs2_node_frag *frag;
999         struct jffs2_full_dnode *new_fn;
1000         uint32_t alloclen, ilen;
1001         int ret;
1002
1003         jffs2_dbg(1, "Writing replacement hole node for ino #%u from offset 0x%x to 0x%x\n",
1004                   f->inocache->ino, start, end);
1005
1006         memset(&ri, 0, sizeof(ri));
1007
1008         if(fn->frags > 1) {
1009                 size_t readlen;
1010                 uint32_t crc;
1011                 /* It's partially obsoleted by a later write. So we have to
1012                    write it out again with the _same_ version as before */
1013                 ret = jffs2_flash_read(c, ref_offset(fn->raw), sizeof(ri), &readlen, (char *)&ri);
1014                 if (readlen != sizeof(ri) || ret) {
1015                         pr_warn("Node read failed in jffs2_garbage_collect_hole. Ret %d, retlen %zd. Data will be lost by writing new hole node\n",
1016                                 ret, readlen);
1017                         goto fill;
1018                 }
1019                 if (je16_to_cpu(ri.nodetype) != JFFS2_NODETYPE_INODE) {
1020                         pr_warn("%s(): Node at 0x%08x had node type 0x%04x instead of JFFS2_NODETYPE_INODE(0x%04x)\n",
1021                                 __func__, ref_offset(fn->raw),
1022                                 je16_to_cpu(ri.nodetype), JFFS2_NODETYPE_INODE);
1023                         return -EIO;
1024                 }
1025                 if (je32_to_cpu(ri.totlen) != sizeof(ri)) {
1026                         pr_warn("%s(): Node at 0x%08x had totlen 0x%x instead of expected 0x%zx\n",
1027                                 __func__, ref_offset(fn->raw),
1028                                 je32_to_cpu(ri.totlen), sizeof(ri));
1029                         return -EIO;
1030                 }
1031                 crc = crc32(0, &ri, sizeof(ri)-8);
1032                 if (crc != je32_to_cpu(ri.node_crc)) {
1033                         pr_warn("%s: Node at 0x%08x had CRC 0x%08x which doesn't match calculated CRC 0x%08x\n",
1034                                 __func__, ref_offset(fn->raw),
1035                                 je32_to_cpu(ri.node_crc), crc);
1036                         /* FIXME: We could possibly deal with this by writing new holes for each frag */
1037                         pr_warn("Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
1038                                 start, end, f->inocache->ino);
1039                         goto fill;
1040                 }
1041                 if (ri.compr != JFFS2_COMPR_ZERO) {
1042                         pr_warn("%s(): Node 0x%08x wasn't a hole node!\n",
1043                                 __func__, ref_offset(fn->raw));
1044                         pr_warn("Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
1045                                 start, end, f->inocache->ino);
1046                         goto fill;
1047                 }
1048         } else {
1049         fill:
1050                 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
1051                 ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
1052                 ri.totlen = cpu_to_je32(sizeof(ri));
1053                 ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
1054
1055                 ri.ino = cpu_to_je32(f->inocache->ino);
1056                 ri.version = cpu_to_je32(++f->highest_version);
1057                 ri.offset = cpu_to_je32(start);
1058                 ri.dsize = cpu_to_je32(end - start);
1059                 ri.csize = cpu_to_je32(0);
1060                 ri.compr = JFFS2_COMPR_ZERO;
1061         }
1062
1063         frag = frag_last(&f->fragtree);
1064         if (frag)
1065                 /* Fetch the inode length from the fragtree rather then
1066                  * from i_size since i_size may have not been updated yet */
1067                 ilen = frag->ofs + frag->size;
1068         else
1069                 ilen = JFFS2_F_I_SIZE(f);
1070
1071         ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1072         ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1073         ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1074         ri.isize = cpu_to_je32(ilen);
1075         ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1076         ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1077         ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1078         ri.data_crc = cpu_to_je32(0);
1079         ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1080
1081         ret = jffs2_reserve_space_gc(c, sizeof(ri), &alloclen,
1082                                      JFFS2_SUMMARY_INODE_SIZE);
1083         if (ret) {
1084                 pr_warn("jffs2_reserve_space_gc of %zd bytes for garbage_collect_hole failed: %d\n",
1085                         sizeof(ri), ret);
1086                 return ret;
1087         }
1088         new_fn = jffs2_write_dnode(c, f, &ri, NULL, 0, ALLOC_GC);
1089
1090         if (IS_ERR(new_fn)) {
1091                 pr_warn("Error writing new hole node: %ld\n", PTR_ERR(new_fn));
1092                 return PTR_ERR(new_fn);
1093         }
1094         if (je32_to_cpu(ri.version) == f->highest_version) {
1095                 jffs2_add_full_dnode_to_inode(c, f, new_fn);
1096                 if (f->metadata) {
1097                         jffs2_mark_node_obsolete(c, f->metadata->raw);
1098                         jffs2_free_full_dnode(f->metadata);
1099                         f->metadata = NULL;
1100                 }
1101                 return 0;
1102         }
1103
1104         /*
1105          * We should only get here in the case where the node we are
1106          * replacing had more than one frag, so we kept the same version
1107          * number as before. (Except in case of error -- see 'goto fill;'
1108          * above.)
1109          */
1110         D1(if(unlikely(fn->frags <= 1)) {
1111                         pr_warn("%s(): Replacing fn with %d frag(s) but new ver %d != highest_version %d of ino #%d\n",
1112                                 __func__, fn->frags, je32_to_cpu(ri.version),
1113                                 f->highest_version, je32_to_cpu(ri.ino));
1114         });
1115
1116         /* This is a partially-overlapped hole node. Mark it REF_NORMAL not REF_PRISTINE */
1117         mark_ref_normal(new_fn->raw);
1118
1119         for (frag = jffs2_lookup_node_frag(&f->fragtree, fn->ofs);
1120              frag; frag = frag_next(frag)) {
1121                 if (frag->ofs > fn->size + fn->ofs)
1122                         break;
1123                 if (frag->node == fn) {
1124                         frag->node = new_fn;
1125                         new_fn->frags++;
1126                         fn->frags--;
1127                 }
1128         }
1129         if (fn->frags) {
1130                 pr_warn("%s(): Old node still has frags!\n", __func__);
1131                 BUG();
1132         }
1133         if (!new_fn->frags) {
1134                 pr_warn("%s(): New node has no frags!\n", __func__);
1135                 BUG();
1136         }
1137
1138         jffs2_mark_node_obsolete(c, fn->raw);
1139         jffs2_free_full_dnode(fn);
1140
1141         return 0;
1142 }
1143
1144 static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *orig_jeb,
1145                                        struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
1146                                        uint32_t start, uint32_t end)
1147 {
1148         struct jffs2_full_dnode *new_fn;
1149         struct jffs2_raw_inode ri;
1150         uint32_t alloclen, offset, orig_end, orig_start;
1151         int ret = 0;
1152         unsigned char *comprbuf = NULL, *writebuf;
1153         unsigned long pg;
1154         unsigned char *pg_ptr;
1155
1156         memset(&ri, 0, sizeof(ri));
1157
1158         jffs2_dbg(1, "Writing replacement dnode for ino #%u from offset 0x%x to 0x%x\n",
1159                   f->inocache->ino, start, end);
1160
1161         orig_end = end;
1162         orig_start = start;
1163
1164         if (c->nr_free_blocks + c->nr_erasing_blocks > c->resv_blocks_gcmerge) {
1165                 /* Attempt to do some merging. But only expand to cover logically
1166                    adjacent frags if the block containing them is already considered
1167                    to be dirty. Otherwise we end up with GC just going round in
1168                    circles dirtying the nodes it already wrote out, especially
1169                    on NAND where we have small eraseblocks and hence a much higher
1170                    chance of nodes having to be split to cross boundaries. */
1171
1172                 struct jffs2_node_frag *frag;
1173                 uint32_t min, max;
1174
1175                 min = start & ~(PAGE_CACHE_SIZE-1);
1176                 max = min + PAGE_CACHE_SIZE;
1177
1178                 frag = jffs2_lookup_node_frag(&f->fragtree, start);
1179
1180                 /* BUG_ON(!frag) but that'll happen anyway... */
1181
1182                 BUG_ON(frag->ofs != start);
1183
1184                 /* First grow down... */
1185                 while((frag = frag_prev(frag)) && frag->ofs >= min) {
1186
1187                         /* If the previous frag doesn't even reach the beginning, there's
1188                            excessive fragmentation. Just merge. */
1189                         if (frag->ofs > min) {
1190                                 jffs2_dbg(1, "Expanding down to cover partial frag (0x%x-0x%x)\n",
1191                                           frag->ofs, frag->ofs+frag->size);
1192                                 start = frag->ofs;
1193                                 continue;
1194                         }
1195                         /* OK. This frag holds the first byte of the page. */
1196                         if (!frag->node || !frag->node->raw) {
1197                                 jffs2_dbg(1, "First frag in page is hole (0x%x-0x%x). Not expanding down.\n",
1198                                           frag->ofs, frag->ofs+frag->size);
1199                                 break;
1200                         } else {
1201
1202                                 /* OK, it's a frag which extends to the beginning of the page. Does it live
1203                                    in a block which is still considered clean? If so, don't obsolete it.
1204                                    If not, cover it anyway. */
1205
1206                                 struct jffs2_raw_node_ref *raw = frag->node->raw;
1207                                 struct jffs2_eraseblock *jeb;
1208
1209                                 jeb = &c->blocks[raw->flash_offset / c->sector_size];
1210
1211                                 if (jeb == c->gcblock) {
1212                                         jffs2_dbg(1, "Expanding down to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1213                                                   frag->ofs,
1214                                                   frag->ofs + frag->size,
1215                                                   ref_offset(raw));
1216                                         start = frag->ofs;
1217                                         break;
1218                                 }
1219                                 if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1220                                         jffs2_dbg(1, "Not expanding down to cover frag (0x%x-0x%x) in clean block %08x\n",
1221                                                   frag->ofs,
1222                                                   frag->ofs + frag->size,
1223                                                   jeb->offset);
1224                                         break;
1225                                 }
1226
1227                                 jffs2_dbg(1, "Expanding down to cover frag (0x%x-0x%x) in dirty block %08x\n",
1228                                           frag->ofs,
1229                                           frag->ofs + frag->size,
1230                                           jeb->offset);
1231                                 start = frag->ofs;
1232                                 break;
1233                         }
1234                 }
1235
1236                 /* ... then up */
1237
1238                 /* Find last frag which is actually part of the node we're to GC. */
1239                 frag = jffs2_lookup_node_frag(&f->fragtree, end-1);
1240
1241                 while((frag = frag_next(frag)) && frag->ofs+frag->size <= max) {
1242
1243                         /* If the previous frag doesn't even reach the beginning, there's lots
1244                            of fragmentation. Just merge. */
1245                         if (frag->ofs+frag->size < max) {
1246                                 jffs2_dbg(1, "Expanding up to cover partial frag (0x%x-0x%x)\n",
1247                                           frag->ofs, frag->ofs+frag->size);
1248                                 end = frag->ofs + frag->size;
1249                                 continue;
1250                         }
1251
1252                         if (!frag->node || !frag->node->raw) {
1253                                 jffs2_dbg(1, "Last frag in page is hole (0x%x-0x%x). Not expanding up.\n",
1254                                           frag->ofs, frag->ofs+frag->size);
1255                                 break;
1256                         } else {
1257
1258                                 /* OK, it's a frag which extends to the beginning of the page. Does it live
1259                                    in a block which is still considered clean? If so, don't obsolete it.
1260                                    If not, cover it anyway. */
1261
1262                                 struct jffs2_raw_node_ref *raw = frag->node->raw;
1263                                 struct jffs2_eraseblock *jeb;
1264
1265                                 jeb = &c->blocks[raw->flash_offset / c->sector_size];
1266
1267                                 if (jeb == c->gcblock) {
1268                                         jffs2_dbg(1, "Expanding up to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1269                                                   frag->ofs,
1270                                                   frag->ofs + frag->size,
1271                                                   ref_offset(raw));
1272                                         end = frag->ofs + frag->size;
1273                                         break;
1274                                 }
1275                                 if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1276                                         jffs2_dbg(1, "Not expanding up to cover frag (0x%x-0x%x) in clean block %08x\n",
1277                                                   frag->ofs,
1278                                                   frag->ofs + frag->size,
1279                                                   jeb->offset);
1280                                         break;
1281                                 }
1282
1283                                 jffs2_dbg(1, "Expanding up to cover frag (0x%x-0x%x) in dirty block %08x\n",
1284                                           frag->ofs,
1285                                           frag->ofs + frag->size,
1286                                           jeb->offset);
1287                                 end = frag->ofs + frag->size;
1288                                 break;
1289                         }
1290                 }
1291                 jffs2_dbg(1, "Expanded dnode to write from (0x%x-0x%x) to (0x%x-0x%x)\n",
1292                           orig_start, orig_end, start, end);
1293
1294                 D1(BUG_ON(end > frag_last(&f->fragtree)->ofs + frag_last(&f->fragtree)->size));
1295                 BUG_ON(end < orig_end);
1296                 BUG_ON(start > orig_start);
1297         }
1298
1299         /* First, use readpage() to read the appropriate page into the page cache */
1300         /* Q: What happens if we actually try to GC the _same_ page for which commit_write()
1301          *    triggered garbage collection in the first place?
1302          * A: I _think_ it's OK. read_cache_page shouldn't deadlock, we'll write out the
1303          *    page OK. We'll actually write it out again in commit_write, which is a little
1304          *    suboptimal, but at least we're correct.
1305          */
1306         pg_ptr = jffs2_gc_fetch_page(c, f, start, &pg);
1307
1308         if (IS_ERR(pg_ptr)) {
1309                 pr_warn("read_cache_page() returned error: %ld\n",
1310                         PTR_ERR(pg_ptr));
1311                 return PTR_ERR(pg_ptr);
1312         }
1313
1314         offset = start;
1315         while(offset < orig_end) {
1316                 uint32_t datalen;
1317                 uint32_t cdatalen;
1318                 uint16_t comprtype = JFFS2_COMPR_NONE;
1319
1320                 ret = jffs2_reserve_space_gc(c, sizeof(ri) + JFFS2_MIN_DATA_LEN,
1321                                         &alloclen, JFFS2_SUMMARY_INODE_SIZE);
1322
1323                 if (ret) {
1324                         pr_warn("jffs2_reserve_space_gc of %zd bytes for garbage_collect_dnode failed: %d\n",
1325                                 sizeof(ri) + JFFS2_MIN_DATA_LEN, ret);
1326                         break;
1327                 }
1328                 cdatalen = min_t(uint32_t, alloclen - sizeof(ri), end - offset);
1329                 datalen = end - offset;
1330
1331                 writebuf = pg_ptr + (offset & (PAGE_CACHE_SIZE -1));
1332
1333                 comprtype = jffs2_compress(c, f, writebuf, &comprbuf, &datalen, &cdatalen);
1334
1335                 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
1336                 ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
1337                 ri.totlen = cpu_to_je32(sizeof(ri) + cdatalen);
1338                 ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
1339
1340                 ri.ino = cpu_to_je32(f->inocache->ino);
1341                 ri.version = cpu_to_je32(++f->highest_version);
1342                 ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1343                 ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1344                 ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1345                 ri.isize = cpu_to_je32(JFFS2_F_I_SIZE(f));
1346                 ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1347                 ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1348                 ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1349                 ri.offset = cpu_to_je32(offset);
1350                 ri.csize = cpu_to_je32(cdatalen);
1351                 ri.dsize = cpu_to_je32(datalen);
1352                 ri.compr = comprtype & 0xff;
1353                 ri.usercompr = (comprtype >> 8) & 0xff;
1354                 ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1355                 ri.data_crc = cpu_to_je32(crc32(0, comprbuf, cdatalen));
1356
1357                 new_fn = jffs2_write_dnode(c, f, &ri, comprbuf, cdatalen, ALLOC_GC);
1358
1359                 jffs2_free_comprbuf(comprbuf, writebuf);
1360
1361                 if (IS_ERR(new_fn)) {
1362                         pr_warn("Error writing new dnode: %ld\n",
1363                                 PTR_ERR(new_fn));
1364                         ret = PTR_ERR(new_fn);
1365                         break;
1366                 }
1367                 ret = jffs2_add_full_dnode_to_inode(c, f, new_fn);
1368                 offset += datalen;
1369                 if (f->metadata) {
1370                         jffs2_mark_node_obsolete(c, f->metadata->raw);
1371                         jffs2_free_full_dnode(f->metadata);
1372                         f->metadata = NULL;
1373                 }
1374         }
1375
1376         jffs2_gc_release_page(c, pg_ptr, &pg);
1377         return ret;
1378 }