f2fs: check the use of macros on block counts and addresses
[linux-3.10.git] / fs / f2fs / segment.c
1 /*
2  * fs/f2fs/segment.c
3  *
4  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5  *             http://www.samsung.com/
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11 #include <linux/fs.h>
12 #include <linux/f2fs_fs.h>
13 #include <linux/bio.h>
14 #include <linux/blkdev.h>
15 #include <linux/prefetch.h>
16 #include <linux/kthread.h>
17 #include <linux/vmalloc.h>
18 #include <linux/swap.h>
19
20 #include "f2fs.h"
21 #include "segment.h"
22 #include "node.h"
23 #include <trace/events/f2fs.h>
24
25 #define __reverse_ffz(x) __reverse_ffs(~(x))
26
27 static struct kmem_cache *discard_entry_slab;
28 static struct kmem_cache *sit_entry_set_slab;
29
30 /*
31  * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
32  * MSB and LSB are reversed in a byte by f2fs_set_bit.
33  */
34 static inline unsigned long __reverse_ffs(unsigned long word)
35 {
36         int num = 0;
37
38 #if BITS_PER_LONG == 64
39         if ((word & 0xffffffff) == 0) {
40                 num += 32;
41                 word >>= 32;
42         }
43 #endif
44         if ((word & 0xffff) == 0) {
45                 num += 16;
46                 word >>= 16;
47         }
48         if ((word & 0xff) == 0) {
49                 num += 8;
50                 word >>= 8;
51         }
52         if ((word & 0xf0) == 0)
53                 num += 4;
54         else
55                 word >>= 4;
56         if ((word & 0xc) == 0)
57                 num += 2;
58         else
59                 word >>= 2;
60         if ((word & 0x2) == 0)
61                 num += 1;
62         return num;
63 }
64
65 /*
66  * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c becasue
67  * f2fs_set_bit makes MSB and LSB reversed in a byte.
68  * Example:
69  *                             LSB <--> MSB
70  *   f2fs_set_bit(0, bitmap) => 0000 0001
71  *   f2fs_set_bit(7, bitmap) => 1000 0000
72  */
73 static unsigned long __find_rev_next_bit(const unsigned long *addr,
74                         unsigned long size, unsigned long offset)
75 {
76         const unsigned long *p = addr + BIT_WORD(offset);
77         unsigned long result = offset & ~(BITS_PER_LONG - 1);
78         unsigned long tmp;
79         unsigned long mask, submask;
80         unsigned long quot, rest;
81
82         if (offset >= size)
83                 return size;
84
85         size -= result;
86         offset %= BITS_PER_LONG;
87         if (!offset)
88                 goto aligned;
89
90         tmp = *(p++);
91         quot = (offset >> 3) << 3;
92         rest = offset & 0x7;
93         mask = ~0UL << quot;
94         submask = (unsigned char)(0xff << rest) >> rest;
95         submask <<= quot;
96         mask &= submask;
97         tmp &= mask;
98         if (size < BITS_PER_LONG)
99                 goto found_first;
100         if (tmp)
101                 goto found_middle;
102
103         size -= BITS_PER_LONG;
104         result += BITS_PER_LONG;
105 aligned:
106         while (size & ~(BITS_PER_LONG-1)) {
107                 tmp = *(p++);
108                 if (tmp)
109                         goto found_middle;
110                 result += BITS_PER_LONG;
111                 size -= BITS_PER_LONG;
112         }
113         if (!size)
114                 return result;
115         tmp = *p;
116 found_first:
117         tmp &= (~0UL >> (BITS_PER_LONG - size));
118         if (tmp == 0UL)         /* Are any bits set? */
119                 return result + size;   /* Nope. */
120 found_middle:
121         return result + __reverse_ffs(tmp);
122 }
123
124 static unsigned long __find_rev_next_zero_bit(const unsigned long *addr,
125                         unsigned long size, unsigned long offset)
126 {
127         const unsigned long *p = addr + BIT_WORD(offset);
128         unsigned long result = offset & ~(BITS_PER_LONG - 1);
129         unsigned long tmp;
130         unsigned long mask, submask;
131         unsigned long quot, rest;
132
133         if (offset >= size)
134                 return size;
135
136         size -= result;
137         offset %= BITS_PER_LONG;
138         if (!offset)
139                 goto aligned;
140
141         tmp = *(p++);
142         quot = (offset >> 3) << 3;
143         rest = offset & 0x7;
144         mask = ~(~0UL << quot);
145         submask = (unsigned char)~((unsigned char)(0xff << rest) >> rest);
146         submask <<= quot;
147         mask += submask;
148         tmp |= mask;
149         if (size < BITS_PER_LONG)
150                 goto found_first;
151         if (~tmp)
152                 goto found_middle;
153
154         size -= BITS_PER_LONG;
155         result += BITS_PER_LONG;
156 aligned:
157         while (size & ~(BITS_PER_LONG - 1)) {
158                 tmp = *(p++);
159                 if (~tmp)
160                         goto found_middle;
161                 result += BITS_PER_LONG;
162                 size -= BITS_PER_LONG;
163         }
164         if (!size)
165                 return result;
166         tmp = *p;
167
168 found_first:
169         tmp |= ~0UL << size;
170         if (tmp == ~0UL)        /* Are any bits zero? */
171                 return result + size;   /* Nope. */
172 found_middle:
173         return result + __reverse_ffz(tmp);
174 }
175
176 /*
177  * This function balances dirty node and dentry pages.
178  * In addition, it controls garbage collection.
179  */
180 void f2fs_balance_fs(struct f2fs_sb_info *sbi)
181 {
182         /*
183          * We should do GC or end up with checkpoint, if there are so many dirty
184          * dir/node pages without enough free segments.
185          */
186         if (has_not_enough_free_secs(sbi, 0)) {
187                 mutex_lock(&sbi->gc_mutex);
188                 f2fs_gc(sbi);
189         }
190 }
191
192 void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
193 {
194         /* check the # of cached NAT entries and prefree segments */
195         if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK) ||
196                                 excess_prefree_segs(sbi))
197                 f2fs_sync_fs(sbi->sb, true);
198 }
199
200 static int issue_flush_thread(void *data)
201 {
202         struct f2fs_sb_info *sbi = data;
203         struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
204         wait_queue_head_t *q = &fcc->flush_wait_queue;
205 repeat:
206         if (kthread_should_stop())
207                 return 0;
208
209         if (!llist_empty(&fcc->issue_list)) {
210                 struct bio *bio = bio_alloc(GFP_NOIO, 0);
211                 struct flush_cmd *cmd, *next;
212                 int ret;
213
214                 fcc->dispatch_list = llist_del_all(&fcc->issue_list);
215                 fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);
216
217                 bio->bi_bdev = sbi->sb->s_bdev;
218                 ret = submit_bio_wait(WRITE_FLUSH, bio);
219
220                 llist_for_each_entry_safe(cmd, next,
221                                           fcc->dispatch_list, llnode) {
222                         cmd->ret = ret;
223                         complete(&cmd->wait);
224                 }
225                 bio_put(bio);
226                 fcc->dispatch_list = NULL;
227         }
228
229         wait_event_interruptible(*q,
230                 kthread_should_stop() || !llist_empty(&fcc->issue_list));
231         goto repeat;
232 }
233
234 int f2fs_issue_flush(struct f2fs_sb_info *sbi)
235 {
236         struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
237         struct flush_cmd cmd;
238
239         trace_f2fs_issue_flush(sbi->sb, test_opt(sbi, NOBARRIER),
240                                         test_opt(sbi, FLUSH_MERGE));
241
242         if (test_opt(sbi, NOBARRIER))
243                 return 0;
244
245         if (!test_opt(sbi, FLUSH_MERGE))
246                 return blkdev_issue_flush(sbi->sb->s_bdev, GFP_KERNEL, NULL);
247
248         init_completion(&cmd.wait);
249
250         llist_add(&cmd.llnode, &fcc->issue_list);
251
252         if (!fcc->dispatch_list)
253                 wake_up(&fcc->flush_wait_queue);
254
255         wait_for_completion(&cmd.wait);
256
257         return cmd.ret;
258 }
259
260 int create_flush_cmd_control(struct f2fs_sb_info *sbi)
261 {
262         dev_t dev = sbi->sb->s_bdev->bd_dev;
263         struct flush_cmd_control *fcc;
264         int err = 0;
265
266         fcc = kzalloc(sizeof(struct flush_cmd_control), GFP_KERNEL);
267         if (!fcc)
268                 return -ENOMEM;
269         init_waitqueue_head(&fcc->flush_wait_queue);
270         init_llist_head(&fcc->issue_list);
271         SM_I(sbi)->cmd_control_info = fcc;
272         fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
273                                 "f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
274         if (IS_ERR(fcc->f2fs_issue_flush)) {
275                 err = PTR_ERR(fcc->f2fs_issue_flush);
276                 kfree(fcc);
277                 SM_I(sbi)->cmd_control_info = NULL;
278                 return err;
279         }
280
281         return err;
282 }
283
284 void destroy_flush_cmd_control(struct f2fs_sb_info *sbi)
285 {
286         struct flush_cmd_control *fcc = SM_I(sbi)->cmd_control_info;
287
288         if (fcc && fcc->f2fs_issue_flush)
289                 kthread_stop(fcc->f2fs_issue_flush);
290         kfree(fcc);
291         SM_I(sbi)->cmd_control_info = NULL;
292 }
293
294 static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
295                 enum dirty_type dirty_type)
296 {
297         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
298
299         /* need not be added */
300         if (IS_CURSEG(sbi, segno))
301                 return;
302
303         if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
304                 dirty_i->nr_dirty[dirty_type]++;
305
306         if (dirty_type == DIRTY) {
307                 struct seg_entry *sentry = get_seg_entry(sbi, segno);
308                 enum dirty_type t = sentry->type;
309
310                 if (unlikely(t >= DIRTY)) {
311                         f2fs_bug_on(sbi, 1);
312                         return;
313                 }
314                 if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t]))
315                         dirty_i->nr_dirty[t]++;
316         }
317 }
318
319 static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
320                 enum dirty_type dirty_type)
321 {
322         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
323
324         if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
325                 dirty_i->nr_dirty[dirty_type]--;
326
327         if (dirty_type == DIRTY) {
328                 struct seg_entry *sentry = get_seg_entry(sbi, segno);
329                 enum dirty_type t = sentry->type;
330
331                 if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
332                         dirty_i->nr_dirty[t]--;
333
334                 if (get_valid_blocks(sbi, segno, sbi->segs_per_sec) == 0)
335                         clear_bit(GET_SECNO(sbi, segno),
336                                                 dirty_i->victim_secmap);
337         }
338 }
339
340 /*
341  * Should not occur error such as -ENOMEM.
342  * Adding dirty entry into seglist is not critical operation.
343  * If a given segment is one of current working segments, it won't be added.
344  */
345 static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
346 {
347         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
348         unsigned short valid_blocks;
349
350         if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
351                 return;
352
353         mutex_lock(&dirty_i->seglist_lock);
354
355         valid_blocks = get_valid_blocks(sbi, segno, 0);
356
357         if (valid_blocks == 0) {
358                 __locate_dirty_segment(sbi, segno, PRE);
359                 __remove_dirty_segment(sbi, segno, DIRTY);
360         } else if (valid_blocks < sbi->blocks_per_seg) {
361                 __locate_dirty_segment(sbi, segno, DIRTY);
362         } else {
363                 /* Recovery routine with SSR needs this */
364                 __remove_dirty_segment(sbi, segno, DIRTY);
365         }
366
367         mutex_unlock(&dirty_i->seglist_lock);
368 }
369
370 static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
371                                 block_t blkstart, block_t blklen)
372 {
373         sector_t start = SECTOR_FROM_BLOCK(blkstart);
374         sector_t len = SECTOR_FROM_BLOCK(blklen);
375         trace_f2fs_issue_discard(sbi->sb, blkstart, blklen);
376         return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0);
377 }
378
379 void discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr)
380 {
381         if (f2fs_issue_discard(sbi, blkaddr, 1)) {
382                 struct page *page = grab_meta_page(sbi, blkaddr);
383                 /* zero-filled page */
384                 set_page_dirty(page);
385                 f2fs_put_page(page, 1);
386         }
387 }
388
389 static void add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc)
390 {
391         struct list_head *head = &SM_I(sbi)->discard_list;
392         struct discard_entry *new;
393         int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
394         int max_blocks = sbi->blocks_per_seg;
395         struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start);
396         unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
397         unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
398         unsigned long *dmap;
399         unsigned int start = 0, end = -1;
400         bool force = (cpc->reason == CP_DISCARD);
401         int i;
402
403         if (!force && !test_opt(sbi, DISCARD))
404                 return;
405
406         if (force && !se->valid_blocks) {
407                 struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
408                 /*
409                  * if this segment is registered in the prefree list, then
410                  * we should skip adding a discard candidate, and let the
411                  * checkpoint do that later.
412                  */
413                 mutex_lock(&dirty_i->seglist_lock);
414                 if (test_bit(cpc->trim_start, dirty_i->dirty_segmap[PRE])) {
415                         mutex_unlock(&dirty_i->seglist_lock);
416                         cpc->trimmed += sbi->blocks_per_seg;
417                         return;
418                 }
419                 mutex_unlock(&dirty_i->seglist_lock);
420
421                 new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS);
422                 INIT_LIST_HEAD(&new->list);
423                 new->blkaddr = START_BLOCK(sbi, cpc->trim_start);
424                 new->len = sbi->blocks_per_seg;
425                 list_add_tail(&new->list, head);
426                 SM_I(sbi)->nr_discards += sbi->blocks_per_seg;
427                 cpc->trimmed += sbi->blocks_per_seg;
428                 return;
429         }
430
431         /* zero block will be discarded through the prefree list */
432         if (!se->valid_blocks || se->valid_blocks == max_blocks)
433                 return;
434
435         dmap = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
436         if (!dmap)
437                 return;
438
439         /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
440         for (i = 0; i < entries; i++)
441                 dmap[i] = (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
442
443         while (force || SM_I(sbi)->nr_discards <= SM_I(sbi)->max_discards) {
444                 start = __find_rev_next_bit(dmap, max_blocks, end + 1);
445                 if (start >= max_blocks)
446                         break;
447
448                 end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1);
449
450                 if (end - start < cpc->trim_minlen)
451                         continue;
452
453                 new = f2fs_kmem_cache_alloc(discard_entry_slab, GFP_NOFS);
454                 INIT_LIST_HEAD(&new->list);
455                 new->blkaddr = START_BLOCK(sbi, cpc->trim_start) + start;
456                 new->len = end - start;
457                 cpc->trimmed += end - start;
458
459                 list_add_tail(&new->list, head);
460                 SM_I(sbi)->nr_discards += end - start;
461         }
462         kfree(dmap);
463 }
464
465 void release_discard_addrs(struct f2fs_sb_info *sbi)
466 {
467         struct list_head *head = &(SM_I(sbi)->discard_list);
468         struct discard_entry *entry, *this;
469
470         /* drop caches */
471         list_for_each_entry_safe(entry, this, head, list) {
472                 list_del(&entry->list);
473                 kmem_cache_free(discard_entry_slab, entry);
474         }
475 }
476
477 /*
478  * Should call clear_prefree_segments after checkpoint is done.
479  */
480 static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
481 {
482         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
483         unsigned int segno;
484
485         mutex_lock(&dirty_i->seglist_lock);
486         for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
487                 __set_test_and_free(sbi, segno);
488         mutex_unlock(&dirty_i->seglist_lock);
489 }
490
491 void clear_prefree_segments(struct f2fs_sb_info *sbi)
492 {
493         struct list_head *head = &(SM_I(sbi)->discard_list);
494         struct discard_entry *entry, *this;
495         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
496         unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
497         unsigned int start = 0, end = -1;
498
499         mutex_lock(&dirty_i->seglist_lock);
500
501         while (1) {
502                 int i;
503                 start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1);
504                 if (start >= MAIN_SEGS(sbi))
505                         break;
506                 end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi),
507                                                                 start + 1);
508
509                 for (i = start; i < end; i++)
510                         clear_bit(i, prefree_map);
511
512                 dirty_i->nr_dirty[PRE] -= end - start;
513
514                 if (!test_opt(sbi, DISCARD))
515                         continue;
516
517                 f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
518                                 (end - start) << sbi->log_blocks_per_seg);
519         }
520         mutex_unlock(&dirty_i->seglist_lock);
521
522         /* send small discards */
523         list_for_each_entry_safe(entry, this, head, list) {
524                 f2fs_issue_discard(sbi, entry->blkaddr, entry->len);
525                 list_del(&entry->list);
526                 SM_I(sbi)->nr_discards -= entry->len;
527                 kmem_cache_free(discard_entry_slab, entry);
528         }
529 }
530
531 static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
532 {
533         struct sit_info *sit_i = SIT_I(sbi);
534
535         if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) {
536                 sit_i->dirty_sentries++;
537                 return false;
538         }
539
540         return true;
541 }
542
543 static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
544                                         unsigned int segno, int modified)
545 {
546         struct seg_entry *se = get_seg_entry(sbi, segno);
547         se->type = type;
548         if (modified)
549                 __mark_sit_entry_dirty(sbi, segno);
550 }
551
552 static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
553 {
554         struct seg_entry *se;
555         unsigned int segno, offset;
556         long int new_vblocks;
557         bool check_map = false;
558
559         segno = GET_SEGNO(sbi, blkaddr);
560
561         se = get_seg_entry(sbi, segno);
562         new_vblocks = se->valid_blocks + del;
563         offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
564
565         if (new_vblocks < 0 || new_vblocks > sbi->blocks_per_seg ||
566             (new_vblocks >> (sizeof(unsigned short) << 3)))
567                 if (f2fs_handle_error(sbi))
568                         check_map = true;
569
570         se->mtime = get_mtime(sbi);
571         SIT_I(sbi)->max_mtime = se->mtime;
572
573         /* Update valid block bitmap */
574         if (del > 0) {
575                 if (f2fs_set_bit(offset, se->cur_valid_map))
576                         if (f2fs_handle_error(sbi))
577                                 check_map = true;
578         } else {
579                 if (!f2fs_clear_bit(offset, se->cur_valid_map))
580                         if (f2fs_handle_error(sbi))
581                                 check_map = true;
582         }
583
584         if (unlikely(check_map)) {
585                 int i;
586                 long int vblocks = 0;
587
588                 f2fs_msg(sbi->sb, KERN_ERR,
589                                 "cannot %svalidate block %u in segment %u with %hu valid blocks",
590                                 (del < 0) ? "in" : "",
591                                 offset, segno, se->valid_blocks);
592
593                 /* assume the count was stale to start */
594                 del = 0;
595                 for (i = 0; i < sbi->blocks_per_seg; i++)
596                         if (f2fs_test_bit(i, se->cur_valid_map))
597                                 vblocks++;
598                 if (vblocks != se->valid_blocks) {
599                         f2fs_msg(sbi->sb, KERN_INFO, "correcting valid block "
600                                 "counts %d -> %ld", se->valid_blocks, vblocks);
601                         /* make accounting corrections */
602                         del = vblocks - se->valid_blocks;
603                 }
604         }
605         se->valid_blocks += del;
606
607         if (!f2fs_test_bit(offset, se->ckpt_valid_map))
608                 se->ckpt_valid_blocks += del;
609
610         __mark_sit_entry_dirty(sbi, segno);
611
612         /* update total number of valid blocks to be written in ckpt area */
613         SIT_I(sbi)->written_valid_blocks += del;
614
615         if (sbi->segs_per_sec > 1)
616                 get_sec_entry(sbi, segno)->valid_blocks += del;
617 }
618
619 void refresh_sit_entry(struct f2fs_sb_info *sbi, block_t old, block_t new)
620 {
621         update_sit_entry(sbi, new, 1);
622         if (GET_SEGNO(sbi, old) != NULL_SEGNO)
623                 update_sit_entry(sbi, old, -1);
624
625         locate_dirty_segment(sbi, GET_SEGNO(sbi, old));
626         locate_dirty_segment(sbi, GET_SEGNO(sbi, new));
627 }
628
629 void invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
630 {
631         unsigned int segno = GET_SEGNO(sbi, addr);
632         struct sit_info *sit_i = SIT_I(sbi);
633
634         f2fs_bug_on(sbi, addr == NULL_ADDR);
635         if (addr == NEW_ADDR)
636                 return;
637
638         if (segno >= TOTAL_SEGS(sbi)) {
639                 f2fs_msg(sbi->sb, KERN_ERR, "invalid segment number %u", segno);
640                 if (f2fs_handle_error(sbi))
641                         return;
642         }
643
644         /* add it into sit main buffer */
645         mutex_lock(&sit_i->sentry_lock);
646
647         update_sit_entry(sbi, addr, -1);
648
649         /* add it into dirty seglist */
650         locate_dirty_segment(sbi, segno);
651
652         mutex_unlock(&sit_i->sentry_lock);
653 }
654
655 /*
656  * This function should be resided under the curseg_mutex lock
657  */
658 static void __add_sum_entry(struct f2fs_sb_info *sbi, int type,
659                                         struct f2fs_summary *sum)
660 {
661         struct curseg_info *curseg = CURSEG_I(sbi, type);
662         void *addr = curseg->sum_blk;
663         addr += curseg->next_blkoff * sizeof(struct f2fs_summary);
664         memcpy(addr, sum, sizeof(struct f2fs_summary));
665 }
666
667 /*
668  * Calculate the number of current summary pages for writing
669  */
670 int npages_for_summary_flush(struct f2fs_sb_info *sbi)
671 {
672         int valid_sum_count = 0;
673         int i, sum_in_page;
674
675         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
676                 if (sbi->ckpt->alloc_type[i] == SSR)
677                         valid_sum_count += sbi->blocks_per_seg;
678                 else
679                         valid_sum_count += curseg_blkoff(sbi, i);
680         }
681
682         sum_in_page = (PAGE_CACHE_SIZE - 2 * SUM_JOURNAL_SIZE -
683                         SUM_FOOTER_SIZE) / SUMMARY_SIZE;
684         if (valid_sum_count <= sum_in_page)
685                 return 1;
686         else if ((valid_sum_count - sum_in_page) <=
687                 (PAGE_CACHE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE)
688                 return 2;
689         return 3;
690 }
691
692 /*
693  * Caller should put this summary page
694  */
695 struct page *get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
696 {
697         return get_meta_page(sbi, GET_SUM_BLOCK(sbi, segno));
698 }
699
700 static void write_sum_page(struct f2fs_sb_info *sbi,
701                         struct f2fs_summary_block *sum_blk, block_t blk_addr)
702 {
703         struct page *page = grab_meta_page(sbi, blk_addr);
704         void *kaddr = page_address(page);
705         memcpy(kaddr, sum_blk, PAGE_CACHE_SIZE);
706         set_page_dirty(page);
707         f2fs_put_page(page, 1);
708 }
709
710 static int is_next_segment_free(struct f2fs_sb_info *sbi, int type)
711 {
712         struct curseg_info *curseg = CURSEG_I(sbi, type);
713         unsigned int segno = curseg->segno + 1;
714         struct free_segmap_info *free_i = FREE_I(sbi);
715
716         if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec)
717                 return !test_bit(segno, free_i->free_segmap);
718         return 0;
719 }
720
721 /*
722  * Find a new segment from the free segments bitmap to right order
723  * This function should be returned with success, otherwise BUG
724  */
725 static void get_new_segment(struct f2fs_sb_info *sbi,
726                         unsigned int *newseg, bool new_sec, int dir)
727 {
728         struct free_segmap_info *free_i = FREE_I(sbi);
729         unsigned int segno, secno, zoneno;
730         unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone;
731         unsigned int hint = *newseg / sbi->segs_per_sec;
732         unsigned int old_zoneno = GET_ZONENO_FROM_SEGNO(sbi, *newseg);
733         unsigned int left_start = hint;
734         bool init = true;
735         int go_left = 0;
736         int i;
737
738         write_lock(&free_i->segmap_lock);
739
740         if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) {
741                 segno = find_next_zero_bit(free_i->free_segmap,
742                                         MAIN_SEGS(sbi), *newseg + 1);
743                 if (segno - *newseg < sbi->segs_per_sec -
744                                         (*newseg % sbi->segs_per_sec))
745                         goto got_it;
746         }
747 find_other_zone:
748         secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
749         if (secno >= MAIN_SECS(sbi)) {
750                 if (dir == ALLOC_RIGHT) {
751                         secno = find_next_zero_bit(free_i->free_secmap,
752                                                         MAIN_SECS(sbi), 0);
753                         f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi));
754                 } else {
755                         go_left = 1;
756                         left_start = hint - 1;
757                 }
758         }
759         if (go_left == 0)
760                 goto skip_left;
761
762         while (test_bit(left_start, free_i->free_secmap)) {
763                 if (left_start > 0) {
764                         left_start--;
765                         continue;
766                 }
767                 left_start = find_next_zero_bit(free_i->free_secmap,
768                                                         MAIN_SECS(sbi), 0);
769                 f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi));
770                 break;
771         }
772         secno = left_start;
773 skip_left:
774         hint = secno;
775         segno = secno * sbi->segs_per_sec;
776         zoneno = secno / sbi->secs_per_zone;
777
778         /* give up on finding another zone */
779         if (!init)
780                 goto got_it;
781         if (sbi->secs_per_zone == 1)
782                 goto got_it;
783         if (zoneno == old_zoneno)
784                 goto got_it;
785         if (dir == ALLOC_LEFT) {
786                 if (!go_left && zoneno + 1 >= total_zones)
787                         goto got_it;
788                 if (go_left && zoneno == 0)
789                         goto got_it;
790         }
791         for (i = 0; i < NR_CURSEG_TYPE; i++)
792                 if (CURSEG_I(sbi, i)->zone == zoneno)
793                         break;
794
795         if (i < NR_CURSEG_TYPE) {
796                 /* zone is in user, try another */
797                 if (go_left)
798                         hint = zoneno * sbi->secs_per_zone - 1;
799                 else if (zoneno + 1 >= total_zones)
800                         hint = 0;
801                 else
802                         hint = (zoneno + 1) * sbi->secs_per_zone;
803                 init = false;
804                 goto find_other_zone;
805         }
806 got_it:
807         /* set it as dirty segment in free segmap */
808         f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap));
809         __set_inuse(sbi, segno);
810         *newseg = segno;
811         write_unlock(&free_i->segmap_lock);
812 }
813
814 static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
815 {
816         struct curseg_info *curseg = CURSEG_I(sbi, type);
817         struct summary_footer *sum_footer;
818
819         curseg->segno = curseg->next_segno;
820         curseg->zone = GET_ZONENO_FROM_SEGNO(sbi, curseg->segno);
821         curseg->next_blkoff = 0;
822         curseg->next_segno = NULL_SEGNO;
823
824         sum_footer = &(curseg->sum_blk->footer);
825         memset(sum_footer, 0, sizeof(struct summary_footer));
826         if (IS_DATASEG(type))
827                 SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
828         if (IS_NODESEG(type))
829                 SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
830         __set_sit_entry_type(sbi, type, curseg->segno, modified);
831 }
832
833 /*
834  * Allocate a current working segment.
835  * This function always allocates a free segment in LFS manner.
836  */
837 static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
838 {
839         struct curseg_info *curseg = CURSEG_I(sbi, type);
840         unsigned int segno = curseg->segno;
841         int dir = ALLOC_LEFT;
842
843         write_sum_page(sbi, curseg->sum_blk,
844                                 GET_SUM_BLOCK(sbi, segno));
845         if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA)
846                 dir = ALLOC_RIGHT;
847
848         if (test_opt(sbi, NOHEAP))
849                 dir = ALLOC_RIGHT;
850
851         get_new_segment(sbi, &segno, new_sec, dir);
852         curseg->next_segno = segno;
853         reset_curseg(sbi, type, 1);
854         curseg->alloc_type = LFS;
855 }
856
857 static void __next_free_blkoff(struct f2fs_sb_info *sbi,
858                         struct curseg_info *seg, block_t start)
859 {
860         struct seg_entry *se = get_seg_entry(sbi, seg->segno);
861         int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
862         unsigned long target_map[entries];
863         unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
864         unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
865         int i, pos;
866
867         for (i = 0; i < entries; i++)
868                 target_map[i] = ckpt_map[i] | cur_map[i];
869
870         pos = __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start);
871
872         seg->next_blkoff = pos;
873 }
874
875 /*
876  * If a segment is written by LFS manner, next block offset is just obtained
877  * by increasing the current block offset. However, if a segment is written by
878  * SSR manner, next block offset obtained by calling __next_free_blkoff
879  */
880 static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
881                                 struct curseg_info *seg)
882 {
883         if (seg->alloc_type == SSR)
884                 __next_free_blkoff(sbi, seg, seg->next_blkoff + 1);
885         else
886                 seg->next_blkoff++;
887 }
888
889 /*
890  * This function always allocates a used segment (from dirty seglist) by SSR
891  * manner, so it should recover the existing segment information of valid blocks
892  */
893 static void change_curseg(struct f2fs_sb_info *sbi, int type, bool reuse)
894 {
895         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
896         struct curseg_info *curseg = CURSEG_I(sbi, type);
897         unsigned int new_segno = curseg->next_segno;
898         struct f2fs_summary_block *sum_node;
899         struct page *sum_page;
900
901         write_sum_page(sbi, curseg->sum_blk,
902                                 GET_SUM_BLOCK(sbi, curseg->segno));
903         __set_test_and_inuse(sbi, new_segno);
904
905         mutex_lock(&dirty_i->seglist_lock);
906         __remove_dirty_segment(sbi, new_segno, PRE);
907         __remove_dirty_segment(sbi, new_segno, DIRTY);
908         mutex_unlock(&dirty_i->seglist_lock);
909
910         reset_curseg(sbi, type, 1);
911         curseg->alloc_type = SSR;
912         __next_free_blkoff(sbi, curseg, 0);
913
914         if (reuse) {
915                 sum_page = get_sum_page(sbi, new_segno);
916                 sum_node = (struct f2fs_summary_block *)page_address(sum_page);
917                 memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
918                 f2fs_put_page(sum_page, 1);
919         }
920 }
921
922 static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
923 {
924         struct curseg_info *curseg = CURSEG_I(sbi, type);
925         const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops;
926
927         if (IS_NODESEG(type) || !has_not_enough_free_secs(sbi, 0))
928                 return v_ops->get_victim(sbi,
929                                 &(curseg)->next_segno, BG_GC, type, SSR);
930
931         /* For data segments, let's do SSR more intensively */
932         for (; type >= CURSEG_HOT_DATA; type--)
933                 if (v_ops->get_victim(sbi, &(curseg)->next_segno,
934                                                 BG_GC, type, SSR))
935                         return 1;
936         return 0;
937 }
938
939 /*
940  * flush out current segment and replace it with new segment
941  * This function should be returned with success, otherwise BUG
942  */
943 static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
944                                                 int type, bool force)
945 {
946         struct curseg_info *curseg = CURSEG_I(sbi, type);
947
948         if (force)
949                 new_curseg(sbi, type, true);
950         else if (type == CURSEG_WARM_NODE)
951                 new_curseg(sbi, type, false);
952         else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type))
953                 new_curseg(sbi, type, false);
954         else if (need_SSR(sbi) && get_ssr_segment(sbi, type))
955                 change_curseg(sbi, type, true);
956         else
957                 new_curseg(sbi, type, false);
958
959         stat_inc_seg_type(sbi, curseg);
960 }
961
962 void allocate_new_segments(struct f2fs_sb_info *sbi)
963 {
964         struct curseg_info *curseg;
965         unsigned int old_curseg;
966         int i;
967
968         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
969                 curseg = CURSEG_I(sbi, i);
970                 old_curseg = curseg->segno;
971                 SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true);
972                 locate_dirty_segment(sbi, old_curseg);
973         }
974 }
975
976 static const struct segment_allocation default_salloc_ops = {
977         .allocate_segment = allocate_segment_by_default,
978 };
979
980 int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
981 {
982         __u64 start = range->start >> sbi->log_blocksize;
983         __u64 end = start + (range->len >> sbi->log_blocksize) - 1;
984         unsigned int start_segno, end_segno;
985         struct cp_control cpc;
986
987         if (range->minlen > SEGMENT_SIZE(sbi) || start >= MAX_BLKADDR(sbi) ||
988                                                 range->len < sbi->blocksize)
989                 return -EINVAL;
990
991         if (end <= MAIN_BLKADDR(sbi))
992                 goto out;
993
994         /* start/end segment number in main_area */
995         start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
996         end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
997                                                 GET_SEGNO(sbi, end);
998         cpc.reason = CP_DISCARD;
999         cpc.trim_start = start_segno;
1000         cpc.trim_end = end_segno;
1001         cpc.trim_minlen = range->minlen >> sbi->log_blocksize;
1002         cpc.trimmed = 0;
1003
1004         /* do checkpoint to issue discard commands safely */
1005         write_checkpoint(sbi, &cpc);
1006 out:
1007         range->len = cpc.trimmed << sbi->log_blocksize;
1008         return 0;
1009 }
1010
1011 static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
1012 {
1013         struct curseg_info *curseg = CURSEG_I(sbi, type);
1014         if (curseg->next_blkoff < sbi->blocks_per_seg)
1015                 return true;
1016         return false;
1017 }
1018
1019 static int __get_segment_type_2(struct page *page, enum page_type p_type)
1020 {
1021         if (p_type == DATA)
1022                 return CURSEG_HOT_DATA;
1023         else
1024                 return CURSEG_HOT_NODE;
1025 }
1026
1027 static int __get_segment_type_4(struct page *page, enum page_type p_type)
1028 {
1029         if (p_type == DATA) {
1030                 struct inode *inode = page->mapping->host;
1031
1032                 if (S_ISDIR(inode->i_mode))
1033                         return CURSEG_HOT_DATA;
1034                 else
1035                         return CURSEG_COLD_DATA;
1036         } else {
1037                 if (IS_DNODE(page) && !is_cold_node(page))
1038                         return CURSEG_HOT_NODE;
1039                 else
1040                         return CURSEG_COLD_NODE;
1041         }
1042 }
1043
1044 static int __get_segment_type_6(struct page *page, enum page_type p_type)
1045 {
1046         if (p_type == DATA) {
1047                 struct inode *inode = page->mapping->host;
1048
1049                 if (S_ISDIR(inode->i_mode))
1050                         return CURSEG_HOT_DATA;
1051                 else if (is_cold_data(page) || file_is_cold(inode))
1052                         return CURSEG_COLD_DATA;
1053                 else
1054                         return CURSEG_WARM_DATA;
1055         } else {
1056                 if (IS_DNODE(page))
1057                         return is_cold_node(page) ? CURSEG_WARM_NODE :
1058                                                 CURSEG_HOT_NODE;
1059                 else
1060                         return CURSEG_COLD_NODE;
1061         }
1062 }
1063
1064 static int __get_segment_type(struct page *page, enum page_type p_type)
1065 {
1066         switch (F2FS_P_SB(page)->active_logs) {
1067         case 2:
1068                 return __get_segment_type_2(page, p_type);
1069         case 4:
1070                 return __get_segment_type_4(page, p_type);
1071         }
1072         /* NR_CURSEG_TYPE(6) logs by default */
1073         f2fs_bug_on(F2FS_P_SB(page),
1074                 F2FS_P_SB(page)->active_logs != NR_CURSEG_TYPE);
1075         return __get_segment_type_6(page, p_type);
1076 }
1077
1078 void allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
1079                 block_t old_blkaddr, block_t *new_blkaddr,
1080                 struct f2fs_summary *sum, int type)
1081 {
1082         struct sit_info *sit_i = SIT_I(sbi);
1083         struct curseg_info *curseg;
1084
1085         curseg = CURSEG_I(sbi, type);
1086
1087         mutex_lock(&curseg->curseg_mutex);
1088
1089         *new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
1090
1091         /*
1092          * __add_sum_entry should be resided under the curseg_mutex
1093          * because, this function updates a summary entry in the
1094          * current summary block.
1095          */
1096         __add_sum_entry(sbi, type, sum);
1097
1098         mutex_lock(&sit_i->sentry_lock);
1099         __refresh_next_blkoff(sbi, curseg);
1100
1101         stat_inc_block_count(sbi, curseg);
1102
1103         if (!__has_curseg_space(sbi, type))
1104                 sit_i->s_ops->allocate_segment(sbi, type, false);
1105         /*
1106          * SIT information should be updated before segment allocation,
1107          * since SSR needs latest valid block information.
1108          */
1109         refresh_sit_entry(sbi, old_blkaddr, *new_blkaddr);
1110
1111         mutex_unlock(&sit_i->sentry_lock);
1112
1113         if (page && IS_NODESEG(type))
1114                 fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));
1115
1116         mutex_unlock(&curseg->curseg_mutex);
1117 }
1118
1119 static void do_write_page(struct f2fs_sb_info *sbi, struct page *page,
1120                         block_t old_blkaddr, block_t *new_blkaddr,
1121                         struct f2fs_summary *sum, struct f2fs_io_info *fio)
1122 {
1123         int type = __get_segment_type(page, fio->type);
1124
1125         allocate_data_block(sbi, page, old_blkaddr, new_blkaddr, sum, type);
1126
1127         /* writeout dirty page into bdev */
1128         f2fs_submit_page_mbio(sbi, page, *new_blkaddr, fio);
1129 }
1130
1131 void write_meta_page(struct f2fs_sb_info *sbi, struct page *page)
1132 {
1133         struct f2fs_io_info fio = {
1134                 .type = META,
1135                 .rw = WRITE_SYNC | REQ_META | REQ_PRIO
1136         };
1137
1138         set_page_writeback(page);
1139         f2fs_submit_page_mbio(sbi, page, page->index, &fio);
1140 }
1141
1142 void write_node_page(struct f2fs_sb_info *sbi, struct page *page,
1143                 struct f2fs_io_info *fio,
1144                 unsigned int nid, block_t old_blkaddr, block_t *new_blkaddr)
1145 {
1146         struct f2fs_summary sum;
1147         set_summary(&sum, nid, 0, 0);
1148         do_write_page(sbi, page, old_blkaddr, new_blkaddr, &sum, fio);
1149 }
1150
1151 void write_data_page(struct page *page, struct dnode_of_data *dn,
1152                 block_t *new_blkaddr, struct f2fs_io_info *fio)
1153 {
1154         struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
1155         struct f2fs_summary sum;
1156         struct node_info ni;
1157
1158         f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR);
1159         get_node_info(sbi, dn->nid, &ni);
1160         set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
1161
1162         do_write_page(sbi, page, dn->data_blkaddr, new_blkaddr, &sum, fio);
1163 }
1164
1165 void rewrite_data_page(struct page *page, block_t old_blkaddr,
1166                                         struct f2fs_io_info *fio)
1167 {
1168         f2fs_submit_page_mbio(F2FS_P_SB(page), page, old_blkaddr, fio);
1169 }
1170
1171 void recover_data_page(struct f2fs_sb_info *sbi,
1172                         struct page *page, struct f2fs_summary *sum,
1173                         block_t old_blkaddr, block_t new_blkaddr)
1174 {
1175         struct sit_info *sit_i = SIT_I(sbi);
1176         struct curseg_info *curseg;
1177         unsigned int segno, old_cursegno;
1178         struct seg_entry *se;
1179         int type;
1180
1181         segno = GET_SEGNO(sbi, new_blkaddr);
1182         se = get_seg_entry(sbi, segno);
1183         type = se->type;
1184
1185         if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) {
1186                 if (old_blkaddr == NULL_ADDR)
1187                         type = CURSEG_COLD_DATA;
1188                 else
1189                         type = CURSEG_WARM_DATA;
1190         }
1191         curseg = CURSEG_I(sbi, type);
1192
1193         mutex_lock(&curseg->curseg_mutex);
1194         mutex_lock(&sit_i->sentry_lock);
1195
1196         old_cursegno = curseg->segno;
1197
1198         /* change the current segment */
1199         if (segno != curseg->segno) {
1200                 curseg->next_segno = segno;
1201                 change_curseg(sbi, type, true);
1202         }
1203
1204         curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
1205         __add_sum_entry(sbi, type, sum);
1206
1207         refresh_sit_entry(sbi, old_blkaddr, new_blkaddr);
1208         locate_dirty_segment(sbi, old_cursegno);
1209
1210         mutex_unlock(&sit_i->sentry_lock);
1211         mutex_unlock(&curseg->curseg_mutex);
1212 }
1213
1214 static inline bool is_merged_page(struct f2fs_sb_info *sbi,
1215                                         struct page *page, enum page_type type)
1216 {
1217         enum page_type btype = PAGE_TYPE_OF_BIO(type);
1218         struct f2fs_bio_info *io = &sbi->write_io[btype];
1219         struct bio_vec *bvec;
1220         int i;
1221
1222         down_read(&io->io_rwsem);
1223         if (!io->bio)
1224                 goto out;
1225
1226         bio_for_each_segment_all(bvec, io->bio, i) {
1227                 if (page == bvec->bv_page) {
1228                         up_read(&io->io_rwsem);
1229                         return true;
1230                 }
1231         }
1232
1233 out:
1234         up_read(&io->io_rwsem);
1235         return false;
1236 }
1237
1238 void f2fs_wait_on_page_writeback(struct page *page,
1239                                 enum page_type type)
1240 {
1241         if (PageWriteback(page)) {
1242                 struct f2fs_sb_info *sbi = F2FS_P_SB(page);
1243
1244                 if (is_merged_page(sbi, page, type))
1245                         f2fs_submit_merged_bio(sbi, type, WRITE);
1246                 wait_on_page_writeback(page);
1247         }
1248 }
1249
1250 static int read_compacted_summaries(struct f2fs_sb_info *sbi)
1251 {
1252         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1253         struct curseg_info *seg_i;
1254         unsigned char *kaddr;
1255         struct page *page;
1256         block_t start;
1257         int i, j, offset;
1258
1259         start = start_sum_block(sbi);
1260
1261         page = get_meta_page(sbi, start++);
1262         kaddr = (unsigned char *)page_address(page);
1263
1264         /* Step 1: restore nat cache */
1265         seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
1266         memcpy(&seg_i->sum_blk->n_nats, kaddr, SUM_JOURNAL_SIZE);
1267
1268         /* Step 2: restore sit cache */
1269         seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
1270         memcpy(&seg_i->sum_blk->n_sits, kaddr + SUM_JOURNAL_SIZE,
1271                                                 SUM_JOURNAL_SIZE);
1272         offset = 2 * SUM_JOURNAL_SIZE;
1273
1274         /* Step 3: restore summary entries */
1275         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
1276                 unsigned short blk_off;
1277                 unsigned int segno;
1278
1279                 seg_i = CURSEG_I(sbi, i);
1280                 segno = le32_to_cpu(ckpt->cur_data_segno[i]);
1281                 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]);
1282                 seg_i->next_segno = segno;
1283                 reset_curseg(sbi, i, 0);
1284                 seg_i->alloc_type = ckpt->alloc_type[i];
1285                 seg_i->next_blkoff = blk_off;
1286
1287                 if (seg_i->alloc_type == SSR)
1288                         blk_off = sbi->blocks_per_seg;
1289
1290                 for (j = 0; j < blk_off; j++) {
1291                         struct f2fs_summary *s;
1292                         s = (struct f2fs_summary *)(kaddr + offset);
1293                         seg_i->sum_blk->entries[j] = *s;
1294                         offset += SUMMARY_SIZE;
1295                         if (offset + SUMMARY_SIZE <= PAGE_CACHE_SIZE -
1296                                                 SUM_FOOTER_SIZE)
1297                                 continue;
1298
1299                         f2fs_put_page(page, 1);
1300                         page = NULL;
1301
1302                         page = get_meta_page(sbi, start++);
1303                         kaddr = (unsigned char *)page_address(page);
1304                         offset = 0;
1305                 }
1306         }
1307         f2fs_put_page(page, 1);
1308         return 0;
1309 }
1310
1311 static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
1312 {
1313         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1314         struct f2fs_summary_block *sum;
1315         struct curseg_info *curseg;
1316         struct page *new;
1317         unsigned short blk_off;
1318         unsigned int segno = 0;
1319         block_t blk_addr = 0;
1320
1321         /* get segment number and block addr */
1322         if (IS_DATASEG(type)) {
1323                 segno = le32_to_cpu(ckpt->cur_data_segno[type]);
1324                 blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
1325                                                         CURSEG_HOT_DATA]);
1326                 if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG))
1327                         blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type);
1328                 else
1329                         blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
1330         } else {
1331                 segno = le32_to_cpu(ckpt->cur_node_segno[type -
1332                                                         CURSEG_HOT_NODE]);
1333                 blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type -
1334                                                         CURSEG_HOT_NODE]);
1335                 if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG))
1336                         blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE,
1337                                                         type - CURSEG_HOT_NODE);
1338                 else
1339                         blk_addr = GET_SUM_BLOCK(sbi, segno);
1340         }
1341
1342         new = get_meta_page(sbi, blk_addr);
1343         sum = (struct f2fs_summary_block *)page_address(new);
1344
1345         if (IS_NODESEG(type)) {
1346                 if (is_set_ckpt_flags(ckpt, CP_UMOUNT_FLAG)) {
1347                         struct f2fs_summary *ns = &sum->entries[0];
1348                         int i;
1349                         for (i = 0; i < sbi->blocks_per_seg; i++, ns++) {
1350                                 ns->version = 0;
1351                                 ns->ofs_in_node = 0;
1352                         }
1353                 } else {
1354                         int err;
1355
1356                         err = restore_node_summary(sbi, segno, sum);
1357                         if (err) {
1358                                 f2fs_put_page(new, 1);
1359                                 return err;
1360                         }
1361                 }
1362         }
1363
1364         /* set uncompleted segment to curseg */
1365         curseg = CURSEG_I(sbi, type);
1366         mutex_lock(&curseg->curseg_mutex);
1367         memcpy(curseg->sum_blk, sum, PAGE_CACHE_SIZE);
1368         curseg->next_segno = segno;
1369         reset_curseg(sbi, type, 0);
1370         curseg->alloc_type = ckpt->alloc_type[type];
1371         curseg->next_blkoff = blk_off;
1372         mutex_unlock(&curseg->curseg_mutex);
1373         f2fs_put_page(new, 1);
1374         return 0;
1375 }
1376
1377 static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
1378 {
1379         int type = CURSEG_HOT_DATA;
1380         int err;
1381
1382         if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG)) {
1383                 /* restore for compacted data summary */
1384                 if (read_compacted_summaries(sbi))
1385                         return -EINVAL;
1386                 type = CURSEG_HOT_NODE;
1387         }
1388
1389         for (; type <= CURSEG_COLD_NODE; type++) {
1390                 err = read_normal_summaries(sbi, type);
1391                 if (err)
1392                         return err;
1393         }
1394
1395         return 0;
1396 }
1397
1398 static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
1399 {
1400         struct page *page;
1401         unsigned char *kaddr;
1402         struct f2fs_summary *summary;
1403         struct curseg_info *seg_i;
1404         int written_size = 0;
1405         int i, j;
1406
1407         page = grab_meta_page(sbi, blkaddr++);
1408         kaddr = (unsigned char *)page_address(page);
1409
1410         /* Step 1: write nat cache */
1411         seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
1412         memcpy(kaddr, &seg_i->sum_blk->n_nats, SUM_JOURNAL_SIZE);
1413         written_size += SUM_JOURNAL_SIZE;
1414
1415         /* Step 2: write sit cache */
1416         seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
1417         memcpy(kaddr + written_size, &seg_i->sum_blk->n_sits,
1418                                                 SUM_JOURNAL_SIZE);
1419         written_size += SUM_JOURNAL_SIZE;
1420
1421         /* Step 3: write summary entries */
1422         for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
1423                 unsigned short blkoff;
1424                 seg_i = CURSEG_I(sbi, i);
1425                 if (sbi->ckpt->alloc_type[i] == SSR)
1426                         blkoff = sbi->blocks_per_seg;
1427                 else
1428                         blkoff = curseg_blkoff(sbi, i);
1429
1430                 for (j = 0; j < blkoff; j++) {
1431                         if (!page) {
1432                                 page = grab_meta_page(sbi, blkaddr++);
1433                                 kaddr = (unsigned char *)page_address(page);
1434                                 written_size = 0;
1435                         }
1436                         summary = (struct f2fs_summary *)(kaddr + written_size);
1437                         *summary = seg_i->sum_blk->entries[j];
1438                         written_size += SUMMARY_SIZE;
1439
1440                         if (written_size + SUMMARY_SIZE <= PAGE_CACHE_SIZE -
1441                                                         SUM_FOOTER_SIZE)
1442                                 continue;
1443
1444                         set_page_dirty(page);
1445                         f2fs_put_page(page, 1);
1446                         page = NULL;
1447                 }
1448         }
1449         if (page) {
1450                 set_page_dirty(page);
1451                 f2fs_put_page(page, 1);
1452         }
1453 }
1454
1455 static void write_normal_summaries(struct f2fs_sb_info *sbi,
1456                                         block_t blkaddr, int type)
1457 {
1458         int i, end;
1459         if (IS_DATASEG(type))
1460                 end = type + NR_CURSEG_DATA_TYPE;
1461         else
1462                 end = type + NR_CURSEG_NODE_TYPE;
1463
1464         for (i = type; i < end; i++) {
1465                 struct curseg_info *sum = CURSEG_I(sbi, i);
1466                 mutex_lock(&sum->curseg_mutex);
1467                 write_sum_page(sbi, sum->sum_blk, blkaddr + (i - type));
1468                 mutex_unlock(&sum->curseg_mutex);
1469         }
1470 }
1471
1472 void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
1473 {
1474         if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_COMPACT_SUM_FLAG))
1475                 write_compacted_summaries(sbi, start_blk);
1476         else
1477                 write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA);
1478 }
1479
1480 void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
1481 {
1482         if (is_set_ckpt_flags(F2FS_CKPT(sbi), CP_UMOUNT_FLAG))
1483                 write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
1484 }
1485
1486 int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type,
1487                                         unsigned int val, int alloc)
1488 {
1489         int i;
1490
1491         if (type == NAT_JOURNAL) {
1492                 for (i = 0; i < nats_in_cursum(sum); i++) {
1493                         if (le32_to_cpu(nid_in_journal(sum, i)) == val)
1494                                 return i;
1495                 }
1496                 if (alloc && nats_in_cursum(sum) < NAT_JOURNAL_ENTRIES)
1497                         return update_nats_in_cursum(sum, 1);
1498         } else if (type == SIT_JOURNAL) {
1499                 for (i = 0; i < sits_in_cursum(sum); i++)
1500                         if (le32_to_cpu(segno_in_journal(sum, i)) == val)
1501                                 return i;
1502                 if (alloc && sits_in_cursum(sum) < SIT_JOURNAL_ENTRIES)
1503                         return update_sits_in_cursum(sum, 1);
1504         }
1505         return -1;
1506 }
1507
1508 static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
1509                                         unsigned int segno)
1510 {
1511         struct sit_info *sit_i = SIT_I(sbi);
1512         unsigned int offset = SIT_BLOCK_OFFSET(segno);
1513         block_t blk_addr = sit_i->sit_base_addr + offset;
1514
1515         check_seg_range(sbi, segno);
1516
1517         /* calculate sit block address */
1518         if (f2fs_test_bit(offset, sit_i->sit_bitmap))
1519                 blk_addr += sit_i->sit_blocks;
1520
1521         return get_meta_page(sbi, blk_addr);
1522 }
1523
1524 static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
1525                                         unsigned int start)
1526 {
1527         struct sit_info *sit_i = SIT_I(sbi);
1528         struct page *src_page, *dst_page;
1529         pgoff_t src_off, dst_off;
1530         void *src_addr, *dst_addr;
1531
1532         src_off = current_sit_addr(sbi, start);
1533         dst_off = next_sit_addr(sbi, src_off);
1534
1535         /* get current sit block page without lock */
1536         src_page = get_meta_page(sbi, src_off);
1537         dst_page = grab_meta_page(sbi, dst_off);
1538         f2fs_bug_on(sbi, PageDirty(src_page));
1539
1540         src_addr = page_address(src_page);
1541         dst_addr = page_address(dst_page);
1542         memcpy(dst_addr, src_addr, PAGE_CACHE_SIZE);
1543
1544         set_page_dirty(dst_page);
1545         f2fs_put_page(src_page, 1);
1546
1547         set_to_next_sit(sit_i, start);
1548
1549         return dst_page;
1550 }
1551
1552 static struct sit_entry_set *grab_sit_entry_set(void)
1553 {
1554         struct sit_entry_set *ses =
1555                         f2fs_kmem_cache_alloc(sit_entry_set_slab, GFP_ATOMIC);
1556
1557         ses->entry_cnt = 0;
1558         INIT_LIST_HEAD(&ses->set_list);
1559         return ses;
1560 }
1561
1562 static void release_sit_entry_set(struct sit_entry_set *ses)
1563 {
1564         list_del(&ses->set_list);
1565         kmem_cache_free(sit_entry_set_slab, ses);
1566 }
1567
1568 static void adjust_sit_entry_set(struct sit_entry_set *ses,
1569                                                 struct list_head *head)
1570 {
1571         struct sit_entry_set *next = ses;
1572
1573         if (list_is_last(&ses->set_list, head))
1574                 return;
1575
1576         list_for_each_entry_continue(next, head, set_list)
1577                 if (ses->entry_cnt <= next->entry_cnt)
1578                         break;
1579
1580         list_move_tail(&ses->set_list, &next->set_list);
1581 }
1582
1583 static void add_sit_entry(unsigned int segno, struct list_head *head)
1584 {
1585         struct sit_entry_set *ses;
1586         unsigned int start_segno = START_SEGNO(segno);
1587
1588         list_for_each_entry(ses, head, set_list) {
1589                 if (ses->start_segno == start_segno) {
1590                         ses->entry_cnt++;
1591                         adjust_sit_entry_set(ses, head);
1592                         return;
1593                 }
1594         }
1595
1596         ses = grab_sit_entry_set();
1597
1598         ses->start_segno = start_segno;
1599         ses->entry_cnt++;
1600         list_add(&ses->set_list, head);
1601 }
1602
1603 static void add_sits_in_set(struct f2fs_sb_info *sbi)
1604 {
1605         struct f2fs_sm_info *sm_info = SM_I(sbi);
1606         struct list_head *set_list = &sm_info->sit_entry_set;
1607         unsigned long *bitmap = SIT_I(sbi)->dirty_sentries_bitmap;
1608         unsigned int segno;
1609
1610         for_each_set_bit(segno, bitmap, MAIN_SEGS(sbi))
1611                 add_sit_entry(segno, set_list);
1612 }
1613
1614 static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
1615 {
1616         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
1617         struct f2fs_summary_block *sum = curseg->sum_blk;
1618         int i;
1619
1620         for (i = sits_in_cursum(sum) - 1; i >= 0; i--) {
1621                 unsigned int segno;
1622                 bool dirtied;
1623
1624                 segno = le32_to_cpu(segno_in_journal(sum, i));
1625                 dirtied = __mark_sit_entry_dirty(sbi, segno);
1626
1627                 if (!dirtied)
1628                         add_sit_entry(segno, &SM_I(sbi)->sit_entry_set);
1629         }
1630         update_sits_in_cursum(sum, -sits_in_cursum(sum));
1631 }
1632
1633 /*
1634  * CP calls this function, which flushes SIT entries including sit_journal,
1635  * and moves prefree segs to free segs.
1636  */
1637 void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
1638 {
1639         struct sit_info *sit_i = SIT_I(sbi);
1640         unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
1641         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
1642         struct f2fs_summary_block *sum = curseg->sum_blk;
1643         struct sit_entry_set *ses, *tmp;
1644         struct list_head *head = &SM_I(sbi)->sit_entry_set;
1645         bool to_journal = true;
1646         struct seg_entry *se;
1647
1648         mutex_lock(&curseg->curseg_mutex);
1649         mutex_lock(&sit_i->sentry_lock);
1650
1651         /*
1652          * add and account sit entries of dirty bitmap in sit entry
1653          * set temporarily
1654          */
1655         add_sits_in_set(sbi);
1656
1657         /*
1658          * if there are no enough space in journal to store dirty sit
1659          * entries, remove all entries from journal and add and account
1660          * them in sit entry set.
1661          */
1662         if (!__has_cursum_space(sum, sit_i->dirty_sentries, SIT_JOURNAL))
1663                 remove_sits_in_journal(sbi);
1664
1665         if (!sit_i->dirty_sentries)
1666                 goto out;
1667
1668         /*
1669          * there are two steps to flush sit entries:
1670          * #1, flush sit entries to journal in current cold data summary block.
1671          * #2, flush sit entries to sit page.
1672          */
1673         list_for_each_entry_safe(ses, tmp, head, set_list) {
1674                 struct page *page;
1675                 struct f2fs_sit_block *raw_sit = NULL;
1676                 unsigned int start_segno = ses->start_segno;
1677                 unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK,
1678                                                 (unsigned long)MAIN_SEGS(sbi));
1679                 unsigned int segno = start_segno;
1680
1681                 if (to_journal &&
1682                         !__has_cursum_space(sum, ses->entry_cnt, SIT_JOURNAL))
1683                         to_journal = false;
1684
1685                 if (!to_journal) {
1686                         page = get_next_sit_page(sbi, start_segno);
1687                         raw_sit = page_address(page);
1688                 }
1689
1690                 /* flush dirty sit entries in region of current sit set */
1691                 for_each_set_bit_from(segno, bitmap, end) {
1692                         int offset, sit_offset;
1693
1694                         se = get_seg_entry(sbi, segno);
1695
1696                         /* add discard candidates */
1697                         if (SM_I(sbi)->nr_discards < SM_I(sbi)->max_discards) {
1698                                 cpc->trim_start = segno;
1699                                 add_discard_addrs(sbi, cpc);
1700                         }
1701
1702                         if (to_journal) {
1703                                 offset = lookup_journal_in_cursum(sum,
1704                                                         SIT_JOURNAL, segno, 1);
1705                                 f2fs_bug_on(sbi, offset < 0);
1706                                 segno_in_journal(sum, offset) =
1707                                                         cpu_to_le32(segno);
1708                                 seg_info_to_raw_sit(se,
1709                                                 &sit_in_journal(sum, offset));
1710                         } else {
1711                                 sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
1712                                 seg_info_to_raw_sit(se,
1713                                                 &raw_sit->entries[sit_offset]);
1714                         }
1715
1716                         __clear_bit(segno, bitmap);
1717                         sit_i->dirty_sentries--;
1718                         ses->entry_cnt--;
1719                 }
1720
1721                 if (!to_journal)
1722                         f2fs_put_page(page, 1);
1723
1724                 f2fs_bug_on(sbi, ses->entry_cnt);
1725                 release_sit_entry_set(ses);
1726         }
1727
1728         f2fs_bug_on(sbi, !list_empty(head));
1729         f2fs_bug_on(sbi, sit_i->dirty_sentries);
1730 out:
1731         if (cpc->reason == CP_DISCARD) {
1732                 for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
1733                         add_discard_addrs(sbi, cpc);
1734         }
1735         mutex_unlock(&sit_i->sentry_lock);
1736         mutex_unlock(&curseg->curseg_mutex);
1737
1738         set_prefree_as_free_segments(sbi);
1739 }
1740
1741 static int build_sit_info(struct f2fs_sb_info *sbi)
1742 {
1743         struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
1744         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1745         struct sit_info *sit_i;
1746         unsigned int sit_segs, start;
1747         char *src_bitmap, *dst_bitmap;
1748         unsigned int bitmap_size;
1749
1750         /* allocate memory for SIT information */
1751         sit_i = kzalloc(sizeof(struct sit_info), GFP_KERNEL);
1752         if (!sit_i)
1753                 return -ENOMEM;
1754
1755         SM_I(sbi)->sit_info = sit_i;
1756
1757         sit_i->sentries = vzalloc(MAIN_SEGS(sbi) * sizeof(struct seg_entry));
1758         if (!sit_i->sentries)
1759                 return -ENOMEM;
1760
1761         bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
1762         sit_i->dirty_sentries_bitmap = kzalloc(bitmap_size, GFP_KERNEL);
1763         if (!sit_i->dirty_sentries_bitmap)
1764                 return -ENOMEM;
1765
1766         for (start = 0; start < MAIN_SEGS(sbi); start++) {
1767                 sit_i->sentries[start].cur_valid_map
1768                         = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
1769                 sit_i->sentries[start].ckpt_valid_map
1770                         = kzalloc(SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
1771                 if (!sit_i->sentries[start].cur_valid_map
1772                                 || !sit_i->sentries[start].ckpt_valid_map)
1773                         return -ENOMEM;
1774         }
1775
1776         if (sbi->segs_per_sec > 1) {
1777                 sit_i->sec_entries = vzalloc(MAIN_SECS(sbi) *
1778                                         sizeof(struct sec_entry));
1779                 if (!sit_i->sec_entries)
1780                         return -ENOMEM;
1781         }
1782
1783         /* get information related with SIT */
1784         sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1;
1785
1786         /* setup SIT bitmap from ckeckpoint pack */
1787         bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
1788         src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);
1789
1790         dst_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
1791         if (!dst_bitmap)
1792                 return -ENOMEM;
1793
1794         /* init SIT information */
1795         sit_i->s_ops = &default_salloc_ops;
1796
1797         sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
1798         sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
1799         sit_i->written_valid_blocks = le64_to_cpu(ckpt->valid_block_count);
1800         sit_i->sit_bitmap = dst_bitmap;
1801         sit_i->bitmap_size = bitmap_size;
1802         sit_i->dirty_sentries = 0;
1803         sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
1804         sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
1805         sit_i->mounted_time = CURRENT_TIME_SEC.tv_sec;
1806         mutex_init(&sit_i->sentry_lock);
1807         return 0;
1808 }
1809
1810 static int build_free_segmap(struct f2fs_sb_info *sbi)
1811 {
1812         struct free_segmap_info *free_i;
1813         unsigned int bitmap_size, sec_bitmap_size;
1814
1815         /* allocate memory for free segmap information */
1816         free_i = kzalloc(sizeof(struct free_segmap_info), GFP_KERNEL);
1817         if (!free_i)
1818                 return -ENOMEM;
1819
1820         SM_I(sbi)->free_info = free_i;
1821
1822         bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
1823         free_i->free_segmap = kmalloc(bitmap_size, GFP_KERNEL);
1824         if (!free_i->free_segmap)
1825                 return -ENOMEM;
1826
1827         sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
1828         free_i->free_secmap = kmalloc(sec_bitmap_size, GFP_KERNEL);
1829         if (!free_i->free_secmap)
1830                 return -ENOMEM;
1831
1832         /* set all segments as dirty temporarily */
1833         memset(free_i->free_segmap, 0xff, bitmap_size);
1834         memset(free_i->free_secmap, 0xff, sec_bitmap_size);
1835
1836         /* init free segmap information */
1837         free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi));
1838         free_i->free_segments = 0;
1839         free_i->free_sections = 0;
1840         rwlock_init(&free_i->segmap_lock);
1841         return 0;
1842 }
1843
1844 static int build_curseg(struct f2fs_sb_info *sbi)
1845 {
1846         struct curseg_info *array;
1847         int i;
1848
1849         array = kcalloc(NR_CURSEG_TYPE, sizeof(*array), GFP_KERNEL);
1850         if (!array)
1851                 return -ENOMEM;
1852
1853         SM_I(sbi)->curseg_array = array;
1854
1855         for (i = 0; i < NR_CURSEG_TYPE; i++) {
1856                 mutex_init(&array[i].curseg_mutex);
1857                 array[i].sum_blk = kzalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
1858                 if (!array[i].sum_blk)
1859                         return -ENOMEM;
1860                 array[i].segno = NULL_SEGNO;
1861                 array[i].next_blkoff = 0;
1862         }
1863         return restore_curseg_summaries(sbi);
1864 }
1865
1866 static void build_sit_entries(struct f2fs_sb_info *sbi)
1867 {
1868         struct sit_info *sit_i = SIT_I(sbi);
1869         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
1870         struct f2fs_summary_block *sum = curseg->sum_blk;
1871         int sit_blk_cnt = SIT_BLK_CNT(sbi);
1872         unsigned int i, start, end;
1873         unsigned int readed, start_blk = 0;
1874         int nrpages = MAX_BIO_BLOCKS(sbi);
1875
1876         do {
1877                 readed = ra_meta_pages(sbi, start_blk, nrpages, META_SIT);
1878
1879                 start = start_blk * sit_i->sents_per_block;
1880                 end = (start_blk + readed) * sit_i->sents_per_block;
1881
1882                 for (; start < end && start < MAIN_SEGS(sbi); start++) {
1883                         struct seg_entry *se = &sit_i->sentries[start];
1884                         struct f2fs_sit_block *sit_blk;
1885                         struct f2fs_sit_entry sit;
1886                         struct page *page;
1887
1888                         mutex_lock(&curseg->curseg_mutex);
1889                         for (i = 0; i < sits_in_cursum(sum); i++) {
1890                                 if (le32_to_cpu(segno_in_journal(sum, i))
1891                                                                 == start) {
1892                                         sit = sit_in_journal(sum, i);
1893                                         mutex_unlock(&curseg->curseg_mutex);
1894                                         goto got_it;
1895                                 }
1896                         }
1897                         mutex_unlock(&curseg->curseg_mutex);
1898
1899                         page = get_current_sit_page(sbi, start);
1900                         sit_blk = (struct f2fs_sit_block *)page_address(page);
1901                         sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
1902                         f2fs_put_page(page, 1);
1903 got_it:
1904                         check_block_count(sbi, start, &sit);
1905                         seg_info_from_raw_sit(se, &sit);
1906                         if (sbi->segs_per_sec > 1) {
1907                                 struct sec_entry *e = get_sec_entry(sbi, start);
1908                                 e->valid_blocks += se->valid_blocks;
1909                         }
1910                 }
1911                 start_blk += readed;
1912         } while (start_blk < sit_blk_cnt);
1913 }
1914
1915 static void init_free_segmap(struct f2fs_sb_info *sbi)
1916 {
1917         unsigned int start;
1918         int type;
1919
1920         for (start = 0; start < MAIN_SEGS(sbi); start++) {
1921                 struct seg_entry *sentry = get_seg_entry(sbi, start);
1922                 if (!sentry->valid_blocks)
1923                         __set_free(sbi, start);
1924         }
1925
1926         /* set use the current segments */
1927         for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) {
1928                 struct curseg_info *curseg_t = CURSEG_I(sbi, type);
1929                 __set_test_and_inuse(sbi, curseg_t->segno);
1930         }
1931 }
1932
1933 static void init_dirty_segmap(struct f2fs_sb_info *sbi)
1934 {
1935         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1936         struct free_segmap_info *free_i = FREE_I(sbi);
1937         unsigned int segno = 0, offset = 0;
1938         unsigned short valid_blocks;
1939
1940         while (1) {
1941                 /* find dirty segment based on free segmap */
1942                 segno = find_next_inuse(free_i, MAIN_SEGS(sbi), offset);
1943                 if (segno >= MAIN_SEGS(sbi))
1944                         break;
1945                 offset = segno + 1;
1946                 valid_blocks = get_valid_blocks(sbi, segno, 0);
1947                 if (valid_blocks == sbi->blocks_per_seg || !valid_blocks)
1948                         continue;
1949                 if (valid_blocks > sbi->blocks_per_seg) {
1950                         f2fs_bug_on(sbi, 1);
1951                         continue;
1952                 }
1953                 mutex_lock(&dirty_i->seglist_lock);
1954                 __locate_dirty_segment(sbi, segno, DIRTY);
1955                 mutex_unlock(&dirty_i->seglist_lock);
1956         }
1957 }
1958
1959 static int init_victim_secmap(struct f2fs_sb_info *sbi)
1960 {
1961         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1962         unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
1963
1964         dirty_i->victim_secmap = kzalloc(bitmap_size, GFP_KERNEL);
1965         if (!dirty_i->victim_secmap)
1966                 return -ENOMEM;
1967         return 0;
1968 }
1969
1970 static int build_dirty_segmap(struct f2fs_sb_info *sbi)
1971 {
1972         struct dirty_seglist_info *dirty_i;
1973         unsigned int bitmap_size, i;
1974
1975         /* allocate memory for dirty segments list information */
1976         dirty_i = kzalloc(sizeof(struct dirty_seglist_info), GFP_KERNEL);
1977         if (!dirty_i)
1978                 return -ENOMEM;
1979
1980         SM_I(sbi)->dirty_info = dirty_i;
1981         mutex_init(&dirty_i->seglist_lock);
1982
1983         bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
1984
1985         for (i = 0; i < NR_DIRTY_TYPE; i++) {
1986                 dirty_i->dirty_segmap[i] = kzalloc(bitmap_size, GFP_KERNEL);
1987                 if (!dirty_i->dirty_segmap[i])
1988                         return -ENOMEM;
1989         }
1990
1991         init_dirty_segmap(sbi);
1992         return init_victim_secmap(sbi);
1993 }
1994
1995 /*
1996  * Update min, max modified time for cost-benefit GC algorithm
1997  */
1998 static void init_min_max_mtime(struct f2fs_sb_info *sbi)
1999 {
2000         struct sit_info *sit_i = SIT_I(sbi);
2001         unsigned int segno;
2002
2003         mutex_lock(&sit_i->sentry_lock);
2004
2005         sit_i->min_mtime = LLONG_MAX;
2006
2007         for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
2008                 unsigned int i;
2009                 unsigned long long mtime = 0;
2010
2011                 for (i = 0; i < sbi->segs_per_sec; i++)
2012                         mtime += get_seg_entry(sbi, segno + i)->mtime;
2013
2014                 mtime = div_u64(mtime, sbi->segs_per_sec);
2015
2016                 if (sit_i->min_mtime > mtime)
2017                         sit_i->min_mtime = mtime;
2018         }
2019         sit_i->max_mtime = get_mtime(sbi);
2020         mutex_unlock(&sit_i->sentry_lock);
2021 }
2022
2023 int build_segment_manager(struct f2fs_sb_info *sbi)
2024 {
2025         struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
2026         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
2027         struct f2fs_sm_info *sm_info;
2028         int err;
2029
2030         sm_info = kzalloc(sizeof(struct f2fs_sm_info), GFP_KERNEL);
2031         if (!sm_info)
2032                 return -ENOMEM;
2033
2034         /* init sm info */
2035         sbi->sm_info = sm_info;
2036         sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
2037         sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
2038         sm_info->segment_count = le32_to_cpu(raw_super->segment_count);
2039         sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
2040         sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
2041         sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
2042         sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
2043         sm_info->rec_prefree_segments = sm_info->main_segments *
2044                                         DEF_RECLAIM_PREFREE_SEGMENTS / 100;
2045         sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
2046         sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
2047         sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
2048
2049         INIT_LIST_HEAD(&sm_info->discard_list);
2050         sm_info->nr_discards = 0;
2051         sm_info->max_discards = 0;
2052
2053         INIT_LIST_HEAD(&sm_info->sit_entry_set);
2054
2055         if (test_opt(sbi, FLUSH_MERGE) && !f2fs_readonly(sbi->sb)) {
2056                 err = create_flush_cmd_control(sbi);
2057                 if (err)
2058                         return err;
2059         }
2060
2061         err = build_sit_info(sbi);
2062         if (err)
2063                 return err;
2064         err = build_free_segmap(sbi);
2065         if (err)
2066                 return err;
2067         err = build_curseg(sbi);
2068         if (err)
2069                 return err;
2070
2071         /* reinit free segmap based on SIT */
2072         build_sit_entries(sbi);
2073
2074         init_free_segmap(sbi);
2075         err = build_dirty_segmap(sbi);
2076         if (err)
2077                 return err;
2078
2079         init_min_max_mtime(sbi);
2080         return 0;
2081 }
2082
2083 static void discard_dirty_segmap(struct f2fs_sb_info *sbi,
2084                 enum dirty_type dirty_type)
2085 {
2086         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2087
2088         mutex_lock(&dirty_i->seglist_lock);
2089         kfree(dirty_i->dirty_segmap[dirty_type]);
2090         dirty_i->nr_dirty[dirty_type] = 0;
2091         mutex_unlock(&dirty_i->seglist_lock);
2092 }
2093
2094 static void destroy_victim_secmap(struct f2fs_sb_info *sbi)
2095 {
2096         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2097         kfree(dirty_i->victim_secmap);
2098 }
2099
2100 static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
2101 {
2102         struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
2103         int i;
2104
2105         if (!dirty_i)
2106                 return;
2107
2108         /* discard pre-free/dirty segments list */
2109         for (i = 0; i < NR_DIRTY_TYPE; i++)
2110                 discard_dirty_segmap(sbi, i);
2111
2112         destroy_victim_secmap(sbi);
2113         SM_I(sbi)->dirty_info = NULL;
2114         kfree(dirty_i);
2115 }
2116
2117 static void destroy_curseg(struct f2fs_sb_info *sbi)
2118 {
2119         struct curseg_info *array = SM_I(sbi)->curseg_array;
2120         int i;
2121
2122         if (!array)
2123                 return;
2124         SM_I(sbi)->curseg_array = NULL;
2125         for (i = 0; i < NR_CURSEG_TYPE; i++)
2126                 kfree(array[i].sum_blk);
2127         kfree(array);
2128 }
2129
2130 static void destroy_free_segmap(struct f2fs_sb_info *sbi)
2131 {
2132         struct free_segmap_info *free_i = SM_I(sbi)->free_info;
2133         if (!free_i)
2134                 return;
2135         SM_I(sbi)->free_info = NULL;
2136         kfree(free_i->free_segmap);
2137         kfree(free_i->free_secmap);
2138         kfree(free_i);
2139 }
2140
2141 static void destroy_sit_info(struct f2fs_sb_info *sbi)
2142 {
2143         struct sit_info *sit_i = SIT_I(sbi);
2144         unsigned int start;
2145
2146         if (!sit_i)
2147                 return;
2148
2149         if (sit_i->sentries) {
2150                 for (start = 0; start < MAIN_SEGS(sbi); start++) {
2151                         kfree(sit_i->sentries[start].cur_valid_map);
2152                         kfree(sit_i->sentries[start].ckpt_valid_map);
2153                 }
2154         }
2155         vfree(sit_i->sentries);
2156         vfree(sit_i->sec_entries);
2157         kfree(sit_i->dirty_sentries_bitmap);
2158
2159         SM_I(sbi)->sit_info = NULL;
2160         kfree(sit_i->sit_bitmap);
2161         kfree(sit_i);
2162 }
2163
2164 void destroy_segment_manager(struct f2fs_sb_info *sbi)
2165 {
2166         struct f2fs_sm_info *sm_info = SM_I(sbi);
2167
2168         if (!sm_info)
2169                 return;
2170         destroy_flush_cmd_control(sbi);
2171         destroy_dirty_segmap(sbi);
2172         destroy_curseg(sbi);
2173         destroy_free_segmap(sbi);
2174         destroy_sit_info(sbi);
2175         sbi->sm_info = NULL;
2176         kfree(sm_info);
2177 }
2178
2179 int __init create_segment_manager_caches(void)
2180 {
2181         discard_entry_slab = f2fs_kmem_cache_create("discard_entry",
2182                         sizeof(struct discard_entry));
2183         if (!discard_entry_slab)
2184                 goto fail;
2185
2186         sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set",
2187                         sizeof(struct nat_entry_set));
2188         if (!sit_entry_set_slab)
2189                 goto destory_discard_entry;
2190         return 0;
2191
2192 destory_discard_entry:
2193         kmem_cache_destroy(discard_entry_slab);
2194 fail:
2195         return -ENOMEM;
2196 }
2197
2198 void destroy_segment_manager_caches(void)
2199 {
2200         kmem_cache_destroy(sit_entry_set_slab);
2201         kmem_cache_destroy(discard_entry_slab);
2202 }