f2fs: use meta_inode cache to improve roll-forward speed
[linux-3.10.git] / fs / f2fs / checkpoint.c
1 /*
2  * fs/f2fs/checkpoint.c
3  *
4  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5  *             http://www.samsung.com/
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11 #include <linux/fs.h>
12 #include <linux/bio.h>
13 #include <linux/mpage.h>
14 #include <linux/writeback.h>
15 #include <linux/blkdev.h>
16 #include <linux/f2fs_fs.h>
17 #include <linux/pagevec.h>
18 #include <linux/swap.h>
19
20 #include "f2fs.h"
21 #include "node.h"
22 #include "segment.h"
23 #include <trace/events/f2fs.h>
24
25 static struct kmem_cache *ino_entry_slab;
26 static struct kmem_cache *inode_entry_slab;
27
28 /*
29  * We guarantee no failure on the returned page.
30  */
31 struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
32 {
33         struct address_space *mapping = META_MAPPING(sbi);
34         struct page *page = NULL;
35 repeat:
36         page = grab_cache_page(mapping, index);
37         if (!page) {
38                 cond_resched();
39                 goto repeat;
40         }
41         f2fs_wait_on_page_writeback(page, META);
42         SetPageUptodate(page);
43         return page;
44 }
45
46 /*
47  * We guarantee no failure on the returned page.
48  */
49 struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
50 {
51         struct address_space *mapping = META_MAPPING(sbi);
52         struct page *page;
53 repeat:
54         page = grab_cache_page(mapping, index);
55         if (!page) {
56                 cond_resched();
57                 goto repeat;
58         }
59         if (PageUptodate(page))
60                 goto out;
61
62         if (f2fs_submit_page_bio(sbi, page, index,
63                                 READ_SYNC | REQ_META | REQ_PRIO))
64                 goto repeat;
65
66         lock_page(page);
67         if (unlikely(page->mapping != mapping)) {
68                 f2fs_put_page(page, 1);
69                 goto repeat;
70         }
71 out:
72         mark_page_accessed(page);
73         return page;
74 }
75
76 struct page *get_meta_page_ra(struct f2fs_sb_info *sbi, pgoff_t index)
77 {
78         bool readahead = false;
79         struct page *page;
80
81         page = find_get_page(META_MAPPING(sbi), index);
82         if (!page || (page && !PageUptodate(page)))
83                 readahead = true;
84         f2fs_put_page(page, 0);
85
86         if (readahead)
87                 ra_meta_pages(sbi, index,
88                                 MAX_BIO_BLOCKS(max_hw_blocks(sbi)), META_POR);
89         return get_meta_page(sbi, index);
90 }
91
92 static inline block_t get_max_meta_blks(struct f2fs_sb_info *sbi, int type)
93 {
94         switch (type) {
95         case META_NAT:
96                 return NM_I(sbi)->max_nid / NAT_ENTRY_PER_BLOCK;
97         case META_SIT:
98                 return SIT_BLK_CNT(sbi);
99         case META_SSA:
100         case META_CP:
101                 return 0;
102         case META_POR:
103                 return SM_I(sbi)->seg0_blkaddr + TOTAL_BLKS(sbi);
104         default:
105                 BUG();
106         }
107 }
108
109 /*
110  * Readahead CP/NAT/SIT/SSA pages
111  */
112 int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages, int type)
113 {
114         block_t prev_blk_addr = 0;
115         struct page *page;
116         block_t blkno = start;
117         block_t max_blks = get_max_meta_blks(sbi, type);
118         block_t min_blks = SM_I(sbi)->seg0_blkaddr;
119
120         struct f2fs_io_info fio = {
121                 .type = META,
122                 .rw = READ_SYNC | REQ_META | REQ_PRIO
123         };
124
125         for (; nrpages-- > 0; blkno++) {
126                 block_t blk_addr;
127
128                 switch (type) {
129                 case META_NAT:
130                         /* get nat block addr */
131                         if (unlikely(blkno >= max_blks))
132                                 blkno = 0;
133                         blk_addr = current_nat_addr(sbi,
134                                         blkno * NAT_ENTRY_PER_BLOCK);
135                         break;
136                 case META_SIT:
137                         /* get sit block addr */
138                         if (unlikely(blkno >= max_blks))
139                                 goto out;
140                         blk_addr = current_sit_addr(sbi,
141                                         blkno * SIT_ENTRY_PER_BLOCK);
142                         if (blkno != start && prev_blk_addr + 1 != blk_addr)
143                                 goto out;
144                         prev_blk_addr = blk_addr;
145                         break;
146                 case META_SSA:
147                 case META_CP:
148                 case META_POR:
149                         if (unlikely(blkno >= max_blks))
150                                 goto out;
151                         if (unlikely(blkno < min_blks))
152                                 goto out;
153                         blk_addr = blkno;
154                         break;
155                 default:
156                         BUG();
157                 }
158
159                 page = grab_cache_page(META_MAPPING(sbi), blk_addr);
160                 if (!page)
161                         continue;
162                 if (PageUptodate(page)) {
163                         mark_page_accessed(page);
164                         f2fs_put_page(page, 1);
165                         continue;
166                 }
167
168                 f2fs_submit_page_mbio(sbi, page, blk_addr, &fio);
169                 mark_page_accessed(page);
170                 f2fs_put_page(page, 0);
171         }
172 out:
173         f2fs_submit_merged_bio(sbi, META, READ);
174         return blkno - start;
175 }
176
177 static int f2fs_write_meta_page(struct page *page,
178                                 struct writeback_control *wbc)
179 {
180         struct f2fs_sb_info *sbi = F2FS_P_SB(page);
181
182         trace_f2fs_writepage(page, META);
183
184         if (unlikely(sbi->por_doing))
185                 goto redirty_out;
186         if (wbc->for_reclaim)
187                 goto redirty_out;
188         if (unlikely(f2fs_cp_error(sbi)))
189                 goto redirty_out;
190
191         f2fs_wait_on_page_writeback(page, META);
192         write_meta_page(sbi, page);
193         dec_page_count(sbi, F2FS_DIRTY_META);
194         unlock_page(page);
195         return 0;
196
197 redirty_out:
198         redirty_page_for_writepage(wbc, page);
199         return AOP_WRITEPAGE_ACTIVATE;
200 }
201
202 static int f2fs_write_meta_pages(struct address_space *mapping,
203                                 struct writeback_control *wbc)
204 {
205         struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
206         long diff, written;
207
208         trace_f2fs_writepages(mapping->host, wbc, META);
209
210         /* collect a number of dirty meta pages and write together */
211         if (wbc->for_kupdate ||
212                 get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META))
213                 goto skip_write;
214
215         /* if mounting is failed, skip writing node pages */
216         mutex_lock(&sbi->cp_mutex);
217         diff = nr_pages_to_write(sbi, META, wbc);
218         written = sync_meta_pages(sbi, META, wbc->nr_to_write);
219         mutex_unlock(&sbi->cp_mutex);
220         wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff);
221         return 0;
222
223 skip_write:
224         wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_META);
225         return 0;
226 }
227
228 long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
229                                                 long nr_to_write)
230 {
231         struct address_space *mapping = META_MAPPING(sbi);
232         pgoff_t index = 0, end = LONG_MAX;
233         struct pagevec pvec;
234         long nwritten = 0;
235         struct writeback_control wbc = {
236                 .for_reclaim = 0,
237         };
238
239         pagevec_init(&pvec, 0);
240
241         while (index <= end) {
242                 int i, nr_pages;
243                 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
244                                 PAGECACHE_TAG_DIRTY,
245                                 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
246                 if (unlikely(nr_pages == 0))
247                         break;
248
249                 for (i = 0; i < nr_pages; i++) {
250                         struct page *page = pvec.pages[i];
251
252                         lock_page(page);
253
254                         if (unlikely(page->mapping != mapping)) {
255 continue_unlock:
256                                 unlock_page(page);
257                                 continue;
258                         }
259                         if (!PageDirty(page)) {
260                                 /* someone wrote it for us */
261                                 goto continue_unlock;
262                         }
263
264                         if (!clear_page_dirty_for_io(page))
265                                 goto continue_unlock;
266
267                         if (f2fs_write_meta_page(page, &wbc)) {
268                                 unlock_page(page);
269                                 break;
270                         }
271                         nwritten++;
272                         if (unlikely(nwritten >= nr_to_write))
273                                 break;
274                 }
275                 pagevec_release(&pvec);
276                 cond_resched();
277         }
278
279         if (nwritten)
280                 f2fs_submit_merged_bio(sbi, type, WRITE);
281
282         return nwritten;
283 }
284
285 static int f2fs_set_meta_page_dirty(struct page *page)
286 {
287         trace_f2fs_set_page_dirty(page, META);
288
289         SetPageUptodate(page);
290         if (!PageDirty(page)) {
291                 __set_page_dirty_nobuffers(page);
292                 inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META);
293                 return 1;
294         }
295         return 0;
296 }
297
298 const struct address_space_operations f2fs_meta_aops = {
299         .writepage      = f2fs_write_meta_page,
300         .writepages     = f2fs_write_meta_pages,
301         .set_page_dirty = f2fs_set_meta_page_dirty,
302 };
303
304 static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
305 {
306         struct ino_entry *e;
307 retry:
308         spin_lock(&sbi->ino_lock[type]);
309
310         e = radix_tree_lookup(&sbi->ino_root[type], ino);
311         if (!e) {
312                 e = kmem_cache_alloc(ino_entry_slab, GFP_ATOMIC);
313                 if (!e) {
314                         spin_unlock(&sbi->ino_lock[type]);
315                         goto retry;
316                 }
317                 if (radix_tree_insert(&sbi->ino_root[type], ino, e)) {
318                         spin_unlock(&sbi->ino_lock[type]);
319                         kmem_cache_free(ino_entry_slab, e);
320                         goto retry;
321                 }
322                 memset(e, 0, sizeof(struct ino_entry));
323                 e->ino = ino;
324
325                 list_add_tail(&e->list, &sbi->ino_list[type]);
326         }
327         spin_unlock(&sbi->ino_lock[type]);
328 }
329
330 static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
331 {
332         struct ino_entry *e;
333
334         spin_lock(&sbi->ino_lock[type]);
335         e = radix_tree_lookup(&sbi->ino_root[type], ino);
336         if (e) {
337                 list_del(&e->list);
338                 radix_tree_delete(&sbi->ino_root[type], ino);
339                 if (type == ORPHAN_INO)
340                         sbi->n_orphans--;
341                 spin_unlock(&sbi->ino_lock[type]);
342                 kmem_cache_free(ino_entry_slab, e);
343                 return;
344         }
345         spin_unlock(&sbi->ino_lock[type]);
346 }
347
348 void add_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type)
349 {
350         /* add new dirty ino entry into list */
351         __add_ino_entry(sbi, ino, type);
352 }
353
354 void remove_dirty_inode(struct f2fs_sb_info *sbi, nid_t ino, int type)
355 {
356         /* remove dirty ino entry from list */
357         __remove_ino_entry(sbi, ino, type);
358 }
359
360 /* mode should be APPEND_INO or UPDATE_INO */
361 bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode)
362 {
363         struct ino_entry *e;
364         spin_lock(&sbi->ino_lock[mode]);
365         e = radix_tree_lookup(&sbi->ino_root[mode], ino);
366         spin_unlock(&sbi->ino_lock[mode]);
367         return e ? true : false;
368 }
369
370 void release_dirty_inode(struct f2fs_sb_info *sbi)
371 {
372         struct ino_entry *e, *tmp;
373         int i;
374
375         for (i = APPEND_INO; i <= UPDATE_INO; i++) {
376                 spin_lock(&sbi->ino_lock[i]);
377                 list_for_each_entry_safe(e, tmp, &sbi->ino_list[i], list) {
378                         list_del(&e->list);
379                         radix_tree_delete(&sbi->ino_root[i], e->ino);
380                         kmem_cache_free(ino_entry_slab, e);
381                 }
382                 spin_unlock(&sbi->ino_lock[i]);
383         }
384 }
385
386 int acquire_orphan_inode(struct f2fs_sb_info *sbi)
387 {
388         int err = 0;
389
390         spin_lock(&sbi->ino_lock[ORPHAN_INO]);
391         if (unlikely(sbi->n_orphans >= sbi->max_orphans))
392                 err = -ENOSPC;
393         else
394                 sbi->n_orphans++;
395         spin_unlock(&sbi->ino_lock[ORPHAN_INO]);
396
397         return err;
398 }
399
400 void release_orphan_inode(struct f2fs_sb_info *sbi)
401 {
402         spin_lock(&sbi->ino_lock[ORPHAN_INO]);
403         if (sbi->n_orphans == 0) {
404                 f2fs_msg(sbi->sb, KERN_ERR, "releasing "
405                         "unacquired orphan inode");
406                 f2fs_handle_error(sbi);
407         } else
408                 sbi->n_orphans--;
409         spin_unlock(&sbi->ino_lock[ORPHAN_INO]);
410 }
411
412 void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
413 {
414         /* add new orphan ino entry into list */
415         __add_ino_entry(sbi, ino, ORPHAN_INO);
416 }
417
418 void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
419 {
420         /* remove orphan entry from orphan list */
421         __remove_ino_entry(sbi, ino, ORPHAN_INO);
422 }
423
424 static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
425 {
426         struct inode *inode = f2fs_iget(sbi->sb, ino);
427         if (IS_ERR(inode)) {
428                 f2fs_msg(sbi->sb, KERN_ERR, "unable to recover orphan inode %d",
429                                 ino);
430                 f2fs_handle_error(sbi);
431                 return;
432         }
433         clear_nlink(inode);
434
435         /* truncate all the data during iput */
436         iput(inode);
437 }
438
439 void recover_orphan_inodes(struct f2fs_sb_info *sbi)
440 {
441         block_t start_blk, orphan_blkaddr, i, j;
442
443         if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG))
444                 return;
445
446         sbi->por_doing = true;
447
448         start_blk = __start_cp_addr(sbi) + 1 +
449                 le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload);
450         orphan_blkaddr = __start_sum_addr(sbi) - 1;
451
452         ra_meta_pages(sbi, start_blk, orphan_blkaddr, META_CP);
453
454         for (i = 0; i < orphan_blkaddr; i++) {
455                 struct page *page = get_meta_page(sbi, start_blk + i);
456                 struct f2fs_orphan_block *orphan_blk;
457
458                 orphan_blk = (struct f2fs_orphan_block *)page_address(page);
459                 for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) {
460                         nid_t ino = le32_to_cpu(orphan_blk->ino[j]);
461                         recover_orphan_inode(sbi, ino);
462                 }
463                 f2fs_put_page(page, 1);
464         }
465         /* clear Orphan Flag */
466         clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG);
467         sbi->por_doing = false;
468         return;
469 }
470
471 static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
472 {
473         struct list_head *head;
474         struct f2fs_orphan_block *orphan_blk = NULL;
475         unsigned int nentries = 0;
476         unsigned short index;
477         unsigned short orphan_blocks =
478                         (unsigned short)GET_ORPHAN_BLOCKS(sbi->n_orphans);
479         struct page *page = NULL;
480         struct ino_entry *orphan = NULL;
481
482         for (index = 0; index < orphan_blocks; index++)
483                 grab_meta_page(sbi, start_blk + index);
484
485         index = 1;
486         spin_lock(&sbi->ino_lock[ORPHAN_INO]);
487         head = &sbi->ino_list[ORPHAN_INO];
488
489         /* loop for each orphan inode entry and write them in Jornal block */
490         list_for_each_entry(orphan, head, list) {
491                 if (!page) {
492                         page = find_get_page(META_MAPPING(sbi), start_blk++);
493                         f2fs_bug_on(sbi, !page);
494                         orphan_blk =
495                                 (struct f2fs_orphan_block *)page_address(page);
496                         memset(orphan_blk, 0, sizeof(*orphan_blk));
497                         f2fs_put_page(page, 0);
498                 }
499
500                 orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino);
501
502                 if (nentries == F2FS_ORPHANS_PER_BLOCK) {
503                         /*
504                          * an orphan block is full of 1020 entries,
505                          * then we need to flush current orphan blocks
506                          * and bring another one in memory
507                          */
508                         orphan_blk->blk_addr = cpu_to_le16(index);
509                         orphan_blk->blk_count = cpu_to_le16(orphan_blocks);
510                         orphan_blk->entry_count = cpu_to_le32(nentries);
511                         set_page_dirty(page);
512                         f2fs_put_page(page, 1);
513                         index++;
514                         nentries = 0;
515                         page = NULL;
516                 }
517         }
518
519         if (page) {
520                 orphan_blk->blk_addr = cpu_to_le16(index);
521                 orphan_blk->blk_count = cpu_to_le16(orphan_blocks);
522                 orphan_blk->entry_count = cpu_to_le32(nentries);
523                 set_page_dirty(page);
524                 f2fs_put_page(page, 1);
525         }
526
527         spin_unlock(&sbi->ino_lock[ORPHAN_INO]);
528 }
529
530 static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
531                                 block_t cp_addr, unsigned long long *version)
532 {
533         struct page *cp_page_1, *cp_page_2 = NULL;
534         unsigned long blk_size = sbi->blocksize;
535         struct f2fs_checkpoint *cp_block;
536         unsigned long long cur_version = 0, pre_version = 0;
537         size_t crc_offset;
538         __u32 crc = 0;
539
540         /* Read the 1st cp block in this CP pack */
541         cp_page_1 = get_meta_page(sbi, cp_addr);
542
543         /* get the version number */
544         cp_block = (struct f2fs_checkpoint *)page_address(cp_page_1);
545         crc_offset = le32_to_cpu(cp_block->checksum_offset);
546         if (crc_offset >= blk_size)
547                 goto invalid_cp1;
548
549         crc = le32_to_cpu(*((__u32 *)((unsigned char *)cp_block + crc_offset)));
550         if (!f2fs_crc_valid(crc, cp_block, crc_offset))
551                 goto invalid_cp1;
552
553         pre_version = cur_cp_version(cp_block);
554
555         /* Read the 2nd cp block in this CP pack */
556         cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1;
557         cp_page_2 = get_meta_page(sbi, cp_addr);
558
559         cp_block = (struct f2fs_checkpoint *)page_address(cp_page_2);
560         crc_offset = le32_to_cpu(cp_block->checksum_offset);
561         if (crc_offset >= blk_size)
562                 goto invalid_cp2;
563
564         crc = le32_to_cpu(*((__u32 *)((unsigned char *)cp_block + crc_offset)));
565         if (!f2fs_crc_valid(crc, cp_block, crc_offset))
566                 goto invalid_cp2;
567
568         cur_version = cur_cp_version(cp_block);
569
570         if (cur_version == pre_version) {
571                 *version = cur_version;
572                 f2fs_put_page(cp_page_2, 1);
573                 return cp_page_1;
574         }
575 invalid_cp2:
576         f2fs_put_page(cp_page_2, 1);
577 invalid_cp1:
578         f2fs_put_page(cp_page_1, 1);
579         return NULL;
580 }
581
582 int get_valid_checkpoint(struct f2fs_sb_info *sbi)
583 {
584         struct f2fs_checkpoint *cp_block;
585         struct f2fs_super_block *fsb = sbi->raw_super;
586         struct page *cp1, *cp2, *cur_page;
587         unsigned long blk_size = sbi->blocksize;
588         unsigned long long cp1_version = 0, cp2_version = 0;
589         unsigned long long cp_start_blk_no;
590         unsigned int cp_blks = 1 + le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload);
591         block_t cp_blk_no;
592         int i;
593
594         sbi->ckpt = kzalloc(cp_blks * blk_size, GFP_KERNEL);
595         if (!sbi->ckpt)
596                 return -ENOMEM;
597         /*
598          * Finding out valid cp block involves read both
599          * sets( cp pack1 and cp pack 2)
600          */
601         cp_start_blk_no = le32_to_cpu(fsb->cp_blkaddr);
602         cp1 = validate_checkpoint(sbi, cp_start_blk_no, &cp1_version);
603
604         /* The second checkpoint pack should start at the next segment */
605         cp_start_blk_no += ((unsigned long long)1) <<
606                                 le32_to_cpu(fsb->log_blocks_per_seg);
607         cp2 = validate_checkpoint(sbi, cp_start_blk_no, &cp2_version);
608
609         if (cp1 && cp2) {
610                 if (ver_after(cp2_version, cp1_version))
611                         cur_page = cp2;
612                 else
613                         cur_page = cp1;
614         } else if (cp1) {
615                 cur_page = cp1;
616         } else if (cp2) {
617                 cur_page = cp2;
618         } else {
619                 goto fail_no_cp;
620         }
621
622         cp_block = (struct f2fs_checkpoint *)page_address(cur_page);
623         memcpy(sbi->ckpt, cp_block, blk_size);
624
625         if (cp_blks <= 1)
626                 goto done;
627
628         cp_blk_no = le32_to_cpu(fsb->cp_blkaddr);
629         if (cur_page == cp2)
630                 cp_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg);
631
632         for (i = 1; i < cp_blks; i++) {
633                 void *sit_bitmap_ptr;
634                 unsigned char *ckpt = (unsigned char *)sbi->ckpt;
635
636                 cur_page = get_meta_page(sbi, cp_blk_no + i);
637                 sit_bitmap_ptr = page_address(cur_page);
638                 memcpy(ckpt + i * blk_size, sit_bitmap_ptr, blk_size);
639                 f2fs_put_page(cur_page, 1);
640         }
641 done:
642         f2fs_put_page(cp1, 1);
643         f2fs_put_page(cp2, 1);
644         return 0;
645
646 fail_no_cp:
647         kfree(sbi->ckpt);
648         return -EINVAL;
649 }
650
651 static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new)
652 {
653         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
654
655         if (is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR))
656                 return -EEXIST;
657
658         set_inode_flag(F2FS_I(inode), FI_DIRTY_DIR);
659         F2FS_I(inode)->dirty_dir = new;
660         list_add_tail(&new->list, &sbi->dir_inode_list);
661         stat_inc_dirty_dir(sbi);
662         return 0;
663 }
664
665 void update_dirty_page(struct inode *inode, struct page *page)
666 {
667         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
668         struct dir_inode_entry *new;
669         int ret = 0;
670
671         if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode))
672                 return;
673
674         if (!S_ISDIR(inode->i_mode)) {
675                 inode_inc_dirty_pages(inode);
676                 goto out;
677         }
678
679         new = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
680         new->inode = inode;
681         INIT_LIST_HEAD(&new->list);
682
683         spin_lock(&sbi->dir_inode_lock);
684         ret = __add_dirty_inode(inode, new);
685         inode_inc_dirty_pages(inode);
686         spin_unlock(&sbi->dir_inode_lock);
687
688         if (ret)
689                 kmem_cache_free(inode_entry_slab, new);
690 out:
691         SetPagePrivate(page);
692 }
693
694 void add_dirty_dir_inode(struct inode *inode)
695 {
696         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
697         struct dir_inode_entry *new =
698                         f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
699         int ret = 0;
700
701         new->inode = inode;
702         INIT_LIST_HEAD(&new->list);
703
704         spin_lock(&sbi->dir_inode_lock);
705         ret = __add_dirty_inode(inode, new);
706         spin_unlock(&sbi->dir_inode_lock);
707
708         if (ret)
709                 kmem_cache_free(inode_entry_slab, new);
710 }
711
712 void remove_dirty_dir_inode(struct inode *inode)
713 {
714         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
715         struct dir_inode_entry *entry;
716
717         if (!S_ISDIR(inode->i_mode))
718                 return;
719
720         spin_lock(&sbi->dir_inode_lock);
721         if (get_dirty_pages(inode) ||
722                         !is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) {
723                 spin_unlock(&sbi->dir_inode_lock);
724                 return;
725         }
726
727         entry = F2FS_I(inode)->dirty_dir;
728         list_del(&entry->list);
729         F2FS_I(inode)->dirty_dir = NULL;
730         clear_inode_flag(F2FS_I(inode), FI_DIRTY_DIR);
731         stat_dec_dirty_dir(sbi);
732         spin_unlock(&sbi->dir_inode_lock);
733         kmem_cache_free(inode_entry_slab, entry);
734
735         /* Only from the recovery routine */
736         if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) {
737                 clear_inode_flag(F2FS_I(inode), FI_DELAY_IPUT);
738                 iput(inode);
739         }
740 }
741
742 void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi)
743 {
744         struct list_head *head;
745         struct dir_inode_entry *entry;
746         struct inode *inode;
747 retry:
748         spin_lock(&sbi->dir_inode_lock);
749
750         head = &sbi->dir_inode_list;
751         if (list_empty(head)) {
752                 spin_unlock(&sbi->dir_inode_lock);
753                 return;
754         }
755         entry = list_entry(head->next, struct dir_inode_entry, list);
756         inode = igrab(entry->inode);
757         spin_unlock(&sbi->dir_inode_lock);
758         if (inode) {
759                 filemap_fdatawrite(inode->i_mapping);
760                 iput(inode);
761         } else {
762                 /*
763                  * We should submit bio, since it exists several
764                  * wribacking dentry pages in the freeing inode.
765                  */
766                 f2fs_submit_merged_bio(sbi, DATA, WRITE);
767         }
768         goto retry;
769 }
770
771 /*
772  * Freeze all the FS-operations for checkpoint.
773  */
774 static int block_operations(struct f2fs_sb_info *sbi)
775 {
776         struct writeback_control wbc = {
777                 .sync_mode = WB_SYNC_ALL,
778                 .nr_to_write = LONG_MAX,
779                 .for_reclaim = 0,
780         };
781         struct blk_plug plug;
782         int err = 0;
783
784         blk_start_plug(&plug);
785
786 retry_flush_dents:
787         f2fs_lock_all(sbi);
788         /* write all the dirty dentry pages */
789         if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
790                 f2fs_unlock_all(sbi);
791                 sync_dirty_dir_inodes(sbi);
792                 if (unlikely(f2fs_cp_error(sbi))) {
793                         err = -EIO;
794                         goto out;
795                 }
796                 goto retry_flush_dents;
797         }
798
799         /*
800          * POR: we should ensure that there is no dirty node pages
801          * until finishing nat/sit flush.
802          */
803 retry_flush_nodes:
804         down_write(&sbi->node_write);
805
806         if (get_pages(sbi, F2FS_DIRTY_NODES)) {
807                 up_write(&sbi->node_write);
808                 sync_node_pages(sbi, 0, &wbc);
809                 if (unlikely(f2fs_cp_error(sbi))) {
810                         f2fs_unlock_all(sbi);
811                         err = -EIO;
812                         goto out;
813                 }
814                 goto retry_flush_nodes;
815         }
816 out:
817         blk_finish_plug(&plug);
818         return err;
819 }
820
821 static void unblock_operations(struct f2fs_sb_info *sbi)
822 {
823         up_write(&sbi->node_write);
824         f2fs_unlock_all(sbi);
825 }
826
827 static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
828 {
829         DEFINE_WAIT(wait);
830
831         for (;;) {
832                 prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE);
833
834                 if (!get_pages(sbi, F2FS_WRITEBACK))
835                         break;
836
837                 io_schedule();
838         }
839         finish_wait(&sbi->cp_wait, &wait);
840 }
841
842 static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
843 {
844         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
845         struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
846         struct f2fs_nm_info *nm_i = NM_I(sbi);
847         nid_t last_nid = nm_i->next_scan_nid;
848         block_t start_blk;
849         struct page *cp_page;
850         unsigned int data_sum_blocks, orphan_blocks;
851         __u32 crc32 = 0;
852         void *kaddr;
853         int i;
854         int cp_payload_blks = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload);
855
856         /*
857          * This avoids to conduct wrong roll-forward operations and uses
858          * metapages, so should be called prior to sync_meta_pages below.
859          */
860         discard_next_dnode(sbi, NEXT_FREE_BLKADDR(sbi, curseg));
861
862         /* Flush all the NAT/SIT pages */
863         while (get_pages(sbi, F2FS_DIRTY_META)) {
864                 sync_meta_pages(sbi, META, LONG_MAX);
865                 if (unlikely(f2fs_cp_error(sbi)))
866                         return;
867         }
868
869         next_free_nid(sbi, &last_nid);
870
871         /*
872          * modify checkpoint
873          * version number is already updated
874          */
875         ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi));
876         ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi));
877         ckpt->free_segment_count = cpu_to_le32(free_segments(sbi));
878         for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) {
879                 ckpt->cur_node_segno[i] =
880                         cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_NODE));
881                 ckpt->cur_node_blkoff[i] =
882                         cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_NODE));
883                 ckpt->alloc_type[i + CURSEG_HOT_NODE] =
884                                 curseg_alloc_type(sbi, i + CURSEG_HOT_NODE);
885         }
886         for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) {
887                 ckpt->cur_data_segno[i] =
888                         cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_DATA));
889                 ckpt->cur_data_blkoff[i] =
890                         cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_DATA));
891                 ckpt->alloc_type[i + CURSEG_HOT_DATA] =
892                                 curseg_alloc_type(sbi, i + CURSEG_HOT_DATA);
893         }
894
895         ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi));
896         ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi));
897         ckpt->next_free_nid = cpu_to_le32(last_nid);
898
899         /* 2 cp  + n data seg summary + orphan inode blocks */
900         data_sum_blocks = npages_for_summary_flush(sbi);
901         if (data_sum_blocks < NR_CURSEG_DATA_TYPE)
902                 set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
903         else
904                 clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
905
906         orphan_blocks = GET_ORPHAN_BLOCKS(sbi->n_orphans);
907         ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks +
908                         orphan_blocks);
909
910         if (is_umount) {
911                 set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
912                 ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS+
913                                 cp_payload_blks + data_sum_blocks +
914                                 orphan_blocks + NR_CURSEG_NODE_TYPE);
915         } else {
916                 clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
917                 ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS +
918                                 cp_payload_blks + data_sum_blocks +
919                                 orphan_blocks);
920         }
921
922         if (sbi->n_orphans)
923                 set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
924         else
925                 clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
926
927         if (sbi->need_fsck)
928                 set_ckpt_flags(ckpt, CP_FSCK_FLAG);
929
930         /* update SIT/NAT bitmap */
931         get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP));
932         get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP));
933
934         crc32 = f2fs_crc32(ckpt, le32_to_cpu(ckpt->checksum_offset));
935         *((__le32 *)((unsigned char *)ckpt +
936                                 le32_to_cpu(ckpt->checksum_offset)))
937                                 = cpu_to_le32(crc32);
938
939         start_blk = __start_cp_addr(sbi);
940
941         /* write out checkpoint buffer at block 0 */
942         cp_page = grab_meta_page(sbi, start_blk++);
943         kaddr = page_address(cp_page);
944         memcpy(kaddr, ckpt, (1 << sbi->log_blocksize));
945         set_page_dirty(cp_page);
946         f2fs_put_page(cp_page, 1);
947
948         for (i = 1; i < 1 + cp_payload_blks; i++) {
949                 cp_page = grab_meta_page(sbi, start_blk++);
950                 kaddr = page_address(cp_page);
951                 memcpy(kaddr, (char *)ckpt + i * F2FS_BLKSIZE,
952                                 (1 << sbi->log_blocksize));
953                 set_page_dirty(cp_page);
954                 f2fs_put_page(cp_page, 1);
955         }
956
957         if (sbi->n_orphans) {
958                 write_orphan_inodes(sbi, start_blk);
959                 start_blk += orphan_blocks;
960         }
961
962         write_data_summaries(sbi, start_blk);
963         start_blk += data_sum_blocks;
964         if (is_umount) {
965                 write_node_summaries(sbi, start_blk);
966                 start_blk += NR_CURSEG_NODE_TYPE;
967         }
968
969         /* writeout checkpoint block */
970         cp_page = grab_meta_page(sbi, start_blk);
971         kaddr = page_address(cp_page);
972         memcpy(kaddr, ckpt, (1 << sbi->log_blocksize));
973         set_page_dirty(cp_page);
974         f2fs_put_page(cp_page, 1);
975
976         /* wait for previous submitted node/meta pages writeback */
977         wait_on_all_pages_writeback(sbi);
978
979         if (unlikely(f2fs_cp_error(sbi)))
980                 return;
981
982         filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LONG_MAX);
983         filemap_fdatawait_range(META_MAPPING(sbi), 0, LONG_MAX);
984
985         /* update user_block_counts */
986         sbi->last_valid_block_count = sbi->total_valid_block_count;
987         sbi->alloc_valid_block_count = 0;
988
989         /* Here, we only have one bio having CP pack */
990         sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
991
992         release_dirty_inode(sbi);
993
994         if (unlikely(f2fs_cp_error(sbi)))
995                 return;
996
997         clear_prefree_segments(sbi);
998         F2FS_RESET_SB_DIRT(sbi);
999 }
1000
1001 /*
1002  * We guarantee that this checkpoint procedure should not fail.
1003  */
1004 void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
1005 {
1006         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1007         unsigned long long ckpt_ver;
1008
1009         trace_f2fs_write_checkpoint(sbi->sb, is_umount, "start block_ops");
1010
1011         mutex_lock(&sbi->cp_mutex);
1012
1013         if (!sbi->s_dirty)
1014                 goto out;
1015         if (unlikely(f2fs_cp_error(sbi)))
1016                 goto out;
1017         if (block_operations(sbi))
1018                 goto out;
1019
1020         trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish block_ops");
1021
1022         f2fs_submit_merged_bio(sbi, DATA, WRITE);
1023         f2fs_submit_merged_bio(sbi, NODE, WRITE);
1024         f2fs_submit_merged_bio(sbi, META, WRITE);
1025
1026         /*
1027          * update checkpoint pack index
1028          * Increase the version number so that
1029          * SIT entries and seg summaries are written at correct place
1030          */
1031         ckpt_ver = cur_cp_version(ckpt);
1032         ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver);
1033
1034         /* write cached NAT/SIT entries to NAT/SIT area */
1035         flush_nat_entries(sbi);
1036         flush_sit_entries(sbi);
1037
1038         /* unlock all the fs_lock[] in do_checkpoint() */
1039         do_checkpoint(sbi, is_umount);
1040
1041         unblock_operations(sbi);
1042         stat_inc_cp_count(sbi->stat_info);
1043 out:
1044         mutex_unlock(&sbi->cp_mutex);
1045         trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint");
1046 }
1047
1048 void init_ino_entry_info(struct f2fs_sb_info *sbi)
1049 {
1050         int i;
1051
1052         for (i = 0; i < MAX_INO_ENTRY; i++) {
1053                 INIT_RADIX_TREE(&sbi->ino_root[i], GFP_ATOMIC);
1054                 spin_lock_init(&sbi->ino_lock[i]);
1055                 INIT_LIST_HEAD(&sbi->ino_list[i]);
1056         }
1057
1058         /*
1059          * considering 512 blocks in a segment 8 blocks are needed for cp
1060          * and log segment summaries. Remaining blocks are used to keep
1061          * orphan entries with the limitation one reserved segment
1062          * for cp pack we can have max 1020*504 orphan entries
1063          */
1064         sbi->n_orphans = 0;
1065         sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS -
1066                         NR_CURSEG_TYPE) * F2FS_ORPHANS_PER_BLOCK;
1067 }
1068
1069 int __init create_checkpoint_caches(void)
1070 {
1071         ino_entry_slab = f2fs_kmem_cache_create("f2fs_ino_entry",
1072                         sizeof(struct ino_entry));
1073         if (!ino_entry_slab)
1074                 return -ENOMEM;
1075         inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry",
1076                         sizeof(struct dir_inode_entry));
1077         if (!inode_entry_slab) {
1078                 kmem_cache_destroy(ino_entry_slab);
1079                 return -ENOMEM;
1080         }
1081         return 0;
1082 }
1083
1084 void destroy_checkpoint_caches(void)
1085 {
1086         kmem_cache_destroy(ino_entry_slab);
1087         kmem_cache_destroy(inode_entry_slab);
1088 }