]> nv-tegra.nvidia Code Review - linux-3.10.git/blob - fs/f2fs/checkpoint.c
f2fs: add infra for ino management
[linux-3.10.git] / fs / f2fs / checkpoint.c
1 /*
2  * fs/f2fs/checkpoint.c
3  *
4  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5  *             http://www.samsung.com/
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11 #include <linux/fs.h>
12 #include <linux/bio.h>
13 #include <linux/mpage.h>
14 #include <linux/writeback.h>
15 #include <linux/blkdev.h>
16 #include <linux/f2fs_fs.h>
17 #include <linux/pagevec.h>
18 #include <linux/swap.h>
19
20 #include "f2fs.h"
21 #include "node.h"
22 #include "segment.h"
23 #include <trace/events/f2fs.h>
24
25 static struct kmem_cache *ino_entry_slab;
26 static struct kmem_cache *inode_entry_slab;
27
28 /*
29  * We guarantee no failure on the returned page.
30  */
31 struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
32 {
33         struct address_space *mapping = META_MAPPING(sbi);
34         struct page *page = NULL;
35 repeat:
36         page = grab_cache_page(mapping, index);
37         if (!page) {
38                 cond_resched();
39                 goto repeat;
40         }
41         f2fs_wait_on_page_writeback(page, META);
42         SetPageUptodate(page);
43         return page;
44 }
45
46 /*
47  * We guarantee no failure on the returned page.
48  */
49 struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
50 {
51         struct address_space *mapping = META_MAPPING(sbi);
52         struct page *page;
53 repeat:
54         page = grab_cache_page(mapping, index);
55         if (!page) {
56                 cond_resched();
57                 goto repeat;
58         }
59         if (PageUptodate(page))
60                 goto out;
61
62         if (f2fs_submit_page_bio(sbi, page, index,
63                                 READ_SYNC | REQ_META | REQ_PRIO))
64                 goto repeat;
65
66         lock_page(page);
67         if (unlikely(page->mapping != mapping)) {
68                 f2fs_put_page(page, 1);
69                 goto repeat;
70         }
71 out:
72         mark_page_accessed(page);
73         return page;
74 }
75
76 static inline int get_max_meta_blks(struct f2fs_sb_info *sbi, int type)
77 {
78         switch (type) {
79         case META_NAT:
80                 return NM_I(sbi)->max_nid / NAT_ENTRY_PER_BLOCK;
81         case META_SIT:
82                 return SIT_BLK_CNT(sbi);
83         case META_SSA:
84         case META_CP:
85                 return 0;
86         default:
87                 BUG();
88         }
89 }
90
91 /*
92  * Readahead CP/NAT/SIT/SSA pages
93  */
94 int ra_meta_pages(struct f2fs_sb_info *sbi, int start, int nrpages, int type)
95 {
96         block_t prev_blk_addr = 0;
97         struct page *page;
98         int blkno = start;
99         int max_blks = get_max_meta_blks(sbi, type);
100
101         struct f2fs_io_info fio = {
102                 .type = META,
103                 .rw = READ_SYNC | REQ_META | REQ_PRIO
104         };
105
106         for (; nrpages-- > 0; blkno++) {
107                 block_t blk_addr;
108
109                 switch (type) {
110                 case META_NAT:
111                         /* get nat block addr */
112                         if (unlikely(blkno >= max_blks))
113                                 blkno = 0;
114                         blk_addr = current_nat_addr(sbi,
115                                         blkno * NAT_ENTRY_PER_BLOCK);
116                         break;
117                 case META_SIT:
118                         /* get sit block addr */
119                         if (unlikely(blkno >= max_blks))
120                                 goto out;
121                         blk_addr = current_sit_addr(sbi,
122                                         blkno * SIT_ENTRY_PER_BLOCK);
123                         if (blkno != start && prev_blk_addr + 1 != blk_addr)
124                                 goto out;
125                         prev_blk_addr = blk_addr;
126                         break;
127                 case META_SSA:
128                 case META_CP:
129                         /* get ssa/cp block addr */
130                         blk_addr = blkno;
131                         break;
132                 default:
133                         BUG();
134                 }
135
136                 page = grab_cache_page(META_MAPPING(sbi), blk_addr);
137                 if (!page)
138                         continue;
139                 if (PageUptodate(page)) {
140                         mark_page_accessed(page);
141                         f2fs_put_page(page, 1);
142                         continue;
143                 }
144
145                 f2fs_submit_page_mbio(sbi, page, blk_addr, &fio);
146                 mark_page_accessed(page);
147                 f2fs_put_page(page, 0);
148         }
149 out:
150         f2fs_submit_merged_bio(sbi, META, READ);
151         return blkno - start;
152 }
153
154 static int f2fs_write_meta_page(struct page *page,
155                                 struct writeback_control *wbc)
156 {
157         struct inode *inode = page->mapping->host;
158         struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
159
160         trace_f2fs_writepage(page, META);
161
162         if (unlikely(sbi->por_doing))
163                 goto redirty_out;
164         if (wbc->for_reclaim)
165                 goto redirty_out;
166
167         /* Should not write any meta pages, if any IO error was occurred */
168         if (unlikely(is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ERROR_FLAG)))
169                 goto no_write;
170
171         f2fs_wait_on_page_writeback(page, META);
172         write_meta_page(sbi, page);
173 no_write:
174         dec_page_count(sbi, F2FS_DIRTY_META);
175         unlock_page(page);
176         return 0;
177
178 redirty_out:
179         redirty_page_for_writepage(wbc, page);
180         return AOP_WRITEPAGE_ACTIVATE;
181 }
182
183 static int f2fs_write_meta_pages(struct address_space *mapping,
184                                 struct writeback_control *wbc)
185 {
186         struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
187         long diff, written;
188
189         trace_f2fs_writepages(mapping->host, wbc, META);
190
191         /* collect a number of dirty meta pages and write together */
192         if (wbc->for_kupdate ||
193                 get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META))
194                 goto skip_write;
195
196         /* if mounting is failed, skip writing node pages */
197         mutex_lock(&sbi->cp_mutex);
198         diff = nr_pages_to_write(sbi, META, wbc);
199         written = sync_meta_pages(sbi, META, wbc->nr_to_write);
200         mutex_unlock(&sbi->cp_mutex);
201         wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff);
202         return 0;
203
204 skip_write:
205         wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_META);
206         return 0;
207 }
208
209 long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
210                                                 long nr_to_write)
211 {
212         struct address_space *mapping = META_MAPPING(sbi);
213         pgoff_t index = 0, end = LONG_MAX;
214         struct pagevec pvec;
215         long nwritten = 0;
216         struct writeback_control wbc = {
217                 .for_reclaim = 0,
218         };
219
220         pagevec_init(&pvec, 0);
221
222         while (index <= end) {
223                 int i, nr_pages;
224                 nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
225                                 PAGECACHE_TAG_DIRTY,
226                                 min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
227                 if (unlikely(nr_pages == 0))
228                         break;
229
230                 for (i = 0; i < nr_pages; i++) {
231                         struct page *page = pvec.pages[i];
232
233                         lock_page(page);
234
235                         if (unlikely(page->mapping != mapping)) {
236 continue_unlock:
237                                 unlock_page(page);
238                                 continue;
239                         }
240                         if (!PageDirty(page)) {
241                                 /* someone wrote it for us */
242                                 goto continue_unlock;
243                         }
244
245                         if (!clear_page_dirty_for_io(page))
246                                 goto continue_unlock;
247
248                         if (f2fs_write_meta_page(page, &wbc)) {
249                                 unlock_page(page);
250                                 break;
251                         }
252                         nwritten++;
253                         if (unlikely(nwritten >= nr_to_write))
254                                 break;
255                 }
256                 pagevec_release(&pvec);
257                 cond_resched();
258         }
259
260         if (nwritten)
261                 f2fs_submit_merged_bio(sbi, type, WRITE);
262
263         return nwritten;
264 }
265
266 static int f2fs_set_meta_page_dirty(struct page *page)
267 {
268         struct address_space *mapping = page->mapping;
269         struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb);
270
271         trace_f2fs_set_page_dirty(page, META);
272
273         SetPageUptodate(page);
274         if (!PageDirty(page)) {
275                 __set_page_dirty_nobuffers(page);
276                 inc_page_count(sbi, F2FS_DIRTY_META);
277                 return 1;
278         }
279         return 0;
280 }
281
282 const struct address_space_operations f2fs_meta_aops = {
283         .writepage      = f2fs_write_meta_page,
284         .writepages     = f2fs_write_meta_pages,
285         .set_page_dirty = f2fs_set_meta_page_dirty,
286 };
287
288 static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
289 {
290         struct ino_entry *new, *e;
291
292         new = f2fs_kmem_cache_alloc(ino_entry_slab, GFP_ATOMIC);
293         new->ino = ino;
294
295         spin_lock(&sbi->ino_lock[type]);
296         list_for_each_entry(e, &sbi->ino_list[type], list) {
297                 if (e->ino == ino) {
298                         spin_unlock(&sbi->ino_lock[type]);
299                         kmem_cache_free(ino_entry_slab, new);
300                         return;
301                 }
302                 if (e->ino > ino)
303                         break;
304         }
305
306         /* add new entry into list which is sorted by inode number */
307         list_add_tail(&new->list, &e->list);
308         spin_unlock(&sbi->ino_lock[type]);
309 }
310
311 static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
312 {
313         struct ino_entry *e;
314
315         spin_lock(&sbi->ino_lock[type]);
316         list_for_each_entry(e, &sbi->ino_list[type], list) {
317                 if (e->ino == ino) {
318                         list_del(&e->list);
319                         sbi->n_orphans--;
320                         spin_unlock(&sbi->ino_lock[type]);
321                         kmem_cache_free(ino_entry_slab, e);
322                         return;
323                 }
324         }
325         spin_unlock(&sbi->ino_lock[type]);
326 }
327
328 int acquire_orphan_inode(struct f2fs_sb_info *sbi)
329 {
330         int err = 0;
331
332         spin_lock(&sbi->ino_lock[ORPHAN_INO]);
333         if (unlikely(sbi->n_orphans >= sbi->max_orphans))
334                 err = -ENOSPC;
335         else
336                 sbi->n_orphans++;
337         spin_unlock(&sbi->ino_lock[ORPHAN_INO]);
338
339         return err;
340 }
341
342 void release_orphan_inode(struct f2fs_sb_info *sbi)
343 {
344         spin_lock(&sbi->ino_lock[ORPHAN_INO]);
345         if (sbi->n_orphans == 0) {
346                 f2fs_msg(sbi->sb, KERN_ERR, "releasing "
347                         "unacquired orphan inode");
348                 f2fs_handle_error(sbi);
349         } else
350                 sbi->n_orphans--;
351         spin_unlock(&sbi->ino_lock[ORPHAN_INO]);
352 }
353
354 void add_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
355 {
356         /* add new orphan entry into list which is sorted by inode number */
357         __add_ino_entry(sbi, ino, ORPHAN_INO);
358 }
359
360 void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
361 {
362         /* remove orphan entry from orphan list */
363         __remove_ino_entry(sbi, ino, ORPHAN_INO);
364 }
365
366 static void recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
367 {
368         struct inode *inode = f2fs_iget(sbi->sb, ino);
369         if (IS_ERR(inode)) {
370                 f2fs_msg(sbi->sb, KERN_ERR, "unable to recover orphan inode %d",
371                                 ino);
372                 f2fs_handle_error(sbi);
373                 return;
374         }
375         clear_nlink(inode);
376
377         /* truncate all the data during iput */
378         iput(inode);
379 }
380
381 void recover_orphan_inodes(struct f2fs_sb_info *sbi)
382 {
383         block_t start_blk, orphan_blkaddr, i, j;
384
385         if (!is_set_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG))
386                 return;
387
388         sbi->por_doing = true;
389
390         start_blk = __start_cp_addr(sbi) + 1 +
391                 le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload);
392         orphan_blkaddr = __start_sum_addr(sbi) - 1;
393
394         ra_meta_pages(sbi, start_blk, orphan_blkaddr, META_CP);
395
396         for (i = 0; i < orphan_blkaddr; i++) {
397                 struct page *page = get_meta_page(sbi, start_blk + i);
398                 struct f2fs_orphan_block *orphan_blk;
399
400                 orphan_blk = (struct f2fs_orphan_block *)page_address(page);
401                 for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) {
402                         nid_t ino = le32_to_cpu(orphan_blk->ino[j]);
403                         recover_orphan_inode(sbi, ino);
404                 }
405                 f2fs_put_page(page, 1);
406         }
407         /* clear Orphan Flag */
408         clear_ckpt_flags(F2FS_CKPT(sbi), CP_ORPHAN_PRESENT_FLAG);
409         sbi->por_doing = false;
410         return;
411 }
412
413 static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
414 {
415         struct list_head *head;
416         struct f2fs_orphan_block *orphan_blk = NULL;
417         unsigned int nentries = 0;
418         unsigned short index;
419         unsigned short orphan_blocks = (unsigned short)((sbi->n_orphans +
420                 (F2FS_ORPHANS_PER_BLOCK - 1)) / F2FS_ORPHANS_PER_BLOCK);
421         struct page *page = NULL;
422         struct ino_entry *orphan = NULL;
423
424         for (index = 0; index < orphan_blocks; index++)
425                 grab_meta_page(sbi, start_blk + index);
426
427         index = 1;
428         spin_lock(&sbi->ino_lock[ORPHAN_INO]);
429         head = &sbi->ino_list[ORPHAN_INO];
430
431         /* loop for each orphan inode entry and write them in Jornal block */
432         list_for_each_entry(orphan, head, list) {
433                 if (!page) {
434                         page = find_get_page(META_MAPPING(sbi), start_blk++);
435                         f2fs_bug_on(!page);
436                         orphan_blk =
437                                 (struct f2fs_orphan_block *)page_address(page);
438                         memset(orphan_blk, 0, sizeof(*orphan_blk));
439                         f2fs_put_page(page, 0);
440                 }
441
442                 orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino);
443
444                 if (nentries == F2FS_ORPHANS_PER_BLOCK) {
445                         /*
446                          * an orphan block is full of 1020 entries,
447                          * then we need to flush current orphan blocks
448                          * and bring another one in memory
449                          */
450                         orphan_blk->blk_addr = cpu_to_le16(index);
451                         orphan_blk->blk_count = cpu_to_le16(orphan_blocks);
452                         orphan_blk->entry_count = cpu_to_le32(nentries);
453                         set_page_dirty(page);
454                         f2fs_put_page(page, 1);
455                         index++;
456                         nentries = 0;
457                         page = NULL;
458                 }
459         }
460
461         if (page) {
462                 orphan_blk->blk_addr = cpu_to_le16(index);
463                 orphan_blk->blk_count = cpu_to_le16(orphan_blocks);
464                 orphan_blk->entry_count = cpu_to_le32(nentries);
465                 set_page_dirty(page);
466                 f2fs_put_page(page, 1);
467         }
468
469         spin_unlock(&sbi->ino_lock[ORPHAN_INO]);
470 }
471
472 static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
473                                 block_t cp_addr, unsigned long long *version)
474 {
475         struct page *cp_page_1, *cp_page_2 = NULL;
476         unsigned long blk_size = sbi->blocksize;
477         struct f2fs_checkpoint *cp_block;
478         unsigned long long cur_version = 0, pre_version = 0;
479         size_t crc_offset;
480         __u32 crc = 0;
481
482         /* Read the 1st cp block in this CP pack */
483         cp_page_1 = get_meta_page(sbi, cp_addr);
484
485         /* get the version number */
486         cp_block = (struct f2fs_checkpoint *)page_address(cp_page_1);
487         crc_offset = le32_to_cpu(cp_block->checksum_offset);
488         if (crc_offset >= blk_size)
489                 goto invalid_cp1;
490
491         crc = le32_to_cpu(*((__u32 *)((unsigned char *)cp_block + crc_offset)));
492         if (!f2fs_crc_valid(crc, cp_block, crc_offset))
493                 goto invalid_cp1;
494
495         pre_version = cur_cp_version(cp_block);
496
497         /* Read the 2nd cp block in this CP pack */
498         cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1;
499         cp_page_2 = get_meta_page(sbi, cp_addr);
500
501         cp_block = (struct f2fs_checkpoint *)page_address(cp_page_2);
502         crc_offset = le32_to_cpu(cp_block->checksum_offset);
503         if (crc_offset >= blk_size)
504                 goto invalid_cp2;
505
506         crc = le32_to_cpu(*((__u32 *)((unsigned char *)cp_block + crc_offset)));
507         if (!f2fs_crc_valid(crc, cp_block, crc_offset))
508                 goto invalid_cp2;
509
510         cur_version = cur_cp_version(cp_block);
511
512         if (cur_version == pre_version) {
513                 *version = cur_version;
514                 f2fs_put_page(cp_page_2, 1);
515                 return cp_page_1;
516         }
517 invalid_cp2:
518         f2fs_put_page(cp_page_2, 1);
519 invalid_cp1:
520         f2fs_put_page(cp_page_1, 1);
521         return NULL;
522 }
523
524 int get_valid_checkpoint(struct f2fs_sb_info *sbi)
525 {
526         struct f2fs_checkpoint *cp_block;
527         struct f2fs_super_block *fsb = sbi->raw_super;
528         struct page *cp1, *cp2, *cur_page;
529         unsigned long blk_size = sbi->blocksize;
530         unsigned long long cp1_version = 0, cp2_version = 0;
531         unsigned long long cp_start_blk_no;
532         unsigned int cp_blks = 1 + le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload);
533         block_t cp_blk_no;
534         int i;
535
536         sbi->ckpt = kzalloc(cp_blks * blk_size, GFP_KERNEL);
537         if (!sbi->ckpt)
538                 return -ENOMEM;
539         /*
540          * Finding out valid cp block involves read both
541          * sets( cp pack1 and cp pack 2)
542          */
543         cp_start_blk_no = le32_to_cpu(fsb->cp_blkaddr);
544         cp1 = validate_checkpoint(sbi, cp_start_blk_no, &cp1_version);
545
546         /* The second checkpoint pack should start at the next segment */
547         cp_start_blk_no += ((unsigned long long)1) <<
548                                 le32_to_cpu(fsb->log_blocks_per_seg);
549         cp2 = validate_checkpoint(sbi, cp_start_blk_no, &cp2_version);
550
551         if (cp1 && cp2) {
552                 if (ver_after(cp2_version, cp1_version))
553                         cur_page = cp2;
554                 else
555                         cur_page = cp1;
556         } else if (cp1) {
557                 cur_page = cp1;
558         } else if (cp2) {
559                 cur_page = cp2;
560         } else {
561                 goto fail_no_cp;
562         }
563
564         cp_block = (struct f2fs_checkpoint *)page_address(cur_page);
565         memcpy(sbi->ckpt, cp_block, blk_size);
566
567         if (cp_blks <= 1)
568                 goto done;
569
570         cp_blk_no = le32_to_cpu(fsb->cp_blkaddr);
571         if (cur_page == cp2)
572                 cp_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg);
573
574         for (i = 1; i < cp_blks; i++) {
575                 void *sit_bitmap_ptr;
576                 unsigned char *ckpt = (unsigned char *)sbi->ckpt;
577
578                 cur_page = get_meta_page(sbi, cp_blk_no + i);
579                 sit_bitmap_ptr = page_address(cur_page);
580                 memcpy(ckpt + i * blk_size, sit_bitmap_ptr, blk_size);
581                 f2fs_put_page(cur_page, 1);
582         }
583 done:
584         f2fs_put_page(cp1, 1);
585         f2fs_put_page(cp2, 1);
586         return 0;
587
588 fail_no_cp:
589         kfree(sbi->ckpt);
590         return -EINVAL;
591 }
592
593 static int __add_dirty_inode(struct inode *inode, struct dir_inode_entry *new)
594 {
595         struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
596
597         if (is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR))
598                 return -EEXIST;
599
600         set_inode_flag(F2FS_I(inode), FI_DIRTY_DIR);
601         F2FS_I(inode)->dirty_dir = new;
602         list_add_tail(&new->list, &sbi->dir_inode_list);
603         stat_inc_dirty_dir(sbi);
604         return 0;
605 }
606
607 void set_dirty_dir_page(struct inode *inode, struct page *page)
608 {
609         struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
610         struct dir_inode_entry *new;
611         int ret = 0;
612
613         if (!S_ISDIR(inode->i_mode))
614                 return;
615
616         new = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
617         new->inode = inode;
618         INIT_LIST_HEAD(&new->list);
619
620         spin_lock(&sbi->dir_inode_lock);
621         ret = __add_dirty_inode(inode, new);
622         inode_inc_dirty_dents(inode);
623         SetPagePrivate(page);
624         spin_unlock(&sbi->dir_inode_lock);
625
626         if (ret)
627                 kmem_cache_free(inode_entry_slab, new);
628 }
629
630 void add_dirty_dir_inode(struct inode *inode)
631 {
632         struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
633         struct dir_inode_entry *new =
634                         f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
635         int ret = 0;
636
637         new->inode = inode;
638         INIT_LIST_HEAD(&new->list);
639
640         spin_lock(&sbi->dir_inode_lock);
641         ret = __add_dirty_inode(inode, new);
642         spin_unlock(&sbi->dir_inode_lock);
643
644         if (ret)
645                 kmem_cache_free(inode_entry_slab, new);
646 }
647
648 void remove_dirty_dir_inode(struct inode *inode)
649 {
650         struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb);
651         struct dir_inode_entry *entry;
652
653         if (!S_ISDIR(inode->i_mode))
654                 return;
655
656         spin_lock(&sbi->dir_inode_lock);
657         if (get_dirty_dents(inode) ||
658                         !is_inode_flag_set(F2FS_I(inode), FI_DIRTY_DIR)) {
659                 spin_unlock(&sbi->dir_inode_lock);
660                 return;
661         }
662
663         entry = F2FS_I(inode)->dirty_dir;
664         list_del(&entry->list);
665         F2FS_I(inode)->dirty_dir = NULL;
666         clear_inode_flag(F2FS_I(inode), FI_DIRTY_DIR);
667         stat_dec_dirty_dir(sbi);
668         spin_unlock(&sbi->dir_inode_lock);
669         kmem_cache_free(inode_entry_slab, entry);
670
671         /* Only from the recovery routine */
672         if (is_inode_flag_set(F2FS_I(inode), FI_DELAY_IPUT)) {
673                 clear_inode_flag(F2FS_I(inode), FI_DELAY_IPUT);
674                 iput(inode);
675         }
676 }
677
678 void sync_dirty_dir_inodes(struct f2fs_sb_info *sbi)
679 {
680         struct list_head *head;
681         struct dir_inode_entry *entry;
682         struct inode *inode;
683 retry:
684         spin_lock(&sbi->dir_inode_lock);
685
686         head = &sbi->dir_inode_list;
687         if (list_empty(head)) {
688                 spin_unlock(&sbi->dir_inode_lock);
689                 return;
690         }
691         entry = list_entry(head->next, struct dir_inode_entry, list);
692         inode = igrab(entry->inode);
693         spin_unlock(&sbi->dir_inode_lock);
694         if (inode) {
695                 filemap_fdatawrite(inode->i_mapping);
696                 iput(inode);
697         } else {
698                 /*
699                  * We should submit bio, since it exists several
700                  * wribacking dentry pages in the freeing inode.
701                  */
702                 f2fs_submit_merged_bio(sbi, DATA, WRITE);
703         }
704         goto retry;
705 }
706
707 /*
708  * Freeze all the FS-operations for checkpoint.
709  */
710 static void block_operations(struct f2fs_sb_info *sbi)
711 {
712         struct writeback_control wbc = {
713                 .sync_mode = WB_SYNC_ALL,
714                 .nr_to_write = LONG_MAX,
715                 .for_reclaim = 0,
716         };
717         struct blk_plug plug;
718
719         blk_start_plug(&plug);
720
721 retry_flush_dents:
722         f2fs_lock_all(sbi);
723         /* write all the dirty dentry pages */
724         if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
725                 f2fs_unlock_all(sbi);
726                 sync_dirty_dir_inodes(sbi);
727                 goto retry_flush_dents;
728         }
729
730         /*
731          * POR: we should ensure that there is no dirty node pages
732          * until finishing nat/sit flush.
733          */
734 retry_flush_nodes:
735         mutex_lock(&sbi->node_write);
736
737         if (get_pages(sbi, F2FS_DIRTY_NODES)) {
738                 mutex_unlock(&sbi->node_write);
739                 sync_node_pages(sbi, 0, &wbc);
740                 goto retry_flush_nodes;
741         }
742         blk_finish_plug(&plug);
743 }
744
745 static void unblock_operations(struct f2fs_sb_info *sbi)
746 {
747         mutex_unlock(&sbi->node_write);
748         f2fs_unlock_all(sbi);
749 }
750
751 static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
752 {
753         DEFINE_WAIT(wait);
754
755         for (;;) {
756                 prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE);
757
758                 if (!get_pages(sbi, F2FS_WRITEBACK))
759                         break;
760
761                 io_schedule();
762         }
763         finish_wait(&sbi->cp_wait, &wait);
764 }
765
766 static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
767 {
768         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
769         nid_t last_nid = 0;
770         block_t start_blk;
771         struct page *cp_page;
772         unsigned int data_sum_blocks, orphan_blocks;
773         __u32 crc32 = 0;
774         void *kaddr;
775         int i;
776         int cp_payload_blks = le32_to_cpu(F2FS_RAW_SUPER(sbi)->cp_payload);
777
778         /*
779          * This avoids to conduct wrong roll-forward operations and uses
780          * metapages, so should be called prior to sync_meta_pages below.
781          */
782         discard_next_dnode(sbi);
783
784         /* Flush all the NAT/SIT pages */
785         while (get_pages(sbi, F2FS_DIRTY_META))
786                 sync_meta_pages(sbi, META, LONG_MAX);
787
788         next_free_nid(sbi, &last_nid);
789
790         /*
791          * modify checkpoint
792          * version number is already updated
793          */
794         ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi));
795         ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi));
796         ckpt->free_segment_count = cpu_to_le32(free_segments(sbi));
797         for (i = 0; i < 3; i++) {
798                 ckpt->cur_node_segno[i] =
799                         cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_NODE));
800                 ckpt->cur_node_blkoff[i] =
801                         cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_NODE));
802                 ckpt->alloc_type[i + CURSEG_HOT_NODE] =
803                                 curseg_alloc_type(sbi, i + CURSEG_HOT_NODE);
804         }
805         for (i = 0; i < 3; i++) {
806                 ckpt->cur_data_segno[i] =
807                         cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_DATA));
808                 ckpt->cur_data_blkoff[i] =
809                         cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_DATA));
810                 ckpt->alloc_type[i + CURSEG_HOT_DATA] =
811                                 curseg_alloc_type(sbi, i + CURSEG_HOT_DATA);
812         }
813
814         ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi));
815         ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi));
816         ckpt->next_free_nid = cpu_to_le32(last_nid);
817
818         /* 2 cp  + n data seg summary + orphan inode blocks */
819         data_sum_blocks = npages_for_summary_flush(sbi);
820         if (data_sum_blocks < 3)
821                 set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
822         else
823                 clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
824
825         orphan_blocks = (sbi->n_orphans + F2FS_ORPHANS_PER_BLOCK - 1)
826                                         / F2FS_ORPHANS_PER_BLOCK;
827         ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks +
828                         orphan_blocks);
829
830         if (is_umount) {
831                 set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
832                 ckpt->cp_pack_total_block_count = cpu_to_le32(2 +
833                                 cp_payload_blks + data_sum_blocks +
834                                 orphan_blocks + NR_CURSEG_NODE_TYPE);
835         } else {
836                 clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
837                 ckpt->cp_pack_total_block_count = cpu_to_le32(2 +
838                                 cp_payload_blks + data_sum_blocks +
839                                 orphan_blocks);
840         }
841
842         if (sbi->n_orphans)
843                 set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
844         else
845                 clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
846
847         /* update SIT/NAT bitmap */
848         get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP));
849         get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP));
850
851         crc32 = f2fs_crc32(ckpt, le32_to_cpu(ckpt->checksum_offset));
852         *((__le32 *)((unsigned char *)ckpt +
853                                 le32_to_cpu(ckpt->checksum_offset)))
854                                 = cpu_to_le32(crc32);
855
856         start_blk = __start_cp_addr(sbi);
857
858         /* write out checkpoint buffer at block 0 */
859         cp_page = grab_meta_page(sbi, start_blk++);
860         kaddr = page_address(cp_page);
861         memcpy(kaddr, ckpt, (1 << sbi->log_blocksize));
862         set_page_dirty(cp_page);
863         f2fs_put_page(cp_page, 1);
864
865         for (i = 1; i < 1 + cp_payload_blks; i++) {
866                 cp_page = grab_meta_page(sbi, start_blk++);
867                 kaddr = page_address(cp_page);
868                 memcpy(kaddr, (char *)ckpt + i * F2FS_BLKSIZE,
869                                 (1 << sbi->log_blocksize));
870                 set_page_dirty(cp_page);
871                 f2fs_put_page(cp_page, 1);
872         }
873
874         if (sbi->n_orphans) {
875                 write_orphan_inodes(sbi, start_blk);
876                 start_blk += orphan_blocks;
877         }
878
879         write_data_summaries(sbi, start_blk);
880         start_blk += data_sum_blocks;
881         if (is_umount) {
882                 write_node_summaries(sbi, start_blk);
883                 start_blk += NR_CURSEG_NODE_TYPE;
884         }
885
886         /* writeout checkpoint block */
887         cp_page = grab_meta_page(sbi, start_blk);
888         kaddr = page_address(cp_page);
889         memcpy(kaddr, ckpt, (1 << sbi->log_blocksize));
890         set_page_dirty(cp_page);
891         f2fs_put_page(cp_page, 1);
892
893         /* wait for previous submitted node/meta pages writeback */
894         wait_on_all_pages_writeback(sbi);
895
896         filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LONG_MAX);
897         filemap_fdatawait_range(META_MAPPING(sbi), 0, LONG_MAX);
898
899         /* update user_block_counts */
900         sbi->last_valid_block_count = sbi->total_valid_block_count;
901         sbi->alloc_valid_block_count = 0;
902
903         /* Here, we only have one bio having CP pack */
904         sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
905
906         if (unlikely(!is_set_ckpt_flags(ckpt, CP_ERROR_FLAG))) {
907                 clear_prefree_segments(sbi);
908                 F2FS_RESET_SB_DIRT(sbi);
909         }
910 }
911
912 /*
913  * We guarantee that this checkpoint procedure should not fail.
914  */
915 void write_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
916 {
917         struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
918         unsigned long long ckpt_ver;
919
920         trace_f2fs_write_checkpoint(sbi->sb, is_umount, "start block_ops");
921
922         mutex_lock(&sbi->cp_mutex);
923         block_operations(sbi);
924
925         trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish block_ops");
926
927         f2fs_submit_merged_bio(sbi, DATA, WRITE);
928         f2fs_submit_merged_bio(sbi, NODE, WRITE);
929         f2fs_submit_merged_bio(sbi, META, WRITE);
930
931         /*
932          * update checkpoint pack index
933          * Increase the version number so that
934          * SIT entries and seg summaries are written at correct place
935          */
936         ckpt_ver = cur_cp_version(ckpt);
937         ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver);
938
939         /* write cached NAT/SIT entries to NAT/SIT area */
940         flush_nat_entries(sbi);
941         flush_sit_entries(sbi);
942
943         /* unlock all the fs_lock[] in do_checkpoint() */
944         do_checkpoint(sbi, is_umount);
945
946         unblock_operations(sbi);
947         mutex_unlock(&sbi->cp_mutex);
948
949         stat_inc_cp_count(sbi->stat_info);
950         trace_f2fs_write_checkpoint(sbi->sb, is_umount, "finish checkpoint");
951 }
952
953 void init_ino_entry_info(struct f2fs_sb_info *sbi)
954 {
955         int i;
956
957         for (i = 0; i < MAX_INO_ENTRY; i++) {
958                 spin_lock_init(&sbi->ino_lock[i]);
959                 INIT_LIST_HEAD(&sbi->ino_list[i]);
960         }
961
962         /*
963          * considering 512 blocks in a segment 8 blocks are needed for cp
964          * and log segment summaries. Remaining blocks are used to keep
965          * orphan entries with the limitation one reserved segment
966          * for cp pack we can have max 1020*504 orphan entries
967          */
968         sbi->n_orphans = 0;
969         sbi->max_orphans = (sbi->blocks_per_seg - 2 - NR_CURSEG_TYPE)
970                                 * F2FS_ORPHANS_PER_BLOCK;
971 }
972
973 int __init create_checkpoint_caches(void)
974 {
975         ino_entry_slab = f2fs_kmem_cache_create("f2fs_ino_entry",
976                         sizeof(struct ino_entry));
977         if (!ino_entry_slab)
978                 return -ENOMEM;
979         inode_entry_slab = f2fs_kmem_cache_create("f2fs_dirty_dir_entry",
980                         sizeof(struct dir_inode_entry));
981         if (!inode_entry_slab) {
982                 kmem_cache_destroy(ino_entry_slab);
983                 return -ENOMEM;
984         }
985         return 0;
986 }
987
988 void destroy_checkpoint_caches(void)
989 {
990         kmem_cache_destroy(ino_entry_slab);
991         kmem_cache_destroy(inode_entry_slab);
992 }