f2fs: support IO alignment for DATA and NODE writes
This patch implements IO alignment by filling dummy blocks in DATA and NODE
write bios. If we can guarantee, for example, 32KB or 64KB for such the IOs,
we can eliminate underlying dummy page problem which FTL conducts in order to
close MLC or TLC partial written pages.
Note that,
- it requires "-o mode=lfs".
- IO size should be power of 2, not exceed BIO_MAX_PAGES, 256.
- read IO is still 4KB.
- do checkpoint at fsync, if dummy NODE page was written.
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index a06b2d1..12d235f 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -93,6 +93,17 @@
struct page *page = bvec->bv_page;
enum count_type type = WB_DATA_TYPE(page);
+ if (IS_DUMMY_WRITTEN_PAGE(page)) {
+ set_page_private(page, (unsigned long)NULL);
+ ClearPagePrivate(page);
+ unlock_page(page);
+ mempool_free(page, sbi->write_io_dummy);
+
+ if (unlikely(bio->bi_error))
+ f2fs_stop_checkpoint(sbi, true);
+ continue;
+ }
+
fscrypt_pullback_bio_page(&page, true);
if (unlikely(bio->bi_error)) {
@@ -171,10 +182,42 @@
struct bio *bio, enum page_type type)
{
if (!is_read_io(bio_op(bio))) {
+ unsigned int start;
+
if (f2fs_sb_mounted_blkzoned(sbi->sb) &&
current->plug && (type == DATA || type == NODE))
blk_finish_plug(current->plug);
+
+ if (type != DATA && type != NODE)
+ goto submit_io;
+
+ start = bio->bi_iter.bi_size >> F2FS_BLKSIZE_BITS;
+ start %= F2FS_IO_SIZE(sbi);
+
+ if (start == 0)
+ goto submit_io;
+
+ /* fill dummy pages */
+ for (; start < F2FS_IO_SIZE(sbi); start++) {
+ struct page *page =
+ mempool_alloc(sbi->write_io_dummy,
+ GFP_NOIO | __GFP_ZERO | __GFP_NOFAIL);
+ f2fs_bug_on(sbi, !page);
+
+ SetPagePrivate(page);
+ set_page_private(page, (unsigned long)DUMMY_WRITTEN_PAGE);
+ lock_page(page);
+ if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE)
+ f2fs_bug_on(sbi, 1);
+ }
+ /*
+ * In the NODE case, we lose next block address chain. So, we
+ * need to do checkpoint in f2fs_sync_file.
+ */
+ if (type == NODE)
+ set_sbi_flag(sbi, SBI_NEED_CP);
}
+submit_io:
if (is_read_io(bio_op(bio)))
trace_f2fs_submit_read_bio(sbi->sb, type, bio);
else
@@ -319,13 +362,14 @@
return 0;
}
-void f2fs_submit_page_mbio(struct f2fs_io_info *fio)
+int f2fs_submit_page_mbio(struct f2fs_io_info *fio)
{
struct f2fs_sb_info *sbi = fio->sbi;
enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
struct f2fs_bio_info *io;
bool is_read = is_read_io(fio->op);
struct page *bio_page;
+ int err = 0;
io = is_read ? &sbi->read_io : &sbi->write_io[btype];
@@ -346,6 +390,12 @@
__submit_merged_bio(io);
alloc_new:
if (io->bio == NULL) {
+ if ((fio->type == DATA || fio->type == NODE) &&
+ fio->new_blkaddr & F2FS_IO_SIZE_MASK(sbi)) {
+ err = -EAGAIN;
+ dec_page_count(sbi, WB_DATA_TYPE(bio_page));
+ goto out_fail;
+ }
io->bio = __bio_alloc(sbi, fio->new_blkaddr,
BIO_MAX_PAGES, is_read);
io->fio = *fio;
@@ -359,9 +409,10 @@
io->last_block_in_bio = fio->new_blkaddr;
f2fs_trace_ios(fio, 0);
-
+out_fail:
up_write(&io->io_rwsem);
trace_f2fs_submit_page_mbio(fio->page, fio);
+ return err;
}
static void __set_data_blkaddr(struct dnode_of_data *dn)