ext4 crypto: implement the ext4 encryption write path
Michael Halcrow [Wed, 15 Apr 2015 00:48:49 +0000 (20:48 -0400)]
Pulls block_write_begin() into fs/ext4/inode.c because it might need
to do a low-level read of the existing data, in which case we need to
decrypt it.

Change-Id: If317411eb54d5fb8d0b5570289e2cbefe78b389d
Signed-off-by: Michael Halcrow <mhalcrow@google.com>
Signed-off-by: Ildar Muslukhov <ildarm@google.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Theodore Ts'o <tytso@google.com>

fs/ext4/extents.c
fs/ext4/ialloc.c
fs/ext4/inode.c
fs/ext4/page-io.c

index 115c02f..b188d0b 100644 (file)
@@ -3111,6 +3111,9 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
        ee_len    = ext4_ext_get_actual_len(ex);
        ee_pblock = ext4_ext_pblock(ex);
 
+       if (ext4_encrypted_inode(inode))
+               return ext4_encrypted_zeroout(inode, ex);
+
        ret = sb_issue_zeroout(inode->i_sb, ee_pblock, ee_len, GFP_NOFS);
        if (ret > 0)
                ret = 0;
@@ -4911,6 +4914,20 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
        ext4_lblk_t lblk;
        unsigned int blkbits = inode->i_blkbits;
 
+       /*
+        * Encrypted inodes can't handle collapse range or insert
+        * range since we would need to re-encrypt blocks with a
+        * different IV or XTS tweak (which are based on the logical
+        * block number).
+        *
+        * XXX It's not clear why zero range isn't working, but we'll
+        * leave it disabled for encrypted inodes for now.  This is a
+        * bug we should fix....
+        */
+       if (ext4_encrypted_inode(inode) &&
+           (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE)))
+               return -EOPNOTSUPP;
+
        /* Return error if mode is not supported */
        if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
                     FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE))
index ac644c3..e554ca3 100644 (file)
@@ -997,6 +997,11 @@ got:
        ei->i_block_group = group;
        ei->i_last_alloc_group = ~0;
 
+       /* If the directory encrypted, then we should encrypt the inode. */
+       if ((S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) &&
+           ext4_encrypted_inode(dir))
+               ext4_set_inode_flag(inode, EXT4_INODE_ENCRYPT);
+
        ext4_set_inode_flags(inode);
        if (IS_DIRSYNC(inode))
                ext4_handle_sync(handle);
index 49b9c65..042d091 100644 (file)
@@ -878,6 +878,95 @@ int do_journal_get_write_access(handle_t *handle,
 
 static int ext4_get_block_write_nolock(struct inode *inode, sector_t iblock,
                   struct buffer_head *bh_result, int create);
+
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
+                                 get_block_t *get_block)
+{
+       unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+       unsigned to = from + len;
+       struct inode *inode = page->mapping->host;
+       unsigned block_start, block_end;
+       sector_t block;
+       int err = 0;
+       unsigned blocksize = inode->i_sb->s_blocksize;
+       unsigned bbits;
+       struct buffer_head *bh, *head, *wait[2], **wait_bh = wait;
+       bool decrypt = false;
+
+       BUG_ON(!PageLocked(page));
+       BUG_ON(from > PAGE_CACHE_SIZE);
+       BUG_ON(to > PAGE_CACHE_SIZE);
+       BUG_ON(from > to);
+
+       if (!page_has_buffers(page))
+               create_empty_buffers(page, blocksize, 0);
+       head = page_buffers(page);
+       bbits = ilog2(blocksize);
+       block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
+
+       for (bh = head, block_start = 0; bh != head || !block_start;
+           block++, block_start = block_end, bh = bh->b_this_page) {
+               block_end = block_start + blocksize;
+               if (block_end <= from || block_start >= to) {
+                       if (PageUptodate(page)) {
+                               if (!buffer_uptodate(bh))
+                                       set_buffer_uptodate(bh);
+                       }
+                       continue;
+               }
+               if (buffer_new(bh))
+                       clear_buffer_new(bh);
+               if (!buffer_mapped(bh)) {
+                       WARN_ON(bh->b_size != blocksize);
+                       err = get_block(inode, block, bh, 1);
+                       if (err)
+                               break;
+                       if (buffer_new(bh)) {
+                               unmap_underlying_metadata(bh->b_bdev,
+                                                         bh->b_blocknr);
+                               if (PageUptodate(page)) {
+                                       clear_buffer_new(bh);
+                                       set_buffer_uptodate(bh);
+                                       mark_buffer_dirty(bh);
+                                       continue;
+                               }
+                               if (block_end > to || block_start < from)
+                                       zero_user_segments(page, to, block_end,
+                                                          block_start, from);
+                               continue;
+                       }
+               }
+               if (PageUptodate(page)) {
+                       if (!buffer_uptodate(bh))
+                               set_buffer_uptodate(bh);
+                       continue;
+               }
+               if (!buffer_uptodate(bh) && !buffer_delay(bh) &&
+                   !buffer_unwritten(bh) &&
+                   (block_start < from || block_end > to)) {
+                       ll_rw_block(READ, 1, &bh);
+                       *wait_bh++ = bh;
+                       decrypt = ext4_encrypted_inode(inode) &&
+                               S_ISREG(inode->i_mode);
+               }
+       }
+       /*
+        * If we issued read requests, let them complete.
+        */
+       while (wait_bh > wait) {
+               wait_on_buffer(*--wait_bh);
+               if (!buffer_uptodate(*wait_bh))
+                       err = -EIO;
+       }
+       if (unlikely(err))
+               page_zero_new_buffers(page, from, to);
+       else if (decrypt)
+               err = ext4_decrypt_one(inode, page);
+       return err;
+}
+#endif
+
 static int ext4_write_begin(struct file *file, struct address_space *mapping,
                            loff_t pos, unsigned len, unsigned flags,
                            struct page **pagep, void **fsdata)
@@ -940,11 +1029,19 @@ retry_journal:
        /* In case writeback began while the page was unlocked */
        wait_for_stable_page(page);
 
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+       if (ext4_should_dioread_nolock(inode))
+               ret = ext4_block_write_begin(page, pos, len,
+                                            ext4_get_block_write);
+       else
+               ret = ext4_block_write_begin(page, pos, len,
+                                            ext4_get_block);
+#else
        if (ext4_should_dioread_nolock(inode))
                ret = __block_write_begin(page, pos, len, ext4_get_block_write);
        else
                ret = __block_write_begin(page, pos, len, ext4_get_block);
-
+#endif
        if (!ret && ext4_should_journal_data(inode)) {
                ret = ext4_walk_page_buffers(handle, page_buffers(page),
                                             from, to, NULL,
@@ -2600,7 +2697,12 @@ retry_journal:
        /* In case writeback began while the page was unlocked */
        wait_for_stable_page(page);
 
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+       ret = ext4_block_write_begin(page, pos, len,
+                                    ext4_da_get_block_prep);
+#else
        ret = __block_write_begin(page, pos, len, ext4_da_get_block_prep);
+#endif
        if (ret < 0) {
                unlock_page(page);
                ext4_journal_stop(handle);
@@ -3058,6 +3160,9 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
                get_block_func = ext4_get_block_write;
                dio_flags = DIO_LOCKING;
        }
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+       BUG_ON(ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode));
+#endif
        ret = __blockdev_direct_IO(rw, iocb, inode,
                                   inode->i_sb->s_bdev, iov,
                                   offset, nr_segs,
@@ -3122,6 +3227,11 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
        size_t count = iov_length(iov, nr_segs);
        ssize_t ret;
 
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+       if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode))
+               return 0;
+#endif
+
        /*
         * If we are doing data journalling we don't support O_DIRECT
         */
index fee0726..20ff238 100644 (file)
@@ -69,6 +69,10 @@ static void ext4_finish_bio(struct bio *bio)
 
        bio_for_each_segment_all(bvec, bio, i) {
                struct page *page = bvec->bv_page;
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+               struct page *data_page = NULL;
+               struct ext4_crypto_ctx *ctx = NULL;
+#endif
                struct buffer_head *bh, *head;
                unsigned bio_start = bvec->bv_offset;
                unsigned bio_end = bio_start + bvec->bv_len;
@@ -78,6 +82,15 @@ static void ext4_finish_bio(struct bio *bio)
                if (!page)
                        continue;
 
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+               if (!page->mapping) {
+                       /* The bounce data pages are unmapped. */
+                       data_page = page;
+                       ctx = (struct ext4_crypto_ctx *)page_private(data_page);
+                       page = ctx->control_page;
+               }
+#endif
+
                if (error) {
                        SetPageError(page);
                        set_bit(AS_EIO, &page->mapping->flags);
@@ -102,8 +115,13 @@ static void ext4_finish_bio(struct bio *bio)
                } while ((bh = bh->b_this_page) != head);
                bit_spin_unlock(BH_Uptodate_Lock, &head->b_state);
                local_irq_restore(flags);
-               if (!under_io)
+               if (!under_io) {
+#ifdef CONFIG_EXT4_FS_ENCRYPTION
+                       if (ctx)
+                               ext4_restore_control_page(data_page);
+#endif
                        end_page_writeback(page);
+               }
        }
 }
 
@@ -378,6 +396,7 @@ static int io_submit_init_bio(struct ext4_io_submit *io,
 
 static int io_submit_add_bh(struct ext4_io_submit *io,
                            struct inode *inode,
+                           struct page *page,
                            struct buffer_head *bh)
 {
        int ret;
@@ -391,7 +410,7 @@ submit_and_retry:
                if (ret)
                        return ret;
        }
-       ret = bio_add_page(io->io_bio, bh->b_page, bh->b_size, bh_offset(bh));
+       ret = bio_add_page(io->io_bio, page, bh->b_size, bh_offset(bh));
        if (ret != bh->b_size)
                goto submit_and_retry;
        io->io_next_block++;
@@ -404,6 +423,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
                        struct writeback_control *wbc,
                        bool keep_towrite)
 {
+       struct page *data_page = NULL;
        struct inode *inode = page->mapping->host;
        unsigned block_start, blocksize;
        struct buffer_head *bh, *head;
@@ -463,19 +483,29 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
                set_buffer_async_write(bh);
        } while ((bh = bh->b_this_page) != head);
 
-       /* Now submit buffers to write */
        bh = head = page_buffers(page);
+
+       if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {
+               data_page = ext4_encrypt(inode, page);
+               if (IS_ERR(data_page)) {
+                       ret = PTR_ERR(data_page);
+                       data_page = NULL;
+                       goto out;
+               }
+       }
+
+       /* Now submit buffers to write */
        do {
                if (!buffer_async_write(bh))
                        continue;
-               ret = io_submit_add_bh(io, inode, bh);
+               ret = io_submit_add_bh(io, inode,
+                                      data_page ? data_page : page, bh);
                if (ret) {
                        /*
                         * We only get here on ENOMEM.  Not much else
                         * we can do but mark the page as dirty, and
                         * better luck next time.
                         */
-                       redirty_page_for_writepage(wbc, page);
                        break;
                }
                nr_submitted++;
@@ -484,6 +514,11 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
 
        /* Error stopped previous loop? Clean up buffers... */
        if (ret) {
+       out:
+               if (data_page)
+                       ext4_restore_control_page(data_page);
+               printk_ratelimited(KERN_ERR "%s: ret = %d\n", __func__, ret);
+               redirty_page_for_writepage(wbc, page);
                do {
                        clear_buffer_async_write(bh);
                        bh = bh->b_this_page;