fallocate should be a file operation
Christoph Hellwig [Fri, 14 Jan 2011 12:07:43 +0000 (13:07 +0100)]
Currently all filesystems except XFS implement fallocate asynchronously,
while XFS forced a commit.  Both of these are suboptimal - in case of O_SYNC
I/O we really want our allocation on disk, especially for the !KEEP_SIZE
case where we actually grow the file with user-visible zeroes.  On the
other hand always commiting the transaction is a bad idea for fast-path
uses of fallocate like for example in recent Samba versions.   Given
that block allocation is a data plane operation anyway change it from
an inode operation to a file operation so that we have the file structure
available that lets us check for O_SYNC.

This also includes moving the code around for a few of the filesystems,
and remove the already unnedded S_ISDIR checks given that we only wire
up fallocate for regular files.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>

13 files changed:
Documentation/filesystems/Locking
fs/btrfs/file.c
fs/btrfs/inode.c
fs/ext4/ext4.h
fs/ext4/extents.c
fs/ext4/file.c
fs/gfs2/file.c
fs/gfs2/ops_inode.c
fs/ocfs2/file.c
fs/open.c
fs/xfs/linux-2.6/xfs_file.c
fs/xfs/linux-2.6/xfs_iops.c
include/linux/fs.h

index 651d523..4471a41 100644 (file)
@@ -60,7 +60,6 @@ ata *);
        ssize_t (*listxattr) (struct dentry *, char *, size_t);
        int (*removexattr) (struct dentry *, const char *);
        void (*truncate_range)(struct inode *, loff_t, loff_t);
-       long (*fallocate)(struct inode *inode, int mode, loff_t offset, loff_t len);
        int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len);
 
 locking rules:
@@ -88,7 +87,6 @@ getxattr:     no
 listxattr:     no
 removexattr:   yes
 truncate_range:        yes
-fallocate:     no
 fiemap:                no
        Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on
 victim.
@@ -437,6 +435,7 @@ prototypes:
        ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *,
                        size_t, unsigned int);
        int (*setlease)(struct file *, long, struct file_lock **);
+       long (*fallocate)(struct file *, int, loff_t, loff_t);
 };
 
 locking rules:
index 66836d8..a9e0a4e 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/string.h>
 #include <linux/backing-dev.h>
 #include <linux/mpage.h>
+#include <linux/falloc.h>
 #include <linux/swap.h>
 #include <linux/writeback.h>
 #include <linux/statfs.h>
@@ -1237,6 +1238,117 @@ static int btrfs_file_mmap(struct file  *filp, struct vm_area_struct *vma)
        return 0;
 }
 
+static long btrfs_fallocate(struct file *file, int mode,
+                           loff_t offset, loff_t len)
+{
+       struct inode *inode = file->f_path.dentry->d_inode;
+       struct extent_state *cached_state = NULL;
+       u64 cur_offset;
+       u64 last_byte;
+       u64 alloc_start;
+       u64 alloc_end;
+       u64 alloc_hint = 0;
+       u64 locked_end;
+       u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
+       struct extent_map *em;
+       int ret;
+
+       alloc_start = offset & ~mask;
+       alloc_end =  (offset + len + mask) & ~mask;
+
+       /* We only support the FALLOC_FL_KEEP_SIZE mode */
+       if (mode & ~FALLOC_FL_KEEP_SIZE)
+               return -EOPNOTSUPP;
+
+       /*
+        * wait for ordered IO before we have any locks.  We'll loop again
+        * below with the locks held.
+        */
+       btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start);
+
+       mutex_lock(&inode->i_mutex);
+       ret = inode_newsize_ok(inode, alloc_end);
+       if (ret)
+               goto out;
+
+       if (alloc_start > inode->i_size) {
+               ret = btrfs_cont_expand(inode, alloc_start);
+               if (ret)
+                       goto out;
+       }
+
+       ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start);
+       if (ret)
+               goto out;
+
+       locked_end = alloc_end - 1;
+       while (1) {
+               struct btrfs_ordered_extent *ordered;
+
+               /* the extent lock is ordered inside the running
+                * transaction
+                */
+               lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start,
+                                locked_end, 0, &cached_state, GFP_NOFS);
+               ordered = btrfs_lookup_first_ordered_extent(inode,
+                                                           alloc_end - 1);
+               if (ordered &&
+                   ordered->file_offset + ordered->len > alloc_start &&
+                   ordered->file_offset < alloc_end) {
+                       btrfs_put_ordered_extent(ordered);
+                       unlock_extent_cached(&BTRFS_I(inode)->io_tree,
+                                            alloc_start, locked_end,
+                                            &cached_state, GFP_NOFS);
+                       /*
+                        * we can't wait on the range with the transaction
+                        * running or with the extent lock held
+                        */
+                       btrfs_wait_ordered_range(inode, alloc_start,
+                                                alloc_end - alloc_start);
+               } else {
+                       if (ordered)
+                               btrfs_put_ordered_extent(ordered);
+                       break;
+               }
+       }
+
+       cur_offset = alloc_start;
+       while (1) {
+               em = btrfs_get_extent(inode, NULL, 0, cur_offset,
+                                     alloc_end - cur_offset, 0);
+               BUG_ON(IS_ERR(em) || !em);
+               last_byte = min(extent_map_end(em), alloc_end);
+               last_byte = (last_byte + mask) & ~mask;
+               if (em->block_start == EXTENT_MAP_HOLE ||
+                   (cur_offset >= inode->i_size &&
+                    !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
+                       ret = btrfs_prealloc_file_range(inode, mode, cur_offset,
+                                                       last_byte - cur_offset,
+                                                       1 << inode->i_blkbits,
+                                                       offset + len,
+                                                       &alloc_hint);
+                       if (ret < 0) {
+                               free_extent_map(em);
+                               break;
+                       }
+               }
+               free_extent_map(em);
+
+               cur_offset = last_byte;
+               if (cur_offset >= alloc_end) {
+                       ret = 0;
+                       break;
+               }
+       }
+       unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
+                            &cached_state, GFP_NOFS);
+
+       btrfs_free_reserved_data_space(inode, alloc_end - alloc_start);
+out:
+       mutex_unlock(&inode->i_mutex);
+       return ret;
+}
+
 const struct file_operations btrfs_file_operations = {
        .llseek         = generic_file_llseek,
        .read           = do_sync_read,
@@ -1248,6 +1360,7 @@ const struct file_operations btrfs_file_operations = {
        .open           = generic_file_open,
        .release        = btrfs_release_file,
        .fsync          = btrfs_sync_file,
+       .fallocate      = btrfs_fallocate,
        .unlocked_ioctl = btrfs_ioctl,
 #ifdef CONFIG_COMPAT
        .compat_ioctl   = btrfs_ioctl,
index 64daf2a..902afbf 100644 (file)
@@ -7098,116 +7098,6 @@ int btrfs_prealloc_file_range_trans(struct inode *inode,
                                           min_size, actual_len, alloc_hint, trans);
 }
 
-static long btrfs_fallocate(struct inode *inode, int mode,
-                           loff_t offset, loff_t len)
-{
-       struct extent_state *cached_state = NULL;
-       u64 cur_offset;
-       u64 last_byte;
-       u64 alloc_start;
-       u64 alloc_end;
-       u64 alloc_hint = 0;
-       u64 locked_end;
-       u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
-       struct extent_map *em;
-       int ret;
-
-       alloc_start = offset & ~mask;
-       alloc_end =  (offset + len + mask) & ~mask;
-
-       /* We only support the FALLOC_FL_KEEP_SIZE mode */
-       if (mode & ~FALLOC_FL_KEEP_SIZE)
-               return -EOPNOTSUPP;
-
-       /*
-        * wait for ordered IO before we have any locks.  We'll loop again
-        * below with the locks held.
-        */
-       btrfs_wait_ordered_range(inode, alloc_start, alloc_end - alloc_start);
-
-       mutex_lock(&inode->i_mutex);
-       ret = inode_newsize_ok(inode, alloc_end);
-       if (ret)
-               goto out;
-
-       if (alloc_start > inode->i_size) {
-               ret = btrfs_cont_expand(inode, alloc_start);
-               if (ret)
-                       goto out;
-       }
-
-       ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start);
-       if (ret)
-               goto out;
-
-       locked_end = alloc_end - 1;
-       while (1) {
-               struct btrfs_ordered_extent *ordered;
-
-               /* the extent lock is ordered inside the running
-                * transaction
-                */
-               lock_extent_bits(&BTRFS_I(inode)->io_tree, alloc_start,
-                                locked_end, 0, &cached_state, GFP_NOFS);
-               ordered = btrfs_lookup_first_ordered_extent(inode,
-                                                           alloc_end - 1);
-               if (ordered &&
-                   ordered->file_offset + ordered->len > alloc_start &&
-                   ordered->file_offset < alloc_end) {
-                       btrfs_put_ordered_extent(ordered);
-                       unlock_extent_cached(&BTRFS_I(inode)->io_tree,
-                                            alloc_start, locked_end,
-                                            &cached_state, GFP_NOFS);
-                       /*
-                        * we can't wait on the range with the transaction
-                        * running or with the extent lock held
-                        */
-                       btrfs_wait_ordered_range(inode, alloc_start,
-                                                alloc_end - alloc_start);
-               } else {
-                       if (ordered)
-                               btrfs_put_ordered_extent(ordered);
-                       break;
-               }
-       }
-
-       cur_offset = alloc_start;
-       while (1) {
-               em = btrfs_get_extent(inode, NULL, 0, cur_offset,
-                                     alloc_end - cur_offset, 0);
-               BUG_ON(IS_ERR(em) || !em);
-               last_byte = min(extent_map_end(em), alloc_end);
-               last_byte = (last_byte + mask) & ~mask;
-               if (em->block_start == EXTENT_MAP_HOLE ||
-                   (cur_offset >= inode->i_size &&
-                    !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
-                       ret = btrfs_prealloc_file_range(inode, mode, cur_offset,
-                                                       last_byte - cur_offset,
-                                                       1 << inode->i_blkbits,
-                                                       offset + len,
-                                                       &alloc_hint);
-                       if (ret < 0) {
-                               free_extent_map(em);
-                               break;
-                       }
-               }
-               free_extent_map(em);
-
-               cur_offset = last_byte;
-               if (cur_offset >= alloc_end) {
-                       ret = 0;
-                       break;
-               }
-       }
-       unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
-                            &cached_state, GFP_NOFS);
-
-       btrfs_free_reserved_data_space(inode, alloc_end - alloc_start);
-out:
-       mutex_unlock(&inode->i_mutex);
-       return ret;
-}
-
 static int btrfs_set_page_dirty(struct page *page)
 {
        return __set_page_dirty_nobuffers(page);
@@ -7310,7 +7200,6 @@ static const struct inode_operations btrfs_file_inode_operations = {
        .listxattr      = btrfs_listxattr,
        .removexattr    = btrfs_removexattr,
        .permission     = btrfs_permission,
-       .fallocate      = btrfs_fallocate,
        .fiemap         = btrfs_fiemap,
 };
 static const struct inode_operations btrfs_special_inode_operations = {
index 1de65f5..0c8d97b 100644 (file)
@@ -2065,7 +2065,7 @@ extern int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
 extern void ext4_ext_truncate(struct inode *);
 extern void ext4_ext_init(struct super_block *);
 extern void ext4_ext_release(struct super_block *);
-extern long ext4_fallocate(struct inode *inode, int mode, loff_t offset,
+extern long ext4_fallocate(struct file *file, int mode, loff_t offset,
                          loff_t len);
 extern int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
                          ssize_t len);
index 4bdd160..63a7581 100644 (file)
@@ -3627,14 +3627,15 @@ static void ext4_falloc_update_inode(struct inode *inode,
 }
 
 /*
- * preallocate space for a file. This implements ext4's fallocate inode
+ * preallocate space for a file. This implements ext4's fallocate file
  * operation, which gets called from sys_fallocate system call.
  * For block-mapped files, posix_fallocate should fall back to the method
  * of writing zeroes to the required new blocks (the same behavior which is
  * expected for file systems which do not support fallocate() system call).
  */
-long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
+long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 {
+       struct inode *inode = file->f_path.dentry->d_inode;
        handle_t *handle;
        loff_t new_size;
        unsigned int max_blocks;
@@ -3655,10 +3656,6 @@ long ext4_fallocate(struct inode *inode, int mode, loff_t offset, loff_t len)
        if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
                return -EOPNOTSUPP;
 
-       /* preallocation to directories is currently not supported */
-       if (S_ISDIR(inode->i_mode))
-               return -ENODEV;
-
        map.m_lblk = offset >> blkbits;
        /*
         * We can't just convert len to max_blocks because
index bb003dc..2e8322c 100644 (file)
@@ -210,6 +210,7 @@ const struct file_operations ext4_file_operations = {
        .fsync          = ext4_sync_file,
        .splice_read    = generic_file_splice_read,
        .splice_write   = generic_file_splice_write,
+       .fallocate      = ext4_fallocate,
 };
 
 const struct inode_operations ext4_file_inode_operations = {
@@ -223,7 +224,6 @@ const struct inode_operations ext4_file_inode_operations = {
        .removexattr    = generic_removexattr,
 #endif
        .check_acl      = ext4_check_acl,
-       .fallocate      = ext4_fallocate,
        .fiemap         = ext4_fiemap,
 };
 
index fca6689..7cfdcb9 100644 (file)
@@ -19,6 +19,8 @@
 #include <linux/fs.h>
 #include <linux/gfs2_ondisk.h>
 #include <linux/ext2_fs.h>
+#include <linux/falloc.h>
+#include <linux/swap.h>
 #include <linux/crc32.h>
 #include <linux/writeback.h>
 #include <asm/uaccess.h>
@@ -610,6 +612,260 @@ static ssize_t gfs2_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
        return generic_file_aio_write(iocb, iov, nr_segs, pos);
 }
 
+static void empty_write_end(struct page *page, unsigned from,
+                          unsigned to)
+{
+       struct gfs2_inode *ip = GFS2_I(page->mapping->host);
+
+       page_zero_new_buffers(page, from, to);
+       flush_dcache_page(page);
+       mark_page_accessed(page);
+
+       if (!gfs2_is_writeback(ip))
+               gfs2_page_add_databufs(ip, page, from, to);
+
+       block_commit_write(page, from, to);
+}
+
+static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
+{
+       unsigned start, end, next;
+       struct buffer_head *bh, *head;
+       int error;
+
+       if (!page_has_buffers(page)) {
+               error = __block_write_begin(page, from, to - from, gfs2_block_map);
+               if (unlikely(error))
+                       return error;
+
+               empty_write_end(page, from, to);
+               return 0;
+       }
+
+       bh = head = page_buffers(page);
+       next = end = 0;
+       while (next < from) {
+               next += bh->b_size;
+               bh = bh->b_this_page;
+       }
+       start = next;
+       do {
+               next += bh->b_size;
+               if (buffer_mapped(bh)) {
+                       if (end) {
+                               error = __block_write_begin(page, start, end - start,
+                                                           gfs2_block_map);
+                               if (unlikely(error))
+                                       return error;
+                               empty_write_end(page, start, end);
+                               end = 0;
+                       }
+                       start = next;
+               }
+               else
+                       end = next;
+               bh = bh->b_this_page;
+       } while (next < to);
+
+       if (end) {
+               error = __block_write_begin(page, start, end - start, gfs2_block_map);
+               if (unlikely(error))
+                       return error;
+               empty_write_end(page, start, end);
+       }
+
+       return 0;
+}
+
+static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
+                          int mode)
+{
+       struct gfs2_inode *ip = GFS2_I(inode);
+       struct buffer_head *dibh;
+       int error;
+       u64 start = offset >> PAGE_CACHE_SHIFT;
+       unsigned int start_offset = offset & ~PAGE_CACHE_MASK;
+       u64 end = (offset + len - 1) >> PAGE_CACHE_SHIFT;
+       pgoff_t curr;
+       struct page *page;
+       unsigned int end_offset = (offset + len) & ~PAGE_CACHE_MASK;
+       unsigned int from, to;
+
+       if (!end_offset)
+               end_offset = PAGE_CACHE_SIZE;
+
+       error = gfs2_meta_inode_buffer(ip, &dibh);
+       if (unlikely(error))
+               goto out;
+
+       gfs2_trans_add_bh(ip->i_gl, dibh, 1);
+
+       if (gfs2_is_stuffed(ip)) {
+               error = gfs2_unstuff_dinode(ip, NULL);
+               if (unlikely(error))
+                       goto out;
+       }
+
+       curr = start;
+       offset = start << PAGE_CACHE_SHIFT;
+       from = start_offset;
+       to = PAGE_CACHE_SIZE;
+       while (curr <= end) {
+               page = grab_cache_page_write_begin(inode->i_mapping, curr,
+                                                  AOP_FLAG_NOFS);
+               if (unlikely(!page)) {
+                       error = -ENOMEM;
+                       goto out;
+               }
+
+               if (curr == end)
+                       to = end_offset;
+               error = write_empty_blocks(page, from, to);
+               if (!error && offset + to > inode->i_size &&
+                   !(mode & FALLOC_FL_KEEP_SIZE)) {
+                       i_size_write(inode, offset + to);
+               }
+               unlock_page(page);
+               page_cache_release(page);
+               if (error)
+                       goto out;
+               curr++;
+               offset += PAGE_CACHE_SIZE;
+               from = 0;
+       }
+
+       gfs2_dinode_out(ip, dibh->b_data);
+       mark_inode_dirty(inode);
+
+       brelse(dibh);
+
+out:
+       return error;
+}
+
+static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len,
+                           unsigned int *data_blocks, unsigned int *ind_blocks)
+{
+       const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
+       unsigned int max_blocks = ip->i_alloc->al_rgd->rd_free_clone;
+       unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1);
+
+       for (tmp = max_data; tmp > sdp->sd_diptrs;) {
+               tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs);
+               max_data -= tmp;
+       }
+       /* This calculation isn't the exact reverse of gfs2_write_calc_reserve,
+          so it might end up with fewer data blocks */
+       if (max_data <= *data_blocks)
+               return;
+       *data_blocks = max_data;
+       *ind_blocks = max_blocks - max_data;
+       *len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift;
+       if (*len > max) {
+               *len = max;
+               gfs2_write_calc_reserv(ip, max, data_blocks, ind_blocks);
+       }
+}
+
+static long gfs2_fallocate(struct file *file, int mode, loff_t offset,
+                          loff_t len)
+{
+       struct inode *inode = file->f_path.dentry->d_inode;
+       struct gfs2_sbd *sdp = GFS2_SB(inode);
+       struct gfs2_inode *ip = GFS2_I(inode);
+       unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
+       loff_t bytes, max_bytes;
+       struct gfs2_alloc *al;
+       int error;
+       loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift;
+       next = (next + 1) << sdp->sd_sb.sb_bsize_shift;
+
+       /* We only support the FALLOC_FL_KEEP_SIZE mode */
+       if (mode & ~FALLOC_FL_KEEP_SIZE)
+               return -EOPNOTSUPP;
+
+       offset = (offset >> sdp->sd_sb.sb_bsize_shift) <<
+                sdp->sd_sb.sb_bsize_shift;
+
+       len = next - offset;
+       bytes = sdp->sd_max_rg_data * sdp->sd_sb.sb_bsize / 2;
+       if (!bytes)
+               bytes = UINT_MAX;
+
+       gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
+       error = gfs2_glock_nq(&ip->i_gh);
+       if (unlikely(error))
+               goto out_uninit;
+
+       if (!gfs2_write_alloc_required(ip, offset, len))
+               goto out_unlock;
+
+       while (len > 0) {
+               if (len < bytes)
+                       bytes = len;
+               al = gfs2_alloc_get(ip);
+               if (!al) {
+                       error = -ENOMEM;
+                       goto out_unlock;
+               }
+
+               error = gfs2_quota_lock_check(ip);
+               if (error)
+                       goto out_alloc_put;
+
+retry:
+               gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks);
+
+               al->al_requested = data_blocks + ind_blocks;
+               error = gfs2_inplace_reserve(ip);
+               if (error) {
+                       if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) {
+                               bytes >>= 1;
+                               goto retry;
+                       }
+                       goto out_qunlock;
+               }
+               max_bytes = bytes;
+               calc_max_reserv(ip, len, &max_bytes, &data_blocks, &ind_blocks);
+               al->al_requested = data_blocks + ind_blocks;
+
+               rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA +
+                         RES_RG_HDR + gfs2_rg_blocks(al);
+               if (gfs2_is_jdata(ip))
+                       rblocks += data_blocks ? data_blocks : 1;
+
+               error = gfs2_trans_begin(sdp, rblocks,
+                                        PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize);
+               if (error)
+                       goto out_trans_fail;
+
+               error = fallocate_chunk(inode, offset, max_bytes, mode);
+               gfs2_trans_end(sdp);
+
+               if (error)
+                       goto out_trans_fail;
+
+               len -= max_bytes;
+               offset += max_bytes;
+               gfs2_inplace_release(ip);
+               gfs2_quota_unlock(ip);
+               gfs2_alloc_put(ip);
+       }
+       goto out_unlock;
+
+out_trans_fail:
+       gfs2_inplace_release(ip);
+out_qunlock:
+       gfs2_quota_unlock(ip);
+out_alloc_put:
+       gfs2_alloc_put(ip);
+out_unlock:
+       gfs2_glock_dq(&ip->i_gh);
+out_uninit:
+       gfs2_holder_uninit(&ip->i_gh);
+       return error;
+}
+
 #ifdef CONFIG_GFS2_FS_LOCKING_DLM
 
 /**
@@ -765,6 +1021,7 @@ const struct file_operations gfs2_file_fops = {
        .splice_read    = generic_file_splice_read,
        .splice_write   = generic_file_splice_write,
        .setlease       = gfs2_setlease,
+       .fallocate      = gfs2_fallocate,
 };
 
 const struct file_operations gfs2_dir_fops = {
@@ -794,6 +1051,7 @@ const struct file_operations gfs2_file_fops_nolock = {
        .splice_read    = generic_file_splice_read,
        .splice_write   = generic_file_splice_write,
        .setlease       = generic_setlease,
+       .fallocate      = gfs2_fallocate,
 };
 
 const struct file_operations gfs2_dir_fops_nolock = {
index c09528c..d8b26ac 100644 (file)
@@ -18,8 +18,6 @@
 #include <linux/gfs2_ondisk.h>
 #include <linux/crc32.h>
 #include <linux/fiemap.h>
-#include <linux/swap.h>
-#include <linux/falloc.h>
 #include <asm/uaccess.h>
 
 #include "gfs2.h"
@@ -1257,261 +1255,6 @@ static int gfs2_removexattr(struct dentry *dentry, const char *name)
        return ret;
 }
 
-static void empty_write_end(struct page *page, unsigned from,
-                          unsigned to)
-{
-       struct gfs2_inode *ip = GFS2_I(page->mapping->host);
-
-       page_zero_new_buffers(page, from, to);
-       flush_dcache_page(page);
-       mark_page_accessed(page);
-
-       if (!gfs2_is_writeback(ip))
-               gfs2_page_add_databufs(ip, page, from, to);
-
-       block_commit_write(page, from, to);
-}
-
-
-static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
-{
-       unsigned start, end, next;
-       struct buffer_head *bh, *head;
-       int error;
-
-       if (!page_has_buffers(page)) {
-               error = __block_write_begin(page, from, to - from, gfs2_block_map);
-               if (unlikely(error))
-                       return error;
-
-               empty_write_end(page, from, to);
-               return 0;
-       }
-
-       bh = head = page_buffers(page);
-       next = end = 0;
-       while (next < from) {
-               next += bh->b_size;
-               bh = bh->b_this_page;
-       }
-       start = next;
-       do {
-               next += bh->b_size;
-               if (buffer_mapped(bh)) {
-                       if (end) {
-                               error = __block_write_begin(page, start, end - start,
-                                                           gfs2_block_map);
-                               if (unlikely(error))
-                                       return error;
-                               empty_write_end(page, start, end);
-                               end = 0;
-                       }
-                       start = next;
-               }
-               else
-                       end = next;
-               bh = bh->b_this_page;
-       } while (next < to);
-
-       if (end) {
-               error = __block_write_begin(page, start, end - start, gfs2_block_map);
-               if (unlikely(error))
-                       return error;
-               empty_write_end(page, start, end);
-       }
-
-       return 0;
-}
-
-static int fallocate_chunk(struct inode *inode, loff_t offset, loff_t len,
-                          int mode)
-{
-       struct gfs2_inode *ip = GFS2_I(inode);
-       struct buffer_head *dibh;
-       int error;
-       u64 start = offset >> PAGE_CACHE_SHIFT;
-       unsigned int start_offset = offset & ~PAGE_CACHE_MASK;
-       u64 end = (offset + len - 1) >> PAGE_CACHE_SHIFT;
-       pgoff_t curr;
-       struct page *page;
-       unsigned int end_offset = (offset + len) & ~PAGE_CACHE_MASK;
-       unsigned int from, to;
-
-       if (!end_offset)
-               end_offset = PAGE_CACHE_SIZE;
-
-       error = gfs2_meta_inode_buffer(ip, &dibh);
-       if (unlikely(error))
-               goto out;
-
-       gfs2_trans_add_bh(ip->i_gl, dibh, 1);
-
-       if (gfs2_is_stuffed(ip)) {
-               error = gfs2_unstuff_dinode(ip, NULL);
-               if (unlikely(error))
-                       goto out;
-       }
-
-       curr = start;
-       offset = start << PAGE_CACHE_SHIFT;
-       from = start_offset;
-       to = PAGE_CACHE_SIZE;
-       while (curr <= end) {
-               page = grab_cache_page_write_begin(inode->i_mapping, curr,
-                                                  AOP_FLAG_NOFS);
-               if (unlikely(!page)) {
-                       error = -ENOMEM;
-                       goto out;
-               }
-
-               if (curr == end)
-                       to = end_offset;
-               error = write_empty_blocks(page, from, to);
-               if (!error && offset + to > inode->i_size &&
-                   !(mode & FALLOC_FL_KEEP_SIZE)) {
-                       i_size_write(inode, offset + to);
-               }
-               unlock_page(page);
-               page_cache_release(page);
-               if (error)
-                       goto out;
-               curr++;
-               offset += PAGE_CACHE_SIZE;
-               from = 0;
-       }
-
-       gfs2_dinode_out(ip, dibh->b_data);
-       mark_inode_dirty(inode);
-
-       brelse(dibh);
-
-out:
-       return error;
-}
-
-static void calc_max_reserv(struct gfs2_inode *ip, loff_t max, loff_t *len,
-                           unsigned int *data_blocks, unsigned int *ind_blocks)
-{
-       const struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
-       unsigned int max_blocks = ip->i_alloc->al_rgd->rd_free_clone;
-       unsigned int tmp, max_data = max_blocks - 3 * (sdp->sd_max_height - 1);
-
-       for (tmp = max_data; tmp > sdp->sd_diptrs;) {
-               tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs);
-               max_data -= tmp;
-       }
-       /* This calculation isn't the exact reverse of gfs2_write_calc_reserve,
-          so it might end up with fewer data blocks */
-       if (max_data <= *data_blocks)
-               return;
-       *data_blocks = max_data;
-       *ind_blocks = max_blocks - max_data;
-       *len = ((loff_t)max_data - 3) << sdp->sd_sb.sb_bsize_shift;
-       if (*len > max) {
-               *len = max;
-               gfs2_write_calc_reserv(ip, max, data_blocks, ind_blocks);
-       }
-}
-
-static long gfs2_fallocate(struct inode *inode, int mode, loff_t offset,
-                          loff_t len)
-{
-       struct gfs2_sbd *sdp = GFS2_SB(inode);
-       struct gfs2_inode *ip = GFS2_I(inode);
-       unsigned int data_blocks = 0, ind_blocks = 0, rblocks;
-       loff_t bytes, max_bytes;
-       struct gfs2_alloc *al;
-       int error;
-       loff_t next = (offset + len - 1) >> sdp->sd_sb.sb_bsize_shift;
-       next = (next + 1) << sdp->sd_sb.sb_bsize_shift;
-
-       /* We only support the FALLOC_FL_KEEP_SIZE mode */
-       if (mode & ~FALLOC_FL_KEEP_SIZE)
-               return -EOPNOTSUPP;
-
-       offset = (offset >> sdp->sd_sb.sb_bsize_shift) <<
-                sdp->sd_sb.sb_bsize_shift;
-
-       len = next - offset;
-       bytes = sdp->sd_max_rg_data * sdp->sd_sb.sb_bsize / 2;
-       if (!bytes)
-               bytes = UINT_MAX;
-
-       gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
-       error = gfs2_glock_nq(&ip->i_gh);
-       if (unlikely(error))
-               goto out_uninit;
-
-       if (!gfs2_write_alloc_required(ip, offset, len))
-               goto out_unlock;
-
-       while (len > 0) {
-               if (len < bytes)
-                       bytes = len;
-               al = gfs2_alloc_get(ip);
-               if (!al) {
-                       error = -ENOMEM;
-                       goto out_unlock;
-               }
-
-               error = gfs2_quota_lock_check(ip);
-               if (error)
-                       goto out_alloc_put;
-
-retry:
-               gfs2_write_calc_reserv(ip, bytes, &data_blocks, &ind_blocks);
-
-               al->al_requested = data_blocks + ind_blocks;
-               error = gfs2_inplace_reserve(ip);
-               if (error) {
-                       if (error == -ENOSPC && bytes > sdp->sd_sb.sb_bsize) {
-                               bytes >>= 1;
-                               goto retry;
-                       }
-                       goto out_qunlock;
-               }
-               max_bytes = bytes;
-               calc_max_reserv(ip, len, &max_bytes, &data_blocks, &ind_blocks);
-               al->al_requested = data_blocks + ind_blocks;
-
-               rblocks = RES_DINODE + ind_blocks + RES_STATFS + RES_QUOTA +
-                         RES_RG_HDR + gfs2_rg_blocks(al);
-               if (gfs2_is_jdata(ip))
-                       rblocks += data_blocks ? data_blocks : 1;
-
-               error = gfs2_trans_begin(sdp, rblocks,
-                                        PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize);
-               if (error)
-                       goto out_trans_fail;
-
-               error = fallocate_chunk(inode, offset, max_bytes, mode);
-               gfs2_trans_end(sdp);
-
-               if (error)
-                       goto out_trans_fail;
-
-               len -= max_bytes;
-               offset += max_bytes;
-               gfs2_inplace_release(ip);
-               gfs2_quota_unlock(ip);
-               gfs2_alloc_put(ip);
-       }
-       goto out_unlock;
-
-out_trans_fail:
-       gfs2_inplace_release(ip);
-out_qunlock:
-       gfs2_quota_unlock(ip);
-out_alloc_put:
-       gfs2_alloc_put(ip);
-out_unlock:
-       gfs2_glock_dq(&ip->i_gh);
-out_uninit:
-       gfs2_holder_uninit(&ip->i_gh);
-       return error;
-}
-
-
 static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                       u64 start, u64 len)
 {
@@ -1562,7 +1305,6 @@ const struct inode_operations gfs2_file_iops = {
        .getxattr = gfs2_getxattr,
        .listxattr = gfs2_listxattr,
        .removexattr = gfs2_removexattr,
-       .fallocate = gfs2_fallocate,
        .fiemap = gfs2_fiemap,
 };
 
index cf254ce..a665195 100644 (file)
@@ -1989,9 +1989,10 @@ int ocfs2_change_file_space(struct file *file, unsigned int cmd,
        return __ocfs2_change_file_space(file, inode, file->f_pos, cmd, sr, 0);
 }
 
-static long ocfs2_fallocate(struct inode *inode, int mode, loff_t offset,
+static long ocfs2_fallocate(struct file *file, int mode, loff_t offset,
                            loff_t len)
 {
+       struct inode *inode = file->f_path.dentry->d_inode;
        struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
        struct ocfs2_space_resv sr;
        int change_size = 1;
@@ -2002,9 +2003,6 @@ static long ocfs2_fallocate(struct inode *inode, int mode, loff_t offset,
        if (!ocfs2_writes_unwritten_extents(osb))
                return -EOPNOTSUPP;
 
-       if (S_ISDIR(inode->i_mode))
-               return -ENODEV;
-
        if (mode & FALLOC_FL_KEEP_SIZE)
                change_size = 0;
 
@@ -2612,7 +2610,6 @@ const struct inode_operations ocfs2_file_iops = {
        .getxattr       = generic_getxattr,
        .listxattr      = ocfs2_listxattr,
        .removexattr    = generic_removexattr,
-       .fallocate      = ocfs2_fallocate,
        .fiemap         = ocfs2_fiemap,
 };
 
@@ -2644,6 +2641,7 @@ const struct file_operations ocfs2_fops = {
        .flock          = ocfs2_flock,
        .splice_read    = ocfs2_file_splice_read,
        .splice_write   = ocfs2_file_splice_write,
+       .fallocate      = ocfs2_fallocate,
 };
 
 const struct file_operations ocfs2_dops = {
index 5b6ef7e..e52389e 100644 (file)
--- a/fs/open.c
+++ b/fs/open.c
@@ -255,10 +255,10 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
        if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
                return -EFBIG;
 
-       if (!inode->i_op->fallocate)
+       if (!file->f_op->fallocate)
                return -EOPNOTSUPP;
 
-       return inode->i_op->fallocate(inode, mode, offset, len);
+       return file->f_op->fallocate(file, mode, offset, len);
 }
 
 SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len)
index ef51eb4..a55c1b4 100644 (file)
@@ -37,6 +37,7 @@
 #include "xfs_trace.h"
 
 #include <linux/dcache.h>
+#include <linux/falloc.h>
 
 static const struct vm_operations_struct xfs_file_vm_ops;
 
@@ -882,6 +883,60 @@ out_unlock:
        return ret;
 }
 
+STATIC long
+xfs_file_fallocate(
+       struct file     *file,
+       int             mode,
+       loff_t          offset,
+       loff_t          len)
+{
+       struct inode    *inode = file->f_path.dentry->d_inode;
+       long            error;
+       loff_t          new_size = 0;
+       xfs_flock64_t   bf;
+       xfs_inode_t     *ip = XFS_I(inode);
+       int             cmd = XFS_IOC_RESVSP;
+
+       if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
+               return -EOPNOTSUPP;
+
+       bf.l_whence = 0;
+       bf.l_start = offset;
+       bf.l_len = len;
+
+       xfs_ilock(ip, XFS_IOLOCK_EXCL);
+
+       if (mode & FALLOC_FL_PUNCH_HOLE)
+               cmd = XFS_IOC_UNRESVSP;
+
+       /* check the new inode size is valid before allocating */
+       if (!(mode & FALLOC_FL_KEEP_SIZE) &&
+           offset + len > i_size_read(inode)) {
+               new_size = offset + len;
+               error = inode_newsize_ok(inode, new_size);
+               if (error)
+                       goto out_unlock;
+       }
+
+       error = -xfs_change_file_space(ip, cmd, &bf, 0, XFS_ATTR_NOLOCK);
+       if (error)
+               goto out_unlock;
+
+       /* Change file size if needed */
+       if (new_size) {
+               struct iattr iattr;
+
+               iattr.ia_valid = ATTR_SIZE;
+               iattr.ia_size = new_size;
+               error = -xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK);
+       }
+
+out_unlock:
+       xfs_iunlock(ip, XFS_IOLOCK_EXCL);
+       return error;
+}
+
+
 STATIC int
 xfs_file_open(
        struct inode    *inode,
@@ -1000,6 +1055,7 @@ const struct file_operations xfs_file_operations = {
        .open           = xfs_file_open,
        .release        = xfs_file_release,
        .fsync          = xfs_file_fsync,
+       .fallocate      = xfs_file_fallocate,
 };
 
 const struct file_operations xfs_dir_file_operations = {
index a4ecc21..bd57278 100644 (file)
@@ -46,7 +46,6 @@
 #include <linux/namei.h>
 #include <linux/posix_acl.h>
 #include <linux/security.h>
-#include <linux/falloc.h>
 #include <linux/fiemap.h>
 #include <linux/slab.h>
 
@@ -505,64 +504,6 @@ xfs_vn_setattr(
        return -xfs_setattr(XFS_I(dentry->d_inode), iattr, 0);
 }
 
-STATIC long
-xfs_vn_fallocate(
-       struct inode    *inode,
-       int             mode,
-       loff_t          offset,
-       loff_t          len)
-{
-       long            error;
-       loff_t          new_size = 0;
-       xfs_flock64_t   bf;
-       xfs_inode_t     *ip = XFS_I(inode);
-       int             cmd = XFS_IOC_RESVSP;
-
-       if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
-               return -EOPNOTSUPP;
-
-       /* preallocation on directories not yet supported */
-       error = -ENODEV;
-       if (S_ISDIR(inode->i_mode))
-               goto out_error;
-
-       bf.l_whence = 0;
-       bf.l_start = offset;
-       bf.l_len = len;
-
-       xfs_ilock(ip, XFS_IOLOCK_EXCL);
-
-       if (mode & FALLOC_FL_PUNCH_HOLE)
-               cmd = XFS_IOC_UNRESVSP;
-
-       /* check the new inode size is valid before allocating */
-       if (!(mode & FALLOC_FL_KEEP_SIZE) &&
-           offset + len > i_size_read(inode)) {
-               new_size = offset + len;
-               error = inode_newsize_ok(inode, new_size);
-               if (error)
-                       goto out_unlock;
-       }
-
-       error = -xfs_change_file_space(ip, cmd, &bf, 0, XFS_ATTR_NOLOCK);
-       if (error)
-               goto out_unlock;
-
-       /* Change file size if needed */
-       if (new_size) {
-               struct iattr iattr;
-
-               iattr.ia_valid = ATTR_SIZE;
-               iattr.ia_size = new_size;
-               error = -xfs_setattr(ip, &iattr, XFS_ATTR_NOLOCK);
-       }
-
-out_unlock:
-       xfs_iunlock(ip, XFS_IOLOCK_EXCL);
-out_error:
-       return error;
-}
-
 #define XFS_FIEMAP_FLAGS       (FIEMAP_FLAG_SYNC|FIEMAP_FLAG_XATTR)
 
 /*
@@ -656,7 +597,6 @@ static const struct inode_operations xfs_inode_operations = {
        .getxattr               = generic_getxattr,
        .removexattr            = generic_removexattr,
        .listxattr              = xfs_vn_listxattr,
-       .fallocate              = xfs_vn_fallocate,
        .fiemap                 = xfs_vn_fiemap,
 };
 
index 177b4dd..09b5bd6 100644 (file)
@@ -1552,6 +1552,8 @@ struct file_operations {
        ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
        ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
        int (*setlease)(struct file *, long, struct file_lock **);
+       long (*fallocate)(struct file *file, int mode, loff_t offset,
+                         loff_t len);
 };
 
 #define IPERM_FLAG_RCU 0x0001
@@ -1582,8 +1584,6 @@ struct inode_operations {
        ssize_t (*listxattr) (struct dentry *, char *, size_t);
        int (*removexattr) (struct dentry *, const char *);
        void (*truncate_range)(struct inode *, loff_t, loff_t);
-       long (*fallocate)(struct inode *inode, int mode, loff_t offset,
-                         loff_t len);
        int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
                      u64 len);
 } ____cacheline_aligned;