Merge branch 'for-3.4' of git://linux-nfs.org/~bfields/linux

[linux-2.6.git] / fs / nfsd / vfs.c
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c

index 7b2b3f775326c837200a4e430c1b5357399cfc63..296d671654d6a12fbb89567ea6b80cfd26a1829f 100644 (file)
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1,7 +1,4 @@
-#define MSNFS  /* HACK HACK */
  /*
- * linux/fs/nfsd/vfs.c
- *
   * File operations used by nfsd. Some of these have been ripped from
   * other parts of the kernel because they weren't exported, others
   * are partial duplicates with added or changed functionality.
@@ -16,48 +13,33 @@
   * Zerocpy NFS support (C) 2002 Hirokazu Takahashi <taka@valinux.co.jp>
   */
  
-#include <linux/string.h>
-#include <linux/time.h>
-#include <linux/errno.h>
  #include <linux/fs.h>
  #include <linux/file.h>
-#include <linux/mount.h>
-#include <linux/major.h>
  #include <linux/splice.h>
-#include <linux/proc_fs.h>
-#include <linux/stat.h>
  #include <linux/fcntl.h>
-#include <linux/net.h>
-#include <linux/unistd.h>
-#include <linux/slab.h>
-#include <linux/pagemap.h>
-#include <linux/in.h>
-#include <linux/module.h>
  #include <linux/namei.h>
-#include <linux/vfs.h>
  #include <linux/delay.h>
-#include <linux/sunrpc/svc.h>
-#include <linux/nfsd/nfsd.h>
-#ifdef CONFIG_NFSD_V3
-#include <linux/nfs3.h>
-#include <linux/nfsd/xdr3.h>
-#endif /* CONFIG_NFSD_V3 */
-#include <linux/nfsd/nfsfh.h>
-#include <linux/quotaops.h>
  #include <linux/fsnotify.h>
-#include <linux/posix_acl.h>
  #include <linux/posix_acl_xattr.h>
  #include <linux/xattr.h>
-#ifdef CONFIG_NFSD_V4
-#include <linux/nfs4.h>
-#include <linux/nfs4_acl.h>
-#include <linux/nfsd_idmap.h>
-#include <linux/security.h>
-#endif /* CONFIG_NFSD_V4 */
  #include <linux/jhash.h>
  #include <linux/ima.h>
-
+#include <linux/slab.h>
  #include <asm/uaccess.h>
+#include <linux/exportfs.h>
+#include <linux/writeback.h>
+
+#ifdef CONFIG_NFSD_V3
+#include "xdr3.h"
+#endif /* CONFIG_NFSD_V3 */
+
+#ifdef CONFIG_NFSD_V4
+#include "acl.h"
+#include "idmap.h"
+#endif /* CONFIG_NFSD_V4 */
+
+#include "nfsd.h"
+#include "vfs.h"
  
  #define NFSDDBG_FACILITY               NFSDDBG_FILEOP
  
@@ -105,18 +87,27 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
                             .dentry = dget(dentry)};
         int err = 0;
  
-       while (follow_down(&path.mnt, &path.dentry) &&
-              d_mountpoint(path.dentry))
-               ;
+       err = follow_down(&path);
+       if (err < 0)
+               goto out;
  
         exp2 = rqst_exp_get_by_name(rqstp, &path);
         if (IS_ERR(exp2)) {
-               if (PTR_ERR(exp2) != -ENOENT)
-                       err = PTR_ERR(exp2);
+               err = PTR_ERR(exp2);
+               /*
+                * We normally allow NFS clients to continue
+                * "underneath" a mountpoint that is not exported.
+                * The exception is V4ROOT, where no traversal is ever
+                * allowed without an explicit export of the new
+                * directory.
+                */
+               if (err == -ENOENT && !(exp->ex_flags & NFSEXP_V4ROOT))
+                       err = 0;
                 path_put(&path);
                 goto out;
         }
-       if ((exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) {
+       if (nfsd_v4client(rqstp) ||
+               (exp->ex_flags & NFSEXP_CROSSMOUNT) || EX_NOHIDE(exp2)) {
                 /* successfully crossed mount point */
                 /*
                  * This is subtle: path.dentry is *not* on path.mnt
@@ -135,6 +126,55 @@ out:
         return err;
  }
  
+static void follow_to_parent(struct path *path)
+{
+       struct dentry *dp;
+
+       while (path->dentry == path->mnt->mnt_root && follow_up(path))
+               ;
+       dp = dget_parent(path->dentry);
+       dput(path->dentry);
+       path->dentry = dp;
+}
+
+static int nfsd_lookup_parent(struct svc_rqst *rqstp, struct dentry *dparent, struct svc_export **exp, struct dentry **dentryp)
+{
+       struct svc_export *exp2;
+       struct path path = {.mnt = mntget((*exp)->ex_path.mnt),
+                           .dentry = dget(dparent)};
+
+       follow_to_parent(&path);
+
+       exp2 = rqst_exp_parent(rqstp, &path);
+       if (PTR_ERR(exp2) == -ENOENT) {
+               *dentryp = dget(dparent);
+       } else if (IS_ERR(exp2)) {
+               path_put(&path);
+               return PTR_ERR(exp2);
+       } else {
+               *dentryp = dget(path.dentry);
+               exp_put(*exp);
+               *exp = exp2;
+       }
+       path_put(&path);
+       return 0;
+}
+
+/*
+ * For nfsd purposes, we treat V4ROOT exports as though there was an
+ * export at *every* directory.
+ */
+int nfsd_mountpoint(struct dentry *dentry, struct svc_export *exp)
+{
+       if (d_mountpoint(dentry))
+               return 1;
+       if (nfsd4_is_junction(dentry))
+               return 1;
+       if (!(exp->ex_flags & NFSEXP_V4ROOT))
+               return 0;
+       return dentry->d_inode != NULL;
+}
+
  __be32
  nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
                    const char *name, unsigned int len,
@@ -143,16 +183,10 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
         struct svc_export       *exp;
         struct dentry           *dparent;
         struct dentry           *dentry;
-       __be32                  err;
         int                     host_err;
  
         dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name);
  
-       /* Obtain dentry and export. */
-       err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC);
-       if (err)
-               return err;
-
         dparent = fhp->fh_dentry;
         exp  = fhp->fh_export;
         exp_get(exp);
@@ -163,35 +197,13 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
                         dentry = dget(dparent);
                 else if (dparent != exp->ex_path.dentry)
                         dentry = dget_parent(dparent);
-               else if (!EX_NOHIDE(exp))
+               else if (!EX_NOHIDE(exp) && !nfsd_v4client(rqstp))
                         dentry = dget(dparent); /* .. == . just like at / */
                 else {
                         /* checking mountpoint crossing is very different when stepping up */
-                       struct svc_export *exp2 = NULL;
-                       struct dentry *dp;
-                       struct path path = {.mnt = mntget(exp->ex_path.mnt),
-                                           .dentry = dget(dparent)};
-
-                       while (path.dentry == path.mnt->mnt_root &&
-                              follow_up(&path))
-                               ;
-                       dp = dget_parent(path.dentry);
-                       dput(path.dentry);
-                       path.dentry = dp;
-
-                       exp2 = rqst_exp_parent(rqstp, &path);
-                       if (PTR_ERR(exp2) == -ENOENT) {
-                               dentry = dget(dparent);
-                       } else if (IS_ERR(exp2)) {
-                               host_err = PTR_ERR(exp2);
-                               path_put(&path);
+                       host_err = nfsd_lookup_parent(rqstp, dparent, &exp, &dentry);
+                       if (host_err)
                                 goto out_nfserr;
-                       } else {
-                               dentry = dget(path.dentry);
-                               exp_put(exp);
-                               exp = exp2;
-                       }
-                       path_put(&path);
                 }
         } else {
                 fh_lock(fhp);
@@ -202,7 +214,7 @@ nfsd_lookup_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp,
                 /*
                  * check if we have crossed a mount point ...
                  */
-               if (d_mountpoint(dentry)) {
+               if (nfsd_mountpoint(dentry, exp)) {
                         if ((host_err = nfsd_cross_mnt(rqstp, &dentry, &exp))) {
                                 dput(dentry);
                                 goto out_nfserr;
@@ -238,6 +250,9 @@ nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name,
         struct dentry           *dentry;
         __be32 err;
  
+       err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC);
+       if (err)
+               return err;
         err = nfsd_lookup_dentry(rqstp, fhp, name, len, &exp, &dentry);
         if (err)
                 return err;
@@ -257,6 +272,29 @@ out:
         return err;
  }
  
+static int nfsd_break_lease(struct inode *inode)
+{
+       if (!S_ISREG(inode->i_mode))
+               return 0;
+       return break_lease(inode, O_WRONLY | O_NONBLOCK);
+}
+
+/*
+ * Commit metadata changes to stable storage.
+ */
+static int
+commit_metadata(struct svc_fh *fhp)
+{
+       struct inode *inode = fhp->fh_dentry->d_inode;
+       const struct export_operations *export_ops = inode->i_sb->s_export_op;
+
+       if (!EX_ISSYNC(fhp->fh_export))
+               return 0;
+
+       if (export_ops->commit_metadata)
+               return export_ops->commit_metadata(inode);
+       return sync_inode_metadata(inode, 1);
+}
  
  /*
   * Set various file attributes.
@@ -269,7 +307,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
         struct dentry   *dentry;
         struct inode    *inode;
         int             accmode = NFSD_MAY_SATTR;
-       int             ftype = 0;
+       umode_t         ftype = 0;
         __be32          err;
         int             host_err;
         int             size_change = 0;
@@ -343,16 +381,6 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
                                 goto out;
                 }
  
-               /*
-                * If we are changing the size of the file, then
-                * we need to break all leases.
-                */
-               host_err = break_lease(inode, FMODE_WRITE | O_NONBLOCK);
-               if (host_err == -EWOULDBLOCK)
-                       host_err = -ETIMEDOUT;
-               if (host_err) /* ENOMEM or EWOULDBLOCK */
-                       goto out_nfserr;
-
                 host_err = get_write_access(inode);
                 if (host_err)
                         goto out_nfserr;
@@ -363,7 +391,6 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
                         put_write_access(inode);
                         goto out_nfserr;
                 }
-               vfs_dq_init(inode);
         }
  
         /* sanitize the mode change */
@@ -394,7 +421,11 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
  
         err = nfserr_notsync;
         if (!check_guard || guardtime == inode->i_ctime.tv_sec) {
+               host_err = nfsd_break_lease(inode);
+               if (host_err)
+                       goto out_nfserr;
                 fh_lock(fhp);
+
                 host_err = notify_change(dentry, iap);
                 err = nfserrno(host_err);
                 fh_unlock(fhp);
@@ -402,8 +433,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
         if (size_change)
                 put_write_access(inode);
         if (!err)
-               if (EX_ISSYNC(fhp->fh_export))
-                       write_inode_now(inode, 1);
+               commit_metadata(fhp);
  out:
         return err;
  
@@ -474,7 +504,7 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
         unsigned int flags = 0;
  
         /* Get inode */
-       error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, NFSD_MAY_SATTR);
+       error = fh_verify(rqstp, fhp, 0, NFSD_MAY_SATTR);
         if (error)
                 return error;
  
@@ -564,7 +594,34 @@ nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_ac
         return error;
  }
  
-#endif /* defined(CONFIG_NFS_V4) */
+/*
+ * NFS junction information is stored in an extended attribute.
+ */
+#define NFSD_JUNCTION_XATTR_NAME       XATTR_TRUSTED_PREFIX "junction.nfs"
+
+/**
+ * nfsd4_is_junction - Test if an object could be an NFS junction
+ *
+ * @dentry: object to test
+ *
+ * Returns 1 if "dentry" appears to contain NFS junction information.
+ * Otherwise 0 is returned.
+ */
+int nfsd4_is_junction(struct dentry *dentry)
+{
+       struct inode *inode = dentry->d_inode;
+
+       if (inode == NULL)
+               return 0;
+       if (inode->i_mode & S_IXUGO)
+               return 0;
+       if (!(inode->i_mode & S_ISVTX))
+               return 0;
+       if (vfs_getxattr(dentry, NFSD_JUNCTION_XATTR_NAME, NULL, 0) <= 0)
+               return 0;
+       return 1;
+}
+#endif /* defined(CONFIG_NFSD_V4) */
  
  #ifdef CONFIG_NFSD_V3
  /*
@@ -668,30 +725,40 @@ nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *suppor
  }
  #endif /* CONFIG_NFSD_V3 */
  
+static int nfsd_open_break_lease(struct inode *inode, int access)
+{
+       unsigned int mode;
  
+       if (access & NFSD_MAY_NOT_BREAK_LEASE)
+               return 0;
+       mode = (access & NFSD_MAY_WRITE) ? O_WRONLY : O_RDONLY;
+       return break_lease(inode, mode | O_NONBLOCK);
+}
  
  /*
   * Open an existing file or directory.
- * The access argument indicates the type of open (read/write/lock)
+ * The may_flags argument indicates the type of open (read/write/lock)
+ * and additional flags.
   * N.B. After this call fhp needs an fh_put
   */
  __be32
-nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
-                       int access, struct file **filp)
+nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
+                       int may_flags, struct file **filp)
  {
-       const struct cred *cred = current_cred();
         struct dentry   *dentry;
         struct inode    *inode;
         int             flags = O_RDONLY|O_LARGEFILE;
         __be32          err;
-       int             host_err;
+       int             host_err = 0;
+
+       validate_process_creds();
  
         /*
          * If we get here, then the client has already done an "open",
          * and (hopefully) checked permission - so allow OWNER_OVERRIDE
          * in case a chmod has now revoked permission.
          */
-       err = fh_verify(rqstp, fhp, type, access | NFSD_MAY_OWNER_OVERRIDE);
+       err = fh_verify(rqstp, fhp, type, may_flags | NFSD_MAY_OWNER_OVERRIDE);
         if (err)
                 goto out;
  
@@ -702,7 +769,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
          * or any access when mandatory locking enabled
          */
         err = nfserr_perm;
-       if (IS_APPEND(inode) && (access & NFSD_MAY_WRITE))
+       if (IS_APPEND(inode) && (may_flags & NFSD_MAY_WRITE))
                 goto out;
         /*
          * We must ignore files (but only files) which might have mandatory
@@ -715,33 +782,33 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
         if (!inode->i_fop)
                 goto out;
  
-       /*
-        * Check to see if there are any leases on this file.
-        * This may block while leases are broken.
-        */
-       host_err = break_lease(inode, O_NONBLOCK | ((access & NFSD_MAY_WRITE) ? FMODE_WRITE : 0));
-       if (host_err == -EWOULDBLOCK)
-               host_err = -ETIMEDOUT;
+       host_err = nfsd_open_break_lease(inode, may_flags);
         if (host_err) /* NOMEM or WOULDBLOCK */
                 goto out_nfserr;
  
-       if (access & NFSD_MAY_WRITE) {
-               if (access & NFSD_MAY_READ)
+       if (may_flags & NFSD_MAY_WRITE) {
+               if (may_flags & NFSD_MAY_READ)
                         flags = O_RDWR|O_LARGEFILE;
                 else
                         flags = O_WRONLY|O_LARGEFILE;
-
-               vfs_dq_init(inode);
         }
         *filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_path.mnt),
-                           flags, cred);
+                           flags, current_cred());
         if (IS_ERR(*filp))
                 host_err = PTR_ERR(*filp);
-       else
-               ima_counts_get(*filp);
+       else {
+               host_err = ima_file_check(*filp, may_flags);
+
+               if (may_flags & NFSD_MAY_64BIT_COOKIE)
+                       (*filp)->f_mode |= FMODE_64BITHASH;
+               else
+                       (*filp)->f_mode |= FMODE_32BITHASH;
+       }
+
  out_nfserr:
         err = nfserrno(host_err);
  out:
+       validate_process_creds();
         return err;
  }
  
@@ -754,46 +821,6 @@ nfsd_close(struct file *filp)
         fput(filp);
  }
  
-/*
- * Sync a file
- * As this calls fsync (not fdatasync) there is no need for a write_inode
- * after it.
- */
-static inline int nfsd_dosync(struct file *filp, struct dentry *dp,
-                             const struct file_operations *fop)
-{
-       struct inode *inode = dp->d_inode;
-       int (*fsync) (struct file *, struct dentry *, int);
-       int err;
-
-       err = filemap_fdatawrite(inode->i_mapping);
-       if (err == 0 && fop && (fsync = fop->fsync))
-               err = fsync(filp, dp, 0);
-       if (err == 0)
-               err = filemap_fdatawait(inode->i_mapping);
-
-       return err;
-}
-
-static int
-nfsd_sync(struct file *filp)
-{
-        int err;
-       struct inode *inode = filp->f_path.dentry->d_inode;
-       dprintk("nfsd: sync file %s\n", filp->f_path.dentry->d_name.name);
-       mutex_lock(&inode->i_mutex);
-       err=nfsd_dosync(filp, filp->f_path.dentry, filp->f_op);
-       mutex_unlock(&inode->i_mutex);
-
-       return err;
-}
-
-int
-nfsd_sync_dir(struct dentry *dp)
-{
-       return nfsd_dosync(NULL, dp, dp->d_inode->i_fop);
-}
-
  /*
   * Obtain the readahead parameters for the file
   * specified by (dev, ino).
@@ -818,7 +845,7 @@ nfsd_get_raparms(dev_t dev, ino_t ino)
                 if (ra->p_count == 0)
                         frap = rap;
         }
-       depth = nfsdstats.ra_size*11/10;
+       depth = nfsdstats.ra_size;
         if (!frap) {    
                 spin_unlock(&rab->pb_lock);
                 return NULL;
@@ -854,11 +881,6 @@ nfsd_splice_actor(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
         struct page **pp = rqstp->rq_respages + rqstp->rq_resused;
         struct page *page = buf->page;
         size_t size;
-       int ret;
-
-       ret = buf->ops->confirm(pipe, buf);
-       if (unlikely(ret))
-               return ret;
  
         size = sd->len;
  
@@ -888,36 +910,15 @@ static int nfsd_direct_splice_actor(struct pipe_inode_info *pipe,
         return __splice_from_pipe(pipe, sd, nfsd_splice_actor);
  }
  
-static inline int svc_msnfs(struct svc_fh *ffhp)
-{
-#ifdef MSNFS
-       return (ffhp->fh_export->ex_flags & NFSEXP_MSNFS);
-#else
-       return 0;
-#endif
-}
-
  static __be32
  nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
                loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
  {
-       struct inode *inode;
-       struct raparms  *ra;
         mm_segment_t    oldfs;
         __be32          err;
         int             host_err;
  
         err = nfserr_perm;
-       inode = file->f_path.dentry->d_inode;
-
-       if (svc_msnfs(fhp) && !lock_may_read(inode, offset, *count))
-               goto out;
-
-       /* Get readahead parameters */
-       ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino);
-
-       if (ra && ra->p_set)
-               file->f_ra = ra->p_ra;
  
         if (file->f_op->splice_read && rqstp->rq_splice_ok) {
                 struct splice_desc sd = {
@@ -936,24 +937,13 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
                 set_fs(oldfs);
         }
  
-       /* Write back readahead params */
-       if (ra) {
-               struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex];
-               spin_lock(&rab->pb_lock);
-               ra->p_ra = file->f_ra;
-               ra->p_set = 1;
-               ra->p_count--;
-               spin_unlock(&rab->pb_lock);
-       }
-
         if (host_err >= 0) {
                 nfsdstats.io_read += host_err;
                 *count = host_err;
                 err = 0;
-               fsnotify_access(file->f_path.dentry);
+               fsnotify_access(file);
         } else 
                 err = nfserrno(host_err);
-out:
         return err;
  }
  
@@ -967,6 +957,43 @@ static void kill_suid(struct dentry *dentry)
         mutex_unlock(&dentry->d_inode->i_mutex);
  }
  
+/*
+ * Gathered writes: If another process is currently writing to the file,
+ * there's a high chance this is another nfsd (triggered by a bulk write
+ * from a client's biod). Rather than syncing the file with each write
+ * request, we sleep for 10 msec.
+ *
+ * I don't know if this roughly approximates C. Juszak's idea of
+ * gathered writes, but it's a nice and simple solution (IMHO), and it
+ * seems to work:-)
+ *
+ * Note: we do this only in the NFSv2 case, since v3 and higher have a
+ * better tool (separate unstable writes and commits) for solving this
+ * problem.
+ */
+static int wait_for_concurrent_writes(struct file *file)
+{
+       struct inode *inode = file->f_path.dentry->d_inode;
+       static ino_t last_ino;
+       static dev_t last_dev;
+       int err = 0;
+
+       if (atomic_read(&inode->i_writecount) > 1
+           || (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) {
+               dprintk("nfsd: write defer %d\n", task_pid_nr(current));
+               msleep(10);
+               dprintk("nfsd: write resume %d\n", task_pid_nr(current));
+       }
+
+       if (inode->i_state & I_DIRTY) {
+               dprintk("nfsd: write sync %d\n", task_pid_nr(current));
+               err = vfs_fsync(file, 0);
+       }
+       last_ino = inode->i_ino;
+       last_dev = inode->i_sb->s_dev;
+       return err;
+}
+
  static __be32
  nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
                                 loff_t offset, struct kvec *vec, int vlen,
@@ -979,14 +1006,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
         __be32                  err = 0;
         int                     host_err;
         int                     stable = *stablep;
-
-#ifdef MSNFS
-       err = nfserr_perm;
-
-       if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
-               (!lock_may_write(file->f_path.dentry->d_inode, offset, *cnt)))
-               goto out;
-#endif
+       int                     use_wgather;
  
         dentry = file->f_path.dentry;
         inode = dentry->d_inode;
@@ -997,9 +1017,10 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
          *  -   the sync export option has been set, or
          *  -   the client requested O_SYNC behavior (NFSv3 feature).
          *  -   The file system doesn't support fsync().
-        * When gathered writes have been configured for this volume,
+        * When NFSv2 gathered writes have been configured for this volume,
          * flushing the data to disk is handled separately below.
          */
+       use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp);
  
         if (!file->f_op->fsync) {/* COMMIT3 cannot work */
                stable = 2;
@@ -1008,7 +1029,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
  
         if (!EX_ISSYNC(exp))
                 stable = 0;
-       if (stable && !EX_WGATHER(exp)) {
+       if (stable && !use_wgather) {
                 spin_lock(&file->f_lock);
                 file->f_flags |= O_SYNC;
                 spin_unlock(&file->f_lock);
@@ -1018,58 +1039,25 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
         oldfs = get_fs(); set_fs(KERNEL_DS);
         host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset);
         set_fs(oldfs);
-       if (host_err >= 0) {
-               *cnt = host_err;
-               nfsdstats.io_write += host_err;
-               fsnotify_modify(file->f_path.dentry);
-       }
+       if (host_err < 0)
+               goto out_nfserr;
+       *cnt = host_err;
+       nfsdstats.io_write += host_err;
+       fsnotify_modify(file);
  
         /* clear setuid/setgid flag after write */
-       if (host_err >= 0 && (inode->i_mode & (S_ISUID | S_ISGID)))
+       if (inode->i_mode & (S_ISUID | S_ISGID))
                 kill_suid(dentry);
  
-       if (host_err >= 0 && stable) {
-               static ino_t    last_ino;
-               static dev_t    last_dev;
-
-               /*
-                * Gathered writes: If another process is currently
-                * writing to the file, there's a high chance
-                * this is another nfsd (triggered by a bulk write
-                * from a client's biod). Rather than syncing the
-                * file with each write request, we sleep for 10 msec.
-                *
-                * I don't know if this roughly approximates
-                * C. Juszak's idea of gathered writes, but it's a
-                * nice and simple solution (IMHO), and it seems to
-                * work:-)
-                */
-               if (EX_WGATHER(exp)) {
-                       if (atomic_read(&inode->i_writecount) > 1
-                           || (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) {
-                               dprintk("nfsd: write defer %d\n", task_pid_nr(current));
-                               msleep(10);
-                               dprintk("nfsd: write resume %d\n", task_pid_nr(current));
-                       }
-
-                       if (inode->i_state & I_DIRTY) {
-                               dprintk("nfsd: write sync %d\n", task_pid_nr(current));
-                               host_err=nfsd_sync(file);
-                       }
-#if 0
-                       wake_up(&inode->i_wait);
-#endif
-               }
-               last_ino = inode->i_ino;
-               last_dev = inode->i_sb->s_dev;
-       }
+       if (stable && use_wgather)
+               host_err = wait_for_concurrent_writes(file);
  
+out_nfserr:
         dprintk("nfsd: write complete host_err=%d\n", host_err);
         if (host_err >= 0)
                 err = 0;
         else
                 err = nfserrno(host_err);
-out:
         return err;
  }
  
@@ -1078,8 +1066,45 @@ out:
   * on entry. On return, *count contains the number of bytes actually read.
   * N.B. After this call fhp needs an fh_put
   */
+__be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
+       loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
+{
+       struct file *file;
+       struct inode *inode;
+       struct raparms  *ra;
+       __be32 err;
+
+       err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
+       if (err)
+               return err;
+
+       inode = file->f_path.dentry->d_inode;
+
+       /* Get readahead parameters */
+       ra = nfsd_get_raparms(inode->i_sb->s_dev, inode->i_ino);
+
+       if (ra && ra->p_set)
+               file->f_ra = ra->p_ra;
+
+       err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count);
+
+       /* Write back readahead params */
+       if (ra) {
+               struct raparm_hbucket *rab = &raparm_hash[ra->p_hindex];
+               spin_lock(&rab->pb_lock);
+               ra->p_ra = file->f_ra;
+               ra->p_set = 1;
+               ra->p_count--;
+               spin_unlock(&rab->pb_lock);
+       }
+
+       nfsd_close(file);
+       return err;
+}
+
+/* As above, but use the provided file descriptor. */
  __be32
-nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
+nfsd_read_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
                 loff_t offset, struct kvec *vec, int vlen,
                 unsigned long *count)
  {
@@ -1091,13 +1116,8 @@ nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
                 if (err)
                         goto out;
                 err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count);
-       } else {
-               err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file);
-               if (err)
-                       goto out;
-               err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count);
-               nfsd_close(file);
-       }
+       } else /* Note file may still be NULL in NFSv4 special stateid case: */
+               err = nfsd_read(rqstp, fhp, offset, vec, vlen, count);
  out:
         return err;
  }
@@ -1138,8 +1158,9 @@ out:
  #ifdef CONFIG_NFSD_V3
  /*
   * Commit all pending writes to stable storage.
- * Strictly speaking, we could sync just the indicated file region here,
- * but there's currently no way we can ask the VFS to do so.
+ *
+ * Note: we only guarantee that data that lies within the range specified
+ * by the 'offset' and 'count' parameters will be synced.
   *
   * Unfortunately we cannot lock the file to make sure we return full WCC
   * data to the client, as locking happens lower down in the filesystem.
@@ -1149,23 +1170,32 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
                 loff_t offset, unsigned long count)
  {
         struct file     *file;
-       __be32          err;
+       loff_t          end = LLONG_MAX;
+       __be32          err = nfserr_inval;
  
-       if ((u64)count > ~(u64)offset)
-               return nfserr_inval;
+       if (offset < 0)
+               goto out;
+       if (count != 0) {
+               end = offset + (loff_t)count - 1;
+               if (end < offset)
+                       goto out;
+       }
  
-       err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_WRITE, &file);
+       err = nfsd_open(rqstp, fhp, S_IFREG,
+                       NFSD_MAY_WRITE|NFSD_MAY_NOT_BREAK_LEASE, &file);
         if (err)
-               return err;
+               goto out;
         if (EX_ISSYNC(fhp->fh_export)) {
-               if (file->f_op && file->f_op->fsync) {
-                       err = nfserrno(nfsd_sync(file));
-               } else {
+               int err2 = vfs_fsync_range(file, offset, end, 0);
+
+               if (err2 != -EINVAL)
+                       err = nfserrno(err2);
+               else
                         err = nfserr_notsupp;
-               }
         }
  
         nfsd_close(file);
+out:
         return err;
  }
  #endif /* CONFIG_NFSD_V3 */
@@ -1289,7 +1319,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
                 goto out;
         }
  
-       host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
+       host_err = fh_want_write(fhp);
         if (host_err)
                 goto out_nfserr;
  
@@ -1314,19 +1344,21 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
                 break;
         }
         if (host_err < 0) {
-               mnt_drop_write(fhp->fh_export->ex_path.mnt);
+               fh_drop_write(fhp);
                 goto out_nfserr;
         }
  
-       if (EX_ISSYNC(fhp->fh_export)) {
-               err = nfserrno(nfsd_sync_dir(dentry));
-               write_inode_now(dchild->d_inode, 1);
-       }
+       err = nfsd_create_setattr(rqstp, resfhp, iap);
  
-       err2 = nfsd_create_setattr(rqstp, resfhp, iap);
+       /*
+        * nfsd_setattr already committed the child.  Transactional filesystems
+        * had a chance to commit changes for both parent and child
+        * simultaneously making the following commit_metadata a noop.
+        */
+       err2 = nfserrno(commit_metadata(fhp));
         if (err2)
                 err = err2;
-       mnt_drop_write(fhp->fh_export->ex_path.mnt);
+       fh_drop_write(fhp);
         /*
          * Update the file handle to get the new inode info.
          */
@@ -1343,19 +1375,25 @@ out_nfserr:
  }
  
  #ifdef CONFIG_NFSD_V3
+
+static inline int nfsd_create_is_exclusive(int createmode)
+{
+       return createmode == NFS3_CREATE_EXCLUSIVE
+              || createmode == NFS4_CREATE_EXCLUSIVE4_1;
+}
+
  /*
- * NFSv3 version of nfsd_create
+ * NFSv3 and NFSv4 version of nfsd_create
   */
  __be32
-nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
+do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
                 char *fname, int flen, struct iattr *iap,
                 struct svc_fh *resfhp, int createmode, u32 *verifier,
-               int *truncp, int *created)
+               bool *truncp, bool *created)
  {
         struct dentry   *dentry, *dchild = NULL;
         struct inode    *dirp;
         __be32          err;
-       __be32          err2;
         int             host_err;
         __u32           v_mtime=0, v_atime=0;
  
@@ -1367,7 +1405,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
                 goto out;
         if (!(iap->ia_valid & ATTR_MODE))
                 iap->ia_mode = 0;
-       err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
+       err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_EXEC);
         if (err)
                 goto out;
  
@@ -1389,11 +1427,18 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
         if (IS_ERR(dchild))
                 goto out_nfserr;
  
+       /* If file doesn't exist, check for permissions to create one */
+       if (!dchild->d_inode) {
+               err = fh_verify(rqstp, fhp, S_IFDIR, NFSD_MAY_CREATE);
+               if (err)
+                       goto out;
+       }
+
         err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
         if (err)
                 goto out;
  
-       if (createmode == NFS3_CREATE_EXCLUSIVE) {
+       if (nfsd_create_is_exclusive(createmode)) {
                 /* solaris7 gets confused (bugid 4218508) if these have
                  * the high bit set, so just clear the high bits. If this is
                  * ever changed to use different attrs for storing the
@@ -1404,7 +1449,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
                 v_atime = verifier[1]&0x7fffffff;
         }
         
-       host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
+       host_err = fh_want_write(fhp);
         if (host_err)
                 goto out_nfserr;
         if (dchild->d_inode) {
@@ -1434,30 +1479,30 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
                             && dchild->d_inode->i_atime.tv_sec == v_atime
                             && dchild->d_inode->i_size  == 0 )
                                 break;
+               case NFS4_CREATE_EXCLUSIVE4_1:
+                       if (   dchild->d_inode->i_mtime.tv_sec == v_mtime
+                           && dchild->d_inode->i_atime.tv_sec == v_atime
+                           && dchild->d_inode->i_size  == 0 )
+                               goto set_attr;
                          /* fallthru */
                 case NFS3_CREATE_GUARDED:
                         err = nfserr_exist;
                 }
-               mnt_drop_write(fhp->fh_export->ex_path.mnt);
+               fh_drop_write(fhp);
                 goto out;
         }
  
         host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
         if (host_err < 0) {
-               mnt_drop_write(fhp->fh_export->ex_path.mnt);
+               fh_drop_write(fhp);
                 goto out_nfserr;
         }
         if (created)
                 *created = 1;
  
-       if (EX_ISSYNC(fhp->fh_export)) {
-               err = nfserrno(nfsd_sync_dir(dentry));
-               /* setattr will sync the child (or not) */
-       }
-
         nfsd_check_ignore_resizing(iap);
  
-       if (createmode == NFS3_CREATE_EXCLUSIVE) {
+       if (nfsd_create_is_exclusive(createmode)) {
                 /* Cram the verifier into atime/mtime */
                 iap->ia_valid = ATTR_MTIME|ATTR_ATIME
                         | ATTR_MTIME_SET|ATTR_ATIME_SET;
@@ -1469,11 +1514,15 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
         }
  
   set_attr:
-       err2 = nfsd_create_setattr(rqstp, resfhp, iap);
-       if (err2)
-               err = err2;
+       err = nfsd_create_setattr(rqstp, resfhp, iap);
+
+       /*
+        * nfsd_setattr already committed the child (and possibly also the parent).
+        */
+       if (!err)
+               err = nfserrno(commit_metadata(fhp));
  
-       mnt_drop_write(fhp->fh_export->ex_path.mnt);
+       fh_drop_write(fhp);
         /*
          * Update the filehandle to get the new inode info.
          */
@@ -1500,30 +1549,31 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
  __be32
  nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp)
  {
-       struct dentry   *dentry;
         struct inode    *inode;
         mm_segment_t    oldfs;
         __be32          err;
         int             host_err;
+       struct path path;
  
         err = fh_verify(rqstp, fhp, S_IFLNK, NFSD_MAY_NOP);
         if (err)
                 goto out;
  
-       dentry = fhp->fh_dentry;
-       inode = dentry->d_inode;
+       path.mnt = fhp->fh_export->ex_path.mnt;
+       path.dentry = fhp->fh_dentry;
+       inode = path.dentry->d_inode;
  
         err = nfserr_inval;
         if (!inode->i_op->readlink)
                 goto out;
  
-       touch_atime(fhp->fh_export->ex_path.mnt, dentry);
+       touch_atime(&path);
         /* N.B. Why does this call need a get_fs()??
          * Remove the set_fs and watch the fireworks:-) --okir
          */
  
         oldfs = get_fs(); set_fs(KERNEL_DS);
-       host_err = inode->i_op->readlink(dentry, buf, *lenp);
+       host_err = inode->i_op->readlink(path.dentry, buf, *lenp);
         set_fs(oldfs);
  
         if (host_err < 0)
@@ -1570,7 +1620,7 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
         if (IS_ERR(dnew))
                 goto out_nfserr;
  
-       host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
+       host_err = fh_want_write(fhp);
         if (host_err)
                 goto out_nfserr;
  
@@ -1586,15 +1636,12 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
                 }
         } else
                 host_err = vfs_symlink(dentry->d_inode, dnew, path);
-
-       if (!host_err) {
-               if (EX_ISSYNC(fhp->fh_export))
-                       host_err = nfsd_sync_dir(dentry);
-       }
         err = nfserrno(host_err);
+       if (!err)
+               err = nfserrno(commit_metadata(fhp));
         fh_unlock(fhp);
  
-       mnt_drop_write(fhp->fh_export->ex_path.mnt);
+       fh_drop_write(fhp);
  
         cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp);
         dput(dnew);
@@ -1616,17 +1663,19 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
                                 char *name, int len, struct svc_fh *tfhp)
  {
         struct dentry   *ddir, *dnew, *dold;
-       struct inode    *dirp, *dest;
+       struct inode    *dirp;
         __be32          err;
         int             host_err;
  
         err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_CREATE);
         if (err)
                 goto out;
-       err = fh_verify(rqstp, tfhp, -S_IFDIR, NFSD_MAY_NOP);
+       err = fh_verify(rqstp, tfhp, 0, NFSD_MAY_NOP);
         if (err)
                 goto out;
-
+       err = nfserr_isdir;
+       if (S_ISDIR(tfhp->fh_dentry->d_inode->i_mode))
+               goto out;
         err = nfserr_perm;
         if (!len)
                 goto out;
@@ -1644,27 +1693,33 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
                 goto out_nfserr;
  
         dold = tfhp->fh_dentry;
-       dest = dold->d_inode;
  
-       host_err = mnt_want_write(tfhp->fh_export->ex_path.mnt);
+       host_err = fh_want_write(tfhp);
         if (host_err) {
                 err = nfserrno(host_err);
                 goto out_dput;
         }
+       err = nfserr_noent;
+       if (!dold->d_inode)
+               goto out_drop_write;
+       host_err = nfsd_break_lease(dold->d_inode);
+       if (host_err) {
+               err = nfserrno(host_err);
+               goto out_drop_write;
+       }
         host_err = vfs_link(dold, dirp, dnew);
         if (!host_err) {
-               if (EX_ISSYNC(ffhp->fh_export)) {
-                       err = nfserrno(nfsd_sync_dir(ddir));
-                       write_inode_now(dest, 1);
-               }
-               err = 0;
+               err = nfserrno(commit_metadata(ffhp));
+               if (!err)
+                       err = nfserrno(commit_metadata(tfhp));
         } else {
                 if (host_err == -EXDEV && rqstp->rq_vers == 2)
                         err = nfserr_acces;
                 else
                         err = nfserrno(host_err);
         }
-       mnt_drop_write(tfhp->fh_export->ex_path.mnt);
+out_drop_write:
+       fh_drop_write(tfhp);
  out_dput:
         dput(dnew);
  out_unlock:
@@ -1738,29 +1793,29 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
         if (ndentry == trap)
                 goto out_dput_new;
  
-       if (svc_msnfs(ffhp) &&
-               ((atomic_read(&odentry->d_count) > 1)
-                || (atomic_read(&ndentry->d_count) > 1))) {
-                       host_err = -EPERM;
-                       goto out_dput_new;
-       }
-
         host_err = -EXDEV;
         if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt)
                 goto out_dput_new;
-       host_err = mnt_want_write(ffhp->fh_export->ex_path.mnt);
+       host_err = fh_want_write(ffhp);
         if (host_err)
                 goto out_dput_new;
  
+       host_err = nfsd_break_lease(odentry->d_inode);
+       if (host_err)
+               goto out_drop_write;
+       if (ndentry->d_inode) {
+               host_err = nfsd_break_lease(ndentry->d_inode);
+               if (host_err)
+                       goto out_drop_write;
+       }
         host_err = vfs_rename(fdir, odentry, tdir, ndentry);
-       if (!host_err && EX_ISSYNC(tfhp->fh_export)) {
-               host_err = nfsd_sync_dir(tdentry);
+       if (!host_err) {
+               host_err = commit_metadata(tfhp);
                 if (!host_err)
-                       host_err = nfsd_sync_dir(fdentry);
+                       host_err = commit_metadata(ffhp);
         }
-
-       mnt_drop_write(ffhp->fh_export->ex_path.mnt);
-
+out_drop_write:
+       fh_drop_write(ffhp);
   out_dput_new:
         dput(ndentry);
   out_dput_old:
@@ -1819,31 +1874,24 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
         if (!type)
                 type = rdentry->d_inode->i_mode & S_IFMT;
  
-       host_err = mnt_want_write(fhp->fh_export->ex_path.mnt);
+       host_err = fh_want_write(fhp);
         if (host_err)
-               goto out_nfserr;
+               goto out_put;
  
-       if (type != S_IFDIR) { /* It's UNLINK */
-#ifdef MSNFS
-               if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
-                       (atomic_read(&rdentry->d_count) > 1)) {
-                       host_err = -EPERM;
-               } else
-#endif
+       host_err = nfsd_break_lease(rdentry->d_inode);
+       if (host_err)
+               goto out_drop_write;
+       if (type != S_IFDIR)
                 host_err = vfs_unlink(dirp, rdentry);
-       } else { /* It's RMDIR */
+       else
                 host_err = vfs_rmdir(dirp, rdentry);
-       }
-
+       if (!host_err)
+               host_err = commit_metadata(fhp);
+out_drop_write:
+       fh_drop_write(fhp);
+out_put:
         dput(rdentry);
  
-       if (host_err)
-               goto out_drop;
-       if (EX_ISSYNC(fhp->fh_export))
-               host_err = nfsd_sync_dir(dentry);
-
-out_drop:
-       mnt_drop_write(fhp->fh_export->ex_path.mnt);
  out_nfserr:
         err = nfserrno(host_err);
  out:
@@ -1981,8 +2029,13 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp,
         __be32          err;
         struct file     *file;
         loff_t          offset = *offsetp;
+       int             may_flags = NFSD_MAY_READ;
+
+       /* NFSv2 only supports 32 bit cookies */
+       if (rqstp->rq_vers > 2)
+               may_flags |= NFSD_MAY_64BIT_COOKIE;
  
-       err = nfsd_open(rqstp, fhp, S_IFDIR, NFSD_MAY_READ, &file);
+       err = nfsd_open(rqstp, fhp, S_IFDIR, may_flags, &file);
         if (err)
                 goto out;
  
@@ -2009,9 +2062,17 @@ out:
  __be32
  nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat, int access)
  {
-       __be32 err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP | access);
-       if (!err && vfs_statfs(fhp->fh_dentry,stat))
-               err = nfserr_io;
+       __be32 err;
+
+       err = fh_verify(rqstp, fhp, 0, NFSD_MAY_NOP | access);
+       if (!err) {
+               struct path path = {
+                       .mnt    = fhp->fh_export->ex_path.mnt,
+                       .dentry = fhp->fh_dentry,
+               };
+               if (vfs_statfs(&path, stat))
+                       err = nfserr_io;
+       }
         return err;
  }
  
@@ -2028,10 +2089,9 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
                                         struct dentry *dentry, int acc)
  {
         struct inode    *inode = dentry->d_inode;
-       struct path     path;
         int             err;
  
-       if (acc == NFSD_MAY_NOP)
+       if ((acc & NFSD_MAY_MASK) == NFSD_MAY_NOP)
                 return 0;
  #if 0
         dprintk("nfsd: permission 0x%x%s%s%s%s%s%s%s mode 0%o%s%s%s\n",
@@ -2099,19 +2159,10 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
  
         /* Allow read access to binaries even when mode 111 */
         if (err == -EACCES && S_ISREG(inode->i_mode) &&
-           acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE))
+            (acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE) ||
+             acc == (NFSD_MAY_READ | NFSD_MAY_READ_IF_EXEC)))
                 err = inode_permission(inode, MAY_EXEC);
-       if (err)
-               goto nfsd_out;
  
-       /* Do integrity (permission) checking now, but defer incrementing
-        * IMA counts to the actual file open.
-        */
-       path.mnt = exp->ex_path.mnt;
-       path.dentry = dentry;
-       err = ima_path_check(&path, acc & (MAY_READ | MAY_WRITE | MAY_EXEC),
-                            IMA_COUNT_LEAVE);
-nfsd_out:
         return err? nfserrno(err) : 0;
  }
  
@@ -2244,7 +2295,7 @@ nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl)
         } else
                 size = 0;
  
-       error = mnt_want_write(fhp->fh_export->ex_path.mnt);
+       error = fh_want_write(fhp);
         if (error)
                 goto getout;
         if (size)
@@ -2258,7 +2309,7 @@ nfsd_set_posix_acl(struct svc_fh *fhp, int type, struct posix_acl *acl)
                                 error = 0;
                 }
         }
-       mnt_drop_write(fhp->fh_export->ex_path.mnt);
+       fh_drop_write(fhp);
  
  getout:
         kfree(value);