Merge branch 'for-2.6.34' of git://linux-nfs.org/~bfields/linux
[linux-2.6.git] / fs / nfsd / nfs4xdr.c
index dd81ac1..78c7e24 100644 (file)
  * at the end of nfs4svc_decode_compoundargs.
  */
 
-#include <linux/param.h>
-#include <linux/smp.h>
-#include <linux/fs.h>
 #include <linux/namei.h>
-#include <linux/vfs.h>
+#include <linux/statfs.h>
 #include <linux/utsname.h>
-#include <linux/sunrpc/xdr.h>
-#include <linux/sunrpc/svc.h>
-#include <linux/sunrpc/clnt.h>
-#include <linux/nfsd/nfsd.h>
-#include <linux/nfsd/state.h>
-#include <linux/nfsd/xdr4.h>
 #include <linux/nfsd_idmap.h>
-#include <linux/nfs4.h>
 #include <linux/nfs4_acl.h>
-#include <linux/sunrpc/gss_api.h>
 #include <linux/sunrpc/svcauth_gss.h>
 
+#include "xdr4.h"
+#include "vfs.h"
+
 #define NFSDDBG_FACILITY               NFSDDBG_XDR
 
 /*
@@ -83,16 +75,6 @@ check_filename(char *str, int len, __be32 err)
        return 0;
 }
 
-/*
- * START OF "GENERIC" DECODE ROUTINES.
- *   These may look a little ugly since they are imported from a "generic"
- * set of XDR encode/decode routines which are intended to be shared by
- * all of our NFSv4 implementations (OpenBSD, MacOS X...).
- *
- * If the pain of reading these is too great, it should be a straightforward
- * task to translate them into Linux-specific versions which are more
- * consistent with the style used in NFSv2/v3...
- */
 #define DECODE_HEAD                            \
        __be32 *p;                              \
        __be32 status
@@ -189,6 +171,11 @@ static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes)
        return p;
 }
 
+static int zero_clientid(clientid_t *clid)
+{
+       return (clid->cl_boot == 0) && (clid->cl_id == 0);
+}
+
 static int
 defer_free(struct nfsd4_compoundargs *argp,
                void (*release)(const void *), void *p)
@@ -231,6 +218,7 @@ nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval)
 
        bmval[0] = 0;
        bmval[1] = 0;
+       bmval[2] = 0;
 
        READ_BUF(4);
        READ32(bmlen);
@@ -242,13 +230,15 @@ nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval)
                READ32(bmval[0]);
        if (bmlen > 1)
                READ32(bmval[1]);
+       if (bmlen > 2)
+               READ32(bmval[2]);
 
        DECODE_TAIL;
 }
 
 static __be32
-nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *iattr,
-    struct nfs4_acl **acl)
+nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
+                  struct iattr *iattr, struct nfs4_acl **acl)
 {
        int expected_len, len = 0;
        u32 dummy32;
@@ -260,15 +250,6 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *ia
        if ((status = nfsd4_decode_bitmap(argp, bmval)))
                return status;
 
-       /*
-        * According to spec, unsupported attributes return ERR_ATTRNOTSUPP;
-        * read-only attributes return ERR_INVAL.
-        */
-       if ((bmval[0] & ~NFSD_SUPPORTED_ATTRS_WORD0) || (bmval[1] & ~NFSD_SUPPORTED_ATTRS_WORD1))
-               return nfserr_attrnotsupp;
-       if ((bmval[0] & ~NFSD_WRITEABLE_ATTRS_WORD0) || (bmval[1] & ~NFSD_WRITEABLE_ATTRS_WORD1))
-               return nfserr_inval;
-
        READ_BUF(4);
        READ32(expected_len);
 
@@ -401,7 +382,11 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *ia
                        goto xdr_error;
                }
        }
-       if (len != expected_len)
+       if (bmval[0] & ~NFSD_WRITEABLE_ATTRS_WORD0
+           || bmval[1] & ~NFSD_WRITEABLE_ATTRS_WORD1
+           || bmval[2] & ~NFSD_WRITEABLE_ATTRS_WORD2)
+               READ_BUF(expected_len - len);
+       else if (len != expected_len)
                goto xdr_error;
 
        DECODE_TAIL;
@@ -494,7 +479,9 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create
        if ((status = check_filename(create->cr_name, create->cr_namelen, nfserr_inval)))
                return status;
 
-       if ((status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr, &create->cr_acl)))
+       status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr,
+                                   &create->cr_acl);
+       if (status)
                goto out;
 
        DECODE_TAIL;
@@ -584,6 +571,8 @@ nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt)
        READ_BUF(lockt->lt_owner.len);
        READMEM(lockt->lt_owner.data, lockt->lt_owner.len);
 
+       if (argp->minorversion && !zero_clientid(&lockt->lt_clientid))
+               return nfserr_inval;
        DECODE_TAIL;
 }
 
@@ -653,13 +642,25 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
                switch (open->op_createmode) {
                case NFS4_CREATE_UNCHECKED:
                case NFS4_CREATE_GUARDED:
-                       if ((status = nfsd4_decode_fattr(argp, open->op_bmval, &open->op_iattr, &open->op_acl)))
+                       status = nfsd4_decode_fattr(argp, open->op_bmval,
+                               &open->op_iattr, &open->op_acl);
+                       if (status)
                                goto out;
                        break;
                case NFS4_CREATE_EXCLUSIVE:
                        READ_BUF(8);
                        COPYMEM(open->op_verf.data, 8);
                        break;
+               case NFS4_CREATE_EXCLUSIVE4_1:
+                       if (argp->minorversion < 1)
+                               goto xdr_error;
+                       READ_BUF(8);
+                       COPYMEM(open->op_verf.data, 8);
+                       status = nfsd4_decode_fattr(argp, open->op_bmval,
+                               &open->op_iattr, &open->op_acl);
+                       if (status)
+                               goto out;
+                       break;
                default:
                        goto xdr_error;
                }
@@ -852,8 +853,8 @@ nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *seta
        status = nfsd4_decode_stateid(argp, &setattr->sa_stateid);
        if (status)
                return status;
-       return nfsd4_decode_fattr(argp, setattr->sa_bmval,
-                                 &setattr->sa_iattr, &setattr->sa_acl);
+       return nfsd4_decode_fattr(argp, setattr->sa_bmval, &setattr->sa_iattr,
+                                 &setattr->sa_acl);
 }
 
 static __be32
@@ -994,6 +995,8 @@ nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_rel
        READ_BUF(rlockowner->rl_owner.len);
        READMEM(rlockowner->rl_owner.data, rlockowner->rl_owner.len);
 
+       if (argp->minorversion && !zero_clientid(&rlockowner->rl_clientid))
+               return nfserr_inval;
        DECODE_TAIL;
 }
 
@@ -1285,64 +1288,64 @@ static nfsd4_dec nfsd4_dec_ops[] = {
 };
 
 static nfsd4_dec nfsd41_dec_ops[] = {
-       [OP_ACCESS]             (nfsd4_dec)nfsd4_decode_access,
-       [OP_CLOSE]              (nfsd4_dec)nfsd4_decode_close,
-       [OP_COMMIT]             (nfsd4_dec)nfsd4_decode_commit,
-       [OP_CREATE]             (nfsd4_dec)nfsd4_decode_create,
-       [OP_DELEGPURGE]         (nfsd4_dec)nfsd4_decode_notsupp,
-       [OP_DELEGRETURN]        (nfsd4_dec)nfsd4_decode_delegreturn,
-       [OP_GETATTR]            (nfsd4_dec)nfsd4_decode_getattr,
-       [OP_GETFH]              (nfsd4_dec)nfsd4_decode_noop,
-       [OP_LINK]               (nfsd4_dec)nfsd4_decode_link,
-       [OP_LOCK]               (nfsd4_dec)nfsd4_decode_lock,
-       [OP_LOCKT]              (nfsd4_dec)nfsd4_decode_lockt,
-       [OP_LOCKU]              (nfsd4_dec)nfsd4_decode_locku,
-       [OP_LOOKUP]             (nfsd4_dec)nfsd4_decode_lookup,
-       [OP_LOOKUPP]            (nfsd4_dec)nfsd4_decode_noop,
-       [OP_NVERIFY]            (nfsd4_dec)nfsd4_decode_verify,
-       [OP_OPEN]               (nfsd4_dec)nfsd4_decode_open,
-       [OP_OPENATTR]           (nfsd4_dec)nfsd4_decode_notsupp,
-       [OP_OPEN_CONFIRM]       (nfsd4_dec)nfsd4_decode_notsupp,
-       [OP_OPEN_DOWNGRADE]     (nfsd4_dec)nfsd4_decode_open_downgrade,
-       [OP_PUTFH]              (nfsd4_dec)nfsd4_decode_putfh,
-       [OP_PUTPUBFH]           (nfsd4_dec)nfsd4_decode_notsupp,
-       [OP_PUTROOTFH]          (nfsd4_dec)nfsd4_decode_noop,
-       [OP_READ]               (nfsd4_dec)nfsd4_decode_read,
-       [OP_READDIR]            (nfsd4_dec)nfsd4_decode_readdir,
-       [OP_READLINK]           (nfsd4_dec)nfsd4_decode_noop,
-       [OP_REMOVE]             (nfsd4_dec)nfsd4_decode_remove,
-       [OP_RENAME]             (nfsd4_dec)nfsd4_decode_rename,
-       [OP_RENEW]              (nfsd4_dec)nfsd4_decode_notsupp,
-       [OP_RESTOREFH]          (nfsd4_dec)nfsd4_decode_noop,
-       [OP_SAVEFH]             (nfsd4_dec)nfsd4_decode_noop,
-       [OP_SECINFO]            (nfsd4_dec)nfsd4_decode_secinfo,
-       [OP_SETATTR]            (nfsd4_dec)nfsd4_decode_setattr,
-       [OP_SETCLIENTID]        (nfsd4_dec)nfsd4_decode_notsupp,
-       [OP_SETCLIENTID_CONFIRM](nfsd4_dec)nfsd4_decode_notsupp,
-       [OP_VERIFY]             (nfsd4_dec)nfsd4_decode_verify,
-       [OP_WRITE]              (nfsd4_dec)nfsd4_decode_write,
-       [OP_RELEASE_LOCKOWNER]  (nfsd4_dec)nfsd4_decode_notsupp,
+       [OP_ACCESS]             = (nfsd4_dec)nfsd4_decode_access,
+       [OP_CLOSE]              = (nfsd4_dec)nfsd4_decode_close,
+       [OP_COMMIT]             = (nfsd4_dec)nfsd4_decode_commit,
+       [OP_CREATE]             = (nfsd4_dec)nfsd4_decode_create,
+       [OP_DELEGPURGE]         = (nfsd4_dec)nfsd4_decode_notsupp,
+       [OP_DELEGRETURN]        = (nfsd4_dec)nfsd4_decode_delegreturn,
+       [OP_GETATTR]            = (nfsd4_dec)nfsd4_decode_getattr,
+       [OP_GETFH]              = (nfsd4_dec)nfsd4_decode_noop,
+       [OP_LINK]               = (nfsd4_dec)nfsd4_decode_link,
+       [OP_LOCK]               = (nfsd4_dec)nfsd4_decode_lock,
+       [OP_LOCKT]              = (nfsd4_dec)nfsd4_decode_lockt,
+       [OP_LOCKU]              = (nfsd4_dec)nfsd4_decode_locku,
+       [OP_LOOKUP]             = (nfsd4_dec)nfsd4_decode_lookup,
+       [OP_LOOKUPP]            = (nfsd4_dec)nfsd4_decode_noop,
+       [OP_NVERIFY]            = (nfsd4_dec)nfsd4_decode_verify,
+       [OP_OPEN]               = (nfsd4_dec)nfsd4_decode_open,
+       [OP_OPENATTR]           = (nfsd4_dec)nfsd4_decode_notsupp,
+       [OP_OPEN_CONFIRM]       = (nfsd4_dec)nfsd4_decode_notsupp,
+       [OP_OPEN_DOWNGRADE]     = (nfsd4_dec)nfsd4_decode_open_downgrade,
+       [OP_PUTFH]              = (nfsd4_dec)nfsd4_decode_putfh,
+       [OP_PUTPUBFH]           = (nfsd4_dec)nfsd4_decode_notsupp,
+       [OP_PUTROOTFH]          = (nfsd4_dec)nfsd4_decode_noop,
+       [OP_READ]               = (nfsd4_dec)nfsd4_decode_read,
+       [OP_READDIR]            = (nfsd4_dec)nfsd4_decode_readdir,
+       [OP_READLINK]           = (nfsd4_dec)nfsd4_decode_noop,
+       [OP_REMOVE]             = (nfsd4_dec)nfsd4_decode_remove,
+       [OP_RENAME]             = (nfsd4_dec)nfsd4_decode_rename,
+       [OP_RENEW]              = (nfsd4_dec)nfsd4_decode_notsupp,
+       [OP_RESTOREFH]          = (nfsd4_dec)nfsd4_decode_noop,
+       [OP_SAVEFH]             = (nfsd4_dec)nfsd4_decode_noop,
+       [OP_SECINFO]            = (nfsd4_dec)nfsd4_decode_secinfo,
+       [OP_SETATTR]            = (nfsd4_dec)nfsd4_decode_setattr,
+       [OP_SETCLIENTID]        = (nfsd4_dec)nfsd4_decode_notsupp,
+       [OP_SETCLIENTID_CONFIRM]= (nfsd4_dec)nfsd4_decode_notsupp,
+       [OP_VERIFY]             = (nfsd4_dec)nfsd4_decode_verify,
+       [OP_WRITE]              = (nfsd4_dec)nfsd4_decode_write,
+       [OP_RELEASE_LOCKOWNER]  = (nfsd4_dec)nfsd4_decode_notsupp,
 
        /* new operations for NFSv4.1 */
-       [OP_BACKCHANNEL_CTL]    (nfsd4_dec)nfsd4_decode_notsupp,
-       [OP_BIND_CONN_TO_SESSION](nfsd4_dec)nfsd4_decode_notsupp,
-       [OP_EXCHANGE_ID]        (nfsd4_dec)nfsd4_decode_exchange_id,
-       [OP_CREATE_SESSION]     (nfsd4_dec)nfsd4_decode_create_session,
-       [OP_DESTROY_SESSION]    (nfsd4_dec)nfsd4_decode_destroy_session,
-       [OP_FREE_STATEID]       (nfsd4_dec)nfsd4_decode_notsupp,
-       [OP_GET_DIR_DELEGATION] (nfsd4_dec)nfsd4_decode_notsupp,
-       [OP_GETDEVICEINFO]      (nfsd4_dec)nfsd4_decode_notsupp,
-       [OP_GETDEVICELIST]      (nfsd4_dec)nfsd4_decode_notsupp,
-       [OP_LAYOUTCOMMIT]       (nfsd4_dec)nfsd4_decode_notsupp,
-       [OP_LAYOUTGET]          (nfsd4_dec)nfsd4_decode_notsupp,
-       [OP_LAYOUTRETURN]       (nfsd4_dec)nfsd4_decode_notsupp,
-       [OP_SECINFO_NO_NAME]    (nfsd4_dec)nfsd4_decode_notsupp,
-       [OP_SEQUENCE]           (nfsd4_dec)nfsd4_decode_sequence,
-       [OP_SET_SSV]            (nfsd4_dec)nfsd4_decode_notsupp,
-       [OP_TEST_STATEID]       (nfsd4_dec)nfsd4_decode_notsupp,
-       [OP_WANT_DELEGATION]    (nfsd4_dec)nfsd4_decode_notsupp,
-       [OP_DESTROY_CLIENTID]   (nfsd4_dec)nfsd4_decode_notsupp,
-       [OP_RECLAIM_COMPLETE]   (nfsd4_dec)nfsd4_decode_notsupp,
+       [OP_BACKCHANNEL_CTL]    = (nfsd4_dec)nfsd4_decode_notsupp,
+       [OP_BIND_CONN_TO_SESSION]= (nfsd4_dec)nfsd4_decode_notsupp,
+       [OP_EXCHANGE_ID]        = (nfsd4_dec)nfsd4_decode_exchange_id,
+       [OP_CREATE_SESSION]     = (nfsd4_dec)nfsd4_decode_create_session,
+       [OP_DESTROY_SESSION]    = (nfsd4_dec)nfsd4_decode_destroy_session,
+       [OP_FREE_STATEID]       = (nfsd4_dec)nfsd4_decode_notsupp,
+       [OP_GET_DIR_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp,
+       [OP_GETDEVICEINFO]      = (nfsd4_dec)nfsd4_decode_notsupp,
+       [OP_GETDEVICELIST]      = (nfsd4_dec)nfsd4_decode_notsupp,
+       [OP_LAYOUTCOMMIT]       = (nfsd4_dec)nfsd4_decode_notsupp,
+       [OP_LAYOUTGET]          = (nfsd4_dec)nfsd4_decode_notsupp,
+       [OP_LAYOUTRETURN]       = (nfsd4_dec)nfsd4_decode_notsupp,
+       [OP_SECINFO_NO_NAME]    = (nfsd4_dec)nfsd4_decode_notsupp,
+       [OP_SEQUENCE]           = (nfsd4_dec)nfsd4_decode_sequence,
+       [OP_SET_SSV]            = (nfsd4_dec)nfsd4_decode_notsupp,
+       [OP_TEST_STATEID]       = (nfsd4_dec)nfsd4_decode_notsupp,
+       [OP_WANT_DELEGATION]    = (nfsd4_dec)nfsd4_decode_notsupp,
+       [OP_DESTROY_CLIENTID]   = (nfsd4_dec)nfsd4_decode_notsupp,
+       [OP_RECLAIM_COMPLETE]   = (nfsd4_dec)nfsd4_decode_notsupp,
 };
 
 struct nfsd4_minorversion_ops {
@@ -1431,7 +1434,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
                }
                op->opnum = ntohl(*argp->p++);
 
-               if (op->opnum >= OP_ACCESS && op->opnum < ops->nops)
+               if (op->opnum >= FIRST_NFS4_OP && op->opnum <= LAST_NFS4_OP)
                        op->status = ops->decoders[op->opnum](argp, &op->u);
                else {
                        op->opnum = OP_ILLEGAL;
@@ -1446,21 +1449,6 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
 
        DECODE_TAIL;
 }
-/*
- * END OF "GENERIC" DECODE ROUTINES.
- */
-
-/*
- * START OF "GENERIC" ENCODE ROUTINES.
- *   These may look a little ugly since they are imported from a "generic"
- * set of XDR encode/decode routines which are intended to be shared by
- * all of our NFSv4 implementations (OpenBSD, MacOS X...).
- *
- * If the pain of reading these is too great, it should be a straightforward
- * task to translate them into Linux-specific versions which are more
- * consistent with the style used in NFSv2/v3...
- */
-#define ENCODE_HEAD              __be32 *p
 
 #define WRITE32(n)               *p++ = htonl(n)
 #define WRITE64(n)               do {                          \
@@ -1472,13 +1460,41 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
        memcpy(p, ptr, nbytes);                                 \
        p += XDR_QUADLEN(nbytes);                               \
 }} while (0)
-#define WRITECINFO(c)          do {                            \
-       *p++ = htonl(c.atomic);                                 \
-       *p++ = htonl(c.before_ctime_sec);                               \
-       *p++ = htonl(c.before_ctime_nsec);                              \
-       *p++ = htonl(c.after_ctime_sec);                                \
-       *p++ = htonl(c.after_ctime_nsec);                               \
-} while (0)
+
+static void write32(__be32 **p, u32 n)
+{
+       *(*p)++ = n;
+}
+
+static void write64(__be32 **p, u64 n)
+{
+       write32(p, (u32)(n >> 32));
+       write32(p, (u32)n);
+}
+
+static void write_change(__be32 **p, struct kstat *stat, struct inode *inode)
+{
+       if (IS_I_VERSION(inode)) {
+               write64(p, inode->i_version);
+       } else {
+               write32(p, stat->ctime.tv_sec);
+               write32(p, stat->ctime.tv_nsec);
+       }
+}
+
+static void write_cinfo(__be32 **p, struct nfsd4_change_info *c)
+{
+       write32(p, c->atomic);
+       if (c->change_supported) {
+               write64(p, c->before_change);
+               write64(p, c->after_change);
+       } else {
+               write32(p, c->before_ctime_sec);
+               write32(p, c->before_ctime_nsec);
+               write32(p, c->after_ctime_sec);
+               write32(p, c->after_ctime_nsec);
+       }
+}
 
 #define RESERVE_SPACE(nbytes)  do {                            \
        p = resp->p;                                            \
@@ -1575,7 +1591,8 @@ static __be32 nfsd4_encode_fs_location4(struct nfsd4_fs_location *location,
 static char *nfsd4_path(struct svc_rqst *rqstp, struct svc_export *exp, __be32 *stat)
 {
        struct svc_fh tmp_fh;
-       char *path, *rootpath;
+       char *path = NULL, *rootpath;
+       size_t rootlen;
 
        fh_init(&tmp_fh, NFS4_FHSIZE);
        *stat = exp_pseudoroot(rqstp, &tmp_fh);
@@ -1585,14 +1602,18 @@ static char *nfsd4_path(struct svc_rqst *rqstp, struct svc_export *exp, __be32 *
 
        path = exp->ex_pathname;
 
-       if (strncmp(path, rootpath, strlen(rootpath))) {
+       rootlen = strlen(rootpath);
+       if (strncmp(path, rootpath, rootlen)) {
                dprintk("nfsd: fs_locations failed;"
                        "%s is not contained in %s\n", path, rootpath);
                *stat = nfserr_notsupp;
-               return NULL;
+               path = NULL;
+               goto out;
        }
-
-       return path + strlen(rootpath);
+       path += rootlen;
+out:
+       fh_put(&tmp_fh);
+       return path;
 }
 
 /*
@@ -1708,6 +1729,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
 {
        u32 bmval0 = bmval[0];
        u32 bmval1 = bmval[1];
+       u32 bmval2 = bmval[2];
        struct kstat stat;
        struct svc_fh tempfh;
        struct kstatfs statfs;
@@ -1721,12 +1743,16 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
        int err;
        int aclsupport = 0;
        struct nfs4_acl *acl = NULL;
+       struct nfsd4_compoundres *resp = rqstp->rq_resp;
+       u32 minorversion = resp->cstate.minorversion;
 
        BUG_ON(bmval1 & NFSD_WRITEONLY_ATTRS_WORD1);
-       BUG_ON(bmval0 & ~NFSD_SUPPORTED_ATTRS_WORD0);
-       BUG_ON(bmval1 & ~NFSD_SUPPORTED_ATTRS_WORD1);
+       BUG_ON(bmval0 & ~nfsd_suppattrs0(minorversion));
+       BUG_ON(bmval1 & ~nfsd_suppattrs1(minorversion));
+       BUG_ON(bmval2 & ~nfsd_suppattrs2(minorversion));
 
        if (exp->ex_fslocs.migrated) {
+               BUG_ON(bmval[2]);
                status = fattr_handle_absent_fs(&bmval0, &bmval1, &rdattr_err);
                if (status)
                        goto out;
@@ -1764,30 +1790,43 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
                                goto out_nfserr;
                }
        }
-       if (bmval0 & FATTR4_WORD0_FS_LOCATIONS) {
-               if (exp->ex_fslocs.locations == NULL) {
-                       bmval0 &= ~FATTR4_WORD0_FS_LOCATIONS;
-               }
-       }
        if ((buflen -= 16) < 0)
                goto out_resource;
 
-       WRITE32(2);
-       WRITE32(bmval0);
-       WRITE32(bmval1);
+       if (unlikely(bmval2)) {
+               WRITE32(3);
+               WRITE32(bmval0);
+               WRITE32(bmval1);
+               WRITE32(bmval2);
+       } else if (likely(bmval1)) {
+               WRITE32(2);
+               WRITE32(bmval0);
+               WRITE32(bmval1);
+       } else {
+               WRITE32(1);
+               WRITE32(bmval0);
+       }
        attrlenp = p++;                /* to be backfilled later */
 
        if (bmval0 & FATTR4_WORD0_SUPPORTED_ATTRS) {
-               u32 word0 = NFSD_SUPPORTED_ATTRS_WORD0;
+               u32 word0 = nfsd_suppattrs0(minorversion);
+               u32 word1 = nfsd_suppattrs1(minorversion);
+               u32 word2 = nfsd_suppattrs2(minorversion);
+
                if ((buflen -= 12) < 0)
                        goto out_resource;
                if (!aclsupport)
                        word0 &= ~FATTR4_WORD0_ACL;
-               if (!exp->ex_fslocs.locations)
-                       word0 &= ~FATTR4_WORD0_FS_LOCATIONS;
-               WRITE32(2);
-               WRITE32(word0);
-               WRITE32(NFSD_SUPPORTED_ATTRS_WORD1);
+               if (!word2) {
+                       WRITE32(2);
+                       WRITE32(word0);
+                       WRITE32(word1);
+               } else {
+                       WRITE32(3);
+                       WRITE32(word0);
+                       WRITE32(word1);
+                       WRITE32(word2);
+               }
        }
        if (bmval0 & FATTR4_WORD0_TYPE) {
                if ((buflen -= 4) < 0)
@@ -1806,16 +1845,9 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
                        WRITE32(NFS4_FH_PERSISTENT|NFS4_FH_VOL_RENAME);
        }
        if (bmval0 & FATTR4_WORD0_CHANGE) {
-               /*
-                * Note: This _must_ be consistent with the scheme for writing
-                * change_info, so any changes made here must be reflected there
-                * as well.  (See xdr4.h:set_change_info() and the WRITECINFO()
-                * macro above.)
-                */
                if ((buflen -= 8) < 0)
                        goto out_resource;
-               WRITE32(stat.ctime.tv_sec);
-               WRITE32(stat.ctime.tv_nsec);
+               write_change(&p, &stat, dentry->d_inode);
        }
        if (bmval0 & FATTR4_WORD0_SIZE) {
                if ((buflen -= 8) < 0)
@@ -2089,14 +2121,27 @@ out_acl:
                 * and this is the root of a cross-mounted filesystem.
                 */
                if (ignore_crossmnt == 0 &&
-                   exp->ex_path.mnt->mnt_root->d_inode == dentry->d_inode) {
-                       err = vfs_getattr(exp->ex_path.mnt->mnt_parent,
-                               exp->ex_path.mnt->mnt_mountpoint, &stat);
+                   dentry == exp->ex_path.mnt->mnt_root) {
+                       struct path path = exp->ex_path;
+                       path_get(&path);
+                       while (follow_up(&path)) {
+                               if (path.dentry != path.mnt->mnt_root)
+                                       break;
+                       }
+                       err = vfs_getattr(path.mnt, path.dentry, &stat);
+                       path_put(&path);
                        if (err)
                                goto out_nfserr;
                }
                WRITE64(stat.ino);
        }
+       if (bmval2 & FATTR4_WORD2_SUPPATTR_EXCLCREAT) {
+               WRITE32(3);
+               WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD0);
+               WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD1);
+               WRITE32(NFSD_SUPPATTR_EXCLCREAT_WORD2);
+       }
+
        *attrlenp = htonl((char *)p - (char *)attrlenp - 4);
        *countp = p - buffer;
        status = nfs_ok;
@@ -2139,6 +2184,15 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
        dentry = lookup_one_len(name, cd->rd_fhp->fh_dentry, namlen);
        if (IS_ERR(dentry))
                return nfserrno(PTR_ERR(dentry));
+       if (!dentry->d_inode) {
+               /*
+                * nfsd_buffered_readdir drops the i_mutex between
+                * readdir and calling this callback, leaving a window
+                * where this directory entry could have gone away.
+                */
+               dput(dentry);
+               return nfserr_noent;
+       }
 
        exp_get(exp);
        /*
@@ -2148,11 +2202,14 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
         * we will not follow the cross mount and will fill the attribtutes
         * directly from the mountpoint dentry.
         */
-       if (d_mountpoint(dentry) && !attributes_need_mount(cd->rd_bmval))
-               ignore_crossmnt = 1;
-       else if (d_mountpoint(dentry)) {
+       if (nfsd_mountpoint(dentry, exp)) {
                int err;
 
+               if (!(exp->ex_flags & NFSEXP_V4ROOT)
+                               && !attributes_need_mount(cd->rd_bmval)) {
+                       ignore_crossmnt = 1;
+                       goto out_encode;
+               }
                /*
                 * Why the heck aren't we just using nfsd_lookup??
                 * Different "."/".." handling?  Something else?
@@ -2168,6 +2225,7 @@ nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
                        goto out_put;
 
        }
+out_encode:
        nfserr = nfsd4_encode_fattr(NULL, exp, dentry, p, buflen, cd->rd_bmval,
                                        cd->rd_rqstp, ignore_crossmnt);
 out_put:
@@ -2201,6 +2259,7 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
        struct nfsd4_readdir *cd = container_of(ccd, struct nfsd4_readdir, common);
        int buflen;
        __be32 *p = cd->buffer;
+       __be32 *cookiep;
        __be32 nfserr = nfserr_toosmall;
 
        /* In nfsv4, "." and ".." never make it onto the wire.. */
@@ -2217,7 +2276,7 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
                goto fail;
 
        *p++ = xdr_one;                             /* mark entry present */
-       cd->offset = p;                             /* remember pointer */
+       cookiep = p;
        p = xdr_encode_hyper(p, NFS_OFFSET_MAX);    /* offset of next entry */
        p = xdr_encode_array(p, name, namlen);      /* name length & name */
 
@@ -2231,6 +2290,8 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
                goto fail;
        case nfserr_dropit:
                goto fail;
+       case nfserr_noent:
+               goto skip_entry;
        default:
                /*
                 * If the client requested the RDATTR_ERROR attribute,
@@ -2249,6 +2310,8 @@ nfsd4_encode_dirent(void *ccdv, const char *name, int namlen,
        }
        cd->buflen -= (p - cd->buffer);
        cd->buffer = p;
+       cd->offset = cookiep;
+skip_entry:
        cd->common.err = nfs_ok;
        return 0;
 fail:
@@ -2259,7 +2322,7 @@ fail:
 static void
 nfsd4_encode_stateid(struct nfsd4_compoundres *resp, stateid_t *sid)
 {
-       ENCODE_HEAD;
+       __be32 *p;
 
        RESERVE_SPACE(sizeof(stateid_t));
        WRITE32(sid->si_generation);
@@ -2270,7 +2333,7 @@ nfsd4_encode_stateid(struct nfsd4_compoundres *resp, stateid_t *sid)
 static __be32
 nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_access *access)
 {
-       ENCODE_HEAD;
+       __be32 *p;
 
        if (!nfserr) {
                RESERVE_SPACE(8);
@@ -2297,7 +2360,7 @@ nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_c
 static __be32
 nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_commit *commit)
 {
-       ENCODE_HEAD;
+       __be32 *p;
 
        if (!nfserr) {
                RESERVE_SPACE(8);
@@ -2310,11 +2373,11 @@ nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
 static __be32
 nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_create *create)
 {
-       ENCODE_HEAD;
+       __be32 *p;
 
        if (!nfserr) {
                RESERVE_SPACE(32);
-               WRITECINFO(create->cr_cinfo);
+               write_cinfo(&p, &create->cr_cinfo);
                WRITE32(2);
                WRITE32(create->cr_bmval[0]);
                WRITE32(create->cr_bmval[1]);
@@ -2346,7 +2409,7 @@ nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh
 {
        struct svc_fh *fhp = *fhpp;
        unsigned int len;
-       ENCODE_HEAD;
+       __be32 *p;
 
        if (!nfserr) {
                len = fhp->fh_handle.fh_size;
@@ -2365,7 +2428,7 @@ nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh
 static void
 nfsd4_encode_lock_denied(struct nfsd4_compoundres *resp, struct nfsd4_lock_denied *ld)
 {
-       ENCODE_HEAD;
+       __be32 *p;
 
        RESERVE_SPACE(32 + XDR_LEN(ld->ld_sop ? ld->ld_sop->so_owner.len : 0));
        WRITE64(ld->ld_start);
@@ -2421,11 +2484,11 @@ nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l
 static __be32
 nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_link *link)
 {
-       ENCODE_HEAD;
+       __be32 *p;
 
        if (!nfserr) {
                RESERVE_SPACE(20);
-               WRITECINFO(link->li_cinfo);
+               write_cinfo(&p, &link->li_cinfo);
                ADJUST_ARGS();
        }
        return nfserr;
@@ -2435,7 +2498,7 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_li
 static __be32
 nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open *open)
 {
-       ENCODE_HEAD;
+       __be32 *p;
        ENCODE_SEQID_OP_HEAD;
 
        if (nfserr)
@@ -2443,7 +2506,7 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op
 
        nfsd4_encode_stateid(resp, &open->op_stateid);
        RESERVE_SPACE(40);
-       WRITECINFO(open->op_cinfo);
+       write_cinfo(&p, &open->op_cinfo);
        WRITE32(open->op_rflags);
        WRITE32(2);
        WRITE32(open->op_bmval[0]);
@@ -2530,7 +2593,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
        int v, pn;
        unsigned long maxcount; 
        long len;
-       ENCODE_HEAD;
+       __be32 *p;
 
        if (nfserr)
                return nfserr;
@@ -2592,7 +2655,7 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd
 {
        int maxcount;
        char *page;
-       ENCODE_HEAD;
+       __be32 *p;
 
        if (nfserr)
                return nfserr;
@@ -2641,7 +2704,7 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
        int maxcount;
        loff_t offset;
        __be32 *page, *savep, *tailbase;
-       ENCODE_HEAD;
+       __be32 *p;
 
        if (nfserr)
                return nfserr;
@@ -2717,11 +2780,11 @@ err_no_verf:
 static __be32
 nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_remove *remove)
 {
-       ENCODE_HEAD;
+       __be32 *p;
 
        if (!nfserr) {
                RESERVE_SPACE(20);
-               WRITECINFO(remove->rm_cinfo);
+               write_cinfo(&p, &remove->rm_cinfo);
                ADJUST_ARGS();
        }
        return nfserr;
@@ -2730,12 +2793,12 @@ nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
 static __be32
 nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_rename *rename)
 {
-       ENCODE_HEAD;
+       __be32 *p;
 
        if (!nfserr) {
                RESERVE_SPACE(40);
-               WRITECINFO(rename->rn_sinfo);
-               WRITECINFO(rename->rn_tinfo);
+               write_cinfo(&p, &rename->rn_sinfo);
+               write_cinfo(&p, &rename->rn_tinfo);
                ADJUST_ARGS();
        }
        return nfserr;
@@ -2750,7 +2813,7 @@ nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
        u32 nflavs;
        struct exp_flavor_info *flavs;
        struct exp_flavor_info def_flavs[2];
-       ENCODE_HEAD;
+       __be32 *p;
 
        if (nfserr)
                goto out;
@@ -2815,7 +2878,7 @@ out:
 static __be32
 nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setattr *setattr)
 {
-       ENCODE_HEAD;
+       __be32 *p;
 
        RESERVE_SPACE(12);
        if (nfserr) {
@@ -2835,7 +2898,7 @@ nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
 static __be32
 nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setclientid *scd)
 {
-       ENCODE_HEAD;
+       __be32 *p;
 
        if (!nfserr) {
                RESERVE_SPACE(8 + sizeof(nfs4_verifier));
@@ -2855,7 +2918,7 @@ nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct n
 static __be32
 nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_write *write)
 {
-       ENCODE_HEAD;
+       __be32 *p;
 
        if (!nfserr) {
                RESERVE_SPACE(16);
@@ -2871,7 +2934,7 @@ static __be32
 nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, int nfserr,
                         struct nfsd4_exchange_id *exid)
 {
-       ENCODE_HEAD;
+       __be32 *p;
        char *major_id;
        char *server_scope;
        int major_id_sz;
@@ -2926,7 +2989,7 @@ static __be32
 nfsd4_encode_create_session(struct nfsd4_compoundres *resp, int nfserr,
                            struct nfsd4_create_session *sess)
 {
-       ENCODE_HEAD;
+       __be32 *p;
 
        if (nfserr)
                return nfserr;
@@ -2982,7 +3045,7 @@ __be32
 nfsd4_encode_sequence(struct nfsd4_compoundres *resp, int nfserr,
                      struct nfsd4_sequence *seq)
 {
-       ENCODE_HEAD;
+       __be32 *p;
 
        if (nfserr)
                return nfserr;
@@ -3001,6 +3064,7 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, int nfserr,
        WRITE32(0);
 
        ADJUST_ARGS();
+       resp->cstate.datap = p; /* DRC cache data pointer */
        return 0;
 }
 
@@ -3078,11 +3142,59 @@ static nfsd4_enc nfsd4_enc_ops[] = {
        [OP_RECLAIM_COMPLETE]   = (nfsd4_enc)nfsd4_encode_noop,
 };
 
+/*
+ * Calculate the total amount of memory that the compound response has taken
+ * after encoding the current operation.
+ *
+ * pad: add on 8 bytes for the next operation's op_code and status so that
+ * there is room to cache a failure on the next operation.
+ *
+ * Compare this length to the session se_fmaxresp_cached.
+ *
+ * Our se_fmaxresp_cached will always be a multiple of PAGE_SIZE, and so
+ * will be at least a page and will therefore hold the xdr_buf head.
+ */
+static int nfsd4_check_drc_limit(struct nfsd4_compoundres *resp)
+{
+       int status = 0;
+       struct xdr_buf *xb = &resp->rqstp->rq_res;
+       struct nfsd4_compoundargs *args = resp->rqstp->rq_argp;
+       struct nfsd4_session *session = NULL;
+       struct nfsd4_slot *slot = resp->cstate.slot;
+       u32 length, tlen = 0, pad = 8;
+
+       if (!nfsd4_has_session(&resp->cstate))
+               return status;
+
+       session = resp->cstate.session;
+       if (session == NULL || slot->sl_cachethis == 0)
+               return status;
+
+       if (resp->opcnt >= args->opcnt)
+               pad = 0; /* this is the last operation */
+
+       if (xb->page_len == 0) {
+               length = (char *)resp->p - (char *)xb->head[0].iov_base + pad;
+       } else {
+               if (xb->tail[0].iov_base && xb->tail[0].iov_len > 0)
+                       tlen = (char *)resp->p - (char *)xb->tail[0].iov_base;
+
+               length = xb->head[0].iov_len + xb->page_len + tlen + pad;
+       }
+       dprintk("%s length %u, xb->page_len %u tlen %u pad %u\n", __func__,
+               length, xb->page_len, tlen, pad);
+
+       if (length <= session->se_fchannel.maxresp_cached)
+               return status;
+       else
+               return nfserr_rep_too_big_to_cache;
+}
+
 void
 nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
 {
        __be32 *statp;
-       ENCODE_HEAD;
+       __be32 *p;
 
        RESERVE_SPACE(8);
        WRITE32(op->opnum);
@@ -3094,6 +3206,9 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
        BUG_ON(op->opnum < 0 || op->opnum >= ARRAY_SIZE(nfsd4_enc_ops) ||
               !nfsd4_enc_ops[op->opnum]);
        op->status = nfsd4_enc_ops[op->opnum](resp, op->status, &op->u);
+       /* nfsd4_check_drc_limit guarantees enough room for error status */
+       if (!op->status && nfsd4_check_drc_limit(resp))
+               op->status = nfserr_rep_too_big_to_cache;
 status:
        /*
         * Note: We write the status directly, instead of using WRITE32(),
@@ -3113,7 +3228,7 @@ status:
 void
 nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
 {
-       ENCODE_HEAD;
+       __be32 *p;
        struct nfs4_replay *rp = op->replay;
 
        BUG_ON(!rp);
@@ -3128,10 +3243,6 @@ nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
        ADJUST_ARGS();
 }
 
-/*
- * END OF "GENERIC" ENCODE ROUTINES.
- */
-
 int
 nfs4svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p, void *dummy)
 {
@@ -3181,6 +3292,7 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo
        /*
         * All that remains is to write the tag and operation count...
         */
+       struct nfsd4_compound_state *cs = &resp->cstate;
        struct kvec *iov;
        p = resp->tagp;
        *p++ = htonl(resp->taglen);
@@ -3194,17 +3306,11 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo
                iov = &rqstp->rq_res.head[0];
        iov->iov_len = ((char*)resp->p) - (char*)iov->iov_base;
        BUG_ON(iov->iov_len > PAGE_SIZE);
-       if (nfsd4_has_session(&resp->cstate)) {
-               if (resp->cstate.status == nfserr_replay_cache &&
-                               !nfsd4_not_cached(resp)) {
-                       iov->iov_len = resp->cstate.iovlen;
-               } else {
-                       nfsd4_store_cache_entry(resp);
-                       dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__);
-                       resp->cstate.slot->sl_inuse = 0;
-               }
-               if (resp->cstate.session)
-                       nfsd4_put_session(resp->cstate.session);
+       if (nfsd4_has_session(cs) && cs->status != nfserr_replay_cache) {
+               nfsd4_store_cache_entry(resp);
+               dprintk("%s: SET SLOT STATE TO AVAILABLE\n", __func__);
+               resp->cstate.slot->sl_inuse = false;
+               nfsd4_put_session(resp->cstate.session);
        }
        return 1;
 }