Merge branch 'for-3.10' of git://linux-nfs.org/~bfields/linux
Linus Torvalds [Fri, 3 May 2013 17:59:39 +0000 (10:59 -0700)]
Pull nfsd changes from J Bruce Fields:
 "Highlights include:

   - Some more DRC cleanup and performance work from Jeff Layton

   - A gss-proxy upcall from Simo Sorce: currently krb5 mounts to the
     server using credentials from Active Directory often fail due to
     limitations of the svcgssd upcall interface.  This replacement
     lifts those limitations.  The existing upcall is still supported
     for backwards compatibility.

   - More NFSv4.1 support: at this point, if a user with a current
     client who upgrades from 4.0 to 4.1 should see no regressions.  In
     theory we do everything a 4.1 server is required to do.  Patches
     for a couple minor exceptions are ready for 3.11, and with those
     and some more testing I'd like to turn 4.1 on by default in 3.11."

Fix up semantic conflict as per Stephen Rothwell and linux-next:

Commit 030d794bf498 ("SUNRPC: Use gssproxy upcall for server RPCGSS
authentication") adds two new users of "PDE(inode)->data", but we're
supposed to use "PDE_DATA(inode)" instead since commit d9dda78bad87
("procfs: new helper - PDE_DATA(inode)").

The old PDE() macro is no longer available since commit c30480b92cf4
("proc: Make the PROC_I() and PDE() macros internal to procfs")

* 'for-3.10' of git://linux-nfs.org/~bfields/linux: (60 commits)
  NFSD: SECINFO doesn't handle unsupported pseudoflavors correctly
  NFSD: Simplify GSS flavor encoding in nfsd4_do_encode_secinfo()
  nfsd: make symbol nfsd_reply_cache_shrinker static
  svcauth_gss: fix error return code in rsc_parse()
  nfsd4: don't remap EISDIR errors in rename
  svcrpc: fix gss-proxy to respect user namespaces
  SUNRPC: gssp_procedures[] can be static
  SUNRPC: define {create,destroy}_use_gss_proxy_proc_entry in !PROC case
  nfsd4: better error return to indicate SSV non-support
  nfsd: fix EXDEV checking in rename
  SUNRPC: Use gssproxy upcall for server RPCGSS authentication.
  SUNRPC: Add RPC based upcall mechanism for RPCGSS auth
  SUNRPC: conditionally return endtime from import_sec_context
  SUNRPC: allow disabling idle timeout
  SUNRPC: attempt AF_LOCAL connect on setup
  nfsd: Decode and send 64bit time values
  nfsd4: put_client_renew_locked can be static
  nfsd4: remove unused macro
  nfsd4: remove some useless code
  nfsd4: implement SEQ4_STATUS_RECALLABLE_STATE_REVOKED
  ...

32 files changed:
Documentation/filesystems/nfs/00-INDEX
Documentation/filesystems/nfs/rpc-server-gss.txt [new file with mode: 0644]
fs/nfsd/cache.h
fs/nfsd/netns.h
fs/nfsd/nfs4callback.c
fs/nfsd/nfs4proc.c
fs/nfsd/nfs4state.c
fs/nfsd/nfs4xdr.c
fs/nfsd/nfscache.c
fs/nfsd/nfsctl.c
fs/nfsd/state.h
fs/nfsd/vfs.c
fs/nfsd/xdr4.h
fs/nfsd/xdr4cb.h [new file with mode: 0644]
include/linux/sunrpc/clnt.h
include/linux/sunrpc/gss_api.h
include/linux/sunrpc/msg_prot.h
include/linux/sunrpc/xprt.h
net/sunrpc/auth_gss/Makefile
net/sunrpc/auth_gss/auth_gss.c
net/sunrpc/auth_gss/gss_krb5_mech.c
net/sunrpc/auth_gss/gss_mech_switch.c
net/sunrpc/auth_gss/gss_rpc_upcall.c [new file with mode: 0644]
net/sunrpc/auth_gss/gss_rpc_upcall.h [new file with mode: 0644]
net/sunrpc/auth_gss/gss_rpc_xdr.c [new file with mode: 0644]
net/sunrpc/auth_gss/gss_rpc_xdr.h [new file with mode: 0644]
net/sunrpc/auth_gss/svcauth_gss.c
net/sunrpc/cache.c
net/sunrpc/clnt.c
net/sunrpc/netns.h
net/sunrpc/xprt.c
net/sunrpc/xprtsock.c

index 1716874..66eb6c8 100644 (file)
@@ -20,3 +20,5 @@ rpc-cache.txt
        - introduction to the caching mechanisms in the sunrpc layer.
 idmapper.txt
        - information for configuring request-keys to be used by idmapper
+knfsd-rpcgss.txt
+       - Information on GSS authentication support in the NFS Server
diff --git a/Documentation/filesystems/nfs/rpc-server-gss.txt b/Documentation/filesystems/nfs/rpc-server-gss.txt
new file mode 100644 (file)
index 0000000..716f4be
--- /dev/null
@@ -0,0 +1,91 @@
+
+rpcsec_gss support for kernel RPC servers
+=========================================
+
+This document gives references to the standards and protocols used to
+implement RPCGSS authentication in kernel RPC servers such as the NFS
+server and the NFS client's NFSv4.0 callback server.  (But note that
+NFSv4.1 and higher don't require the client to act as a server for the
+purposes of authentication.)
+
+RPCGSS is specified in a few IETF documents:
+ - RFC2203 v1: http://tools.ietf.org/rfc/rfc2203.txt
+ - RFC5403 v2: http://tools.ietf.org/rfc/rfc5403.txt
+and there is a 3rd version  being proposed:
+ - http://tools.ietf.org/id/draft-williams-rpcsecgssv3.txt
+   (At draft n. 02 at the time of writing)
+
+Background
+----------
+
+The RPCGSS Authentication method describes a way to perform GSSAPI
+Authentication for NFS.  Although GSSAPI is itself completely mechanism
+agnostic, in many cases only the KRB5 mechanism is supported by NFS
+implementations.
+
+The Linux kernel, at the moment, supports only the KRB5 mechanism, and
+depends on GSSAPI extensions that are KRB5 specific.
+
+GSSAPI is a complex library, and implementing it completely in kernel is
+unwarranted. However GSSAPI operations are fundementally separable in 2
+parts:
+- initial context establishment
+- integrity/privacy protection (signing and encrypting of individual
+  packets)
+
+The former is more complex and policy-independent, but less
+performance-sensitive.  The latter is simpler and needs to be very fast.
+
+Therefore, we perform per-packet integrity and privacy protection in the
+kernel, but leave the initial context establishment to userspace.  We
+need upcalls to request userspace to perform context establishment.
+
+NFS Server Legacy Upcall Mechanism
+----------------------------------
+
+The classic upcall mechanism uses a custom text based upcall mechanism
+to talk to a custom daemon called rpc.svcgssd that is provide by the
+nfs-utils package.
+
+This upcall mechanism has 2 limitations:
+
+A) It can handle tokens that are no bigger than 2KiB
+
+In some Kerberos deployment GSSAPI tokens can be quite big, up and
+beyond 64KiB in size due to various authorization extensions attacked to
+the Kerberos tickets, that needs to be sent through the GSS layer in
+order to perform context establishment.
+
+B) It does not properly handle creds where the user is member of more
+than a few housand groups (the current hard limit in the kernel is 65K
+groups) due to limitation on the size of the buffer that can be send
+back to the kernel (4KiB).
+
+NFS Server New RPC Upcall Mechanism
+-----------------------------------
+
+The newer upcall mechanism uses RPC over a unix socket to a daemon
+called gss-proxy, implemented by a userspace program called Gssproxy.
+
+The gss_proxy RPC protocol is currently documented here:
+
+       https://fedorahosted.org/gss-proxy/wiki/ProtocolDocumentation
+
+This upcall mechanism uses the kernel rpc client and connects to the gssproxy
+userspace program over a regular unix socket. The gssproxy protocol does not
+suffer from the size limitations of the legacy protocol.
+
+Negotiating Upcall Mechanisms
+-----------------------------
+
+To provide backward compatibility, the kernel defaults to using the
+legacy mechanism.  To switch to the new mechanism, gss-proxy must bind
+to /var/run/gssproxy.sock and then write "1" to
+/proc/net/rpc/use-gss-proxy.  If gss-proxy dies, it must repeat both
+steps.
+
+Once the upcall mechanism is chosen, it cannot be changed.  To prevent
+locking into the legacy mechanisms, the above steps must be performed
+before starting nfsd.  Whoever starts nfsd can guarantee this by reading
+from /proc/net/rpc/use-gss-proxy and checking that it contains a
+"1"--the read will block until gss-proxy has done its write to the file.
index 87fd141..d5c5b3e 100644 (file)
@@ -82,6 +82,7 @@ int   nfsd_reply_cache_init(void);
 void   nfsd_reply_cache_shutdown(void);
 int    nfsd_cache_lookup(struct svc_rqst *);
 void   nfsd_cache_update(struct svc_rqst *, int, __be32 *);
+int    nfsd_reply_cache_stats_open(struct inode *, struct file *);
 
 #ifdef CONFIG_NFSD_V4
 void   nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp);
index 1051beb..849a7c3 100644 (file)
@@ -80,6 +80,7 @@ struct nfsd_net {
         */
        struct list_head client_lru;
        struct list_head close_lru;
+       struct list_head del_recall_lru;
 
        struct delayed_work laundromat_work;
 
index 99bc85f..7f05cd1 100644 (file)
@@ -37,6 +37,7 @@
 #include "nfsd.h"
 #include "state.h"
 #include "netns.h"
+#include "xdr4cb.h"
 
 #define NFSDDBG_FACILITY                NFSDDBG_PROC
 
@@ -53,30 +54,6 @@ enum {
        NFSPROC4_CLNT_CB_SEQUENCE,
 };
 
-#define NFS4_MAXTAGLEN         20
-
-#define NFS4_enc_cb_null_sz            0
-#define NFS4_dec_cb_null_sz            0
-#define cb_compound_enc_hdr_sz         4
-#define cb_compound_dec_hdr_sz         (3 + (NFS4_MAXTAGLEN >> 2))
-#define sessionid_sz                   (NFS4_MAX_SESSIONID_LEN >> 2)
-#define cb_sequence_enc_sz             (sessionid_sz + 4 +             \
-                                       1 /* no referring calls list yet */)
-#define cb_sequence_dec_sz             (op_dec_sz + sessionid_sz + 4)
-
-#define op_enc_sz                      1
-#define op_dec_sz                      2
-#define enc_nfs4_fh_sz                 (1 + (NFS4_FHSIZE >> 2))
-#define enc_stateid_sz                 (NFS4_STATEID_SIZE >> 2)
-#define NFS4_enc_cb_recall_sz          (cb_compound_enc_hdr_sz +       \
-                                       cb_sequence_enc_sz +            \
-                                       1 + enc_stateid_sz +            \
-                                       enc_nfs4_fh_sz)
-
-#define NFS4_dec_cb_recall_sz          (cb_compound_dec_hdr_sz  +      \
-                                       cb_sequence_dec_sz +            \
-                                       op_dec_sz)
-
 struct nfs4_cb_compound_hdr {
        /* args */
        u32             ident;  /* minorversion 0 only */
@@ -817,8 +794,7 @@ static bool nfsd41_cb_get_slot(struct nfs4_client *clp, struct rpc_task *task)
 static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
 {
        struct nfsd4_callback *cb = calldata;
-       struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall);
-       struct nfs4_client *clp = dp->dl_stid.sc_client;
+       struct nfs4_client *clp = cb->cb_clp;
        u32 minorversion = clp->cl_minorversion;
 
        cb->cb_minorversion = minorversion;
@@ -839,8 +815,7 @@ static void nfsd4_cb_prepare(struct rpc_task *task, void *calldata)
 static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
 {
        struct nfsd4_callback *cb = calldata;
-       struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall);
-       struct nfs4_client *clp = dp->dl_stid.sc_client;
+       struct nfs4_client *clp = cb->cb_clp;
 
        dprintk("%s: minorversion=%d\n", __func__,
                clp->cl_minorversion);
@@ -863,7 +838,7 @@ static void nfsd4_cb_recall_done(struct rpc_task *task, void *calldata)
 {
        struct nfsd4_callback *cb = calldata;
        struct nfs4_delegation *dp = container_of(cb, struct nfs4_delegation, dl_recall);
-       struct nfs4_client *clp = dp->dl_stid.sc_client;
+       struct nfs4_client *clp = cb->cb_clp;
        struct rpc_clnt *current_rpc_client = clp->cl_cb_client;
 
        nfsd4_cb_done(task, calldata);
index ae73175..8ae5abf 100644 (file)
@@ -191,9 +191,18 @@ static __be32 nfsd_check_obj_isreg(struct svc_fh *fh)
        return nfserr_symlink;
 }
 
+static void nfsd4_set_open_owner_reply_cache(struct nfsd4_compound_state *cstate, struct nfsd4_open *open, struct svc_fh *resfh)
+{
+       if (nfsd4_has_session(cstate))
+               return;
+       fh_copy_shallow(&open->op_openowner->oo_owner.so_replay.rp_openfh,
+                       &resfh->fh_handle);
+}
+
 static __be32
-do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
+do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_open *open)
 {
+       struct svc_fh *current_fh = &cstate->current_fh;
        struct svc_fh *resfh;
        int accmode;
        __be32 status;
@@ -252,9 +261,7 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o
        if (is_create_with_attrs(open) && open->op_acl != NULL)
                do_set_nfs4_acl(rqstp, resfh, open->op_acl, open->op_bmval);
 
-       /* set reply cache */
-       fh_copy_shallow(&open->op_openowner->oo_owner.so_replay.rp_openfh,
-                       &resfh->fh_handle);
+       nfsd4_set_open_owner_reply_cache(cstate, open, resfh);
        accmode = NFSD_MAY_NOP;
        if (open->op_created)
                accmode |= NFSD_MAY_OWNER_OVERRIDE;
@@ -268,8 +275,9 @@ out:
 }
 
 static __be32
-do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
+do_open_fhandle(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_open *open)
 {
+       struct svc_fh *current_fh = &cstate->current_fh;
        __be32 status;
 
        /* We don't know the target directory, and therefore can not
@@ -278,9 +286,7 @@ do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_
 
        memset(&open->op_cinfo, 0, sizeof(struct nfsd4_change_info));
 
-       /* set replay cache */
-       fh_copy_shallow(&open->op_openowner->oo_owner.so_replay.rp_openfh,
-                       &current_fh->fh_handle);
+       nfsd4_set_open_owner_reply_cache(cstate, open, current_fh);
 
        open->op_truncate = (open->op_iattr.ia_valid & ATTR_SIZE) &&
                (open->op_iattr.ia_size == 0);
@@ -351,6 +357,10 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        }
        if (status)
                goto out;
+       if (open->op_xdr_error) {
+               status = open->op_xdr_error;
+               goto out;
+       }
 
        status = nfsd4_check_open_attributes(rqstp, cstate, open);
        if (status)
@@ -368,8 +378,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        switch (open->op_claim_type) {
                case NFS4_OPEN_CLAIM_DELEGATE_CUR:
                case NFS4_OPEN_CLAIM_NULL:
-                       status = do_open_lookup(rqstp, &cstate->current_fh,
-                                               open);
+                       status = do_open_lookup(rqstp, cstate, open);
                        if (status)
                                goto out;
                        break;
@@ -382,8 +391,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                                goto out;
                case NFS4_OPEN_CLAIM_FH:
                case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
-                       status = do_open_fhandle(rqstp, &cstate->current_fh,
-                                                open);
+                       status = do_open_fhandle(rqstp, cstate, open);
                        if (status)
                                goto out;
                        break;
@@ -409,14 +417,33 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        WARN_ON(status && open->op_created);
 out:
        nfsd4_cleanup_open_state(open, status);
-       if (open->op_openowner)
+       if (open->op_openowner && !nfsd4_has_session(cstate))
                cstate->replay_owner = &open->op_openowner->oo_owner;
-       else
+       nfsd4_bump_seqid(cstate, status);
+       if (!cstate->replay_owner)
                nfs4_unlock_state();
        return status;
 }
 
 /*
+ * OPEN is the only seqid-mutating operation whose decoding can fail
+ * with a seqid-mutating error (specifically, decoding of user names in
+ * the attributes).  Therefore we have to do some processing to look up
+ * the stateowner so that we can bump the seqid.
+ */
+static __be32 nfsd4_open_omfg(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_op *op)
+{
+       struct nfsd4_open *open = (struct nfsd4_open *)&op->u;
+
+       if (!seqid_mutating_err(ntohl(op->status)))
+               return op->status;
+       if (nfsd4_has_session(cstate))
+               return op->status;
+       open->op_xdr_error = op->status;
+       return nfsd4_open(rqstp, cstate, open);
+}
+
+/*
  * filehandle-manipulating ops.
  */
 static __be32
@@ -786,21 +813,11 @@ nfsd4_rename(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        status = nfsd_rename(rqstp, &cstate->save_fh, rename->rn_sname,
                             rename->rn_snamelen, &cstate->current_fh,
                             rename->rn_tname, rename->rn_tnamelen);
-
-       /* the underlying filesystem returns different error's than required
-        * by NFSv4. both save_fh and current_fh have been verified.. */
-       if (status == nfserr_isdir)
-               status = nfserr_exist;
-       else if ((status == nfserr_notdir) &&
-                  (S_ISDIR(cstate->save_fh.fh_dentry->d_inode->i_mode) &&
-                   S_ISDIR(cstate->current_fh.fh_dentry->d_inode->i_mode)))
-               status = nfserr_exist;
-
-       if (!status) {
-               set_change_info(&rename->rn_sinfo, &cstate->current_fh);
-               set_change_info(&rename->rn_tinfo, &cstate->save_fh);
-       }
-       return status;
+       if (status)
+               return status;
+       set_change_info(&rename->rn_sinfo, &cstate->current_fh);
+       set_change_info(&rename->rn_tinfo, &cstate->save_fh);
+       return nfs_ok;
 }
 
 static __be32
@@ -931,14 +948,14 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        nfs4_lock_state();
        status = nfs4_preprocess_stateid_op(SVC_NET(rqstp),
                                        cstate, stateid, WR_STATE, &filp);
-       if (filp)
-               get_file(filp);
-       nfs4_unlock_state();
-
        if (status) {
+               nfs4_unlock_state();
                dprintk("NFSD: nfsd4_write: couldn't process stateid!\n");
                return status;
        }
+       if (filp)
+               get_file(filp);
+       nfs4_unlock_state();
 
        cnt = write->wr_buflen;
        write->wr_how_written = write->wr_stable_how;
@@ -1244,8 +1261,11 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
                 * for example, if there is a miscellaneous XDR error
                 * it will be set to nfserr_bad_xdr.
                 */
-               if (op->status)
+               if (op->status) {
+                       if (op->opnum == OP_OPEN)
+                               op->status = nfsd4_open_omfg(rqstp, cstate, op);
                        goto encode_op;
+               }
 
                /* We must be able to encode a successful response to
                 * this operation, with enough room left over to encode a
@@ -1282,12 +1302,9 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
                if (op->status)
                        goto encode_op;
 
-               if (opdesc->op_func) {
-                       if (opdesc->op_get_currentstateid)
-                               opdesc->op_get_currentstateid(cstate, &op->u);
-                       op->status = opdesc->op_func(rqstp, cstate, &op->u);
-               } else
-                       BUG_ON(op->status == nfs_ok);
+               if (opdesc->op_get_currentstateid)
+                       opdesc->op_get_currentstateid(cstate, &op->u);
+               op->status = opdesc->op_func(rqstp, cstate, &op->u);
 
                if (!op->status) {
                        if (opdesc->op_set_currentstateid)
index 417c848..316ec84 100644 (file)
@@ -42,6 +42,7 @@
 #include <linux/sunrpc/svcauth_gss.h>
 #include <linux/sunrpc/addr.h>
 #include "xdr4.h"
+#include "xdr4cb.h"
 #include "vfs.h"
 #include "current_stateid.h"
 
@@ -94,17 +95,32 @@ nfs4_lock_state(void)
        mutex_lock(&client_mutex);
 }
 
-static void free_session(struct kref *);
+static void free_session(struct nfsd4_session *);
 
-/* Must be called under the client_lock */
-static void nfsd4_put_session_locked(struct nfsd4_session *ses)
+void nfsd4_put_session(struct nfsd4_session *ses)
+{
+       atomic_dec(&ses->se_ref);
+}
+
+static bool is_session_dead(struct nfsd4_session *ses)
+{
+       return ses->se_flags & NFS4_SESSION_DEAD;
+}
+
+static __be32 mark_session_dead_locked(struct nfsd4_session *ses)
 {
-       kref_put(&ses->se_ref, free_session);
+       if (atomic_read(&ses->se_ref))
+               return nfserr_jukebox;
+       ses->se_flags |= NFS4_SESSION_DEAD;
+       return nfs_ok;
 }
 
-static void nfsd4_get_session(struct nfsd4_session *ses)
+static __be32 nfsd4_get_session_locked(struct nfsd4_session *ses)
 {
-       kref_get(&ses->se_ref);
+       if (is_session_dead(ses))
+               return nfserr_badsession;
+       atomic_inc(&ses->se_ref);
+       return nfs_ok;
 }
 
 void
@@ -113,6 +129,90 @@ nfs4_unlock_state(void)
        mutex_unlock(&client_mutex);
 }
 
+static bool is_client_expired(struct nfs4_client *clp)
+{
+       return clp->cl_time == 0;
+}
+
+static __be32 mark_client_expired_locked(struct nfs4_client *clp)
+{
+       if (atomic_read(&clp->cl_refcount))
+               return nfserr_jukebox;
+       clp->cl_time = 0;
+       return nfs_ok;
+}
+
+static __be32 mark_client_expired(struct nfs4_client *clp)
+{
+       struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+       __be32 ret;
+
+       spin_lock(&nn->client_lock);
+       ret = mark_client_expired_locked(clp);
+       spin_unlock(&nn->client_lock);
+       return ret;
+}
+
+static __be32 get_client_locked(struct nfs4_client *clp)
+{
+       if (is_client_expired(clp))
+               return nfserr_expired;
+       atomic_inc(&clp->cl_refcount);
+       return nfs_ok;
+}
+
+/* must be called under the client_lock */
+static inline void
+renew_client_locked(struct nfs4_client *clp)
+{
+       struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+
+       if (is_client_expired(clp)) {
+               WARN_ON(1);
+               printk("%s: client (clientid %08x/%08x) already expired\n",
+                       __func__,
+                       clp->cl_clientid.cl_boot,
+                       clp->cl_clientid.cl_id);
+               return;
+       }
+
+       dprintk("renewing client (clientid %08x/%08x)\n",
+                       clp->cl_clientid.cl_boot,
+                       clp->cl_clientid.cl_id);
+       list_move_tail(&clp->cl_lru, &nn->client_lru);
+       clp->cl_time = get_seconds();
+}
+
+static inline void
+renew_client(struct nfs4_client *clp)
+{
+       struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+
+       spin_lock(&nn->client_lock);
+       renew_client_locked(clp);
+       spin_unlock(&nn->client_lock);
+}
+
+static void put_client_renew_locked(struct nfs4_client *clp)
+{
+       if (!atomic_dec_and_test(&clp->cl_refcount))
+               return;
+       if (!is_client_expired(clp))
+               renew_client_locked(clp);
+}
+
+void put_client_renew(struct nfs4_client *clp)
+{
+       struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+
+       if (!atomic_dec_and_lock(&clp->cl_refcount, &nn->client_lock))
+               return;
+       if (!is_client_expired(clp))
+               renew_client_locked(clp);
+       spin_unlock(&nn->client_lock);
+}
+
+
 static inline u32
 opaque_hashval(const void *ptr, int nbytes)
 {
@@ -126,8 +226,6 @@ opaque_hashval(const void *ptr, int nbytes)
        return x;
 }
 
-static struct list_head del_recall_lru;
-
 static void nfsd4_free_file(struct nfs4_file *f)
 {
        kmem_cache_free(file_slab, f);
@@ -137,7 +235,7 @@ static inline void
 put_nfs4_file(struct nfs4_file *fi)
 {
        if (atomic_dec_and_lock(&fi->fi_ref, &recall_lock)) {
-               list_del(&fi->fi_hash);
+               hlist_del(&fi->fi_hash);
                spin_unlock(&recall_lock);
                iput(fi->fi_inode);
                nfsd4_free_file(fi);
@@ -181,7 +279,7 @@ static unsigned int file_hashval(struct inode *ino)
        return hash_ptr(ino, FILE_HASH_BITS);
 }
 
-static struct list_head file_hashtbl[FILE_HASH_SIZE];
+static struct hlist_head file_hashtbl[FILE_HASH_SIZE];
 
 static void __nfs4_file_get_access(struct nfs4_file *fp, int oflag)
 {
@@ -210,13 +308,7 @@ static void __nfs4_file_put_access(struct nfs4_file *fp, int oflag)
 {
        if (atomic_dec_and_test(&fp->fi_access[oflag])) {
                nfs4_file_put_fd(fp, oflag);
-               /*
-                * It's also safe to get rid of the RDWR open *if*
-                * we no longer have need of the other kind of access
-                * or if we already have the other kind of open:
-                */
-               if (fp->fi_fds[1-oflag]
-                       || atomic_read(&fp->fi_access[1 - oflag]) == 0)
+               if (atomic_read(&fp->fi_access[1 - oflag]) == 0)
                        nfs4_file_put_fd(fp, O_RDWR);
        }
 }
@@ -262,7 +354,7 @@ kmem_cache *slab)
         */
        return stid;
 out_free:
-       kfree(stid);
+       kmem_cache_free(slab, stid);
        return NULL;
 }
 
@@ -313,21 +405,18 @@ alloc_init_deleg(struct nfs4_client *clp, struct nfs4_ol_stateid *stp, struct sv
        return dp;
 }
 
-static void free_stid(struct nfs4_stid *s, struct kmem_cache *slab)
+static void remove_stid(struct nfs4_stid *s)
 {
        struct idr *stateids = &s->sc_client->cl_stateids;
 
        idr_remove(stateids, s->sc_stateid.si_opaque.so_id);
-       kmem_cache_free(slab, s);
 }
 
 void
 nfs4_put_delegation(struct nfs4_delegation *dp)
 {
        if (atomic_dec_and_test(&dp->dl_count)) {
-               dprintk("NFSD: freeing dp %p\n",dp);
-               put_nfs4_file(dp->dl_file);
-               free_stid(&dp->dl_stid, deleg_slab);
+               kmem_cache_free(deleg_slab, dp);
                num_delegations--;
        }
 }
@@ -351,16 +440,45 @@ static void unhash_stid(struct nfs4_stid *s)
 static void
 unhash_delegation(struct nfs4_delegation *dp)
 {
-       unhash_stid(&dp->dl_stid);
        list_del_init(&dp->dl_perclnt);
        spin_lock(&recall_lock);
        list_del_init(&dp->dl_perfile);
        list_del_init(&dp->dl_recall_lru);
        spin_unlock(&recall_lock);
        nfs4_put_deleg_lease(dp->dl_file);
+       put_nfs4_file(dp->dl_file);
+       dp->dl_file = NULL;
+}
+
+
+
+static void destroy_revoked_delegation(struct nfs4_delegation *dp)
+{
+       list_del_init(&dp->dl_recall_lru);
+       remove_stid(&dp->dl_stid);
        nfs4_put_delegation(dp);
 }
 
+static void destroy_delegation(struct nfs4_delegation *dp)
+{
+       unhash_delegation(dp);
+       remove_stid(&dp->dl_stid);
+       nfs4_put_delegation(dp);
+}
+
+static void revoke_delegation(struct nfs4_delegation *dp)
+{
+       struct nfs4_client *clp = dp->dl_stid.sc_client;
+
+       if (clp->cl_minorversion == 0)
+               destroy_delegation(dp);
+       else {
+               unhash_delegation(dp);
+               dp->dl_stid.sc_type = NFS4_REVOKED_DELEG_STID;
+               list_add(&dp->dl_recall_lru, &clp->cl_revoked);
+       }
+}
+
 /* 
  * SETCLIENTID state 
  */
@@ -501,7 +619,8 @@ static void close_generic_stateid(struct nfs4_ol_stateid *stp)
 
 static void free_generic_stateid(struct nfs4_ol_stateid *stp)
 {
-       free_stid(&stp->st_stid, stateid_slab);
+       remove_stid(&stp->st_stid);
+       kmem_cache_free(stateid_slab, stp);
 }
 
 static void release_lock_stateid(struct nfs4_ol_stateid *stp)
@@ -617,6 +736,28 @@ dump_sessionid(const char *fn, struct nfs4_sessionid *sessionid)
 }
 #endif
 
+/*
+ * Bump the seqid on cstate->replay_owner, and clear replay_owner if it
+ * won't be used for replay.
+ */
+void nfsd4_bump_seqid(struct nfsd4_compound_state *cstate, __be32 nfserr)
+{
+       struct nfs4_stateowner *so = cstate->replay_owner;
+
+       if (nfserr == nfserr_replay_me)
+               return;
+
+       if (!seqid_mutating_err(ntohl(nfserr))) {
+               cstate->replay_owner = NULL;
+               return;
+       }
+       if (!so)
+               return;
+       if (so->so_is_open_owner)
+               release_last_closed_stateid(openowner(so));
+       so->so_seqid++;
+       return;
+}
 
 static void
 gen_sessionid(struct nfsd4_session *ses)
@@ -657,17 +798,15 @@ free_session_slots(struct nfsd4_session *ses)
  * We don't actually need to cache the rpc and session headers, so we
  * can allocate a little less for each slot:
  */
-static inline int slot_bytes(struct nfsd4_channel_attrs *ca)
+static inline u32 slot_bytes(struct nfsd4_channel_attrs *ca)
 {
-       return ca->maxresp_cached - NFSD_MIN_HDR_SEQ_SZ;
-}
+       u32 size;
 
-static int nfsd4_sanitize_slot_size(u32 size)
-{
-       size -= NFSD_MIN_HDR_SEQ_SZ; /* We don't cache the rpc header */
-       size = min_t(u32, size, NFSD_SLOT_CACHE_SIZE);
-
-       return size;
+       if (ca->maxresp_cached < NFSD_MIN_HDR_SEQ_SZ)
+               size = 0;
+       else
+               size = ca->maxresp_cached - NFSD_MIN_HDR_SEQ_SZ;
+       return size + sizeof(struct nfsd4_slot);
 }
 
 /*
@@ -675,12 +814,12 @@ static int nfsd4_sanitize_slot_size(u32 size)
  * re-negotiate active sessions and reduce their slot usage to make
  * room for new connections. For now we just fail the create session.
  */
-static int nfsd4_get_drc_mem(int slotsize, u32 num)
+static u32 nfsd4_get_drc_mem(struct nfsd4_channel_attrs *ca)
 {
+       u32 slotsize = slot_bytes(ca);
+       u32 num = ca->maxreqs;
        int avail;
 
-       num = min_t(u32, num, NFSD_MAX_SLOTS_PER_SESSION);
-
        spin_lock(&nfsd_drc_lock);
        avail = min((unsigned long)NFSD_MAX_MEM_PER_SESSION,
                    nfsd_drc_max_mem - nfsd_drc_mem_used);
@@ -691,15 +830,19 @@ static int nfsd4_get_drc_mem(int slotsize, u32 num)
        return num;
 }
 
-static void nfsd4_put_drc_mem(int slotsize, int num)
+static void nfsd4_put_drc_mem(struct nfsd4_channel_attrs *ca)
 {
+       int slotsize = slot_bytes(ca);
+
        spin_lock(&nfsd_drc_lock);
-       nfsd_drc_mem_used -= slotsize * num;
+       nfsd_drc_mem_used -= slotsize * ca->maxreqs;
        spin_unlock(&nfsd_drc_lock);
 }
 
-static struct nfsd4_session *__alloc_session(int slotsize, int numslots)
+static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *attrs)
 {
+       int numslots = attrs->maxreqs;
+       int slotsize = slot_bytes(attrs);
        struct nfsd4_session *new;
        int mem, i;
 
@@ -712,8 +855,7 @@ static struct nfsd4_session *__alloc_session(int slotsize, int numslots)
                return NULL;
        /* allocate each struct nfsd4_slot and data cache in one piece */
        for (i = 0; i < numslots; i++) {
-               mem = sizeof(struct nfsd4_slot) + slotsize;
-               new->se_slots[i] = kzalloc(mem, GFP_KERNEL);
+               new->se_slots[i] = kzalloc(slotsize, GFP_KERNEL);
                if (!new->se_slots[i])
                        goto out_free;
        }
@@ -725,21 +867,6 @@ out_free:
        return NULL;
 }
 
-static void init_forechannel_attrs(struct nfsd4_channel_attrs *new,
-                                  struct nfsd4_channel_attrs *req,
-                                  int numslots, int slotsize,
-                                  struct nfsd_net *nn)
-{
-       u32 maxrpc = nn->nfsd_serv->sv_max_mesg;
-
-       new->maxreqs = numslots;
-       new->maxresp_cached = min_t(u32, req->maxresp_cached,
-                                       slotsize + NFSD_MIN_HDR_SEQ_SZ);
-       new->maxreq_sz = min_t(u32, req->maxreq_sz, maxrpc);
-       new->maxresp_sz = min_t(u32, req->maxresp_sz, maxrpc);
-       new->maxops = min_t(u32, req->maxops, NFSD_MAX_OPS_PER_COMPOUND);
-}
-
 static void free_conn(struct nfsd4_conn *c)
 {
        svc_xprt_put(c->cn_xprt);
@@ -756,8 +883,8 @@ static void nfsd4_conn_lost(struct svc_xpt_user *u)
                list_del(&c->cn_persession);
                free_conn(c);
        }
-       spin_unlock(&clp->cl_lock);
        nfsd4_probe_callback(clp);
+       spin_unlock(&clp->cl_lock);
 }
 
 static struct nfsd4_conn *alloc_conn(struct svc_rqst *rqstp, u32 flags)
@@ -841,59 +968,20 @@ static void nfsd4_del_conns(struct nfsd4_session *s)
 
 static void __free_session(struct nfsd4_session *ses)
 {
-       nfsd4_put_drc_mem(slot_bytes(&ses->se_fchannel), ses->se_fchannel.maxreqs);
        free_session_slots(ses);
        kfree(ses);
 }
 
-static void free_session(struct kref *kref)
+static void free_session(struct nfsd4_session *ses)
 {
-       struct nfsd4_session *ses;
-       struct nfsd_net *nn;
-
-       ses = container_of(kref, struct nfsd4_session, se_ref);
-       nn = net_generic(ses->se_client->net, nfsd_net_id);
+       struct nfsd_net *nn = net_generic(ses->se_client->net, nfsd_net_id);
 
        lockdep_assert_held(&nn->client_lock);
        nfsd4_del_conns(ses);
+       nfsd4_put_drc_mem(&ses->se_fchannel);
        __free_session(ses);
 }
 
-void nfsd4_put_session(struct nfsd4_session *ses)
-{
-       struct nfsd_net *nn = net_generic(ses->se_client->net, nfsd_net_id);
-
-       spin_lock(&nn->client_lock);
-       nfsd4_put_session_locked(ses);
-       spin_unlock(&nn->client_lock);
-}
-
-static struct nfsd4_session *alloc_session(struct nfsd4_channel_attrs *fchan,
-                                          struct nfsd_net *nn)
-{
-       struct nfsd4_session *new;
-       int numslots, slotsize;
-       /*
-        * Note decreasing slot size below client's request may
-        * make it difficult for client to function correctly, whereas
-        * decreasing the number of slots will (just?) affect
-        * performance.  When short on memory we therefore prefer to
-        * decrease number of slots instead of their size.
-        */
-       slotsize = nfsd4_sanitize_slot_size(fchan->maxresp_cached);
-       numslots = nfsd4_get_drc_mem(slotsize, fchan->maxreqs);
-       if (numslots < 1)
-               return NULL;
-
-       new = __alloc_session(slotsize, numslots);
-       if (!new) {
-               nfsd4_put_drc_mem(slotsize, numslots);
-               return NULL;
-       }
-       init_forechannel_attrs(&new->se_fchannel, fchan, numslots, slotsize, nn);
-       return new;
-}
-
 static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, struct nfs4_client *clp, struct nfsd4_create_session *cses)
 {
        int idx;
@@ -908,7 +996,7 @@ static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, stru
        new->se_flags = cses->flags;
        new->se_cb_prog = cses->callback_prog;
        new->se_cb_sec = cses->cb_sec;
-       kref_init(&new->se_ref);
+       atomic_set(&new->se_ref, 0);
        idx = hash_sessionid(&new->se_sessionid);
        spin_lock(&nn->client_lock);
        list_add(&new->se_hash, &nn->sessionid_hashtbl[idx]);
@@ -916,7 +1004,8 @@ static void init_session(struct svc_rqst *rqstp, struct nfsd4_session *new, stru
        list_add(&new->se_perclnt, &clp->cl_sessions);
        spin_unlock(&clp->cl_lock);
        spin_unlock(&nn->client_lock);
-
+       memcpy(&new->se_fchannel, &cses->fore_channel,
+                       sizeof(struct nfsd4_channel_attrs));
        if (cses->flags & SESSION4_BACK_CHAN) {
                struct sockaddr *sa = svc_addr(rqstp);
                /*
@@ -963,38 +1052,6 @@ unhash_session(struct nfsd4_session *ses)
        spin_unlock(&ses->se_client->cl_lock);
 }
 
-/* must be called under the client_lock */
-static inline void
-renew_client_locked(struct nfs4_client *clp)
-{
-       struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
-
-       if (is_client_expired(clp)) {
-               WARN_ON(1);
-               printk("%s: client (clientid %08x/%08x) already expired\n",
-                       __func__,
-                       clp->cl_clientid.cl_boot,
-                       clp->cl_clientid.cl_id);
-               return;
-       }
-
-       dprintk("renewing client (clientid %08x/%08x)\n", 
-                       clp->cl_clientid.cl_boot, 
-                       clp->cl_clientid.cl_id);
-       list_move_tail(&clp->cl_lru, &nn->client_lru);
-       clp->cl_time = get_seconds();
-}
-
-static inline void
-renew_client(struct nfs4_client *clp)
-{
-       struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
-
-       spin_lock(&nn->client_lock);
-       renew_client_locked(clp);
-       spin_unlock(&nn->client_lock);
-}
-
 /* SETCLIENTID and SETCLIENTID_CONFIRM Helper functions */
 static int
 STALE_CLIENTID(clientid_t *clid, struct nfsd_net *nn)
@@ -1038,7 +1095,8 @@ free_client(struct nfs4_client *clp)
                ses = list_entry(clp->cl_sessions.next, struct nfsd4_session,
                                se_perclnt);
                list_del(&ses->se_perclnt);
-               nfsd4_put_session_locked(ses);
+               WARN_ON_ONCE(atomic_read(&ses->se_ref));
+               free_session(ses);
        }
        free_svc_cred(&clp->cl_cred);
        kfree(clp->cl_name.data);
@@ -1046,29 +1104,12 @@ free_client(struct nfs4_client *clp)
        kfree(clp);
 }
 
-void
-release_session_client(struct nfsd4_session *session)
-{
-       struct nfs4_client *clp = session->se_client;
-       struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
-
-       if (!atomic_dec_and_lock(&clp->cl_refcount, &nn->client_lock))
-               return;
-       if (is_client_expired(clp)) {
-               free_client(clp);
-               session->se_client = NULL;
-       } else
-               renew_client_locked(clp);
-       spin_unlock(&nn->client_lock);
-}
-
 /* must be called under the client_lock */
 static inline void
 unhash_client_locked(struct nfs4_client *clp)
 {
        struct nfsd4_session *ses;
 
-       mark_client_expired(clp);
        list_del(&clp->cl_lru);
        spin_lock(&clp->cl_lock);
        list_for_each_entry(ses, &clp->cl_sessions, se_perclnt)
@@ -1094,7 +1135,7 @@ destroy_client(struct nfs4_client *clp)
        spin_unlock(&recall_lock);
        while (!list_empty(&reaplist)) {
                dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru);
-               unhash_delegation(dp);
+               destroy_delegation(dp);
        }
        while (!list_empty(&clp->cl_openowners)) {
                oo = list_entry(clp->cl_openowners.next, struct nfs4_openowner, oo_perclient);
@@ -1110,8 +1151,8 @@ destroy_client(struct nfs4_client *clp)
                rb_erase(&clp->cl_namenode, &nn->unconf_name_tree);
        spin_lock(&nn->client_lock);
        unhash_client_locked(clp);
-       if (atomic_read(&clp->cl_refcount) == 0)
-               free_client(clp);
+       WARN_ON_ONCE(atomic_read(&clp->cl_refcount));
+       free_client(clp);
        spin_unlock(&nn->client_lock);
 }
 
@@ -1290,6 +1331,7 @@ static struct nfs4_client *create_client(struct xdr_netobj name,
        INIT_LIST_HEAD(&clp->cl_delegations);
        INIT_LIST_HEAD(&clp->cl_lru);
        INIT_LIST_HEAD(&clp->cl_callbacks);
+       INIT_LIST_HEAD(&clp->cl_revoked);
        spin_lock_init(&clp->cl_lock);
        nfsd4_init_callback(&clp->cl_cb_null);
        clp->cl_time = get_seconds();
@@ -1371,12 +1413,12 @@ move_to_confirmed(struct nfs4_client *clp)
 }
 
 static struct nfs4_client *
-find_confirmed_client(clientid_t *clid, bool sessions, struct nfsd_net *nn)
+find_client_in_id_table(struct list_head *tbl, clientid_t *clid, bool sessions)
 {
        struct nfs4_client *clp;
        unsigned int idhashval = clientid_hashval(clid->cl_id);
 
-       list_for_each_entry(clp, &nn->conf_id_hashtbl[idhashval], cl_idhash) {
+       list_for_each_entry(clp, &tbl[idhashval], cl_idhash) {
                if (same_clid(&clp->cl_clientid, clid)) {
                        if ((bool)clp->cl_minorversion != sessions)
                                return NULL;
@@ -1388,19 +1430,19 @@ find_confirmed_client(clientid_t *clid, bool sessions, struct nfsd_net *nn)
 }
 
 static struct nfs4_client *
+find_confirmed_client(clientid_t *clid, bool sessions, struct nfsd_net *nn)
+{
+       struct list_head *tbl = nn->conf_id_hashtbl;
+
+       return find_client_in_id_table(tbl, clid, sessions);
+}
+
+static struct nfs4_client *
 find_unconfirmed_client(clientid_t *clid, bool sessions, struct nfsd_net *nn)
 {
-       struct nfs4_client *clp;
-       unsigned int idhashval = clientid_hashval(clid->cl_id);
+       struct list_head *tbl = nn->unconf_id_hashtbl;
 
-       list_for_each_entry(clp, &nn->unconf_id_hashtbl[idhashval], cl_idhash) {
-               if (same_clid(&clp->cl_clientid, clid)) {
-                       if ((bool)clp->cl_minorversion != sessions)
-                               return NULL;
-                       return clp;
-               }
-       }
-       return NULL;
+       return find_client_in_id_table(tbl, clid, sessions);
 }
 
 static bool clp_used_exchangeid(struct nfs4_client *clp)
@@ -1604,6 +1646,7 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
        default:                                /* checked by xdr code */
                WARN_ON_ONCE(1);
        case SP4_SSV:
+               return nfserr_encr_alg_unsupp;
        case SP4_MACH_CRED:
                return nfserr_serverfault;      /* no excuse :-/ */
        }
@@ -1745,10 +1788,55 @@ nfsd4_replay_create_session(struct nfsd4_create_session *cr_ses,
                                /* seqid, slotID, slotID, slotID, status */ \
                        5 ) * sizeof(__be32))
 
-static bool check_forechannel_attrs(struct nfsd4_channel_attrs fchannel)
+static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs *ca, struct nfsd_net *nn)
+{
+       u32 maxrpc = nn->nfsd_serv->sv_max_mesg;
+
+       if (ca->maxreq_sz < NFSD_MIN_REQ_HDR_SEQ_SZ)
+               return nfserr_toosmall;
+       if (ca->maxresp_sz < NFSD_MIN_RESP_HDR_SEQ_SZ)
+               return nfserr_toosmall;
+       ca->headerpadsz = 0;
+       ca->maxreq_sz = min_t(u32, ca->maxreq_sz, maxrpc);
+       ca->maxresp_sz = min_t(u32, ca->maxresp_sz, maxrpc);
+       ca->maxops = min_t(u32, ca->maxops, NFSD_MAX_OPS_PER_COMPOUND);
+       ca->maxresp_cached = min_t(u32, ca->maxresp_cached,
+                       NFSD_SLOT_CACHE_SIZE + NFSD_MIN_HDR_SEQ_SZ);
+       ca->maxreqs = min_t(u32, ca->maxreqs, NFSD_MAX_SLOTS_PER_SESSION);
+       /*
+        * Note decreasing slot size below client's request may make it
+        * difficult for client to function correctly, whereas
+        * decreasing the number of slots will (just?) affect
+        * performance.  When short on memory we therefore prefer to
+        * decrease number of slots instead of their size.  Clients that
+        * request larger slots than they need will get poor results:
+        */
+       ca->maxreqs = nfsd4_get_drc_mem(ca);
+       if (!ca->maxreqs)
+               return nfserr_jukebox;
+
+       return nfs_ok;
+}
+
+static __be32 check_backchannel_attrs(struct nfsd4_channel_attrs *ca)
 {
-       return fchannel.maxreq_sz < NFSD_MIN_REQ_HDR_SEQ_SZ
-               || fchannel.maxresp_sz < NFSD_MIN_RESP_HDR_SEQ_SZ;
+       ca->headerpadsz = 0;
+
+       /*
+        * These RPC_MAX_HEADER macros are overkill, especially since we
+        * don't even do gss on the backchannel yet.  But this is still
+        * less than 1k.  Tighten up this estimate in the unlikely event
+        * it turns out to be a problem for some client:
+        */
+       if (ca->maxreq_sz < NFS4_enc_cb_recall_sz + RPC_MAX_HEADER_WITH_AUTH)
+               return nfserr_toosmall;
+       if (ca->maxresp_sz < NFS4_dec_cb_recall_sz + RPC_MAX_REPHEADER_WITH_AUTH)
+               return nfserr_toosmall;
+       ca->maxresp_cached = 0;
+       if (ca->maxops < 2)
+               return nfserr_toosmall;
+
+       return nfs_ok;
 }
 
 __be32
@@ -1766,12 +1854,16 @@ nfsd4_create_session(struct svc_rqst *rqstp,
 
        if (cr_ses->flags & ~SESSION4_FLAG_MASK_A)
                return nfserr_inval;
-       if (check_forechannel_attrs(cr_ses->fore_channel))
-               return nfserr_toosmall;
-       new = alloc_session(&cr_ses->fore_channel, nn);
-       if (!new)
-               return nfserr_jukebox;
+       status = check_forechannel_attrs(&cr_ses->fore_channel, nn);
+       if (status)
+               return status;
+       status = check_backchannel_attrs(&cr_ses->back_channel);
+       if (status)
+               return status;
        status = nfserr_jukebox;
+       new = alloc_session(&cr_ses->fore_channel);
+       if (!new)
+               goto out_release_drc_mem;
        conn = alloc_conn_from_crses(rqstp, cr_ses);
        if (!conn)
                goto out_free_session;
@@ -1779,6 +1871,7 @@ nfsd4_create_session(struct svc_rqst *rqstp,
        nfs4_lock_state();
        unconf = find_unconfirmed_client(&cr_ses->clientid, true, nn);
        conf = find_confirmed_client(&cr_ses->clientid, true, nn);
+       WARN_ON_ONCE(conf && unconf);
 
        if (conf) {
                cs_slot = &conf->cl_cs_slot;
@@ -1805,8 +1898,12 @@ nfsd4_create_session(struct svc_rqst *rqstp,
                        goto out_free_conn;
                }
                old = find_confirmed_client_by_name(&unconf->cl_name, nn);
-               if (old)
+               if (old) {
+                       status = mark_client_expired(old);
+                       if (status)
+                               goto out_free_conn;
                        expire_client(old);
+               }
                move_to_confirmed(unconf);
                conf = unconf;
        } else {
@@ -1825,23 +1922,21 @@ nfsd4_create_session(struct svc_rqst *rqstp,
 
        memcpy(cr_ses->sessionid.data, new->se_sessionid.data,
               NFS4_MAX_SESSIONID_LEN);
-       memcpy(&cr_ses->fore_channel, &new->se_fchannel,
-               sizeof(struct nfsd4_channel_attrs));
        cs_slot->sl_seqid++;
        cr_ses->seqid = cs_slot->sl_seqid;
 
        /* cache solo and embedded create sessions under the state lock */
        nfsd4_cache_create_session(cr_ses, cs_slot, status);
        nfs4_unlock_state();
-out:
-       dprintk("%s returns %d\n", __func__, ntohl(status));
        return status;
 out_free_conn:
        nfs4_unlock_state();
        free_conn(conn);
 out_free_session:
        __free_session(new);
-       goto out;
+out_release_drc_mem:
+       nfsd4_put_drc_mem(&cr_ses->fore_channel);
+       return status;
 }
 
 static __be32 nfsd4_map_bcts_dir(u32 *dir)
@@ -1879,30 +1974,30 @@ __be32 nfsd4_bind_conn_to_session(struct svc_rqst *rqstp,
 {
        __be32 status;
        struct nfsd4_conn *conn;
+       struct nfsd4_session *session;
        struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
 
        if (!nfsd4_last_compound_op(rqstp))
                return nfserr_not_only_op;
+       nfs4_lock_state();
        spin_lock(&nn->client_lock);
-       cstate->session = find_in_sessionid_hashtbl(&bcts->sessionid, SVC_NET(rqstp));
-       /* Sorta weird: we only need the refcnt'ing because new_conn acquires
-        * client_lock iself: */
-       if (cstate->session) {
-               nfsd4_get_session(cstate->session);
-               atomic_inc(&cstate->session->se_client->cl_refcount);
-       }
+       session = find_in_sessionid_hashtbl(&bcts->sessionid, SVC_NET(rqstp));
        spin_unlock(&nn->client_lock);
-       if (!cstate->session)
-               return nfserr_badsession;
-
+       status = nfserr_badsession;
+       if (!session)
+               goto out;
        status = nfsd4_map_bcts_dir(&bcts->dir);
        if (status)
-               return status;
+               goto out;
        conn = alloc_conn(rqstp, bcts->dir);
+       status = nfserr_jukebox;
        if (!conn)
-               return nfserr_jukebox;
-       nfsd4_init_conn(rqstp, conn, cstate->session);
-       return nfs_ok;
+               goto out;
+       nfsd4_init_conn(rqstp, conn, session);
+       status = nfs_ok;
+out:
+       nfs4_unlock_state();
+       return status;
 }
 
 static bool nfsd4_compound_in_session(struct nfsd4_session *session, struct nfs4_sessionid *sid)
@@ -1918,42 +2013,36 @@ nfsd4_destroy_session(struct svc_rqst *r,
                      struct nfsd4_destroy_session *sessionid)
 {
        struct nfsd4_session *ses;
-       __be32 status = nfserr_badsession;
+       __be32 status;
        struct nfsd_net *nn = net_generic(SVC_NET(r), nfsd_net_id);
 
-       /* Notes:
-        * - The confirmed nfs4_client->cl_sessionid holds destroyed sessinid
-        * - Should we return nfserr_back_chan_busy if waiting for
-        *   callbacks on to-be-destroyed session?
-        * - Do we need to clear any callback info from previous session?
-        */
-
+       nfs4_lock_state();
+       status = nfserr_not_only_op;
        if (nfsd4_compound_in_session(cstate->session, &sessionid->sessionid)) {
                if (!nfsd4_last_compound_op(r))
-                       return nfserr_not_only_op;
+                       goto out;
        }
        dump_sessionid(__func__, &sessionid->sessionid);
        spin_lock(&nn->client_lock);
        ses = find_in_sessionid_hashtbl(&sessionid->sessionid, SVC_NET(r));
-       if (!ses) {
-               spin_unlock(&nn->client_lock);
-               goto out;
-       }
-
+       status = nfserr_badsession;
+       if (!ses)
+               goto out_client_lock;
+       status = mark_session_dead_locked(ses);
+       if (status)
+               goto out_client_lock;
        unhash_session(ses);
        spin_unlock(&nn->client_lock);
 
-       nfs4_lock_state();
        nfsd4_probe_callback_sync(ses->se_client);
-       nfs4_unlock_state();
 
        spin_lock(&nn->client_lock);
-       nfsd4_del_conns(ses);
-       nfsd4_put_session_locked(ses);
-       spin_unlock(&nn->client_lock);
+       free_session(ses);
        status = nfs_ok;
+out_client_lock:
+       spin_unlock(&nn->client_lock);
 out:
-       dprintk("%s returns %d\n", __func__, ntohl(status));
+       nfs4_unlock_state();
        return status;
 }
 
@@ -2013,6 +2102,7 @@ nfsd4_sequence(struct svc_rqst *rqstp,
 {
        struct nfsd4_compoundres *resp = rqstp->rq_resp;
        struct nfsd4_session *session;
+       struct nfs4_client *clp;
        struct nfsd4_slot *slot;
        struct nfsd4_conn *conn;
        __be32 status;
@@ -2033,19 +2123,26 @@ nfsd4_sequence(struct svc_rqst *rqstp,
        status = nfserr_badsession;
        session = find_in_sessionid_hashtbl(&seq->sessionid, SVC_NET(rqstp));
        if (!session)
-               goto out;
+               goto out_no_session;
+       clp = session->se_client;
+       status = get_client_locked(clp);
+       if (status)
+               goto out_no_session;
+       status = nfsd4_get_session_locked(session);
+       if (status)
+               goto out_put_client;
 
        status = nfserr_too_many_ops;
        if (nfsd4_session_too_many_ops(rqstp, session))
-               goto out;
+               goto out_put_session;
 
        status = nfserr_req_too_big;
        if (nfsd4_request_too_big(rqstp, session))
-               goto out;
+               goto out_put_session;
 
        status = nfserr_badslot;
        if (seq->slotid >= session->se_fchannel.maxreqs)
-               goto out;
+               goto out_put_session;
 
        slot = session->se_slots[seq->slotid];
        dprintk("%s: slotid %d\n", __func__, seq->slotid);
@@ -2060,7 +2157,7 @@ nfsd4_sequence(struct svc_rqst *rqstp,
        if (status == nfserr_replay_cache) {
                status = nfserr_seq_misordered;
                if (!(slot->sl_flags & NFSD4_SLOT_INITIALIZED))
-                       goto out;
+                       goto out_put_session;
                cstate->slot = slot;
                cstate->session = session;
                /* Return the cached reply status and set cstate->status
@@ -2070,7 +2167,7 @@ nfsd4_sequence(struct svc_rqst *rqstp,
                goto out;
        }
        if (status)
-               goto out;
+               goto out_put_session;
 
        nfsd4_sequence_check_conn(conn, session);
        conn = NULL;
@@ -2087,27 +2184,27 @@ nfsd4_sequence(struct svc_rqst *rqstp,
        cstate->session = session;
 
 out:
-       /* Hold a session reference until done processing the compound. */
-       if (cstate->session) {
-               struct nfs4_client *clp = session->se_client;
-
-               nfsd4_get_session(cstate->session);
-               atomic_inc(&clp->cl_refcount);
-               switch (clp->cl_cb_state) {
-               case NFSD4_CB_DOWN:
-                       seq->status_flags = SEQ4_STATUS_CB_PATH_DOWN;
-                       break;
-               case NFSD4_CB_FAULT:
-                       seq->status_flags = SEQ4_STATUS_BACKCHANNEL_FAULT;
-                       break;
-               default:
-                       seq->status_flags = 0;
-               }
+       switch (clp->cl_cb_state) {
+       case NFSD4_CB_DOWN:
+               seq->status_flags = SEQ4_STATUS_CB_PATH_DOWN;
+               break;
+       case NFSD4_CB_FAULT:
+               seq->status_flags = SEQ4_STATUS_BACKCHANNEL_FAULT;
+               break;
+       default:
+               seq->status_flags = 0;
        }
+       if (!list_empty(&clp->cl_revoked))
+               seq->status_flags |= SEQ4_STATUS_RECALLABLE_STATE_REVOKED;
+out_no_session:
        kfree(conn);
        spin_unlock(&nn->client_lock);
-       dprintk("%s: return %d\n", __func__, ntohl(status));
        return status;
+out_put_session:
+       nfsd4_put_session(session);
+out_put_client:
+       put_client_renew_locked(clp);
+       goto out_no_session;
 }
 
 __be32
@@ -2120,17 +2217,12 @@ nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta
        nfs4_lock_state();
        unconf = find_unconfirmed_client(&dc->clientid, true, nn);
        conf = find_confirmed_client(&dc->clientid, true, nn);
+       WARN_ON_ONCE(conf && unconf);
 
        if (conf) {
                clp = conf;
 
-               if (!is_client_expired(conf) && client_has_state(conf)) {
-                       status = nfserr_clientid_busy;
-                       goto out;
-               }
-
-               /* rfc5661 18.50.3 */
-               if (cstate->session && conf == cstate->session->se_client) {
+               if (client_has_state(conf)) {
                        status = nfserr_clientid_busy;
                        goto out;
                }
@@ -2144,7 +2236,6 @@ nfsd4_destroy_clientid(struct svc_rqst *rqstp, struct nfsd4_compound_state *csta
        expire_client(clp);
 out:
        nfs4_unlock_state();
-       dprintk("%s return %d\n", __func__, ntohl(status));
        return status;
 }
 
@@ -2282,8 +2373,12 @@ nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
                expire_client(unconf);
        } else { /* case 3: normal case; new or rebooted client */
                conf = find_confirmed_client_by_name(&unconf->cl_name, nn);
-               if (conf)
+               if (conf) {
+                       status = mark_client_expired(conf);
+                       if (status)
+                               goto out;
                        expire_client(conf);
+               }
                move_to_confirmed(unconf);
                nfsd4_probe_callback(unconf);
        }
@@ -2303,7 +2398,6 @@ static void nfsd4_init_file(struct nfs4_file *fp, struct inode *ino)
        unsigned int hashval = file_hashval(ino);
 
        atomic_set(&fp->fi_ref, 1);
-       INIT_LIST_HEAD(&fp->fi_hash);
        INIT_LIST_HEAD(&fp->fi_stateids);
        INIT_LIST_HEAD(&fp->fi_delegations);
        fp->fi_inode = igrab(ino);
@@ -2312,7 +2406,7 @@ static void nfsd4_init_file(struct nfs4_file *fp, struct inode *ino)
        memset(fp->fi_fds, 0, sizeof(fp->fi_fds));
        memset(fp->fi_access, 0, sizeof(fp->fi_access));
        spin_lock(&recall_lock);
-       list_add(&fp->fi_hash, &file_hashtbl[hashval]);
+       hlist_add_head(&fp->fi_hash, &file_hashtbl[hashval]);
        spin_unlock(&recall_lock);
 }
 
@@ -2498,7 +2592,7 @@ find_file(struct inode *ino)
        struct nfs4_file *fp;
 
        spin_lock(&recall_lock);
-       list_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) {
+       hlist_for_each_entry(fp, &file_hashtbl[hashval], fi_hash) {
                if (fp->fi_inode == ino) {
                        get_nfs4_file(fp);
                        spin_unlock(&recall_lock);
@@ -2521,8 +2615,6 @@ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
        struct nfs4_ol_stateid *stp;
        __be32 ret;
 
-       dprintk("NFSD: nfs4_share_conflict\n");
-
        fp = find_file(ino);
        if (!fp)
                return nfs_ok;
@@ -2541,6 +2633,9 @@ out:
 
 static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
 {
+       struct nfs4_client *clp = dp->dl_stid.sc_client;
+       struct nfsd_net *nn = net_generic(clp->net, nfsd_net_id);
+
        /* We're assuming the state code never drops its reference
         * without first removing the lease.  Since we're in this lease
         * callback (and since the lease code is serialized by the kernel
@@ -2548,7 +2643,7 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
         * it's safe to take a reference: */
        atomic_inc(&dp->dl_count);
 
-       list_add_tail(&dp->dl_recall_lru, &del_recall_lru);
+       list_add_tail(&dp->dl_recall_lru, &nn->del_recall_lru);
 
        /* only place dl_time is set. protected by lock_flocks*/
        dp->dl_time = get_seconds();
@@ -2694,7 +2789,7 @@ static bool nfsd4_is_deleg_cur(struct nfsd4_open *open)
 }
 
 static __be32
-nfs4_check_deleg(struct nfs4_client *cl, struct nfs4_file *fp, struct nfsd4_open *open,
+nfs4_check_deleg(struct nfs4_client *cl, struct nfsd4_open *open,
                struct nfs4_delegation **dp)
 {
        int flags;
@@ -3019,7 +3114,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
        if (fp) {
                if ((status = nfs4_check_open(fp, open, &stp)))
                        goto out;
-               status = nfs4_check_deleg(cl, fp, open, &dp);
+               status = nfs4_check_deleg(cl, open, &dp);
                if (status)
                        goto out;
        } else {
@@ -3197,13 +3292,12 @@ nfs4_laundromat(struct nfsd_net *nn)
                                clientid_val = t;
                        break;
                }
-               if (atomic_read(&clp->cl_refcount)) {
+               if (mark_client_expired_locked(clp)) {
                        dprintk("NFSD: client in use (clientid %08x)\n",
                                clp->cl_clientid.cl_id);
                        continue;
                }
-               unhash_client_locked(clp);
-               list_add(&clp->cl_lru, &reaplist);
+               list_move(&clp->cl_lru, &reaplist);
        }
        spin_unlock(&nn->client_lock);
        list_for_each_safe(pos, next, &reaplist) {
@@ -3213,7 +3307,7 @@ nfs4_laundromat(struct nfsd_net *nn)
                expire_client(clp);
        }
        spin_lock(&recall_lock);
-       list_for_each_safe(pos, next, &del_recall_lru) {
+       list_for_each_safe(pos, next, &nn->del_recall_lru) {
                dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
                if (net_generic(dp->dl_stid.sc_client->net, nfsd_net_id) != nn)
                        continue;
@@ -3228,7 +3322,7 @@ nfs4_laundromat(struct nfsd_net *nn)
        spin_unlock(&recall_lock);
        list_for_each_safe(pos, next, &reaplist) {
                dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
-               unhash_delegation(dp);
+               revoke_delegation(dp);
        }
        test_val = nn->nfsd4_lease;
        list_for_each_safe(pos, next, &nn->close_lru) {
@@ -3271,16 +3365,6 @@ static inline __be32 nfs4_check_fh(struct svc_fh *fhp, struct nfs4_ol_stateid *s
        return nfs_ok;
 }
 
-static int
-STALE_STATEID(stateid_t *stateid, struct nfsd_net *nn)
-{
-       if (stateid->si_opaque.so_clid.cl_boot == nn->boot_time)
-               return 0;
-       dprintk("NFSD: stale stateid " STATEID_FMT "!\n",
-               STATEID_VAL(stateid));
-       return 1;
-}
-
 static inline int
 access_permit_read(struct nfs4_ol_stateid *stp)
 {
@@ -3397,13 +3481,24 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
        status = check_stateid_generation(stateid, &s->sc_stateid, 1);
        if (status)
                return status;
-       if (!(s->sc_type & (NFS4_OPEN_STID | NFS4_LOCK_STID)))
+       switch (s->sc_type) {
+       case NFS4_DELEG_STID:
+               return nfs_ok;
+       case NFS4_REVOKED_DELEG_STID:
+               return nfserr_deleg_revoked;
+       case NFS4_OPEN_STID:
+       case NFS4_LOCK_STID:
+               ols = openlockstateid(s);
+               if (ols->st_stateowner->so_is_open_owner
+                               && !(openowner(ols->st_stateowner)->oo_flags
+                                               & NFS4_OO_CONFIRMED))
+                       return nfserr_bad_stateid;
                return nfs_ok;
-       ols = openlockstateid(s);
-       if (ols->st_stateowner->so_is_open_owner
-           && !(openowner(ols->st_stateowner)->oo_flags & NFS4_OO_CONFIRMED))
+       default:
+               printk("unknown stateid type %x\n", s->sc_type);
+       case NFS4_CLOSED_STID:
                return nfserr_bad_stateid;
-       return nfs_ok;
+       }
 }
 
 static __be32 nfsd4_lookup_stateid(stateid_t *stateid, unsigned char typemask,
@@ -3411,19 +3506,20 @@ static __be32 nfsd4_lookup_stateid(stateid_t *stateid, unsigned char typemask,
                                   struct nfsd_net *nn)
 {
        struct nfs4_client *cl;
+       __be32 status;
 
        if (ZERO_STATEID(stateid) || ONE_STATEID(stateid))
                return nfserr_bad_stateid;
-       if (STALE_STATEID(stateid, nn))
+       status = lookup_clientid(&stateid->si_opaque.so_clid, sessions,
+                                                       nn, &cl);
+       if (status == nfserr_stale_clientid)
                return nfserr_stale_stateid;
-       cl = find_confirmed_client(&stateid->si_opaque.so_clid, sessions, nn);
-       if (!cl)
-               return nfserr_expired;
+       if (status)
+               return status;
        *s = find_stateid_by_type(cl, stateid, typemask);
        if (!*s)
                return nfserr_bad_stateid;
        return nfs_ok;
-
 }
 
 /*
@@ -3533,6 +3629,7 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 {
        stateid_t *stateid = &free_stateid->fr_stateid;
        struct nfs4_stid *s;
+       struct nfs4_delegation *dp;
        struct nfs4_client *cl = cstate->session->se_client;
        __be32 ret = nfserr_bad_stateid;
 
@@ -3554,6 +3651,11 @@ nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                else
                        ret = nfserr_locks_held;
                break;
+       case NFS4_REVOKED_DELEG_STID:
+               dp = delegstateid(s);
+               destroy_revoked_delegation(dp);
+               ret = nfs_ok;
+               break;
        default:
                ret = nfserr_bad_stateid;
        }
@@ -3578,10 +3680,12 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_
        status = nfsd4_check_seqid(cstate, sop, seqid);
        if (status)
                return status;
-       if (stp->st_stid.sc_type == NFS4_CLOSED_STID)
+       if (stp->st_stid.sc_type == NFS4_CLOSED_STID
+               || stp->st_stid.sc_type == NFS4_REVOKED_DELEG_STID)
                /*
                 * "Closed" stateid's exist *only* to return
-                * nfserr_replay_me from the previous step.
+                * nfserr_replay_me from the previous step, and
+                * revoked delegations are kept only for free_stateid.
                 */
                return nfserr_bad_stateid;
        status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate));
@@ -3611,7 +3715,8 @@ nfs4_preprocess_seqid_op(struct nfsd4_compound_state *cstate, u32 seqid,
        if (status)
                return status;
        *stpp = openlockstateid(s);
-       cstate->replay_owner = (*stpp)->st_stateowner;
+       if (!nfsd4_has_session(cstate))
+               cstate->replay_owner = (*stpp)->st_stateowner;
 
        return nfs4_seqid_op_checks(cstate, stateid, seqid, *stpp);
 }
@@ -3669,6 +3774,7 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        nfsd4_client_record_create(oo->oo_owner.so_client);
        status = nfs_ok;
 out:
+       nfsd4_bump_seqid(cstate, status);
        if (!cstate->replay_owner)
                nfs4_unlock_state();
        return status;
@@ -3752,31 +3858,12 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp,
        memcpy(&od->od_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
        status = nfs_ok;
 out:
+       nfsd4_bump_seqid(cstate, status);
        if (!cstate->replay_owner)
                nfs4_unlock_state();
        return status;
 }
 
-void nfsd4_purge_closed_stateid(struct nfs4_stateowner *so)
-{
-       struct nfs4_openowner *oo;
-       struct nfs4_ol_stateid *s;
-
-       if (!so->so_is_open_owner)
-               return;
-       oo = openowner(so);
-       s = oo->oo_last_closed_stid;
-       if (!s)
-               return;
-       if (!(oo->oo_flags & NFS4_OO_PURGE_CLOSE)) {
-               /* Release the last_closed_stid on the next seqid bump: */
-               oo->oo_flags |= NFS4_OO_PURGE_CLOSE;
-               return;
-       }
-       oo->oo_flags &= ~NFS4_OO_PURGE_CLOSE;
-       release_last_closed_stateid(oo);
-}
-
 static void nfsd4_close_open_stateid(struct nfs4_ol_stateid *s)
 {
        unhash_open_stateid(s);
@@ -3805,28 +3892,30 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                                        &close->cl_stateid,
                                        NFS4_OPEN_STID|NFS4_CLOSED_STID,
                                        &stp, nn);
+       nfsd4_bump_seqid(cstate, status);
        if (status)
                goto out; 
        oo = openowner(stp->st_stateowner);
-       status = nfs_ok;
        update_stateid(&stp->st_stid.sc_stateid);
        memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
 
        nfsd4_close_open_stateid(stp);
-       release_last_closed_stateid(oo);
-       oo->oo_last_closed_stid = stp;
+
+       if (cstate->minorversion) {
+               unhash_stid(&stp->st_stid);
+               free_generic_stateid(stp);
+       } else
+               oo->oo_last_closed_stid = stp;
 
        if (list_empty(&oo->oo_owner.so_stateids)) {
-               if (cstate->minorversion) {
+               if (cstate->minorversion)
                        release_openowner(oo);
-                       cstate->replay_owner = NULL;
-               } else {
+               else {
                        /*
                         * In the 4.0 case we need to keep the owners around a
                         * little while to handle CLOSE replay.
                         */
-                       if (list_empty(&oo->oo_owner.so_stateids))
-                               move_to_close_lru(oo, SVC_NET(rqstp));
+                       move_to_close_lru(oo, SVC_NET(rqstp));
                }
        }
 out:
@@ -3858,7 +3947,7 @@ nfsd4_delegreturn(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        if (status)
                goto out;
 
-       unhash_delegation(dp);
+       destroy_delegation(dp);
 out:
        nfs4_unlock_state();
 
@@ -4236,6 +4325,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 out:
        if (status && new_state)
                release_lockowner(lock_sop);
+       nfsd4_bump_seqid(cstate, status);
        if (!cstate->replay_owner)
                nfs4_unlock_state();
        if (file_lock)
@@ -4345,6 +4435,7 @@ __be32
 nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
            struct nfsd4_locku *locku)
 {
+       struct nfs4_lockowner *lo;
        struct nfs4_ol_stateid *stp;
        struct file *filp = NULL;
        struct file_lock *file_lock = NULL;
@@ -4377,9 +4468,10 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                status = nfserr_jukebox;
                goto out;
        }
+       lo = lockowner(stp->st_stateowner);
        locks_init_lock(file_lock);
        file_lock->fl_type = F_UNLCK;
-       file_lock->fl_owner = (fl_owner_t)lockowner(stp->st_stateowner);
+       file_lock->fl_owner = (fl_owner_t)lo;
        file_lock->fl_pid = current->tgid;
        file_lock->fl_file = filp;
        file_lock->fl_flags = FL_POSIX;
@@ -4390,21 +4482,21 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                                                locku->lu_length);
        nfs4_transform_lock_offset(file_lock);
 
-       /*
-       *  Try to unlock the file in the VFS.
-       */
        err = vfs_lock_file(filp, F_SETLK, file_lock, NULL);
        if (err) {
                dprintk("NFSD: nfs4_locku: vfs_lock_file failed!\n");
                goto out_nfserr;
        }
-       /*
-       * OK, unlock succeeded; the only thing left to do is update the stateid.
-       */
        update_stateid(&stp->st_stid.sc_stateid);
        memcpy(&locku->lu_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
 
+       if (nfsd4_has_session(cstate) && !check_for_locks(stp->st_file, lo)) {
+               WARN_ON_ONCE(cstate->replay_owner);
+               release_lockowner(lo);
+       }
+
 out:
+       nfsd4_bump_seqid(cstate, status);
        if (!cstate->replay_owner)
                nfs4_unlock_state();
        if (file_lock)
@@ -4597,6 +4689,8 @@ nfs4_check_open_reclaim(clientid_t *clid, bool sessions, struct nfsd_net *nn)
 
 u64 nfsd_forget_client(struct nfs4_client *clp, u64 max)
 {
+       if (mark_client_expired(clp))
+               return 0;
        expire_client(clp);
        return 1;
 }
@@ -4703,7 +4797,7 @@ u64 nfsd_forget_client_delegations(struct nfs4_client *clp, u64 max)
        spin_unlock(&recall_lock);
 
        list_for_each_entry_safe(dp, next, &victims, dl_recall_lru)
-               unhash_delegation(dp);
+               revoke_delegation(dp);
 
        return count;
 }
@@ -4775,12 +4869,6 @@ struct nfs4_client *nfsd_find_client(struct sockaddr_storage *addr, size_t addr_
 void
 nfs4_state_init(void)
 {
-       int i;
-
-       for (i = 0; i < FILE_HASH_SIZE; i++) {
-               INIT_LIST_HEAD(&file_hashtbl[i]);
-       }
-       INIT_LIST_HEAD(&del_recall_lru);
 }
 
 /*
@@ -4844,6 +4932,7 @@ static int nfs4_state_create_net(struct net *net)
        nn->unconf_name_tree = RB_ROOT;
        INIT_LIST_HEAD(&nn->client_lru);
        INIT_LIST_HEAD(&nn->close_lru);
+       INIT_LIST_HEAD(&nn->del_recall_lru);
        spin_lock_init(&nn->client_lock);
 
        INIT_DELAYED_WORK(&nn->laundromat_work, laundromat_main);
@@ -4956,16 +5045,14 @@ nfs4_state_shutdown_net(struct net *net)
 
        INIT_LIST_HEAD(&reaplist);
        spin_lock(&recall_lock);
-       list_for_each_safe(pos, next, &del_recall_lru) {
+       list_for_each_safe(pos, next, &nn->del_recall_lru) {
                dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
-               if (dp->dl_stid.sc_client->net != net)
-                       continue;
                list_move(&dp->dl_recall_lru, &reaplist);
        }
        spin_unlock(&recall_lock);
        list_for_each_safe(pos, next, &reaplist) {
                dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
-               unhash_delegation(dp);
+               destroy_delegation(dp);
        }
 
        nfsd4_client_tracking_exit(net);
index 2502951..6cd86e0 100644 (file)
@@ -344,10 +344,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
                           all 32 bits of 'nseconds'. */
                        READ_BUF(12);
                        len += 12;
-                       READ32(dummy32);
-                       if (dummy32)
-                               return nfserr_inval;
-                       READ32(iattr->ia_atime.tv_sec);
+                       READ64(iattr->ia_atime.tv_sec);
                        READ32(iattr->ia_atime.tv_nsec);
                        if (iattr->ia_atime.tv_nsec >= (u32)1000000000)
                                return nfserr_inval;
@@ -370,10 +367,7 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
                           all 32 bits of 'nseconds'. */
                        READ_BUF(12);
                        len += 12;
-                       READ32(dummy32);
-                       if (dummy32)
-                               return nfserr_inval;
-                       READ32(iattr->ia_mtime.tv_sec);
+                       READ64(iattr->ia_mtime.tv_sec);
                        READ32(iattr->ia_mtime.tv_nsec);
                        if (iattr->ia_mtime.tv_nsec >= (u32)1000000000)
                                return nfserr_inval;
@@ -804,6 +798,7 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
        open->op_iattr.ia_valid = 0;
        open->op_openowner = NULL;
 
+       open->op_xdr_error = 0;
        /* seqid, share_access, share_deny, clientid, ownerlen */
        READ_BUF(4);
        READ32(open->op_seqid);
@@ -1692,36 +1687,6 @@ static void write_cinfo(__be32 **p, struct nfsd4_change_info *c)
 } while (0)
 #define ADJUST_ARGS()          resp->p = p
 
-/*
- * Header routine to setup seqid operation replay cache
- */
-#define ENCODE_SEQID_OP_HEAD                                   \
-       __be32 *save;                                           \
-                                                               \
-       save = resp->p;
-
-/*
- * Routine for encoding the result of a "seqid-mutating" NFSv4 operation.  This
- * is where sequence id's are incremented, and the replay cache is filled.
- * Note that we increment sequence id's here, at the last moment, so we're sure
- * we know whether the error to be returned is a sequence id mutating error.
- */
-
-static void encode_seqid_op_tail(struct nfsd4_compoundres *resp, __be32 *save, __be32 nfserr)
-{
-       struct nfs4_stateowner *stateowner = resp->cstate.replay_owner;
-
-       if (seqid_mutating_err(ntohl(nfserr)) && stateowner) {
-               stateowner->so_seqid++;
-               stateowner->so_replay.rp_status = nfserr;
-               stateowner->so_replay.rp_buflen =
-                         (char *)resp->p - (char *)save;
-               memcpy(stateowner->so_replay.rp_buf, save,
-                       stateowner->so_replay.rp_buflen);
-               nfsd4_purge_closed_stateid(stateowner);
-       }
-}
-
 /* Encode as an array of strings the string given with components
  * separated @sep, escaped with esc_enter and esc_exit.
  */
@@ -2401,8 +2366,7 @@ out_acl:
        if (bmval1 & FATTR4_WORD1_TIME_ACCESS) {
                if ((buflen -= 12) < 0)
                        goto out_resource;
-               WRITE32(0);
-               WRITE32(stat.atime.tv_sec);
+               WRITE64((s64)stat.atime.tv_sec);
                WRITE32(stat.atime.tv_nsec);
        }
        if (bmval1 & FATTR4_WORD1_TIME_DELTA) {
@@ -2415,15 +2379,13 @@ out_acl:
        if (bmval1 & FATTR4_WORD1_TIME_METADATA) {
                if ((buflen -= 12) < 0)
                        goto out_resource;
-               WRITE32(0);
-               WRITE32(stat.ctime.tv_sec);
+               WRITE64((s64)stat.ctime.tv_sec);
                WRITE32(stat.ctime.tv_nsec);
        }
        if (bmval1 & FATTR4_WORD1_TIME_MODIFY) {
                if ((buflen -= 12) < 0)
                        goto out_resource;
-               WRITE32(0);
-               WRITE32(stat.mtime.tv_sec);
+               WRITE64((s64)stat.mtime.tv_sec);
                WRITE32(stat.mtime.tv_nsec);
        }
        if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) {
@@ -2661,12 +2623,9 @@ static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp,
 static __be32
 nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_close *close)
 {
-       ENCODE_SEQID_OP_HEAD;
-
        if (!nfserr)
                nfsd4_encode_stateid(resp, &close->cl_stateid);
 
-       encode_seqid_op_tail(resp, save, nfserr);
        return nfserr;
 }
 
@@ -2762,14 +2721,11 @@ nfsd4_encode_lock_denied(struct nfsd4_compoundres *resp, struct nfsd4_lock_denie
 static __be32
 nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lock *lock)
 {
-       ENCODE_SEQID_OP_HEAD;
-
        if (!nfserr)
                nfsd4_encode_stateid(resp, &lock->lk_resp_stateid);
        else if (nfserr == nfserr_denied)
                nfsd4_encode_lock_denied(resp, &lock->lk_denied);
 
-       encode_seqid_op_tail(resp, save, nfserr);
        return nfserr;
 }
 
@@ -2784,12 +2740,9 @@ nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l
 static __be32
 nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_locku *locku)
 {
-       ENCODE_SEQID_OP_HEAD;
-
        if (!nfserr)
                nfsd4_encode_stateid(resp, &locku->lu_stateid);
 
-       encode_seqid_op_tail(resp, save, nfserr);
        return nfserr;
 }
 
@@ -2812,7 +2765,6 @@ static __be32
 nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open *open)
 {
        __be32 *p;
-       ENCODE_SEQID_OP_HEAD;
 
        if (nfserr)
                goto out;
@@ -2884,31 +2836,24 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op
        }
        /* XXX save filehandle here */
 out:
-       encode_seqid_op_tail(resp, save, nfserr);
        return nfserr;
 }
 
 static __be32
 nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_confirm *oc)
 {
-       ENCODE_SEQID_OP_HEAD;
-
        if (!nfserr)
                nfsd4_encode_stateid(resp, &oc->oc_resp_stateid);
 
-       encode_seqid_op_tail(resp, save, nfserr);
        return nfserr;
 }
 
 static __be32
 nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_downgrade *od)
 {
-       ENCODE_SEQID_OP_HEAD;
-
        if (!nfserr)
                nfsd4_encode_stateid(resp, &od->od_stateid);
 
-       encode_seqid_op_tail(resp, save, nfserr);
        return nfserr;
 }
 
@@ -3140,10 +3085,11 @@ static __be32
 nfsd4_do_encode_secinfo(struct nfsd4_compoundres *resp,
                         __be32 nfserr, struct svc_export *exp)
 {
-       u32 i, nflavs;
+       u32 i, nflavs, supported;
        struct exp_flavor_info *flavs;
        struct exp_flavor_info def_flavs[2];
-       __be32 *p;
+       __be32 *p, *flavorsp;
+       static bool report = true;
 
        if (nfserr)
                goto out;
@@ -3167,33 +3113,40 @@ nfsd4_do_encode_secinfo(struct nfsd4_compoundres *resp,
                }
        }
 
+       supported = 0;
        RESERVE_SPACE(4);
-       WRITE32(nflavs);
+       flavorsp = p++;         /* to be backfilled later */
        ADJUST_ARGS();
+
        for (i = 0; i < nflavs; i++) {
+               rpc_authflavor_t pf = flavs[i].pseudoflavor;
                struct rpcsec_gss_info info;
 
-               if (rpcauth_get_gssinfo(flavs[i].pseudoflavor, &info) == 0) {
-                       RESERVE_SPACE(4);
+               if (rpcauth_get_gssinfo(pf, &info) == 0) {
+                       supported++;
+                       RESERVE_SPACE(4 + 4 + info.oid.len + 4 + 4);
                        WRITE32(RPC_AUTH_GSS);
-                       ADJUST_ARGS();
-                       RESERVE_SPACE(4 + info.oid.len);
                        WRITE32(info.oid.len);
                        WRITEMEM(info.oid.data, info.oid.len);
-                       ADJUST_ARGS();
-                       RESERVE_SPACE(4);
                        WRITE32(info.qop);
-                       ADJUST_ARGS();
-                       RESERVE_SPACE(4);
                        WRITE32(info.service);
                        ADJUST_ARGS();
-               } else {
+               } else if (pf < RPC_AUTH_MAXFLAVOR) {
+                       supported++;
                        RESERVE_SPACE(4);
-                       WRITE32(flavs[i].pseudoflavor);
+                       WRITE32(pf);
                        ADJUST_ARGS();
+               } else {
+                       if (report)
+                               pr_warn("NFS: SECINFO: security flavor %u "
+                                       "is not supported\n", pf);
                }
        }
 
+       if (nflavs != supported)
+               report = false;
+       *flavorsp = htonl(supported);
+
 out:
        if (exp)
                exp_put(exp);
@@ -3564,6 +3517,7 @@ __be32 nfsd4_check_resp_size(struct nfsd4_compoundres *resp, u32 pad)
 void
 nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
 {
+       struct nfs4_stateowner *so = resp->cstate.replay_owner;
        __be32 *statp;
        __be32 *p;
 
@@ -3580,6 +3534,11 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
        /* nfsd4_check_drc_limit guarantees enough room for error status */
        if (!op->status)
                op->status = nfsd4_check_resp_size(resp, 0);
+       if (so) {
+               so->so_replay.rp_status = op->status;
+               so->so_replay.rp_buflen = (char *)resp->p - (char *)(statp+1);
+               memcpy(so->so_replay.rp_buf, statp+1, so->so_replay.rp_buflen);
+       }
 status:
        /*
         * Note: We write the status directly, instead of using WRITE32(),
@@ -3681,7 +3640,7 @@ nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compo
                        cs->slot->sl_flags &= ~NFSD4_SLOT_INUSE;
                }
                /* Renew the clientid on success and on replay */
-               release_session_client(cs->session);
+               put_client_renew(cs->session->se_client);
                nfsd4_put_session(cs->session);
        }
        return 1;
index ca05f6d..e76244e 100644 (file)
@@ -11,6 +11,8 @@
 #include <linux/slab.h>
 #include <linux/sunrpc/addr.h>
 #include <linux/highmem.h>
+#include <linux/log2.h>
+#include <linux/hash.h>
 #include <net/checksum.h>
 
 #include "nfsd.h"
 
 #define NFSDDBG_FACILITY       NFSDDBG_REPCACHE
 
-#define HASHSIZE               64
+/*
+ * We use this value to determine the number of hash buckets from the max
+ * cache size, the idea being that when the cache is at its maximum number
+ * of entries, then this should be the average number of entries per bucket.
+ */
+#define TARGET_BUCKET_SIZE     64
 
 static struct hlist_head *     cache_hash;
 static struct list_head        lru_head;
 static struct kmem_cache       *drc_slab;
-static unsigned int            num_drc_entries;
+
+/* max number of entries allowed in the cache */
 static unsigned int            max_drc_entries;
 
+/* number of significant bits in the hash value */
+static unsigned int            maskbits;
+
 /*
- * Calculate the hash index from an XID.
+ * Stats and other tracking of on the duplicate reply cache. All of these and
+ * the "rc" fields in nfsdstats are protected by the cache_lock
  */
-static inline u32 request_hash(u32 xid)
-{
-       u32 h = xid;
-       h ^= (xid >> 24);
-       return h & (HASHSIZE-1);
-}
+
+/* total number of entries */
+static unsigned int            num_drc_entries;
+
+/* cache misses due only to checksum comparison failures */
+static unsigned int            payload_misses;
+
+/* amount of memory (in bytes) currently consumed by the DRC */
+static unsigned int            drc_mem_usage;
+
+/* longest hash chain seen */
+static unsigned int            longest_chain;
+
+/* size of cache when we saw the longest hash chain */
+static unsigned int            longest_chain_cachesize;
 
 static int     nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *vec);
 static void    cache_cleaner_func(struct work_struct *unused);
 static int     nfsd_reply_cache_shrink(struct shrinker *shrink,
                                        struct shrink_control *sc);
 
-struct shrinker nfsd_reply_cache_shrinker = {
+static struct shrinker nfsd_reply_cache_shrinker = {
        .shrink = nfsd_reply_cache_shrink,
        .seeks  = 1,
 };
@@ -82,6 +103,16 @@ nfsd_cache_size_limit(void)
        return min_t(unsigned int, limit, 256*1024);
 }
 
+/*
+ * Compute the number of hash buckets we need. Divide the max cachesize by
+ * the "target" max bucket size, and round up to next power of two.
+ */
+static unsigned int
+nfsd_hashsize(unsigned int limit)
+{
+       return roundup_pow_of_two(limit / TARGET_BUCKET_SIZE);
+}
+
 static struct svc_cacherep *
 nfsd_reply_cache_alloc(void)
 {
@@ -100,12 +131,15 @@ nfsd_reply_cache_alloc(void)
 static void
 nfsd_reply_cache_free_locked(struct svc_cacherep *rp)
 {
-       if (rp->c_type == RC_REPLBUFF)
+       if (rp->c_type == RC_REPLBUFF && rp->c_replvec.iov_base) {
+               drc_mem_usage -= rp->c_replvec.iov_len;
                kfree(rp->c_replvec.iov_base);
+       }
        if (!hlist_unhashed(&rp->c_hash))
                hlist_del(&rp->c_hash);
        list_del(&rp->c_lru);
        --num_drc_entries;
+       drc_mem_usage -= sizeof(*rp);
        kmem_cache_free(drc_slab, rp);
 }
 
@@ -119,9 +153,13 @@ nfsd_reply_cache_free(struct svc_cacherep *rp)
 
 int nfsd_reply_cache_init(void)
 {
+       unsigned int hashsize;
+
        INIT_LIST_HEAD(&lru_head);
        max_drc_entries = nfsd_cache_size_limit();
        num_drc_entries = 0;
+       hashsize = nfsd_hashsize(max_drc_entries);
+       maskbits = ilog2(hashsize);
 
        register_shrinker(&nfsd_reply_cache_shrinker);
        drc_slab = kmem_cache_create("nfsd_drc", sizeof(struct svc_cacherep),
@@ -129,7 +167,7 @@ int nfsd_reply_cache_init(void)
        if (!drc_slab)
                goto out_nomem;
 
-       cache_hash = kcalloc(HASHSIZE, sizeof(struct hlist_head), GFP_KERNEL);
+       cache_hash = kcalloc(hashsize, sizeof(struct hlist_head), GFP_KERNEL);
        if (!cache_hash)
                goto out_nomem;
 
@@ -180,7 +218,7 @@ static void
 hash_refile(struct svc_cacherep *rp)
 {
        hlist_del_init(&rp->c_hash);
-       hlist_add_head(&rp->c_hash, cache_hash + request_hash(rp->c_xid));
+       hlist_add_head(&rp->c_hash, cache_hash + hash_32(rp->c_xid, maskbits));
 }
 
 static inline bool
@@ -273,6 +311,26 @@ nfsd_cache_csum(struct svc_rqst *rqstp)
        return csum;
 }
 
+static bool
+nfsd_cache_match(struct svc_rqst *rqstp, __wsum csum, struct svc_cacherep *rp)
+{
+       /* Check RPC header info first */
+       if (rqstp->rq_xid != rp->c_xid || rqstp->rq_proc != rp->c_proc ||
+           rqstp->rq_prot != rp->c_prot || rqstp->rq_vers != rp->c_vers ||
+           rqstp->rq_arg.len != rp->c_len ||
+           !rpc_cmp_addr(svc_addr(rqstp), (struct sockaddr *)&rp->c_addr) ||
+           rpc_get_port(svc_addr(rqstp)) != rpc_get_port((struct sockaddr *)&rp->c_addr))
+               return false;
+
+       /* compare checksum of NFS data */
+       if (csum != rp->c_csum) {
+               ++payload_misses;
+               return false;
+       }
+
+       return true;
+}
+
 /*
  * Search the request hash for an entry that matches the given rqstp.
  * Must be called with cache_lock held. Returns the found entry or
@@ -281,23 +339,30 @@ nfsd_cache_csum(struct svc_rqst *rqstp)
 static struct svc_cacherep *
 nfsd_cache_search(struct svc_rqst *rqstp, __wsum csum)
 {
-       struct svc_cacherep     *rp;
+       struct svc_cacherep     *rp, *ret = NULL;
        struct hlist_head       *rh;
-       __be32                  xid = rqstp->rq_xid;
-       u32                     proto =  rqstp->rq_prot,
-                               vers = rqstp->rq_vers,
-                               proc = rqstp->rq_proc;
+       unsigned int            entries = 0;
 
-       rh = &cache_hash[request_hash(xid)];
+       rh = &cache_hash[hash_32(rqstp->rq_xid, maskbits)];
        hlist_for_each_entry(rp, rh, c_hash) {
-               if (xid == rp->c_xid && proc == rp->c_proc &&
-                   proto == rp->c_prot && vers == rp->c_vers &&
-                   rqstp->rq_arg.len == rp->c_len && csum == rp->c_csum &&
-                   rpc_cmp_addr(svc_addr(rqstp), (struct sockaddr *)&rp->c_addr) &&
-                   rpc_get_port(svc_addr(rqstp)) == rpc_get_port((struct sockaddr *)&rp->c_addr))
-                       return rp;
+               ++entries;
+               if (nfsd_cache_match(rqstp, csum, rp)) {
+                       ret = rp;
+                       break;
+               }
        }
-       return NULL;
+
+       /* tally hash chain length stats */
+       if (entries > longest_chain) {
+               longest_chain = entries;
+               longest_chain_cachesize = num_drc_entries;
+       } else if (entries == longest_chain) {
+               /* prefer to keep the smallest cachesize possible here */
+               longest_chain_cachesize = min(longest_chain_cachesize,
+                                               num_drc_entries);
+       }
+
+       return ret;
 }
 
 /*
@@ -318,55 +383,55 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
        __wsum                  csum;
        unsigned long           age;
        int type = rqstp->rq_cachetype;
-       int rtn;
+       int rtn = RC_DOIT;
 
        rqstp->rq_cacherep = NULL;
        if (type == RC_NOCACHE) {
                nfsdstats.rcnocache++;
-               return RC_DOIT;
+               return rtn;
        }
 
        csum = nfsd_cache_csum(rqstp);
 
+       /*
+        * Since the common case is a cache miss followed by an insert,
+        * preallocate an entry. First, try to reuse the first entry on the LRU
+        * if it works, then go ahead and prune the LRU list.
+        */
        spin_lock(&cache_lock);
-       rtn = RC_DOIT;
-
-       rp = nfsd_cache_search(rqstp, csum);
-       if (rp)
-               goto found_entry;
-
-       /* Try to use the first entry on the LRU */
        if (!list_empty(&lru_head)) {
                rp = list_first_entry(&lru_head, struct svc_cacherep, c_lru);
                if (nfsd_cache_entry_expired(rp) ||
                    num_drc_entries >= max_drc_entries) {
                        lru_put_end(rp);
                        prune_cache_entries();
-                       goto setup_entry;
+                       goto search_cache;
                }
        }
 
-       /* Drop the lock and allocate a new entry */
+       /* No expired ones available, allocate a new one. */
        spin_unlock(&cache_lock);
        rp = nfsd_reply_cache_alloc();
-       if (!rp) {
-               dprintk("nfsd: unable to allocate DRC entry!\n");
-               return RC_DOIT;
-       }
        spin_lock(&cache_lock);
-       ++num_drc_entries;
+       if (likely(rp)) {
+               ++num_drc_entries;
+               drc_mem_usage += sizeof(*rp);
+       }
 
-       /*
-        * Must search again just in case someone inserted one
-        * after we dropped the lock above.
-        */
+search_cache:
        found = nfsd_cache_search(rqstp, csum);
        if (found) {
-               nfsd_reply_cache_free_locked(rp);
+               if (likely(rp))
+                       nfsd_reply_cache_free_locked(rp);
                rp = found;
                goto found_entry;
        }
 
+       if (!rp) {
+               dprintk("nfsd: unable to allocate DRC entry!\n");
+               goto out;
+       }
+
        /*
         * We're keeping the one we just allocated. Are we now over the
         * limit? Prune one off the tip of the LRU in trade for the one we
@@ -376,7 +441,6 @@ nfsd_cache_lookup(struct svc_rqst *rqstp)
                nfsd_reply_cache_free_locked(list_first_entry(&lru_head,
                                                struct svc_cacherep, c_lru));
 
-setup_entry:
        nfsdstats.rcmisses++;
        rqstp->rq_cacherep = rp;
        rp->c_state = RC_INPROG;
@@ -394,6 +458,7 @@ setup_entry:
 
        /* release any buffer */
        if (rp->c_type == RC_REPLBUFF) {
+               drc_mem_usage -= rp->c_replvec.iov_len;
                kfree(rp->c_replvec.iov_base);
                rp->c_replvec.iov_base = NULL;
        }
@@ -462,6 +527,7 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
        struct svc_cacherep *rp = rqstp->rq_cacherep;
        struct kvec     *resv = &rqstp->rq_res.head[0], *cachv;
        int             len;
+       size_t          bufsize = 0;
 
        if (!rp)
                return;
@@ -483,19 +549,21 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
                break;
        case RC_REPLBUFF:
                cachv = &rp->c_replvec;
-               cachv->iov_base = kmalloc(len << 2, GFP_KERNEL);
+               bufsize = len << 2;
+               cachv->iov_base = kmalloc(bufsize, GFP_KERNEL);
                if (!cachv->iov_base) {
                        nfsd_reply_cache_free(rp);
                        return;
                }
-               cachv->iov_len = len << 2;
-               memcpy(cachv->iov_base, statp, len << 2);
+               cachv->iov_len = bufsize;
+               memcpy(cachv->iov_base, statp, bufsize);
                break;
        case RC_NOCACHE:
                nfsd_reply_cache_free(rp);
                return;
        }
        spin_lock(&cache_lock);
+       drc_mem_usage += bufsize;
        lru_put_end(rp);
        rp->c_secure = rqstp->rq_secure;
        rp->c_type = cachetype;
@@ -523,3 +591,30 @@ nfsd_cache_append(struct svc_rqst *rqstp, struct kvec *data)
        vec->iov_len += data->iov_len;
        return 1;
 }
+
+/*
+ * Note that fields may be added, removed or reordered in the future. Programs
+ * scraping this file for info should test the labels to ensure they're
+ * getting the correct field.
+ */
+static int nfsd_reply_cache_stats_show(struct seq_file *m, void *v)
+{
+       spin_lock(&cache_lock);
+       seq_printf(m, "max entries:           %u\n", max_drc_entries);
+       seq_printf(m, "num entries:           %u\n", num_drc_entries);
+       seq_printf(m, "hash buckets:          %u\n", 1 << maskbits);
+       seq_printf(m, "mem usage:             %u\n", drc_mem_usage);
+       seq_printf(m, "cache hits:            %u\n", nfsdstats.rchits);
+       seq_printf(m, "cache misses:          %u\n", nfsdstats.rcmisses);
+       seq_printf(m, "not cached:            %u\n", nfsdstats.rcnocache);
+       seq_printf(m, "payload misses:        %u\n", payload_misses);
+       seq_printf(m, "longest chain len:     %u\n", longest_chain);
+       seq_printf(m, "cachesize at longest:  %u\n", longest_chain_cachesize);
+       spin_unlock(&cache_lock);
+       return 0;
+}
+
+int nfsd_reply_cache_stats_open(struct inode *inode, struct file *file)
+{
+       return single_open(file, nfsd_reply_cache_stats_show, NULL);
+}
index 5bee031..7f55517 100644 (file)
@@ -35,6 +35,7 @@ enum {
        NFSD_Threads,
        NFSD_Pool_Threads,
        NFSD_Pool_Stats,
+       NFSD_Reply_Cache_Stats,
        NFSD_Versions,
        NFSD_Ports,
        NFSD_MaxBlkSize,
@@ -212,6 +213,13 @@ static const struct file_operations pool_stats_operations = {
        .owner          = THIS_MODULE,
 };
 
+static struct file_operations reply_cache_stats_operations = {
+       .open           = nfsd_reply_cache_stats_open,
+       .read           = seq_read,
+       .llseek         = seq_lseek,
+       .release        = single_release,
+};
+
 /*----------------------------------------------------------------------------*/
 /*
  * payload - write methods
@@ -1047,6 +1055,7 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent)
                [NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR},
                [NFSD_Pool_Threads] = {"pool_threads", &transaction_ops, S_IWUSR|S_IRUSR},
                [NFSD_Pool_Stats] = {"pool_stats", &pool_stats_operations, S_IRUGO},
+               [NFSD_Reply_Cache_Stats] = {"reply_cache_stats", &reply_cache_stats_operations, S_IRUGO},
                [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR},
                [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO},
                [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO},
@@ -1102,8 +1111,10 @@ static int create_proc_exports_entry(void)
                return -ENOMEM;
        entry = proc_create("exports", 0, entry,
                                 &exports_proc_operations);
-       if (!entry)
+       if (!entry) {
+               remove_proc_entry("fs/nfs", NULL);
                return -ENOMEM;
+       }
        return 0;
 }
 #else /* CONFIG_PROC_FS */
index 1a8c739..274e2a1 100644 (file)
@@ -79,6 +79,8 @@ struct nfs4_stid {
 #define NFS4_DELEG_STID 4
 /* For an open stateid kept around *only* to process close replays: */
 #define NFS4_CLOSED_STID 8
+/* For a deleg stateid kept around only to process free_stateid's: */
+#define NFS4_REVOKED_DELEG_STID 16
        unsigned char sc_type;
        stateid_t sc_stateid;
        struct nfs4_client *sc_client;
@@ -194,9 +196,11 @@ struct nfsd4_conn {
 };
 
 struct nfsd4_session {
-       struct kref             se_ref;
+       atomic_t                se_ref;
        struct list_head        se_hash;        /* hash by sessionid */
        struct list_head        se_perclnt;
+/* See SESSION4_PERSIST, etc. for standard flags; this is internal-only: */
+#define NFS4_SESSION_DEAD      0x010
        u32                     se_flags;
        struct nfs4_client      *se_client;
        struct nfs4_sessionid   se_sessionid;
@@ -236,6 +240,7 @@ struct nfs4_client {
        struct list_head        cl_openowners;
        struct idr              cl_stateids;    /* stateid lookup */
        struct list_head        cl_delegations;
+       struct list_head        cl_revoked;     /* unacknowledged, revoked 4.1 state */
        struct list_head        cl_lru;         /* tail queue */
        struct xdr_netobj       cl_name;        /* id generated by client */
        nfs4_verifier           cl_verifier;    /* generated by client */
@@ -286,18 +291,6 @@ struct nfs4_client {
        struct net              *net;
 };
 
-static inline void
-mark_client_expired(struct nfs4_client *clp)
-{
-       clp->cl_time = 0;
-}
-
-static inline bool
-is_client_expired(struct nfs4_client *clp)
-{
-       return clp->cl_time == 0;
-}
-
 /* struct nfs4_client_reset
  * one per old client. Populates reset_str_hashtbl. Filled from conf_id_hashtbl
  * upon lease reset, or from upcall to state_daemon (to read in state
@@ -365,7 +358,6 @@ struct nfs4_openowner {
        struct nfs4_ol_stateid *oo_last_closed_stid;
        time_t                  oo_time; /* time of placement on so_close_lru */
 #define NFS4_OO_CONFIRMED   1
-#define NFS4_OO_PURGE_CLOSE 2
 #define NFS4_OO_NEW         4
        unsigned char           oo_flags;
 };
@@ -373,7 +365,7 @@ struct nfs4_openowner {
 struct nfs4_lockowner {
        struct nfs4_stateowner  lo_owner; /* must be first element */
        struct list_head        lo_owner_ino_hash; /* hash by owner,file */
-       struct list_head        lo_perstateid; /* for lockowners only */
+       struct list_head        lo_perstateid;
        struct list_head        lo_list; /* for temporary uses */
 };
 
@@ -390,7 +382,7 @@ static inline struct nfs4_lockowner * lockowner(struct nfs4_stateowner *so)
 /* nfs4_file: a file opened by some number of (open) nfs4_stateowners. */
 struct nfs4_file {
        atomic_t                fi_ref;
-       struct list_head        fi_hash;    /* hash by "struct inode *" */
+       struct hlist_node       fi_hash;    /* hash by "struct inode *" */
        struct list_head        fi_stateids;
        struct list_head        fi_delegations;
        /* One each for O_RDONLY, O_WRONLY, O_RDWR: */
@@ -486,8 +478,7 @@ extern void nfs4_put_delegation(struct nfs4_delegation *dp);
 extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(const char *name,
                                                        struct nfsd_net *nn);
 extern bool nfs4_has_reclaimed_state(const char *name, struct nfsd_net *nn);
-extern void release_session_client(struct nfsd4_session *);
-extern void nfsd4_purge_closed_stateid(struct nfs4_stateowner *);
+extern void put_client_renew(struct nfs4_client *clp);
 
 /* nfs4recover operations */
 extern int nfsd4_client_tracking_init(struct net *net);
index 2b2e239..84ce601 100644 (file)
@@ -1758,10 +1758,6 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
        tdentry = tfhp->fh_dentry;
        tdir = tdentry->d_inode;
 
-       err = (rqstp->rq_vers == 2) ? nfserr_acces : nfserr_xdev;
-       if (ffhp->fh_export != tfhp->fh_export)
-               goto out;
-
        err = nfserr_perm;
        if (!flen || isdotent(fname, flen) || !tlen || isdotent(tname, tlen))
                goto out;
@@ -1802,6 +1798,8 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
        host_err = -EXDEV;
        if (ffhp->fh_export->ex_path.mnt != tfhp->fh_export->ex_path.mnt)
                goto out_dput_new;
+       if (ffhp->fh_export->ex_path.dentry != tfhp->fh_export->ex_path.dentry)
+               goto out_dput_new;
 
        host_err = nfsd_break_lease(odentry->d_inode);
        if (host_err)
index 546f898..3b271d2 100644 (file)
@@ -184,7 +184,6 @@ struct nfsd4_lock {
 #define lk_old_lock_stateid     v.old.lock_stateid
 #define lk_old_lock_seqid       v.old.lock_seqid
 
-#define lk_rflags       u.ok.rflags
 #define lk_resp_stateid u.ok.stateid
 #define lk_denied       u.denied
 
@@ -237,6 +236,7 @@ struct nfsd4_open {
        u32             op_share_deny;      /* request */
        u32             op_deleg_want;      /* request */
        stateid_t       op_stateid;         /* response */
+       __be32          op_xdr_error;       /* see nfsd4_open_omfg() */
        u32             op_recall;          /* recall */
        struct nfsd4_change_info  op_cinfo; /* response */
        u32             op_rflags;          /* response */
@@ -623,6 +623,7 @@ extern __be32 nfsd4_test_stateid(struct svc_rqst *rqstp,
                struct nfsd4_compound_state *, struct nfsd4_test_stateid *test_stateid);
 extern __be32 nfsd4_free_stateid(struct svc_rqst *rqstp,
                struct nfsd4_compound_state *, struct nfsd4_free_stateid *free_stateid);
+extern void nfsd4_bump_seqid(struct nfsd4_compound_state *, __be32 nfserr);
 #endif
 
 /*
diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h
new file mode 100644 (file)
index 0000000..c5c55df
--- /dev/null
@@ -0,0 +1,23 @@
+#define NFS4_MAXTAGLEN         20
+
+#define NFS4_enc_cb_null_sz            0
+#define NFS4_dec_cb_null_sz            0
+#define cb_compound_enc_hdr_sz         4
+#define cb_compound_dec_hdr_sz         (3 + (NFS4_MAXTAGLEN >> 2))
+#define sessionid_sz                   (NFS4_MAX_SESSIONID_LEN >> 2)
+#define cb_sequence_enc_sz             (sessionid_sz + 4 +             \
+                                       1 /* no referring calls list yet */)
+#define cb_sequence_dec_sz             (op_dec_sz + sessionid_sz + 4)
+
+#define op_enc_sz                      1
+#define op_dec_sz                      2
+#define enc_nfs4_fh_sz                 (1 + (NFS4_FHSIZE >> 2))
+#define enc_stateid_sz                 (NFS4_STATEID_SIZE >> 2)
+#define NFS4_enc_cb_recall_sz          (cb_compound_enc_hdr_sz +       \
+                                       cb_sequence_enc_sz +            \
+                                       1 + enc_stateid_sz +            \
+                                       enc_nfs4_fh_sz)
+
+#define NFS4_dec_cb_recall_sz          (cb_compound_dec_hdr_sz  +      \
+                                       cb_sequence_dec_sz +            \
+                                       op_dec_sz)
index e7d492c..bfe11be 100644 (file)
@@ -125,6 +125,7 @@ struct rpc_create_args {
 #define RPC_CLNT_CREATE_DISCRTRY       (1UL << 5)
 #define RPC_CLNT_CREATE_QUIET          (1UL << 6)
 #define RPC_CLNT_CREATE_INFINITE_SLOTS (1UL << 7)
+#define RPC_CLNT_CREATE_NO_IDLE_TIMEOUT        (1UL << 8)
 
 struct rpc_clnt *rpc_create(struct rpc_create_args *args);
 struct rpc_clnt        *rpc_bind_new_program(struct rpc_clnt *,
index f32b7a4..161463e 100644 (file)
@@ -48,6 +48,7 @@ int gss_import_sec_context(
                size_t                  bufsize,
                struct gss_api_mech     *mech,
                struct gss_ctx          **ctx_id,
+               time_t                  *endtime,
                gfp_t                   gfp_mask);
 u32 gss_get_mic(
                struct gss_ctx          *ctx_id,
@@ -105,6 +106,7 @@ struct gss_api_ops {
                        const void              *input_token,
                        size_t                  bufsize,
                        struct gss_ctx          *ctx_id,
+                       time_t                  *endtime,
                        gfp_t                   gfp_mask);
        u32 (*gss_get_mic)(
                        struct gss_ctx          *ctx_id,
@@ -130,6 +132,10 @@ struct gss_api_ops {
 int gss_mech_register(struct gss_api_mech *);
 void gss_mech_unregister(struct gss_api_mech *);
 
+/* returns a mechanism descriptor given an OID, and increments the mechanism's
+ * reference count. */
+struct gss_api_mech * gss_mech_get_by_OID(struct rpcsec_gss_oid *);
+
 /* Given a GSS security tuple, look up a pseudoflavor */
 rpc_authflavor_t gss_mech_info2flavor(struct rpcsec_gss_info *);
 
index c68a147..aadc6a0 100644 (file)
@@ -138,6 +138,9 @@ typedef __be32      rpc_fraghdr;
 #define RPC_MAX_HEADER_WITH_AUTH \
        (RPC_CALLHDRSIZE + 2*(2+RPC_MAX_AUTH_SIZE/4))
 
+#define RPC_MAX_REPHEADER_WITH_AUTH \
+       (RPC_REPHDRSIZE + (2 + RPC_MAX_AUTH_SIZE/4))
+
 /*
  * RFC1833/RFC3530 rpcbind (v3+) well-known netid's.
  */
index ff53924..cec7b9b 100644 (file)
@@ -256,6 +256,7 @@ static inline int bc_prealloc(struct rpc_rqst *req)
 #endif /* CONFIG_SUNRPC_BACKCHANNEL */
 
 #define XPRT_CREATE_INFINITE_SLOTS     (1U)
+#define XPRT_CREATE_NO_IDLE_TIMEOUT    (1U << 1)
 
 struct xprt_create {
        int                     ident;          /* XPRT_TRANSPORT identifier */
index 9e4cb59..14e9e53 100644 (file)
@@ -5,7 +5,8 @@
 obj-$(CONFIG_SUNRPC_GSS) += auth_rpcgss.o
 
 auth_rpcgss-y := auth_gss.o gss_generic_token.o \
-       gss_mech_switch.o svcauth_gss.o
+       gss_mech_switch.o svcauth_gss.o \
+       gss_rpc_upcall.o gss_rpc_xdr.o
 
 obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o
 
index 51415b0..a764e22 100644 (file)
@@ -238,7 +238,7 @@ gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct
                p = ERR_PTR(-EFAULT);
                goto err;
        }
-       ret = gss_import_sec_context(p, seclen, gm, &ctx->gc_gss_ctx, GFP_NOFS);
+       ret = gss_import_sec_context(p, seclen, gm, &ctx->gc_gss_ctx, NULL, GFP_NOFS);
        if (ret < 0) {
                p = ERR_PTR(ret);
                goto err;
index 33255ff..0d3c158 100644 (file)
@@ -679,6 +679,7 @@ out_err:
 static int
 gss_import_sec_context_kerberos(const void *p, size_t len,
                                struct gss_ctx *ctx_id,
+                               time_t *endtime,
                                gfp_t gfp_mask)
 {
        const void *end = (const void *)((const char *)p + len);
@@ -694,9 +695,11 @@ gss_import_sec_context_kerberos(const void *p, size_t len,
        else
                ret = gss_import_v2_context(p, end, ctx, gfp_mask);
 
-       if (ret == 0)
+       if (ret == 0) {
                ctx_id->internal_ctx_id = ctx;
-       else
+               if (endtime)
+                       *endtime = ctx->endtime;
+       } else
                kfree(ctx);
 
        dprintk("RPC:       %s: returning %d\n", __func__, ret);
index 79881d6..defa9d3 100644 (file)
@@ -175,7 +175,7 @@ struct gss_api_mech * gss_mech_get_by_name(const char *name)
        return gm;
 }
 
-static struct gss_api_mech *gss_mech_get_by_OID(struct rpcsec_gss_oid *obj)
+struct gss_api_mech *gss_mech_get_by_OID(struct rpcsec_gss_oid *obj)
 {
        struct gss_api_mech     *pos, *gm = NULL;
        char buf[32];
@@ -386,14 +386,15 @@ int
 gss_import_sec_context(const void *input_token, size_t bufsize,
                       struct gss_api_mech      *mech,
                       struct gss_ctx           **ctx_id,
+                      time_t                   *endtime,
                       gfp_t gfp_mask)
 {
        if (!(*ctx_id = kzalloc(sizeof(**ctx_id), gfp_mask)))
                return -ENOMEM;
        (*ctx_id)->mech_type = gss_mech_get(mech);
 
-       return mech->gm_ops
-               ->gss_import_sec_context(input_token, bufsize, *ctx_id, gfp_mask);
+       return mech->gm_ops->gss_import_sec_context(input_token, bufsize,
+                                               *ctx_id, endtime, gfp_mask);
 }
 
 /* gss_get_mic: compute a mic over message and return mic_token. */
diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c
new file mode 100644 (file)
index 0000000..d304f41
--- /dev/null
@@ -0,0 +1,358 @@
+/*
+ *  linux/net/sunrpc/gss_rpc_upcall.c
+ *
+ *  Copyright (C) 2012 Simo Sorce <simo@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/types.h>
+#include <linux/un.h>
+
+#include <linux/sunrpc/svcauth.h>
+#include "gss_rpc_upcall.h"
+
+#define GSSPROXY_SOCK_PATHNAME "/var/run/gssproxy.sock"
+
+#define GSSPROXY_PROGRAM       (400112u)
+#define GSSPROXY_VERS_1                (1u)
+
+/*
+ * Encoding/Decoding functions
+ */
+
+enum {
+       GSSX_NULL = 0,  /* Unused */
+        GSSX_INDICATE_MECHS = 1,
+        GSSX_GET_CALL_CONTEXT = 2,
+        GSSX_IMPORT_AND_CANON_NAME = 3,
+        GSSX_EXPORT_CRED = 4,
+        GSSX_IMPORT_CRED = 5,
+        GSSX_ACQUIRE_CRED = 6,
+        GSSX_STORE_CRED = 7,
+        GSSX_INIT_SEC_CONTEXT = 8,
+        GSSX_ACCEPT_SEC_CONTEXT = 9,
+        GSSX_RELEASE_HANDLE = 10,
+        GSSX_GET_MIC = 11,
+        GSSX_VERIFY = 12,
+        GSSX_WRAP = 13,
+        GSSX_UNWRAP = 14,
+        GSSX_WRAP_SIZE_LIMIT = 15,
+};
+
+#define PROC(proc, name)                               \
+[GSSX_##proc] = {                                      \
+       .p_proc   = GSSX_##proc,                        \
+       .p_encode = (kxdreproc_t)gssx_enc_##name,       \
+       .p_decode = (kxdrdproc_t)gssx_dec_##name,       \
+       .p_arglen = GSSX_ARG_##name##_sz,               \
+       .p_replen = GSSX_RES_##name##_sz,               \
+       .p_statidx = GSSX_##proc,                       \
+       .p_name   = #proc,                              \
+}
+
+static struct rpc_procinfo gssp_procedures[] = {
+       PROC(INDICATE_MECHS, indicate_mechs),
+        PROC(GET_CALL_CONTEXT, get_call_context),
+        PROC(IMPORT_AND_CANON_NAME, import_and_canon_name),
+        PROC(EXPORT_CRED, export_cred),
+        PROC(IMPORT_CRED, import_cred),
+        PROC(ACQUIRE_CRED, acquire_cred),
+        PROC(STORE_CRED, store_cred),
+        PROC(INIT_SEC_CONTEXT, init_sec_context),
+        PROC(ACCEPT_SEC_CONTEXT, accept_sec_context),
+        PROC(RELEASE_HANDLE, release_handle),
+        PROC(GET_MIC, get_mic),
+        PROC(VERIFY, verify),
+        PROC(WRAP, wrap),
+        PROC(UNWRAP, unwrap),
+        PROC(WRAP_SIZE_LIMIT, wrap_size_limit),
+};
+
+
+
+/*
+ * Common transport functions
+ */
+
+static const struct rpc_program gssp_program;
+
+static int gssp_rpc_create(struct net *net, struct rpc_clnt **_clnt)
+{
+       static const struct sockaddr_un gssp_localaddr = {
+               .sun_family             = AF_LOCAL,
+               .sun_path               = GSSPROXY_SOCK_PATHNAME,
+       };
+       struct rpc_create_args args = {
+               .net            = net,
+               .protocol       = XPRT_TRANSPORT_LOCAL,
+               .address        = (struct sockaddr *)&gssp_localaddr,
+               .addrsize       = sizeof(gssp_localaddr),
+               .servername     = "localhost",
+               .program        = &gssp_program,
+               .version        = GSSPROXY_VERS_1,
+               .authflavor     = RPC_AUTH_NULL,
+               /*
+                * Note we want connection to be done in the caller's
+                * filesystem namespace.  We therefore turn off the idle
+                * timeout, which would result in reconnections being
+                * done without the correct namespace:
+                */
+               .flags          = RPC_CLNT_CREATE_NOPING |
+                                 RPC_CLNT_CREATE_NO_IDLE_TIMEOUT
+       };
+       struct rpc_clnt *clnt;
+       int result = 0;
+
+       clnt = rpc_create(&args);
+       if (IS_ERR(clnt)) {
+               dprintk("RPC:       failed to create AF_LOCAL gssproxy "
+                               "client (errno %ld).\n", PTR_ERR(clnt));
+               result = -PTR_ERR(clnt);
+               *_clnt = NULL;
+               goto out;
+       }
+
+       dprintk("RPC:       created new gssp local client (gssp_local_clnt: "
+                       "%p)\n", clnt);
+       *_clnt = clnt;
+
+out:
+       return result;
+}
+
+void init_gssp_clnt(struct sunrpc_net *sn)
+{
+       mutex_init(&sn->gssp_lock);
+       sn->gssp_clnt = NULL;
+       init_waitqueue_head(&sn->gssp_wq);
+}
+
+int set_gssp_clnt(struct net *net)
+{
+       struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+       struct rpc_clnt *clnt;
+       int ret;
+
+       mutex_lock(&sn->gssp_lock);
+       ret = gssp_rpc_create(net, &clnt);
+       if (!ret) {
+               if (sn->gssp_clnt)
+                       rpc_shutdown_client(sn->gssp_clnt);
+               sn->gssp_clnt = clnt;
+       }
+       mutex_unlock(&sn->gssp_lock);
+       wake_up(&sn->gssp_wq);
+       return ret;
+}
+
+void clear_gssp_clnt(struct sunrpc_net *sn)
+{
+       mutex_lock(&sn->gssp_lock);
+       if (sn->gssp_clnt) {
+               rpc_shutdown_client(sn->gssp_clnt);
+               sn->gssp_clnt = NULL;
+       }
+       mutex_unlock(&sn->gssp_lock);
+}
+
+static struct rpc_clnt *get_gssp_clnt(struct sunrpc_net *sn)
+{
+       struct rpc_clnt *clnt;
+
+       mutex_lock(&sn->gssp_lock);
+       clnt = sn->gssp_clnt;
+       if (clnt)
+               atomic_inc(&clnt->cl_count);
+       mutex_unlock(&sn->gssp_lock);
+       return clnt;
+}
+
+static int gssp_call(struct net *net, struct rpc_message *msg)
+{
+       struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+       struct rpc_clnt *clnt;
+       int status;
+
+       clnt = get_gssp_clnt(sn);
+       if (!clnt)
+               return -EIO;
+       status = rpc_call_sync(clnt, msg, 0);
+       if (status < 0) {
+               dprintk("gssp: rpc_call returned error %d\n", -status);
+               switch (status) {
+               case -EPROTONOSUPPORT:
+                       status = -EINVAL;
+                       break;
+               case -ECONNREFUSED:
+               case -ETIMEDOUT:
+               case -ENOTCONN:
+                       status = -EAGAIN;
+                       break;
+               case -ERESTARTSYS:
+                       if (signalled ())
+                               status = -EINTR;
+                       break;
+               default:
+                       break;
+               }
+       }
+       rpc_release_client(clnt);
+       return status;
+}
+
+
+/*
+ * Public functions
+ */
+
+/* numbers somewhat arbitrary but large enough for current needs */
+#define GSSX_MAX_OUT_HANDLE    128
+#define GSSX_MAX_SRC_PRINC     256
+#define GSSX_KMEMBUF (GSSX_max_output_handle_sz + \
+                       GSSX_max_oid_sz + \
+                       GSSX_max_princ_sz + \
+                       sizeof(struct svc_cred))
+
+int gssp_accept_sec_context_upcall(struct net *net,
+                               struct gssp_upcall_data *data)
+{
+       struct gssx_ctx ctxh = {
+               .state = data->in_handle
+       };
+       struct gssx_arg_accept_sec_context arg = {
+               .input_token = data->in_token,
+       };
+       struct gssx_ctx rctxh = {
+               /*
+                * pass in the max length we expect for each of these
+                * buffers but let the xdr code kmalloc them:
+                */
+               .exported_context_token.len = GSSX_max_output_handle_sz,
+               .mech.len = GSS_OID_MAX_LEN,
+               .src_name.display_name.len = GSSX_max_princ_sz
+       };
+       struct gssx_res_accept_sec_context res = {
+               .context_handle = &rctxh,
+               .output_token = &data->out_token
+       };
+       struct rpc_message msg = {
+               .rpc_proc = &gssp_procedures[GSSX_ACCEPT_SEC_CONTEXT],
+               .rpc_argp = &arg,
+               .rpc_resp = &res,
+               .rpc_cred = NULL, /* FIXME ? */
+       };
+       struct xdr_netobj client_name = { 0 , NULL };
+       int ret;
+
+       if (data->in_handle.len != 0)
+               arg.context_handle = &ctxh;
+       res.output_token->len = GSSX_max_output_token_sz;
+
+       /* use nfs/ for targ_name ? */
+
+       ret = gssp_call(net, &msg);
+
+       /* we need to fetch all data even in case of error so
+        * that we can free special strctures is they have been allocated */
+       data->major_status = res.status.major_status;
+       data->minor_status = res.status.minor_status;
+       if (res.context_handle) {
+               data->out_handle = rctxh.exported_context_token;
+               data->mech_oid.len = rctxh.mech.len;
+               memcpy(data->mech_oid.data, rctxh.mech.data,
+                                               data->mech_oid.len);
+               client_name = rctxh.src_name.display_name;
+       }
+
+       if (res.options.count == 1) {
+               gssx_buffer *value = &res.options.data[0].value;
+               /* Currently we only decode CREDS_VALUE, if we add
+                * anything else we'll have to loop and match on the
+                * option name */
+               if (value->len == 1) {
+                       /* steal group info from struct svc_cred */
+                       data->creds = *(struct svc_cred *)value->data;
+                       data->found_creds = 1;
+               }
+               /* whether we use it or not, free data */
+               kfree(value->data);
+       }
+
+       if (res.options.count != 0) {
+               kfree(res.options.data);
+       }
+
+       /* convert to GSS_NT_HOSTBASED_SERVICE form and set into creds */
+       if (data->found_creds && client_name.data != NULL) {
+               char *c;
+
+               data->creds.cr_principal = kstrndup(client_name.data,
+                                               client_name.len, GFP_KERNEL);
+               if (data->creds.cr_principal) {
+                       /* terminate and remove realm part */
+                       c = strchr(data->creds.cr_principal, '@');
+                       if (c) {
+                               *c = '\0';
+
+                               /* change service-hostname delimiter */
+                               c = strchr(data->creds.cr_principal, '/');
+                               if (c) *c = '@';
+                       }
+                       if (!c) {
+                               /* not a service principal */
+                               kfree(data->creds.cr_principal);
+                               data->creds.cr_principal = NULL;
+                       }
+               }
+       }
+       kfree(client_name.data);
+
+       return ret;
+}
+
+void gssp_free_upcall_data(struct gssp_upcall_data *data)
+{
+       kfree(data->in_handle.data);
+       kfree(data->out_handle.data);
+       kfree(data->out_token.data);
+       kfree(data->mech_oid.data);
+       free_svc_cred(&data->creds);
+}
+
+/*
+ * Initialization stuff
+ */
+
+static const struct rpc_version gssp_version1 = {
+       .number         = GSSPROXY_VERS_1,
+       .nrprocs        = ARRAY_SIZE(gssp_procedures),
+       .procs          = gssp_procedures,
+};
+
+static const struct rpc_version *gssp_version[] = {
+       NULL,
+       &gssp_version1,
+};
+
+static struct rpc_stat gssp_stats;
+
+static const struct rpc_program gssp_program = {
+       .name           = "gssproxy",
+       .number         = GSSPROXY_PROGRAM,
+       .nrvers         = ARRAY_SIZE(gssp_version),
+       .version        = gssp_version,
+       .stats          = &gssp_stats,
+};
diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.h b/net/sunrpc/auth_gss/gss_rpc_upcall.h
new file mode 100644 (file)
index 0000000..1e542ad
--- /dev/null
@@ -0,0 +1,48 @@
+/*
+ *  linux/net/sunrpc/gss_rpc_upcall.h
+ *
+ *  Copyright (C) 2012 Simo Sorce <simo@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _GSS_RPC_UPCALL_H
+#define _GSS_RPC_UPCALL_H
+
+#include <linux/sunrpc/gss_api.h>
+#include <linux/sunrpc/auth_gss.h>
+#include "gss_rpc_xdr.h"
+#include "../netns.h"
+
+struct gssp_upcall_data {
+       struct xdr_netobj in_handle;
+       struct gssp_in_token in_token;
+       struct xdr_netobj out_handle;
+       struct xdr_netobj out_token;
+       struct rpcsec_gss_oid mech_oid;
+       struct svc_cred creds;
+       int found_creds;
+       int major_status;
+       int minor_status;
+};
+
+int gssp_accept_sec_context_upcall(struct net *net,
+                               struct gssp_upcall_data *data);
+void gssp_free_upcall_data(struct gssp_upcall_data *data);
+
+void init_gssp_clnt(struct sunrpc_net *);
+int set_gssp_clnt(struct net *);
+void clear_gssp_clnt(struct sunrpc_net *);
+#endif /* _GSS_RPC_UPCALL_H */
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c
new file mode 100644 (file)
index 0000000..5c4c61d
--- /dev/null
@@ -0,0 +1,838 @@
+/*
+ * GSS Proxy upcall module
+ *
+ *  Copyright (C) 2012 Simo Sorce <simo@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/sunrpc/svcauth.h>
+#include "gss_rpc_xdr.h"
+
+static bool gssx_check_pointer(struct xdr_stream *xdr)
+{
+       __be32 *p;
+
+       p = xdr_reserve_space(xdr, 4);
+       if (unlikely(p == NULL))
+               return -ENOSPC;
+       return *p?true:false;
+}
+
+static int gssx_enc_bool(struct xdr_stream *xdr, int v)
+{
+       __be32 *p;
+
+       p = xdr_reserve_space(xdr, 4);
+       if (unlikely(p == NULL))
+               return -ENOSPC;
+       *p = v ? xdr_one : xdr_zero;
+       return 0;
+}
+
+static int gssx_dec_bool(struct xdr_stream *xdr, u32 *v)
+{
+       __be32 *p;
+
+       p = xdr_inline_decode(xdr, 4);
+       if (unlikely(p == NULL))
+               return -ENOSPC;
+       *v = be32_to_cpu(*p);
+       return 0;
+}
+
+static int gssx_enc_buffer(struct xdr_stream *xdr,
+                          gssx_buffer *buf)
+{
+       __be32 *p;
+
+       p = xdr_reserve_space(xdr, sizeof(u32) + buf->len);
+       if (!p)
+               return -ENOSPC;
+       xdr_encode_opaque(p, buf->data, buf->len);
+       return 0;
+}
+
+static int gssx_enc_in_token(struct xdr_stream *xdr,
+                            struct gssp_in_token *in)
+{
+       __be32 *p;
+
+       p = xdr_reserve_space(xdr, 4);
+       if (!p)
+               return -ENOSPC;
+       *p = cpu_to_be32(in->page_len);
+
+       /* all we need to do is to write pages */
+       xdr_write_pages(xdr, in->pages, in->page_base, in->page_len);
+
+       return 0;
+}
+
+
+static int gssx_dec_buffer(struct xdr_stream *xdr,
+                          gssx_buffer *buf)
+{
+       u32 length;
+       __be32 *p;
+
+       p = xdr_inline_decode(xdr, 4);
+       if (unlikely(p == NULL))
+               return -ENOSPC;
+
+       length = be32_to_cpup(p);
+       p = xdr_inline_decode(xdr, length);
+       if (unlikely(p == NULL))
+               return -ENOSPC;
+
+       if (buf->len == 0) {
+               /* we intentionally are not interested in this buffer */
+               return 0;
+       }
+       if (length > buf->len)
+               return -ENOSPC;
+
+       if (!buf->data) {
+               buf->data = kmemdup(p, length, GFP_KERNEL);
+               if (!buf->data)
+                       return -ENOMEM;
+       } else {
+               memcpy(buf->data, p, length);
+       }
+       buf->len = length;
+       return 0;
+}
+
+static int gssx_enc_option(struct xdr_stream *xdr,
+                          struct gssx_option *opt)
+{
+       int err;
+
+       err = gssx_enc_buffer(xdr, &opt->option);
+       if (err)
+               return err;
+       err = gssx_enc_buffer(xdr, &opt->value);
+       return err;
+}
+
+static int gssx_dec_option(struct xdr_stream *xdr,
+                          struct gssx_option *opt)
+{
+       int err;
+
+       err = gssx_dec_buffer(xdr, &opt->option);
+       if (err)
+               return err;
+       err = gssx_dec_buffer(xdr, &opt->value);
+       return err;
+}
+
+static int dummy_enc_opt_array(struct xdr_stream *xdr,
+                               struct gssx_option_array *oa)
+{
+       __be32 *p;
+
+       if (oa->count != 0)
+               return -EINVAL;
+
+       p = xdr_reserve_space(xdr, 4);
+       if (!p)
+               return -ENOSPC;
+       *p = 0;
+
+       return 0;
+}
+
+static int dummy_dec_opt_array(struct xdr_stream *xdr,
+                               struct gssx_option_array *oa)
+{
+       struct gssx_option dummy;
+       u32 count, i;
+       __be32 *p;
+
+       p = xdr_inline_decode(xdr, 4);
+       if (unlikely(p == NULL))
+               return -ENOSPC;
+       count = be32_to_cpup(p++);
+       memset(&dummy, 0, sizeof(dummy));
+       for (i = 0; i < count; i++) {
+               gssx_dec_option(xdr, &dummy);
+       }
+
+       oa->count = 0;
+       oa->data = NULL;
+       return 0;
+}
+
+static int get_s32(void **p, void *max, s32 *res)
+{
+       void *base = *p;
+       void *next = (void *)((char *)base + sizeof(s32));
+       if (unlikely(next > max || next < base))
+               return -EINVAL;
+       memcpy(res, base, sizeof(s32));
+       *p = next;
+       return 0;
+}
+
+static int gssx_dec_linux_creds(struct xdr_stream *xdr,
+                               struct svc_cred *creds)
+{
+       u32 length;
+       __be32 *p;
+       void *q, *end;
+       s32 tmp;
+       int N, i, err;
+
+       p = xdr_inline_decode(xdr, 4);
+       if (unlikely(p == NULL))
+               return -ENOSPC;
+
+       length = be32_to_cpup(p);
+
+       /* FIXME: we do not want to use the scratch buffer for this one
+        * may need to use functions that allows us to access an io vector
+        * directly */
+       p = xdr_inline_decode(xdr, length);
+       if (unlikely(p == NULL))
+               return -ENOSPC;
+
+       q = p;
+       end = q + length;
+
+       /* uid */
+       err = get_s32(&q, end, &tmp);
+       if (err)
+               return err;
+       creds->cr_uid = make_kuid(&init_user_ns, tmp);
+
+       /* gid */
+       err = get_s32(&q, end, &tmp);
+       if (err)
+               return err;
+       creds->cr_gid = make_kgid(&init_user_ns, tmp);
+
+       /* number of additional gid's */
+       err = get_s32(&q, end, &tmp);
+       if (err)
+               return err;
+       N = tmp;
+       creds->cr_group_info = groups_alloc(N);
+       if (creds->cr_group_info == NULL)
+               return -ENOMEM;
+
+       /* gid's */
+       for (i = 0; i < N; i++) {
+               kgid_t kgid;
+               err = get_s32(&q, end, &tmp);
+               if (err)
+                       goto out_free_groups;
+               err = -EINVAL;
+               kgid = make_kgid(&init_user_ns, tmp);
+               if (!gid_valid(kgid))
+                       goto out_free_groups;
+               GROUP_AT(creds->cr_group_info, i) = kgid;
+       }
+
+       return 0;
+out_free_groups:
+       groups_free(creds->cr_group_info);
+       return err;
+}
+
+static int gssx_dec_option_array(struct xdr_stream *xdr,
+                                struct gssx_option_array *oa)
+{
+       struct svc_cred *creds;
+       u32 count, i;
+       __be32 *p;
+       int err;
+
+       p = xdr_inline_decode(xdr, 4);
+       if (unlikely(p == NULL))
+               return -ENOSPC;
+       count = be32_to_cpup(p++);
+       if (count != 0) {
+               /* we recognize only 1 currently: CREDS_VALUE */
+               oa->count = 1;
+
+               oa->data = kmalloc(sizeof(struct gssx_option), GFP_KERNEL);
+               if (!oa->data)
+                       return -ENOMEM;
+
+               creds = kmalloc(sizeof(struct svc_cred), GFP_KERNEL);
+               if (!creds) {
+                       kfree(oa->data);
+                       return -ENOMEM;
+               }
+
+               oa->data[0].option.data = CREDS_VALUE;
+               oa->data[0].option.len = sizeof(CREDS_VALUE);
+               oa->data[0].value.data = (void *)creds;
+               oa->data[0].value.len = 0;
+       }
+       for (i = 0; i < count; i++) {
+               gssx_buffer dummy = { 0, NULL };
+               u32 length;
+
+               /* option buffer */
+               p = xdr_inline_decode(xdr, 4);
+               if (unlikely(p == NULL))
+                       return -ENOSPC;
+
+               length = be32_to_cpup(p);
+               p = xdr_inline_decode(xdr, length);
+               if (unlikely(p == NULL))
+                       return -ENOSPC;
+
+               if (length == sizeof(CREDS_VALUE) &&
+                   memcmp(p, CREDS_VALUE, sizeof(CREDS_VALUE)) == 0) {
+                       /* We have creds here. parse them */
+                       err = gssx_dec_linux_creds(xdr, creds);
+                       if (err)
+                               return err;
+                       oa->data[0].value.len = 1; /* presence */
+               } else {
+                       /* consume uninteresting buffer */
+                       err = gssx_dec_buffer(xdr, &dummy);
+                       if (err)
+                               return err;
+               }
+       }
+       return 0;
+}
+
+static int gssx_dec_status(struct xdr_stream *xdr,
+                          struct gssx_status *status)
+{
+       __be32 *p;
+       int err;
+
+       /* status->major_status */
+       p = xdr_inline_decode(xdr, 8);
+       if (unlikely(p == NULL))
+               return -ENOSPC;
+       p = xdr_decode_hyper(p, &status->major_status);
+
+       /* status->mech */
+       err = gssx_dec_buffer(xdr, &status->mech);
+       if (err)
+               return err;
+
+       /* status->minor_status */
+       p = xdr_inline_decode(xdr, 8);
+       if (unlikely(p == NULL))
+               return -ENOSPC;
+       p = xdr_decode_hyper(p, &status->minor_status);
+
+       /* status->major_status_string */
+       err = gssx_dec_buffer(xdr, &status->major_status_string);
+       if (err)
+               return err;
+
+       /* status->minor_status_string */
+       err = gssx_dec_buffer(xdr, &status->minor_status_string);
+       if (err)
+               return err;
+
+       /* status->server_ctx */
+       err = gssx_dec_buffer(xdr, &status->server_ctx);
+       if (err)
+               return err;
+
+       /* we assume we have no options for now, so simply consume them */
+       /* status->options */
+       err = dummy_dec_opt_array(xdr, &status->options);
+
+       return err;
+}
+
+static int gssx_enc_call_ctx(struct xdr_stream *xdr,
+                            struct gssx_call_ctx *ctx)
+{
+       struct gssx_option opt;
+       __be32 *p;
+       int err;
+
+       /* ctx->locale */
+       err = gssx_enc_buffer(xdr, &ctx->locale);
+       if (err)
+               return err;
+
+       /* ctx->server_ctx */
+       err = gssx_enc_buffer(xdr, &ctx->server_ctx);
+       if (err)
+               return err;
+
+       /* we always want to ask for lucid contexts */
+       /* ctx->options */
+       p = xdr_reserve_space(xdr, 4);
+       *p = cpu_to_be32(2);
+
+       /* we want a lucid_v1 context */
+       opt.option.data = LUCID_OPTION;
+       opt.option.len = sizeof(LUCID_OPTION);
+       opt.value.data = LUCID_VALUE;
+       opt.value.len = sizeof(LUCID_VALUE);
+       err = gssx_enc_option(xdr, &opt);
+
+       /* ..and user creds */
+       opt.option.data = CREDS_OPTION;
+       opt.option.len = sizeof(CREDS_OPTION);
+       opt.value.data = CREDS_VALUE;
+       opt.value.len = sizeof(CREDS_VALUE);
+       err = gssx_enc_option(xdr, &opt);
+
+       return err;
+}
+
+static int gssx_dec_name_attr(struct xdr_stream *xdr,
+                            struct gssx_name_attr *attr)
+{
+       int err;
+
+       /* attr->attr */
+       err = gssx_dec_buffer(xdr, &attr->attr);
+       if (err)
+               return err;
+
+       /* attr->value */
+       err = gssx_dec_buffer(xdr, &attr->value);
+       if (err)
+               return err;
+
+       /* attr->extensions */
+       err = dummy_dec_opt_array(xdr, &attr->extensions);
+
+       return err;
+}
+
+static int dummy_enc_nameattr_array(struct xdr_stream *xdr,
+                                   struct gssx_name_attr_array *naa)
+{
+       __be32 *p;
+
+       if (naa->count != 0)
+               return -EINVAL;
+
+       p = xdr_reserve_space(xdr, 4);
+       if (!p)
+               return -ENOSPC;
+       *p = 0;
+
+       return 0;
+}
+
+static int dummy_dec_nameattr_array(struct xdr_stream *xdr,
+                                   struct gssx_name_attr_array *naa)
+{
+       struct gssx_name_attr dummy;
+       u32 count, i;
+       __be32 *p;
+
+       p = xdr_inline_decode(xdr, 4);
+       if (unlikely(p == NULL))
+               return -ENOSPC;
+       count = be32_to_cpup(p++);
+       for (i = 0; i < count; i++) {
+               gssx_dec_name_attr(xdr, &dummy);
+       }
+
+       naa->count = 0;
+       naa->data = NULL;
+       return 0;
+}
+
+static struct xdr_netobj zero_netobj = {};
+
+static struct gssx_name_attr_array zero_name_attr_array = {};
+
+static struct gssx_option_array zero_option_array = {};
+
+static int gssx_enc_name(struct xdr_stream *xdr,
+                        struct gssx_name *name)
+{
+       int err;
+
+       /* name->display_name */
+       err = gssx_enc_buffer(xdr, &name->display_name);
+       if (err)
+               return err;
+
+       /* name->name_type */
+       err = gssx_enc_buffer(xdr, &zero_netobj);
+       if (err)
+               return err;
+
+       /* name->exported_name */
+       err = gssx_enc_buffer(xdr, &zero_netobj);
+       if (err)
+               return err;
+
+       /* name->exported_composite_name */
+       err = gssx_enc_buffer(xdr, &zero_netobj);
+       if (err)
+               return err;
+
+       /* leave name_attributes empty for now, will add once we have any
+        * to pass up at all */
+       /* name->name_attributes */
+       err = dummy_enc_nameattr_array(xdr, &zero_name_attr_array);
+       if (err)
+               return err;
+
+       /* leave options empty for now, will add once we have any options
+        * to pass up at all */
+       /* name->extensions */
+       err = dummy_enc_opt_array(xdr, &zero_option_array);
+
+       return err;
+}
+
+static int gssx_dec_name(struct xdr_stream *xdr,
+                        struct gssx_name *name)
+{
+       struct xdr_netobj dummy_netobj;
+       struct gssx_name_attr_array dummy_name_attr_array;
+       struct gssx_option_array dummy_option_array;
+       int err;
+
+       /* name->display_name */
+       err = gssx_dec_buffer(xdr, &name->display_name);
+       if (err)
+               return err;
+
+       /* name->name_type */
+       err = gssx_dec_buffer(xdr, &dummy_netobj);
+       if (err)
+               return err;
+
+       /* name->exported_name */
+       err = gssx_dec_buffer(xdr, &dummy_netobj);
+       if (err)
+               return err;
+
+       /* name->exported_composite_name */
+       err = gssx_dec_buffer(xdr, &dummy_netobj);
+       if (err)
+               return err;
+
+       /* we assume we have no attributes for now, so simply consume them */
+       /* name->name_attributes */
+       err = dummy_dec_nameattr_array(xdr, &dummy_name_attr_array);
+       if (err)
+               return err;
+
+       /* we assume we have no options for now, so simply consume them */
+       /* name->extensions */
+       err = dummy_dec_opt_array(xdr, &dummy_option_array);
+
+       return err;
+}
+
+static int dummy_enc_credel_array(struct xdr_stream *xdr,
+                                 struct gssx_cred_element_array *cea)
+{
+       __be32 *p;
+
+       if (cea->count != 0)
+               return -EINVAL;
+
+       p = xdr_reserve_space(xdr, 4);
+       if (!p)
+               return -ENOSPC;
+       *p = 0;
+
+       return 0;
+}
+
+static int gssx_enc_cred(struct xdr_stream *xdr,
+                        struct gssx_cred *cred)
+{
+       int err;
+
+       /* cred->desired_name */
+       err = gssx_enc_name(xdr, &cred->desired_name);
+       if (err)
+               return err;
+
+       /* cred->elements */
+       err = dummy_enc_credel_array(xdr, &cred->elements);
+
+       /* cred->cred_handle_reference */
+       err = gssx_enc_buffer(xdr, &cred->cred_handle_reference);
+       if (err)
+               return err;
+
+       /* cred->needs_release */
+       err = gssx_enc_bool(xdr, cred->needs_release);
+
+       return err;
+}
+
+static int gssx_enc_ctx(struct xdr_stream *xdr,
+                       struct gssx_ctx *ctx)
+{
+       __be32 *p;
+       int err;
+
+       /* ctx->exported_context_token */
+       err = gssx_enc_buffer(xdr, &ctx->exported_context_token);
+       if (err)
+               return err;
+
+       /* ctx->state */
+       err = gssx_enc_buffer(xdr, &ctx->state);
+       if (err)
+               return err;
+
+       /* ctx->need_release */
+       err = gssx_enc_bool(xdr, ctx->need_release);
+       if (err)
+               return err;
+
+       /* ctx->mech */
+       err = gssx_enc_buffer(xdr, &ctx->mech);
+       if (err)
+               return err;
+
+       /* ctx->src_name */
+       err = gssx_enc_name(xdr, &ctx->src_name);
+       if (err)
+               return err;
+
+       /* ctx->targ_name */
+       err = gssx_enc_name(xdr, &ctx->targ_name);
+       if (err)
+               return err;
+
+       /* ctx->lifetime */
+       p = xdr_reserve_space(xdr, 8+8);
+       if (!p)
+               return -ENOSPC;
+       p = xdr_encode_hyper(p, ctx->lifetime);
+
+       /* ctx->ctx_flags */
+       p = xdr_encode_hyper(p, ctx->ctx_flags);
+
+       /* ctx->locally_initiated */
+       err = gssx_enc_bool(xdr, ctx->locally_initiated);
+       if (err)
+               return err;
+
+       /* ctx->open */
+       err = gssx_enc_bool(xdr, ctx->open);
+       if (err)
+               return err;
+
+       /* leave options empty for now, will add once we have any options
+        * to pass up at all */
+       /* ctx->options */
+       err = dummy_enc_opt_array(xdr, &ctx->options);
+
+       return err;
+}
+
+static int gssx_dec_ctx(struct xdr_stream *xdr,
+                       struct gssx_ctx *ctx)
+{
+       __be32 *p;
+       int err;
+
+       /* ctx->exported_context_token */
+       err = gssx_dec_buffer(xdr, &ctx->exported_context_token);
+       if (err)
+               return err;
+
+       /* ctx->state */
+       err = gssx_dec_buffer(xdr, &ctx->state);
+       if (err)
+               return err;
+
+       /* ctx->need_release */
+       err = gssx_dec_bool(xdr, &ctx->need_release);
+       if (err)
+               return err;
+
+       /* ctx->mech */
+       err = gssx_dec_buffer(xdr, &ctx->mech);
+       if (err)
+               return err;
+
+       /* ctx->src_name */
+       err = gssx_dec_name(xdr, &ctx->src_name);
+       if (err)
+               return err;
+
+       /* ctx->targ_name */
+       err = gssx_dec_name(xdr, &ctx->targ_name);
+       if (err)
+               return err;
+
+       /* ctx->lifetime */
+       p = xdr_inline_decode(xdr, 8+8);
+       if (unlikely(p == NULL))
+               return -ENOSPC;
+       p = xdr_decode_hyper(p, &ctx->lifetime);
+
+       /* ctx->ctx_flags */
+       p = xdr_decode_hyper(p, &ctx->ctx_flags);
+
+       /* ctx->locally_initiated */
+       err = gssx_dec_bool(xdr, &ctx->locally_initiated);
+       if (err)
+               return err;
+
+       /* ctx->open */
+       err = gssx_dec_bool(xdr, &ctx->open);
+       if (err)
+               return err;
+
+       /* we assume we have no options for now, so simply consume them */
+       /* ctx->options */
+       err = dummy_dec_opt_array(xdr, &ctx->options);
+
+       return err;
+}
+
+static int gssx_enc_cb(struct xdr_stream *xdr, struct gssx_cb *cb)
+{
+       __be32 *p;
+       int err;
+
+       /* cb->initiator_addrtype */
+       p = xdr_reserve_space(xdr, 8);
+       if (!p)
+               return -ENOSPC;
+       p = xdr_encode_hyper(p, cb->initiator_addrtype);
+
+       /* cb->initiator_address */
+       err = gssx_enc_buffer(xdr, &cb->initiator_address);
+       if (err)
+               return err;
+
+       /* cb->acceptor_addrtype */
+       p = xdr_reserve_space(xdr, 8);
+       if (!p)
+               return -ENOSPC;
+       p = xdr_encode_hyper(p, cb->acceptor_addrtype);
+
+       /* cb->acceptor_address */
+       err = gssx_enc_buffer(xdr, &cb->acceptor_address);
+       if (err)
+               return err;
+
+       /* cb->application_data */
+       err = gssx_enc_buffer(xdr, &cb->application_data);
+
+       return err;
+}
+
+void gssx_enc_accept_sec_context(struct rpc_rqst *req,
+                                struct xdr_stream *xdr,
+                                struct gssx_arg_accept_sec_context *arg)
+{
+       int err;
+
+       err = gssx_enc_call_ctx(xdr, &arg->call_ctx);
+       if (err)
+               goto done;
+
+       /* arg->context_handle */
+       if (arg->context_handle) {
+               err = gssx_enc_ctx(xdr, arg->context_handle);
+               if (err)
+                       goto done;
+       } else {
+               err = gssx_enc_bool(xdr, 0);
+       }
+
+       /* arg->cred_handle */
+       if (arg->cred_handle) {
+               err = gssx_enc_cred(xdr, arg->cred_handle);
+               if (err)
+                       goto done;
+       } else {
+               err = gssx_enc_bool(xdr, 0);
+       }
+
+       /* arg->input_token */
+       err = gssx_enc_in_token(xdr, &arg->input_token);
+       if (err)
+               goto done;
+
+       /* arg->input_cb */
+       if (arg->input_cb) {
+               err = gssx_enc_cb(xdr, arg->input_cb);
+               if (err)
+                       goto done;
+       } else {
+               err = gssx_enc_bool(xdr, 0);
+       }
+
+       err = gssx_enc_bool(xdr, arg->ret_deleg_cred);
+       if (err)
+               goto done;
+
+       /* leave options empty for now, will add once we have any options
+        * to pass up at all */
+       /* arg->options */
+       err = dummy_enc_opt_array(xdr, &arg->options);
+
+done:
+       if (err)
+               dprintk("RPC:       gssx_enc_accept_sec_context: %d\n", err);
+}
+
+int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp,
+                               struct xdr_stream *xdr,
+                               struct gssx_res_accept_sec_context *res)
+{
+       int err;
+
+       /* res->status */
+       err = gssx_dec_status(xdr, &res->status);
+       if (err)
+               return err;
+
+       /* res->context_handle */
+       if (gssx_check_pointer(xdr)) {
+               err = gssx_dec_ctx(xdr, res->context_handle);
+               if (err)
+                       return err;
+       } else {
+               res->context_handle = NULL;
+       }
+
+       /* res->output_token */
+       if (gssx_check_pointer(xdr)) {
+               err = gssx_dec_buffer(xdr, res->output_token);
+               if (err)
+                       return err;
+       } else {
+               res->output_token = NULL;
+       }
+
+       /* res->delegated_cred_handle */
+       if (gssx_check_pointer(xdr)) {
+               /* we do not support upcall servers sending this data. */
+               return -EINVAL;
+       }
+
+       /* res->options */
+       err = gssx_dec_option_array(xdr, &res->options);
+
+       return err;
+}
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.h b/net/sunrpc/auth_gss/gss_rpc_xdr.h
new file mode 100644 (file)
index 0000000..1c98b27
--- /dev/null
@@ -0,0 +1,264 @@
+/*
+ * GSS Proxy upcall module
+ *
+ *  Copyright (C) 2012 Simo Sorce <simo@redhat.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef _LINUX_GSS_RPC_XDR_H
+#define _LINUX_GSS_RPC_XDR_H
+
+#include <linux/sunrpc/xdr.h>
+#include <linux/sunrpc/clnt.h>
+#include <linux/sunrpc/xprtsock.h>
+
+#ifdef RPC_DEBUG
+# define RPCDBG_FACILITY       RPCDBG_AUTH
+#endif
+
+#define LUCID_OPTION "exported_context_type"
+#define LUCID_VALUE  "linux_lucid_v1"
+#define CREDS_OPTION "exported_creds_type"
+#define CREDS_VALUE  "linux_creds_v1"
+
+typedef struct xdr_netobj gssx_buffer;
+typedef struct xdr_netobj utf8string;
+typedef struct xdr_netobj gssx_OID;
+
+enum gssx_cred_usage {
+       GSSX_C_INITIATE = 1,
+       GSSX_C_ACCEPT = 2,
+       GSSX_C_BOTH = 3,
+};
+
+struct gssx_option {
+       gssx_buffer option;
+       gssx_buffer value;
+};
+
+struct gssx_option_array {
+       u32 count;
+       struct gssx_option *data;
+};
+
+struct gssx_status {
+       u64 major_status;
+       gssx_OID mech;
+       u64 minor_status;
+       utf8string major_status_string;
+       utf8string minor_status_string;
+       gssx_buffer server_ctx;
+       struct gssx_option_array options;
+};
+
+struct gssx_call_ctx {
+       utf8string locale;
+       gssx_buffer server_ctx;
+       struct gssx_option_array options;
+};
+
+struct gssx_name_attr {
+       gssx_buffer attr;
+       gssx_buffer value;
+       struct gssx_option_array extensions;
+};
+
+struct gssx_name_attr_array {
+       u32 count;
+       struct gssx_name_attr *data;
+};
+
+struct gssx_name {
+       gssx_buffer display_name;
+};
+typedef struct gssx_name gssx_name;
+
+struct gssx_cred_element {
+       gssx_name MN;
+       gssx_OID mech;
+       u32 cred_usage;
+       u64 initiator_time_rec;
+       u64 acceptor_time_rec;
+       struct gssx_option_array options;
+};
+
+struct gssx_cred_element_array {
+       u32 count;
+       struct gssx_cred_element *data;
+};
+
+struct gssx_cred {
+       gssx_name desired_name;
+       struct gssx_cred_element_array elements;
+       gssx_buffer cred_handle_reference;
+       u32 needs_release;
+};
+
+struct gssx_ctx {
+       gssx_buffer exported_context_token;
+       gssx_buffer state;
+       u32 need_release;
+       gssx_OID mech;
+       gssx_name src_name;
+       gssx_name targ_name;
+       u64 lifetime;
+       u64 ctx_flags;
+       u32 locally_initiated;
+       u32 open;
+       struct gssx_option_array options;
+};
+
+struct gssx_cb {
+       u64 initiator_addrtype;
+       gssx_buffer initiator_address;
+       u64 acceptor_addrtype;
+       gssx_buffer acceptor_address;
+       gssx_buffer application_data;
+};
+
+
+/* This structure is not defined in the protocol.
+ * It is used in the kernel to carry around a big buffer
+ * as a set of pages */
+struct gssp_in_token {
+       struct page **pages;    /* Array of contiguous pages */
+       unsigned int page_base; /* Start of page data */
+       unsigned int page_len;  /* Length of page data */
+};
+
+struct gssx_arg_accept_sec_context {
+       struct gssx_call_ctx call_ctx;
+       struct gssx_ctx *context_handle;
+       struct gssx_cred *cred_handle;
+       struct gssp_in_token input_token;
+       struct gssx_cb *input_cb;
+       u32 ret_deleg_cred;
+       struct gssx_option_array options;
+};
+
+struct gssx_res_accept_sec_context {
+       struct gssx_status status;
+       struct gssx_ctx *context_handle;
+       gssx_buffer *output_token;
+       /* struct gssx_cred *delegated_cred_handle; not used in kernel */
+       struct gssx_option_array options;
+};
+
+
+
+#define gssx_enc_indicate_mechs NULL
+#define gssx_dec_indicate_mechs NULL
+#define gssx_enc_get_call_context NULL
+#define gssx_dec_get_call_context NULL
+#define gssx_enc_import_and_canon_name NULL
+#define gssx_dec_import_and_canon_name NULL
+#define gssx_enc_export_cred NULL
+#define gssx_dec_export_cred NULL
+#define gssx_enc_import_cred NULL
+#define gssx_dec_import_cred NULL
+#define gssx_enc_acquire_cred NULL
+#define gssx_dec_acquire_cred NULL
+#define gssx_enc_store_cred NULL
+#define gssx_dec_store_cred NULL
+#define gssx_enc_init_sec_context NULL
+#define gssx_dec_init_sec_context NULL
+void gssx_enc_accept_sec_context(struct rpc_rqst *req,
+                                struct xdr_stream *xdr,
+                                struct gssx_arg_accept_sec_context *args);
+int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp,
+                               struct xdr_stream *xdr,
+                               struct gssx_res_accept_sec_context *res);
+#define gssx_enc_release_handle NULL
+#define gssx_dec_release_handle NULL
+#define gssx_enc_get_mic NULL
+#define gssx_dec_get_mic NULL
+#define gssx_enc_verify NULL
+#define gssx_dec_verify NULL
+#define gssx_enc_wrap NULL
+#define gssx_dec_wrap NULL
+#define gssx_enc_unwrap NULL
+#define gssx_dec_unwrap NULL
+#define gssx_enc_wrap_size_limit NULL
+#define gssx_dec_wrap_size_limit NULL
+
+/* non implemented calls are set to 0 size */
+#define GSSX_ARG_indicate_mechs_sz 0
+#define GSSX_RES_indicate_mechs_sz 0
+#define GSSX_ARG_get_call_context_sz 0
+#define GSSX_RES_get_call_context_sz 0
+#define GSSX_ARG_import_and_canon_name_sz 0
+#define GSSX_RES_import_and_canon_name_sz 0
+#define GSSX_ARG_export_cred_sz 0
+#define GSSX_RES_export_cred_sz 0
+#define GSSX_ARG_import_cred_sz 0
+#define GSSX_RES_import_cred_sz 0
+#define GSSX_ARG_acquire_cred_sz 0
+#define GSSX_RES_acquire_cred_sz 0
+#define GSSX_ARG_store_cred_sz 0
+#define GSSX_RES_store_cred_sz 0
+#define GSSX_ARG_init_sec_context_sz 0
+#define GSSX_RES_init_sec_context_sz 0
+
+#define GSSX_default_in_call_ctx_sz (4 + 4 + 4 + \
+                       8 + sizeof(LUCID_OPTION) + sizeof(LUCID_VALUE) + \
+                       8 + sizeof(CREDS_OPTION) + sizeof(CREDS_VALUE))
+#define GSSX_default_in_ctx_hndl_sz (4 + 4+8 + 4 + 4 + 6*4 + 6*4 + 8 + 8 + \
+                                       4 + 4 + 4)
+#define GSSX_default_in_cred_sz 4 /* we send in no cred_handle */
+#define GSSX_default_in_token_sz 4 /* does *not* include token data */
+#define GSSX_default_in_cb_sz 4 /* we do not use channel bindings */
+#define GSSX_ARG_accept_sec_context_sz (GSSX_default_in_call_ctx_sz + \
+                                       GSSX_default_in_ctx_hndl_sz + \
+                                       GSSX_default_in_cred_sz + \
+                                       GSSX_default_in_token_sz + \
+                                       GSSX_default_in_cb_sz + \
+                                       4 /* no deleg creds boolean */ + \
+                                       4) /* empty options */
+
+/* somewhat arbitrary numbers but large enough (we ignore some of the data
+ * sent down, but it is part of the protocol so we need enough space to take
+ * it in) */
+#define GSSX_default_status_sz 8 + 24 + 8 + 256 + 256 + 16 + 4
+#define GSSX_max_output_handle_sz 128
+#define GSSX_max_oid_sz 16
+#define GSSX_max_princ_sz 256
+#define GSSX_default_ctx_sz (GSSX_max_output_handle_sz + \
+                            16 + 4 + GSSX_max_oid_sz + \
+                            2 * GSSX_max_princ_sz + \
+                            8 + 8 + 4 + 4 + 4)
+#define GSSX_max_output_token_sz 1024
+#define GSSX_max_creds_sz (4 + 4 + 4 + NGROUPS_MAX * 4)
+#define GSSX_RES_accept_sec_context_sz (GSSX_default_status_sz + \
+                                       GSSX_default_ctx_sz + \
+                                       GSSX_max_output_token_sz + \
+                                       4 + GSSX_max_creds_sz)
+
+#define GSSX_ARG_release_handle_sz 0
+#define GSSX_RES_release_handle_sz 0
+#define GSSX_ARG_get_mic_sz 0
+#define GSSX_RES_get_mic_sz 0
+#define GSSX_ARG_verify_sz 0
+#define GSSX_RES_verify_sz 0
+#define GSSX_ARG_wrap_sz 0
+#define GSSX_RES_wrap_sz 0
+#define GSSX_ARG_unwrap_sz 0
+#define GSSX_RES_unwrap_sz 0
+#define GSSX_ARG_wrap_size_limit_sz 0
+#define GSSX_RES_wrap_size_limit_sz 0
+
+
+
+#endif /* _LINUX_GSS_RPC_XDR_H */
index c3ba570..871c73c 100644 (file)
@@ -48,8 +48,8 @@
 #include <linux/sunrpc/svcauth.h>
 #include <linux/sunrpc/svcauth_gss.h>
 #include <linux/sunrpc/cache.h>
+#include "gss_rpc_upcall.h"
 
-#include "../netns.h"
 
 #ifdef RPC_DEBUG
 # define RPCDBG_FACILITY       RPCDBG_AUTH
@@ -497,7 +497,8 @@ static int rsc_parse(struct cache_detail *cd,
                len = qword_get(&mesg, buf, mlen);
                if (len < 0)
                        goto out;
-               status = gss_import_sec_context(buf, len, gm, &rsci.mechctx, GFP_KERNEL);
+               status = gss_import_sec_context(buf, len, gm, &rsci.mechctx,
+                                               NULL, GFP_KERNEL);
                if (status)
                        goto out;
 
@@ -505,8 +506,10 @@ static int rsc_parse(struct cache_detail *cd,
                len = qword_get(&mesg, buf, mlen);
                if (len > 0) {
                        rsci.cred.cr_principal = kstrdup(buf, GFP_KERNEL);
-                       if (!rsci.cred.cr_principal)
+                       if (!rsci.cred.cr_principal) {
+                               status = -ENOMEM;
                                goto out;
+                       }
                }
 
        }
@@ -987,13 +990,10 @@ gss_write_init_verf(struct cache_detail *cd, struct svc_rqst *rqstp,
 }
 
 static inline int
-gss_read_verf(struct rpc_gss_wire_cred *gc,
-             struct kvec *argv, __be32 *authp,
-             struct xdr_netobj *in_handle,
-             struct xdr_netobj *in_token)
+gss_read_common_verf(struct rpc_gss_wire_cred *gc,
+                    struct kvec *argv, __be32 *authp,
+                    struct xdr_netobj *in_handle)
 {
-       struct xdr_netobj tmpobj;
-
        /* Read the verifier; should be NULL: */
        *authp = rpc_autherr_badverf;
        if (argv->iov_len < 2 * 4)
@@ -1009,6 +1009,23 @@ gss_read_verf(struct rpc_gss_wire_cred *gc,
        if (dup_netobj(in_handle, &gc->gc_ctx))
                return SVC_CLOSE;
        *authp = rpc_autherr_badverf;
+
+       return 0;
+}
+
+static inline int
+gss_read_verf(struct rpc_gss_wire_cred *gc,
+             struct kvec *argv, __be32 *authp,
+             struct xdr_netobj *in_handle,
+             struct xdr_netobj *in_token)
+{
+       struct xdr_netobj tmpobj;
+       int res;
+
+       res = gss_read_common_verf(gc, argv, authp, in_handle);
+       if (res)
+               return res;
+
        if (svc_safe_getnetobj(argv, &tmpobj)) {
                kfree(in_handle->data);
                return SVC_DENIED;
@@ -1021,6 +1038,40 @@ gss_read_verf(struct rpc_gss_wire_cred *gc,
        return 0;
 }
 
+/* Ok this is really heavily depending on a set of semantics in
+ * how rqstp is set up by svc_recv and pages laid down by the
+ * server when reading a request. We are basically guaranteed that
+ * the token lays all down linearly across a set of pages, starting
+ * at iov_base in rq_arg.head[0] which happens to be the first of a
+ * set of pages stored in rq_pages[].
+ * rq_arg.head[0].iov_base will provide us the page_base to pass
+ * to the upcall.
+ */
+static inline int
+gss_read_proxy_verf(struct svc_rqst *rqstp,
+                   struct rpc_gss_wire_cred *gc, __be32 *authp,
+                   struct xdr_netobj *in_handle,
+                   struct gssp_in_token *in_token)
+{
+       struct kvec *argv = &rqstp->rq_arg.head[0];
+       u32 inlen;
+       int res;
+
+       res = gss_read_common_verf(gc, argv, authp, in_handle);
+       if (res)
+               return res;
+
+       inlen = svc_getnl(argv);
+       if (inlen > (argv->iov_len + rqstp->rq_arg.page_len))
+               return SVC_DENIED;
+
+       in_token->pages = rqstp->rq_pages;
+       in_token->page_base = (ulong)argv->iov_base & ~PAGE_MASK;
+       in_token->page_len = inlen;
+
+       return 0;
+}
+
 static inline int
 gss_write_resv(struct kvec *resv, size_t size_limit,
               struct xdr_netobj *out_handle, struct xdr_netobj *out_token,
@@ -1048,7 +1099,7 @@ gss_write_resv(struct kvec *resv, size_t size_limit,
  * the upcall results are available, write the verifier and result.
  * Otherwise, drop the request pending an answer to the upcall.
  */
-static int svcauth_gss_handle_init(struct svc_rqst *rqstp,
+static int svcauth_gss_legacy_init(struct svc_rqst *rqstp,
                        struct rpc_gss_wire_cred *gc, __be32 *authp)
 {
        struct kvec *argv = &rqstp->rq_arg.head[0];
@@ -1088,6 +1139,287 @@ out:
        return ret;
 }
 
+static int gss_proxy_save_rsc(struct cache_detail *cd,
+                               struct gssp_upcall_data *ud,
+                               uint64_t *handle)
+{
+       struct rsc rsci, *rscp = NULL;
+       static atomic64_t ctxhctr;
+       long long ctxh;
+       struct gss_api_mech *gm = NULL;
+       time_t expiry;
+       int status = -EINVAL;
+
+       memset(&rsci, 0, sizeof(rsci));
+       /* context handle */
+       status = -ENOMEM;
+       /* the handle needs to be just a unique id,
+        * use a static counter */
+       ctxh = atomic64_inc_return(&ctxhctr);
+
+       /* make a copy for the caller */
+       *handle = ctxh;
+
+       /* make a copy for the rsc cache */
+       if (dup_to_netobj(&rsci.handle, (char *)handle, sizeof(uint64_t)))
+               goto out;
+       rscp = rsc_lookup(cd, &rsci);
+       if (!rscp)
+               goto out;
+
+       /* creds */
+       if (!ud->found_creds) {
+               /* userspace seem buggy, we should always get at least a
+                * mapping to nobody */
+               dprintk("RPC:       No creds found, marking Negative!\n");
+               set_bit(CACHE_NEGATIVE, &rsci.h.flags);
+       } else {
+
+               /* steal creds */
+               rsci.cred = ud->creds;
+               memset(&ud->creds, 0, sizeof(struct svc_cred));
+
+               status = -EOPNOTSUPP;
+               /* get mech handle from OID */
+               gm = gss_mech_get_by_OID(&ud->mech_oid);
+               if (!gm)
+                       goto out;
+
+               status = -EINVAL;
+               /* mech-specific data: */
+               status = gss_import_sec_context(ud->out_handle.data,
+                                               ud->out_handle.len,
+                                               gm, &rsci.mechctx,
+                                               &expiry, GFP_KERNEL);
+               if (status)
+                       goto out;
+       }
+
+       rsci.h.expiry_time = expiry;
+       rscp = rsc_update(cd, &rsci, rscp);
+       status = 0;
+out:
+       gss_mech_put(gm);
+       rsc_free(&rsci);
+       if (rscp)
+               cache_put(&rscp->h, cd);
+       else
+               status = -ENOMEM;
+       return status;
+}
+
+static int svcauth_gss_proxy_init(struct svc_rqst *rqstp,
+                       struct rpc_gss_wire_cred *gc, __be32 *authp)
+{
+       struct kvec *resv = &rqstp->rq_res.head[0];
+       struct xdr_netobj cli_handle;
+       struct gssp_upcall_data ud;
+       uint64_t handle;
+       int status;
+       int ret;
+       struct net *net = rqstp->rq_xprt->xpt_net;
+       struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+
+       memset(&ud, 0, sizeof(ud));
+       ret = gss_read_proxy_verf(rqstp, gc, authp,
+                                 &ud.in_handle, &ud.in_token);
+       if (ret)
+               return ret;
+
+       ret = SVC_CLOSE;
+
+       /* Perform synchronous upcall to gss-proxy */
+       status = gssp_accept_sec_context_upcall(net, &ud);
+       if (status)
+               goto out;
+
+       dprintk("RPC:       svcauth_gss: gss major status = %d\n",
+                       ud.major_status);
+
+       switch (ud.major_status) {
+       case GSS_S_CONTINUE_NEEDED:
+               cli_handle = ud.out_handle;
+               break;
+       case GSS_S_COMPLETE:
+               status = gss_proxy_save_rsc(sn->rsc_cache, &ud, &handle);
+               if (status)
+                       goto out;
+               cli_handle.data = (u8 *)&handle;
+               cli_handle.len = sizeof(handle);
+               break;
+       default:
+               ret = SVC_CLOSE;
+               goto out;
+       }
+
+       /* Got an answer to the upcall; use it: */
+       if (gss_write_init_verf(sn->rsc_cache, rqstp,
+                               &cli_handle, &ud.major_status))
+               goto out;
+       if (gss_write_resv(resv, PAGE_SIZE,
+                          &cli_handle, &ud.out_token,
+                          ud.major_status, ud.minor_status))
+               goto out;
+
+       ret = SVC_COMPLETE;
+out:
+       gssp_free_upcall_data(&ud);
+       return ret;
+}
+
+DEFINE_SPINLOCK(use_gssp_lock);
+
+static bool use_gss_proxy(struct net *net)
+{
+       struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+
+       if (sn->use_gss_proxy != -1)
+               return sn->use_gss_proxy;
+       spin_lock(&use_gssp_lock);
+       /*
+        * If you wanted gss-proxy, you should have said so before
+        * starting to accept requests:
+        */
+       sn->use_gss_proxy = 0;
+       spin_unlock(&use_gssp_lock);
+       return 0;
+}
+
+#ifdef CONFIG_PROC_FS
+
+static bool set_gss_proxy(struct net *net, int type)
+{
+       struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+       int ret = 0;
+
+       WARN_ON_ONCE(type != 0 && type != 1);
+       spin_lock(&use_gssp_lock);
+       if (sn->use_gss_proxy == -1 || sn->use_gss_proxy == type)
+               sn->use_gss_proxy = type;
+       else
+               ret = -EBUSY;
+       spin_unlock(&use_gssp_lock);
+       wake_up(&sn->gssp_wq);
+       return ret;
+}
+
+static inline bool gssp_ready(struct sunrpc_net *sn)
+{
+       switch (sn->use_gss_proxy) {
+               case -1:
+                       return false;
+               case 0:
+                       return true;
+               case 1:
+                       return sn->gssp_clnt;
+       }
+       WARN_ON_ONCE(1);
+       return false;
+}
+
+static int wait_for_gss_proxy(struct net *net)
+{
+       struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+
+       return wait_event_interruptible(sn->gssp_wq, gssp_ready(sn));
+}
+
+
+static ssize_t write_gssp(struct file *file, const char __user *buf,
+                        size_t count, loff_t *ppos)
+{
+       struct net *net = PDE_DATA(file->f_path.dentry->d_inode);
+       char tbuf[20];
+       unsigned long i;
+       int res;
+
+       if (*ppos || count > sizeof(tbuf)-1)
+               return -EINVAL;
+       if (copy_from_user(tbuf, buf, count))
+               return -EFAULT;
+
+       tbuf[count] = 0;
+       res = kstrtoul(tbuf, 0, &i);
+       if (res)
+               return res;
+       if (i != 1)
+               return -EINVAL;
+       res = set_gss_proxy(net, 1);
+       if (res)
+               return res;
+       res = set_gssp_clnt(net);
+       if (res)
+               return res;
+       return count;
+}
+
+static ssize_t read_gssp(struct file *file, char __user *buf,
+                        size_t count, loff_t *ppos)
+{
+       struct net *net = PDE_DATA(file->f_path.dentry->d_inode);
+       unsigned long p = *ppos;
+       char tbuf[10];
+       size_t len;
+       int ret;
+
+       ret = wait_for_gss_proxy(net);
+       if (ret)
+               return ret;
+
+       snprintf(tbuf, sizeof(tbuf), "%d\n", use_gss_proxy(net));
+       len = strlen(tbuf);
+       if (p >= len)
+               return 0;
+       len -= p;
+       if (len > count)
+               len = count;
+       if (copy_to_user(buf, (void *)(tbuf+p), len))
+               return -EFAULT;
+       *ppos += len;
+       return len;
+}
+
+static const struct file_operations use_gss_proxy_ops = {
+       .open = nonseekable_open,
+       .write = write_gssp,
+       .read = read_gssp,
+};
+
+static int create_use_gss_proxy_proc_entry(struct net *net)
+{
+       struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+       struct proc_dir_entry **p = &sn->use_gssp_proc;
+
+       sn->use_gss_proxy = -1;
+       *p = proc_create_data("use-gss-proxy", S_IFREG|S_IRUSR|S_IWUSR,
+                             sn->proc_net_rpc,
+                             &use_gss_proxy_ops, net);
+       if (!*p)
+               return -ENOMEM;
+       init_gssp_clnt(sn);
+       return 0;
+}
+
+static void destroy_use_gss_proxy_proc_entry(struct net *net)
+{
+       struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+
+       if (sn->use_gssp_proc) {
+               remove_proc_entry("use-gss-proxy", sn->proc_net_rpc); 
+               clear_gssp_clnt(sn);
+       }
+}
+#else /* CONFIG_PROC_FS */
+
+static int create_use_gss_proxy_proc_entry(struct net *net)
+{
+       return 0;
+}
+
+static void destroy_use_gss_proxy_proc_entry(struct net *net) {}
+
+#endif /* CONFIG_PROC_FS */
+
 /*
  * Accept an rpcsec packet.
  * If context establishment, punt to user space
@@ -1154,7 +1486,10 @@ svcauth_gss_accept(struct svc_rqst *rqstp, __be32 *authp)
        switch (gc->gc_proc) {
        case RPC_GSS_PROC_INIT:
        case RPC_GSS_PROC_CONTINUE_INIT:
-               return svcauth_gss_handle_init(rqstp, gc, authp);
+               if (use_gss_proxy(SVC_NET(rqstp)))
+                       return svcauth_gss_proxy_init(rqstp, gc, authp);
+               else
+                       return svcauth_gss_legacy_init(rqstp, gc, authp);
        case RPC_GSS_PROC_DATA:
        case RPC_GSS_PROC_DESTROY:
                /* Look up the context, and check the verifier: */
@@ -1531,7 +1866,12 @@ gss_svc_init_net(struct net *net)
        rv = rsi_cache_create_net(net);
        if (rv)
                goto out1;
+       rv = create_use_gss_proxy_proc_entry(net);
+       if (rv)
+               goto out2;
        return 0;
+out2:
+       destroy_use_gss_proxy_proc_entry(net);
 out1:
        rsc_cache_destroy_net(net);
        return rv;
@@ -1540,6 +1880,7 @@ out1:
 void
 gss_svc_shutdown_net(struct net *net)
 {
+       destroy_use_gss_proxy_proc_entry(net);
        rsi_cache_destroy_net(net);
        rsc_cache_destroy_net(net);
 }
index f1889be..80fe5c8 100644 (file)
@@ -986,8 +986,10 @@ static int cache_open(struct inode *inode, struct file *filp,
        nonseekable_open(inode, filp);
        if (filp->f_mode & FMODE_READ) {
                rp = kmalloc(sizeof(*rp), GFP_KERNEL);
-               if (!rp)
+               if (!rp) {
+                       module_put(cd->owner);
                        return -ENOMEM;
+               }
                rp->offset = 0;
                rp->q.reader = 1;
                atomic_inc(&cd->readers);
index d259fa9..3f7930f 100644 (file)
@@ -413,6 +413,8 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
 
        if (args->flags & RPC_CLNT_CREATE_INFINITE_SLOTS)
                xprtargs.flags |= XPRT_CREATE_INFINITE_SLOTS;
+       if (args->flags & RPC_CLNT_CREATE_NO_IDLE_TIMEOUT)
+               xprtargs.flags |= XPRT_CREATE_NO_IDLE_TIMEOUT;
        /*
         * If the caller chooses not to specify a hostname, whip
         * up a string representation of the passed-in address.
@@ -681,6 +683,7 @@ rpc_release_client(struct rpc_clnt *clnt)
        if (atomic_dec_and_test(&clnt->cl_count))
                rpc_free_auth(clnt);
 }
+EXPORT_SYMBOL_GPL(rpc_release_client);
 
 /**
  * rpc_bind_new_program - bind a new RPC program to an existing client
index ce7bd44..7111a4c 100644 (file)
@@ -23,6 +23,12 @@ struct sunrpc_net {
        struct rpc_clnt *rpcb_local_clnt4;
        spinlock_t rpcb_clnt_lock;
        unsigned int rpcb_users;
+
+       struct mutex gssp_lock;
+       wait_queue_head_t gssp_wq;
+       struct rpc_clnt *gssp_clnt;
+       int use_gss_proxy;
+       struct proc_dir_entry *use_gssp_proc;
 };
 
 extern int sunrpc_net_id;
index 745fca3..095363e 100644 (file)
@@ -1300,6 +1300,8 @@ found:
                                -PTR_ERR(xprt));
                goto out;
        }
+       if (args->flags & XPRT_CREATE_NO_IDLE_TIMEOUT)
+               xprt->idle_timeout = 0;
        INIT_WORK(&xprt->task_cleanup, xprt_autoclose);
        if (xprt_has_timer(xprt))
                setup_timer(&xprt->timer, xprt_init_autodisconnect,
index 9c28258..ffd5034 100644 (file)
@@ -2655,6 +2655,9 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
                }
                xprt_set_bound(xprt);
                xs_format_peer_addresses(xprt, "local", RPCBIND_NETID_LOCAL);
+               ret = ERR_PTR(xs_local_setup_socket(transport));
+               if (ret)
+                       goto out_err;
                break;
        default:
                ret = ERR_PTR(-EAFNOSUPPORT);