Merge branch 'for-3.3' of git://linux-nfs.org/~bfields/linux
Linus Torvalds [Sat, 14 Jan 2012 20:26:41 +0000 (12:26 -0800)]
* 'for-3.3' of git://linux-nfs.org/~bfields/linux: (31 commits)
  nfsd4: nfsd4_create_clid_dir return value is unused
  NFSD: Change name of extended attribute containing junction
  svcrpc: don't revert to SVC_POOL_DEFAULT on nfsd shutdown
  svcrpc: fix double-free on shutdown of nfsd after changing pool mode
  nfsd4: be forgiving in the absence of the recovery directory
  nfsd4: fix spurious 4.1 post-reboot failures
  NFSD: forget_delegations should use list_for_each_entry_safe
  NFSD: Only reinitilize the recall_lru list under the recall lock
  nfsd4: initialize special stateid's at compile time
  NFSd: use network-namespace-aware cache registering routines
  SUNRPC: create svc_xprt in proper network namespace
  svcrpc: update outdated BKL comment
  nfsd41: allow non-reclaim open-by-fh's in 4.1
  svcrpc: avoid memory-corruption on pool shutdown
  svcrpc: destroy server sockets all at once
  svcrpc: make svc_delete_xprt static
  nfsd: Fix oops when parsing a 0 length export
  nfsd4: Use kmemdup rather than duplicating its implementation
  nfsd4: add a separate (lockowner, inode) lookup
  nfsd4: fix CONFIG_NFSD_FAULT_INJECTION compile error
  ...

26 files changed:
CREDITS
Documentation/filesystems/nfs/00-INDEX
Documentation/filesystems/nfs/fault_injection.txt [new file with mode: 0644]
MAINTAINERS
fs/nfsd/Kconfig
fs/nfsd/Makefile
fs/nfsd/export.c
fs/nfsd/fault_inject.c [new file with mode: 0644]
fs/nfsd/fault_inject.h [new file with mode: 0644]
fs/nfsd/nfs4idmap.c
fs/nfsd/nfs4proc.c
fs/nfsd/nfs4recover.c
fs/nfsd/nfs4state.c
fs/nfsd/nfs4xdr.c
fs/nfsd/nfsctl.c
fs/nfsd/nfsd.h
fs/nfsd/state.h
fs/nfsd/vfs.c
include/linux/sunrpc/svc_xprt.h
include/linux/sunrpc/svcsock.h
net/sunrpc/cache.c
net/sunrpc/svc.c
net/sunrpc/svc_xprt.c
net/sunrpc/svcsock.c
net/sunrpc/xprtrdma/svc_rdma_transport.c
tools/nfsd/inject_fault.sh [new file with mode: 0755]

diff --git a/CREDITS b/CREDITS
index 44fce98..370b4c7 100644 (file)
--- a/CREDITS
+++ b/CREDITS
@@ -514,6 +514,11 @@ S: Bessemerstraat 21
 S: Amsterdam
 S: The Netherlands
 
+N: NeilBrown
+E: neil@brown.name
+P: 4096R/566281B9 1BC6 29EB D390 D870 7B5F  497A 39EC 9EDD 5662 81B9
+D: NFSD Maintainer 2000-2007
+
 N: Zach Brown
 E: zab@zabbo.net
 D: maestro pci sound
index a57e124..1716874 100644 (file)
@@ -2,6 +2,8 @@
        - this file (nfs-related documentation).
 Exporting
        - explanation of how to make filesystems exportable.
+fault_injection.txt
+       - information for using fault injection on the server
 knfsd-stats.txt
        - statistics which the NFS server makes available to user space.
 nfs.txt
diff --git a/Documentation/filesystems/nfs/fault_injection.txt b/Documentation/filesystems/nfs/fault_injection.txt
new file mode 100644 (file)
index 0000000..426d166
--- /dev/null
@@ -0,0 +1,69 @@
+
+Fault Injection
+===============
+Fault injection is a method for forcing errors that may not normally occur, or
+may be difficult to reproduce.  Forcing these errors in a controlled environment
+can help the developer find and fix bugs before their code is shipped in a
+production system.  Injecting an error on the Linux NFS server will allow us to
+observe how the client reacts and if it manages to recover its state correctly.
+
+NFSD_FAULT_INJECTION must be selected when configuring the kernel to use this
+feature.
+
+
+Using Fault Injection
+=====================
+On the client, mount the fault injection server through NFS v4.0+ and do some
+work over NFS (open files, take locks, ...).
+
+On the server, mount the debugfs filesystem to <debug_dir> and ls
+<debug_dir>/nfsd.  This will show a list of files that will be used for
+injecting faults on the NFS server.  As root, write a number n to the file
+corresponding to the action you want the server to take.  The server will then
+process the first n items it finds.  So if you want to forget 5 locks, echo '5'
+to <debug_dir>/nfsd/forget_locks.  A value of 0 will tell the server to forget
+all corresponding items.  A log message will be created containing the number
+of items forgotten (check dmesg).
+
+Go back to work on the client and check if the client recovered from the error
+correctly.
+
+
+Available Faults
+================
+forget_clients:
+     The NFS server keeps a list of clients that have placed a mount call.  If
+     this list is cleared, the server will have no knowledge of who the client
+     is, forcing the client to reauthenticate with the server.
+
+forget_openowners:
+     The NFS server keeps a list of what files are currently opened and who
+     they were opened by.  Clearing this list will force the client to reopen
+     its files.
+
+forget_locks:
+     The NFS server keeps a list of what files are currently locked in the VFS.
+     Clearing this list will force the client to reclaim its locks (files are
+     unlocked through the VFS as they are cleared from this list).
+
+forget_delegations:
+     A delegation is used to assure the client that a file, or part of a file,
+     has not changed since the delegation was awarded.  Clearing this list will
+     force the client to reaquire its delegation before accessing the file
+     again.
+
+recall_delegations:
+     Delegations can be recalled by the server when another client attempts to
+     access a file.  This test will notify the client that its delegation has
+     been revoked, forcing the client to reaquire the delegation before using
+     the file again.
+
+
+tools/nfs/inject_faults.sh script
+=================================
+This script has been created to ease the fault injection process.  This script
+will detect the mounted debugfs directory and write to the files located there
+based on the arguments passed by the user.  For example, running
+`inject_faults.sh forget_locks 1` as root will instruct the server to forget
+one lock.  Running `inject_faults forget_locks` will instruct the server to
+forgetall locks.
index 7559c1c..4d1ba20 100644 (file)
@@ -3775,7 +3775,6 @@ S:        Odd Fixes
 
 KERNEL NFSD, SUNRPC, AND LOCKD SERVERS
 M:     "J. Bruce Fields" <bfields@fieldses.org>
-M:     Neil Brown <neilb@suse.de>
 L:     linux-nfs@vger.kernel.org
 W:     http://nfs.sourceforge.net/
 S:     Supported
index 10e6366..8df1ea4 100644 (file)
@@ -80,3 +80,13 @@ config NFSD_V4
          available from http://linux-nfs.org/.
 
          If unsure, say N.
+
+config NFSD_FAULT_INJECTION
+       bool "NFS server manual fault injection"
+       depends on NFSD_V4 && DEBUG_KERNEL
+       help
+         This option enables support for manually injecting faults
+         into the NFS server.  This is intended to be used for
+         testing error recovery on the NFS client.
+
+         If unsure, say N.
index 9b118ee..af32ef0 100644 (file)
@@ -6,6 +6,7 @@ obj-$(CONFIG_NFSD)      += nfsd.o
 
 nfsd-y                         := nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \
                           export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o
+nfsd-$(CONFIG_NFSD_FAULT_INJECTION) += fault_inject.o
 nfsd-$(CONFIG_NFSD_V2_ACL) += nfs2acl.o
 nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o
 nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
index 62f3b90..cf8a6bd 100644 (file)
@@ -87,7 +87,7 @@ static int expkey_parse(struct cache_detail *cd, char *mesg, int mlen)
        struct svc_expkey key;
        struct svc_expkey *ek = NULL;
 
-       if (mesg[mlen-1] != '\n')
+       if (mlen < 1 || mesg[mlen-1] != '\n')
                return -EINVAL;
        mesg[mlen-1] = 0;
 
@@ -1226,12 +1226,12 @@ nfsd_export_init(void)
        int rv;
        dprintk("nfsd: initializing export module.\n");
 
-       rv = cache_register(&svc_export_cache);
+       rv = cache_register_net(&svc_export_cache, &init_net);
        if (rv)
                return rv;
-       rv = cache_register(&svc_expkey_cache);
+       rv = cache_register_net(&svc_expkey_cache, &init_net);
        if (rv)
-               cache_unregister(&svc_export_cache);
+               cache_unregister_net(&svc_export_cache, &init_net);
        return rv;
 
 }
@@ -1255,8 +1255,8 @@ nfsd_export_shutdown(void)
 
        dprintk("nfsd: shutting down export module.\n");
 
-       cache_unregister(&svc_expkey_cache);
-       cache_unregister(&svc_export_cache);
+       cache_unregister_net(&svc_expkey_cache, &init_net);
+       cache_unregister_net(&svc_export_cache, &init_net);
        svcauth_unix_purge();
 
        dprintk("nfsd: export shutdown complete.\n");
diff --git a/fs/nfsd/fault_inject.c b/fs/nfsd/fault_inject.c
new file mode 100644 (file)
index 0000000..ce7f075
--- /dev/null
@@ -0,0 +1,91 @@
+/*
+ * Copyright (c) 2011 Bryan Schumaker <bjschuma@netapp.com>
+ *
+ * Uses debugfs to create fault injection points for client testing
+ */
+
+#include <linux/types.h>
+#include <linux/fs.h>
+#include <linux/debugfs.h>
+#include <linux/module.h>
+
+#include "state.h"
+#include "fault_inject.h"
+
+struct nfsd_fault_inject_op {
+       char *file;
+       void (*func)(u64);
+};
+
+static struct nfsd_fault_inject_op inject_ops[] = {
+       {
+               .file   = "forget_clients",
+               .func   = nfsd_forget_clients,
+       },
+       {
+               .file   = "forget_locks",
+               .func   = nfsd_forget_locks,
+       },
+       {
+               .file   = "forget_openowners",
+               .func   = nfsd_forget_openowners,
+       },
+       {
+               .file   = "forget_delegations",
+               .func   = nfsd_forget_delegations,
+       },
+       {
+               .file   = "recall_delegations",
+               .func   = nfsd_recall_delegations,
+       },
+};
+
+static long int NUM_INJECT_OPS = sizeof(inject_ops) / sizeof(struct nfsd_fault_inject_op);
+static struct dentry *debug_dir;
+
+static int nfsd_inject_set(void *op_ptr, u64 val)
+{
+       struct nfsd_fault_inject_op *op = op_ptr;
+
+       if (val == 0)
+               printk(KERN_INFO "NFSD Fault Injection: %s (all)", op->file);
+       else
+               printk(KERN_INFO "NFSD Fault Injection: %s (n = %llu)", op->file, val);
+
+       op->func(val);
+       return 0;
+}
+
+static int nfsd_inject_get(void *data, u64 *val)
+{
+       return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_nfsd, nfsd_inject_get, nfsd_inject_set, "%llu\n");
+
+void nfsd_fault_inject_cleanup(void)
+{
+       debugfs_remove_recursive(debug_dir);
+}
+
+int nfsd_fault_inject_init(void)
+{
+       unsigned int i;
+       struct nfsd_fault_inject_op *op;
+       mode_t mode = S_IFREG | S_IRUSR | S_IWUSR;
+
+       debug_dir = debugfs_create_dir("nfsd", NULL);
+       if (!debug_dir)
+               goto fail;
+
+       for (i = 0; i < NUM_INJECT_OPS; i++) {
+               op = &inject_ops[i];
+               if (!debugfs_create_file(op->file, mode, debug_dir, op, &fops_nfsd))
+                       goto fail;
+       }
+       return 0;
+
+fail:
+       nfsd_fault_inject_cleanup();
+       return -ENOMEM;
+}
diff --git a/fs/nfsd/fault_inject.h b/fs/nfsd/fault_inject.h
new file mode 100644 (file)
index 0000000..90bd057
--- /dev/null
@@ -0,0 +1,28 @@
+/*
+ * Copyright (c) 2011 Bryan Schumaker <bjschuma@netapp.com>
+ *
+ * Function definitions for fault injection
+ */
+
+#ifndef LINUX_NFSD_FAULT_INJECT_H
+#define LINUX_NFSD_FAULT_INJECT_H
+
+#ifdef CONFIG_NFSD_FAULT_INJECTION
+int nfsd_fault_inject_init(void);
+void nfsd_fault_inject_cleanup(void);
+void nfsd_forget_clients(u64);
+void nfsd_forget_locks(u64);
+void nfsd_forget_openowners(u64);
+void nfsd_forget_delegations(u64);
+void nfsd_recall_delegations(u64);
+#else /* CONFIG_NFSD_FAULT_INJECTION */
+static inline int nfsd_fault_inject_init(void) { return 0; }
+static inline void nfsd_fault_inject_cleanup(void) {}
+static inline void nfsd_forget_clients(u64 num) {}
+static inline void nfsd_forget_locks(u64 num) {}
+static inline void nfsd_forget_openowners(u64 num) {}
+static inline void nfsd_forget_delegations(u64 num) {}
+static inline void nfsd_recall_delegations(u64 num) {}
+#endif /* CONFIG_NFSD_FAULT_INJECTION */
+
+#endif /* LINUX_NFSD_FAULT_INJECT_H */
index 55780a2..9409627 100644 (file)
@@ -36,6 +36,7 @@
 #include <linux/seq_file.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
+#include <net/net_namespace.h>
 #include "idmap.h"
 #include "nfsd.h"
 
@@ -466,20 +467,20 @@ nfsd_idmap_init(void)
 {
        int rv;
 
-       rv = cache_register(&idtoname_cache);
+       rv = cache_register_net(&idtoname_cache, &init_net);
        if (rv)
                return rv;
-       rv = cache_register(&nametoid_cache);
+       rv = cache_register_net(&nametoid_cache, &init_net);
        if (rv)
-               cache_unregister(&idtoname_cache);
+               cache_unregister_net(&idtoname_cache, &init_net);
        return rv;
 }
 
 void
 nfsd_idmap_shutdown(void)
 {
-       cache_unregister(&idtoname_cache);
-       cache_unregister(&nametoid_cache);
+       cache_unregister_net(&idtoname_cache, &init_net);
+       cache_unregister_net(&nametoid_cache, &init_net);
 }
 
 static int
index c5e28ed..896da74 100644 (file)
@@ -266,10 +266,6 @@ do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_
 {
        __be32 status;
 
-       /* Only reclaims from previously confirmed clients are valid */
-       if ((status = nfs4_check_open_reclaim(&open->op_clientid)))
-               return status;
-
        /* We don't know the target directory, and therefore can not
        * set the change info
        */
@@ -373,6 +369,9 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                        break;
                case NFS4_OPEN_CLAIM_PREVIOUS:
                        open->op_openowner->oo_flags |= NFS4_OO_CONFIRMED;
+                       status = nfs4_check_open_reclaim(&open->op_clientid);
+                       if (status)
+                               goto out;
                case NFS4_OPEN_CLAIM_FH:
                case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
                        status = do_open_fhandle(rqstp, &cstate->current_fh,
index 80a0be9..0b3e875 100644 (file)
@@ -117,8 +117,7 @@ out_no_tfm:
        return status;
 }
 
-int
-nfsd4_create_clid_dir(struct nfs4_client *clp)
+void nfsd4_create_clid_dir(struct nfs4_client *clp)
 {
        const struct cred *original_cred;
        char *dname = clp->cl_recdir;
@@ -127,13 +126,14 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
 
        dprintk("NFSD: nfsd4_create_clid_dir for \"%s\"\n", dname);
 
-       if (!rec_file || clp->cl_firststate)
-               return 0;
-
+       if (clp->cl_firststate)
+               return;
        clp->cl_firststate = 1;
+       if (!rec_file)
+               return;
        status = nfs4_save_creds(&original_cred);
        if (status < 0)
-               return status;
+               return;
 
        dir = rec_file->f_path.dentry;
        /* lock the parent */
@@ -144,8 +144,15 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
                status = PTR_ERR(dentry);
                goto out_unlock;
        }
-       status = -EEXIST;
        if (dentry->d_inode)
+               /*
+                * In the 4.1 case, where we're called from
+                * reclaim_complete(), records from the previous reboot
+                * may still be left, so this is OK.
+                *
+                * In the 4.0 case, we should never get here; but we may
+                * as well be forgiving and just succeed silently.
+                */
                goto out_put;
        status = mnt_want_write_file(rec_file);
        if (status)
@@ -164,7 +171,6 @@ out_unlock:
                                " and is writeable", status,
                                user_recovery_dirname);
        nfs4_reset_creds(original_cred);
-       return status;
 }
 
 typedef int (recdir_func)(struct dentry *, struct dentry *);
index 9ca16dc..e8c98f0 100644 (file)
 time_t nfsd4_lease = 90;     /* default lease time */
 time_t nfsd4_grace = 90;
 static time_t boot_time;
-static stateid_t zerostateid;             /* bits all 0 */
-static stateid_t onestateid;              /* bits all 1 */
+
+#define all_ones {{~0,~0},~0}
+static const stateid_t one_stateid = {
+       .si_generation = ~0,
+       .si_opaque = all_ones,
+};
+static const stateid_t zero_stateid = {
+       /* all fields zero */
+};
+
 static u64 current_sessionid = 1;
 
-#define ZERO_STATEID(stateid) (!memcmp((stateid), &zerostateid, sizeof(stateid_t)))
-#define ONE_STATEID(stateid)  (!memcmp((stateid), &onestateid, sizeof(stateid_t)))
+#define ZERO_STATEID(stateid) (!memcmp((stateid), &zero_stateid, sizeof(stateid_t)))
+#define ONE_STATEID(stateid)  (!memcmp((stateid), &one_stateid, sizeof(stateid_t)))
 
 /* forward declarations */
 static int check_for_locks(struct nfs4_file *filp, struct nfs4_lockowner *lowner);
@@ -133,21 +141,21 @@ unsigned int max_delegations;
  * Open owner state (share locks)
  */
 
-/* hash tables for open owners */
-#define OPEN_OWNER_HASH_BITS              8
-#define OPEN_OWNER_HASH_SIZE             (1 << OPEN_OWNER_HASH_BITS)
-#define OPEN_OWNER_HASH_MASK             (OPEN_OWNER_HASH_SIZE - 1)
+/* hash tables for lock and open owners */
+#define OWNER_HASH_BITS              8
+#define OWNER_HASH_SIZE             (1 << OWNER_HASH_BITS)
+#define OWNER_HASH_MASK             (OWNER_HASH_SIZE - 1)
 
-static unsigned int open_ownerstr_hashval(u32 clientid, struct xdr_netobj *ownername)
+static unsigned int ownerstr_hashval(u32 clientid, struct xdr_netobj *ownername)
 {
        unsigned int ret;
 
        ret = opaque_hashval(ownername->data, ownername->len);
        ret += clientid;
-       return ret & OPEN_OWNER_HASH_MASK;
+       return ret & OWNER_HASH_MASK;
 }
 
-static struct list_head        open_ownerstr_hashtbl[OPEN_OWNER_HASH_SIZE];
+static struct list_head        ownerstr_hashtbl[OWNER_HASH_SIZE];
 
 /* hash table for nfs4_file */
 #define FILE_HASH_BITS                   8
@@ -514,6 +522,7 @@ static void unhash_lockowner(struct nfs4_lockowner *lo)
 
        list_del(&lo->lo_owner.so_strhash);
        list_del(&lo->lo_perstateid);
+       list_del(&lo->lo_owner_ino_hash);
        while (!list_empty(&lo->lo_owner.so_stateids)) {
                stp = list_first_entry(&lo->lo_owner.so_stateids,
                                struct nfs4_ol_stateid, st_perstateowner);
@@ -985,12 +994,11 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
        clp = kzalloc(sizeof(struct nfs4_client), GFP_KERNEL);
        if (clp == NULL)
                return NULL;
-       clp->cl_name.data = kmalloc(name.len, GFP_KERNEL);
+       clp->cl_name.data = kmemdup(name.data, name.len, GFP_KERNEL);
        if (clp->cl_name.data == NULL) {
                kfree(clp);
                return NULL;
        }
-       memcpy(clp->cl_name.data, name.data, name.len);
        clp->cl_name.len = name.len;
        return clp;
 }
@@ -1058,7 +1066,6 @@ expire_client(struct nfs4_client *clp)
        spin_unlock(&recall_lock);
        while (!list_empty(&reaplist)) {
                dp = list_entry(reaplist.next, struct nfs4_delegation, dl_recall_lru);
-               list_del_init(&dp->dl_recall_lru);
                unhash_delegation(dp);
        }
        while (!list_empty(&clp->cl_openowners)) {
@@ -2301,7 +2308,7 @@ nfsd4_free_slabs(void)
        nfsd4_free_slab(&deleg_slab);
 }
 
-static int
+int
 nfsd4_init_slabs(void)
 {
        openowner_slab = kmem_cache_create("nfsd4_openowners",
@@ -2373,7 +2380,7 @@ static inline void *alloc_stateowner(struct kmem_cache *slab, struct xdr_netobj
 
 static void hash_openowner(struct nfs4_openowner *oo, struct nfs4_client *clp, unsigned int strhashval)
 {
-       list_add(&oo->oo_owner.so_strhash, &open_ownerstr_hashtbl[strhashval]);
+       list_add(&oo->oo_owner.so_strhash, &ownerstr_hashtbl[strhashval]);
        list_add(&oo->oo_perclient, &clp->cl_openowners);
 }
 
@@ -2436,7 +2443,9 @@ find_openstateowner_str(unsigned int hashval, struct nfsd4_open *open)
        struct nfs4_stateowner *so;
        struct nfs4_openowner *oo;
 
-       list_for_each_entry(so, &open_ownerstr_hashtbl[hashval], so_strhash) {
+       list_for_each_entry(so, &ownerstr_hashtbl[hashval], so_strhash) {
+               if (!so->so_is_open_owner)
+                       continue;
                if (same_owner_str(so, &open->op_owner, &open->op_clientid)) {
                        oo = openowner(so);
                        renew_client(oo->oo_owner.so_client);
@@ -2580,7 +2589,7 @@ nfsd4_process_open1(struct nfsd4_compound_state *cstate,
        if (open->op_file == NULL)
                return nfserr_jukebox;
 
-       strhashval = open_ownerstr_hashval(clientid->cl_id, &open->op_owner);
+       strhashval = ownerstr_hashval(clientid->cl_id, &open->op_owner);
        oo = find_openstateowner_str(strhashval, open);
        open->op_openowner = oo;
        if (!oo) {
@@ -3123,7 +3132,6 @@ nfs4_laundromat(void)
        spin_unlock(&recall_lock);
        list_for_each_safe(pos, next, &reaplist) {
                dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
-               list_del_init(&dp->dl_recall_lru);
                unhash_delegation(dp);
        }
        test_val = nfsd4_lease;
@@ -3718,13 +3726,11 @@ out:
 }
 
 
-/* 
- * Lock owner state (byte-range locks)
- */
 #define LOFF_OVERFLOW(start, len)      ((u64)(len) > ~(u64)(start))
-#define LOCK_HASH_BITS              8
-#define LOCK_HASH_SIZE             (1 << LOCK_HASH_BITS)
-#define LOCK_HASH_MASK             (LOCK_HASH_SIZE - 1)
+
+#define LOCKOWNER_INO_HASH_BITS 8
+#define LOCKOWNER_INO_HASH_SIZE (1 << LOCKOWNER_INO_HASH_BITS)
+#define LOCKOWNER_INO_HASH_MASK (LOCKOWNER_INO_HASH_SIZE - 1)
 
 static inline u64
 end_offset(u64 start, u64 len)
@@ -3746,16 +3752,14 @@ last_byte_offset(u64 start, u64 len)
        return end > start ? end - 1: NFS4_MAX_UINT64;
 }
 
-static inline unsigned int
-lock_ownerstr_hashval(struct inode *inode, u32 cl_id,
-               struct xdr_netobj *ownername)
+static unsigned int lockowner_ino_hashval(struct inode *inode, u32 cl_id, struct xdr_netobj *ownername)
 {
        return (file_hashval(inode) + cl_id
                        + opaque_hashval(ownername->data, ownername->len))
-               & LOCK_HASH_MASK;
+               & LOCKOWNER_INO_HASH_MASK;
 }
 
-static struct list_head        lock_ownerstr_hashtbl[LOCK_HASH_SIZE];
+static struct list_head lockowner_ino_hashtbl[LOCKOWNER_INO_HASH_SIZE];
 
 /*
  * TODO: Linux file offsets are _signed_ 64-bit quantities, which means that
@@ -3809,23 +3813,39 @@ nevermind:
                deny->ld_type = NFS4_WRITE_LT;
 }
 
+static bool same_lockowner_ino(struct nfs4_lockowner *lo, struct inode *inode, clientid_t *clid, struct xdr_netobj *owner)
+{
+       struct nfs4_ol_stateid *lst;
+
+       if (!same_owner_str(&lo->lo_owner, owner, clid))
+               return false;
+       lst = list_first_entry(&lo->lo_owner.so_stateids,
+                              struct nfs4_ol_stateid, st_perstateowner);
+       return lst->st_file->fi_inode == inode;
+}
+
 static struct nfs4_lockowner *
 find_lockowner_str(struct inode *inode, clientid_t *clid,
                struct xdr_netobj *owner)
 {
-       unsigned int hashval = lock_ownerstr_hashval(inode, clid->cl_id, owner);
-       struct nfs4_stateowner *op;
+       unsigned int hashval = lockowner_ino_hashval(inode, clid->cl_id, owner);
+       struct nfs4_lockowner *lo;
 
-       list_for_each_entry(op, &lock_ownerstr_hashtbl[hashval], so_strhash) {
-               if (same_owner_str(op, owner, clid))
-                       return lockowner(op);
+       list_for_each_entry(lo, &lockowner_ino_hashtbl[hashval], lo_owner_ino_hash) {
+               if (same_lockowner_ino(lo, inode, clid, owner))
+                       return lo;
        }
        return NULL;
 }
 
 static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, struct nfs4_client *clp, struct nfs4_ol_stateid *open_stp)
 {
-       list_add(&lo->lo_owner.so_strhash, &lock_ownerstr_hashtbl[strhashval]);
+       struct inode *inode = open_stp->st_file->fi_inode;
+       unsigned int inohash = lockowner_ino_hashval(inode,
+                       clp->cl_clientid.cl_id, &lo->lo_owner.so_owner);
+
+       list_add(&lo->lo_owner.so_strhash, &ownerstr_hashtbl[strhashval]);
+       list_add(&lo->lo_owner_ino_hash, &lockowner_ino_hashtbl[inohash]);
        list_add(&lo->lo_perstateid, &open_stp->st_lockowners);
 }
 
@@ -3834,7 +3854,7 @@ static void hash_lockowner(struct nfs4_lockowner *lo, unsigned int strhashval, s
  * Called in nfsd4_lock - therefore, OPEN and OPEN_CONFIRM (if needed) has 
  * occurred. 
  *
- * strhashval = lock_ownerstr_hashval 
+ * strhashval = ownerstr_hashval
  */
 
 static struct nfs4_lockowner *
@@ -3892,6 +3912,37 @@ static void get_lock_access(struct nfs4_ol_stateid *lock_stp, u32 access)
        __set_bit(access, &lock_stp->st_access_bmap);
 }
 
+__be32 lookup_or_create_lock_state(struct nfsd4_compound_state *cstate, struct nfs4_ol_stateid *ost, struct nfsd4_lock *lock, struct nfs4_ol_stateid **lst, bool *new)
+{
+       struct nfs4_file *fi = ost->st_file;
+       struct nfs4_openowner *oo = openowner(ost->st_stateowner);
+       struct nfs4_client *cl = oo->oo_owner.so_client;
+       struct nfs4_lockowner *lo;
+       unsigned int strhashval;
+
+       lo = find_lockowner_str(fi->fi_inode, &cl->cl_clientid, &lock->v.new.owner);
+       if (lo) {
+               if (!cstate->minorversion)
+                       return nfserr_bad_seqid;
+               /* XXX: a lockowner always has exactly one stateid: */
+               *lst = list_first_entry(&lo->lo_owner.so_stateids,
+                               struct nfs4_ol_stateid, st_perstateowner);
+               return nfs_ok;
+       }
+       strhashval = ownerstr_hashval(cl->cl_clientid.cl_id,
+                       &lock->v.new.owner);
+       lo = alloc_init_lock_stateowner(strhashval, cl, ost, lock);
+       if (lo == NULL)
+               return nfserr_jukebox;
+       *lst = alloc_init_lock_stateid(lo, fi, ost);
+       if (*lst == NULL) {
+               release_lockowner(lo);
+               return nfserr_jukebox;
+       }
+       *new = true;
+       return nfs_ok;
+}
+
 /*
  *  LOCK operation 
  */
@@ -3907,7 +3958,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
        struct file_lock file_lock;
        struct file_lock conflock;
        __be32 status = 0;
-       unsigned int strhashval;
+       bool new_state = false;
        int lkflg;
        int err;
 
@@ -3933,10 +3984,15 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                 * lock stateid.
                 */
                struct nfs4_ol_stateid *open_stp = NULL;
-               
+
+               if (nfsd4_has_session(cstate))
+                       /* See rfc 5661 18.10.3: given clientid is ignored: */
+                       memcpy(&lock->v.new.clientid,
+                               &cstate->session->se_client->cl_clientid,
+                               sizeof(clientid_t));
+
                status = nfserr_stale_clientid;
-               if (!nfsd4_has_session(cstate) &&
-                   STALE_CLIENTID(&lock->lk_new_clientid))
+               if (STALE_CLIENTID(&lock->lk_new_clientid))
                        goto out;
 
                /* validate and update open stateid and open seqid */
@@ -3948,25 +4004,12 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                        goto out;
                open_sop = openowner(open_stp->st_stateowner);
                status = nfserr_bad_stateid;
-               if (!nfsd4_has_session(cstate) &&
-                       !same_clid(&open_sop->oo_owner.so_client->cl_clientid,
+               if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid,
                                                &lock->v.new.clientid))
                        goto out;
-               /* create lockowner and lock stateid */
-               fp = open_stp->st_file;
-               strhashval = lock_ownerstr_hashval(fp->fi_inode,
-                               open_sop->oo_owner.so_client->cl_clientid.cl_id,
-                               &lock->v.new.owner);
-               /* XXX: Do we need to check for duplicate stateowners on
-                * the same file, or should they just be allowed (and
-                * create new stateids)? */
-               status = nfserr_jukebox;
-               lock_sop = alloc_init_lock_stateowner(strhashval,
-                               open_sop->oo_owner.so_client, open_stp, lock);
-               if (lock_sop == NULL)
-                       goto out;
-               lock_stp = alloc_init_lock_stateid(lock_sop, fp, open_stp);
-               if (lock_stp == NULL)
+               status = lookup_or_create_lock_state(cstate, open_stp, lock,
+                                                       &lock_stp, &new_state);
+               if (status)
                        goto out;
        } else {
                /* lock (lock owner + lock stateid) already exists */
@@ -3976,10 +4019,9 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                                       NFS4_LOCK_STID, &lock_stp);
                if (status)
                        goto out;
-               lock_sop = lockowner(lock_stp->st_stateowner);
-               fp = lock_stp->st_file;
        }
-       /* lock_sop and lock_stp have been created or found */
+       lock_sop = lockowner(lock_stp->st_stateowner);
+       fp = lock_stp->st_file;
 
        lkflg = setlkflg(lock->lk_type);
        status = nfs4_check_openmode(lock_stp, lkflg);
@@ -4054,7 +4096,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
                break;
        }
 out:
-       if (status && lock->lk_is_new && lock_sop)
+       if (status && new_state)
                release_lockowner(lock_sop);
        if (!cstate->replay_owner)
                nfs4_unlock_state();
@@ -4251,7 +4293,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
        struct nfs4_ol_stateid *stp;
        struct xdr_netobj *owner = &rlockowner->rl_owner;
        struct list_head matches;
-       int i;
+       unsigned int hashval = ownerstr_hashval(clid->cl_id, owner);
        __be32 status;
 
        dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n",
@@ -4266,22 +4308,19 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp,
        nfs4_lock_state();
 
        status = nfserr_locks_held;
-       /* XXX: we're doing a linear search through all the lockowners.
-        * Yipes!  For now we'll just hope clients aren't really using
-        * release_lockowner much, but eventually we have to fix these
-        * data structures. */
        INIT_LIST_HEAD(&matches);
-       for (i = 0; i < LOCK_HASH_SIZE; i++) {
-               list_for_each_entry(sop, &lock_ownerstr_hashtbl[i], so_strhash) {
-                       if (!same_owner_str(sop, owner, clid))
-                               continue;
-                       list_for_each_entry(stp, &sop->so_stateids,
-                                       st_perstateowner) {
-                               lo = lockowner(sop);
-                               if (check_for_locks(stp->st_file, lo))
-                                       goto out;
-                               list_add(&lo->lo_list, &matches);
-                       }
+
+       list_for_each_entry(sop, &ownerstr_hashtbl[hashval], so_strhash) {
+               if (sop->so_is_open_owner)
+                       continue;
+               if (!same_owner_str(sop, owner, clid))
+                       continue;
+               list_for_each_entry(stp, &sop->so_stateids,
+                               st_perstateowner) {
+                       lo = lockowner(sop);
+                       if (check_for_locks(stp->st_file, lo))
+                               goto out;
+                       list_add(&lo->lo_list, &matches);
                }
        }
        /* Clients probably won't expect us to return with some (but not all)
@@ -4394,16 +4433,127 @@ nfs4_check_open_reclaim(clientid_t *clid)
        return nfs4_find_reclaim_client(clid) ? nfs_ok : nfserr_reclaim_bad;
 }
 
+#ifdef CONFIG_NFSD_FAULT_INJECTION
+
+void nfsd_forget_clients(u64 num)
+{
+       struct nfs4_client *clp, *next;
+       int count = 0;
+
+       nfs4_lock_state();
+       list_for_each_entry_safe(clp, next, &client_lru, cl_lru) {
+               nfsd4_remove_clid_dir(clp);
+               expire_client(clp);
+               if (++count == num)
+                       break;
+       }
+       nfs4_unlock_state();
+
+       printk(KERN_INFO "NFSD: Forgot %d clients", count);
+}
+
+static void release_lockowner_sop(struct nfs4_stateowner *sop)
+{
+       release_lockowner(lockowner(sop));
+}
+
+static void release_openowner_sop(struct nfs4_stateowner *sop)
+{
+       release_openowner(openowner(sop));
+}
+
+static int nfsd_release_n_owners(u64 num, bool is_open_owner,
+                               void (*release_sop)(struct nfs4_stateowner *))
+{
+       int i, count = 0;
+       struct nfs4_stateowner *sop, *next;
+
+       for (i = 0; i < OWNER_HASH_SIZE; i++) {
+               list_for_each_entry_safe(sop, next, &ownerstr_hashtbl[i], so_strhash) {
+                       if (sop->so_is_open_owner != is_open_owner)
+                               continue;
+                       release_sop(sop);
+                       if (++count == num)
+                               return count;
+               }
+       }
+       return count;
+}
+
+void nfsd_forget_locks(u64 num)
+{
+       int count;
+
+       nfs4_lock_state();
+       count = nfsd_release_n_owners(num, false, release_lockowner_sop);
+       nfs4_unlock_state();
+
+       printk(KERN_INFO "NFSD: Forgot %d locks", count);
+}
+
+void nfsd_forget_openowners(u64 num)
+{
+       int count;
+
+       nfs4_lock_state();
+       count = nfsd_release_n_owners(num, true, release_openowner_sop);
+       nfs4_unlock_state();
+
+       printk(KERN_INFO "NFSD: Forgot %d open owners", count);
+}
+
+int nfsd_process_n_delegations(u64 num, void (*deleg_func)(struct nfs4_delegation *))
+{
+       int i, count = 0;
+       struct nfs4_file *fp, *fnext;
+       struct nfs4_delegation *dp, *dnext;
+
+       for (i = 0; i < FILE_HASH_SIZE; i++) {
+               list_for_each_entry_safe(fp, fnext, &file_hashtbl[i], fi_hash) {
+                       list_for_each_entry_safe(dp, dnext, &fp->fi_delegations, dl_perfile) {
+                               deleg_func(dp);
+                               if (++count == num)
+                                       return count;
+                       }
+               }
+       }
+
+       return count;
+}
+
+void nfsd_forget_delegations(u64 num)
+{
+       unsigned int count;
+
+       nfs4_lock_state();
+       count = nfsd_process_n_delegations(num, unhash_delegation);
+       nfs4_unlock_state();
+
+       printk(KERN_INFO "NFSD: Forgot %d delegations", count);
+}
+
+void nfsd_recall_delegations(u64 num)
+{
+       unsigned int count;
+
+       nfs4_lock_state();
+       spin_lock(&recall_lock);
+       count = nfsd_process_n_delegations(num, nfsd_break_one_deleg);
+       spin_unlock(&recall_lock);
+       nfs4_unlock_state();
+
+       printk(KERN_INFO "NFSD: Recalled %d delegations", count);
+}
+
+#endif /* CONFIG_NFSD_FAULT_INJECTION */
+
 /* initialization to perform at module load time: */
 
-int
+void
 nfs4_state_init(void)
 {
-       int i, status;
+       int i;
 
-       status = nfsd4_init_slabs();
-       if (status)
-               return status;
        for (i = 0; i < CLIENT_HASH_SIZE; i++) {
                INIT_LIST_HEAD(&conf_id_hashtbl[i]);
                INIT_LIST_HEAD(&conf_str_hashtbl[i]);
@@ -4416,18 +4566,15 @@ nfs4_state_init(void)
        for (i = 0; i < FILE_HASH_SIZE; i++) {
                INIT_LIST_HEAD(&file_hashtbl[i]);
        }
-       for (i = 0; i < OPEN_OWNER_HASH_SIZE; i++) {
-               INIT_LIST_HEAD(&open_ownerstr_hashtbl[i]);
-       }
-       for (i = 0; i < LOCK_HASH_SIZE; i++) {
-               INIT_LIST_HEAD(&lock_ownerstr_hashtbl[i]);
+       for (i = 0; i < OWNER_HASH_SIZE; i++) {
+               INIT_LIST_HEAD(&ownerstr_hashtbl[i]);
        }
-       memset(&onestateid, ~0, sizeof(stateid_t));
+       for (i = 0; i < LOCKOWNER_INO_HASH_SIZE; i++)
+               INIT_LIST_HEAD(&lockowner_ino_hashtbl[i]);
        INIT_LIST_HEAD(&close_lru);
        INIT_LIST_HEAD(&client_lru);
        INIT_LIST_HEAD(&del_recall_lru);
        reclaim_str_hashtbl_size = 0;
-       return 0;
 }
 
 static void
@@ -4526,7 +4673,6 @@ __nfs4_state_shutdown(void)
        spin_unlock(&recall_lock);
        list_for_each_safe(pos, next, &reaplist) {
                dp = list_entry (pos, struct nfs4_delegation, dl_recall_lru);
-               list_del_init(&dp->dl_recall_lru);
                unhash_delegation(dp);
        }
 
index b6fa792..0ec5a1b 100644 (file)
@@ -215,10 +215,9 @@ defer_free(struct nfsd4_compoundargs *argp,
 static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes)
 {
        if (p == argp->tmp) {
-               p = kmalloc(nbytes, GFP_KERNEL);
+               p = kmemdup(argp->tmp, nbytes, GFP_KERNEL);
                if (!p)
                        return NULL;
-               memcpy(p, argp->tmp, nbytes);
        } else {
                BUG_ON(p != argp->tmpp);
                argp->tmpp = NULL;
index bb4a11d..748eda9 100644 (file)
@@ -18,6 +18,7 @@
 #include "idmap.h"
 #include "nfsd.h"
 #include "cache.h"
+#include "fault_inject.h"
 
 /*
  *     We have a single directory with several nodes in it.
@@ -1128,9 +1129,13 @@ static int __init init_nfsd(void)
        int retval;
        printk(KERN_INFO "Installing knfsd (copyright (C) 1996 okir@monad.swb.de).\n");
 
-       retval = nfs4_state_init(); /* nfs4 locking state */
+       retval = nfsd4_init_slabs();
        if (retval)
                return retval;
+       nfs4_state_init();
+       retval = nfsd_fault_inject_init(); /* nfsd fault injection controls */
+       if (retval)
+               goto out_free_slabs;
        nfsd_stat_init();       /* Statistics */
        retval = nfsd_reply_cache_init();
        if (retval)
@@ -1161,6 +1166,8 @@ out_free_cache:
        nfsd_reply_cache_shutdown();
 out_free_stat:
        nfsd_stat_shutdown();
+       nfsd_fault_inject_cleanup();
+out_free_slabs:
        nfsd4_free_slabs();
        return retval;
 }
@@ -1175,6 +1182,7 @@ static void __exit exit_nfsd(void)
        nfsd_lockd_shutdown();
        nfsd_idmap_shutdown();
        nfsd4_free_slabs();
+       nfsd_fault_inject_cleanup();
        unregister_filesystem(&nfsd_fs_type);
 }
 
index 58134a2..1d1e858 100644 (file)
@@ -104,14 +104,16 @@ static inline int nfsd_v4client(struct svc_rqst *rq)
  */
 #ifdef CONFIG_NFSD_V4
 extern unsigned int max_delegations;
-int nfs4_state_init(void);
+void nfs4_state_init(void);
+int nfsd4_init_slabs(void);
 void nfsd4_free_slabs(void);
 int nfs4_state_start(void);
 void nfs4_state_shutdown(void);
 void nfs4_reset_lease(time_t leasetime);
 int nfs4_reset_recoverydir(char *recdir);
 #else
-static inline int nfs4_state_init(void) { return 0; }
+static inline void nfs4_state_init(void) { }
+static inline int nfsd4_init_slabs(void) { return 0; }
 static inline void nfsd4_free_slabs(void) { }
 static inline int nfs4_state_start(void) { return 0; }
 static inline void nfs4_state_shutdown(void) { }
@@ -338,15 +340,15 @@ static inline u32 nfsd_suppattrs2(u32 minorversion)
 }
 
 /* These will return ERR_INVAL if specified in GETATTR or READDIR. */
-#define NFSD_WRITEONLY_ATTRS_WORD1                                                         \
-(FATTR4_WORD1_TIME_ACCESS_SET   | FATTR4_WORD1_TIME_MODIFY_SET)
+#define NFSD_WRITEONLY_ATTRS_WORD1 \
+       (FATTR4_WORD1_TIME_ACCESS_SET   | FATTR4_WORD1_TIME_MODIFY_SET)
 
 /* These are the only attrs allowed in CREATE/OPEN/SETATTR. */
-#define NFSD_WRITEABLE_ATTRS_WORD0                                                          \
-(FATTR4_WORD0_SIZE              | FATTR4_WORD0_ACL                                         )
-#define NFSD_WRITEABLE_ATTRS_WORD1                                                          \
-(FATTR4_WORD1_MODE              | FATTR4_WORD1_OWNER         | FATTR4_WORD1_OWNER_GROUP     \
- | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET)
+#define NFSD_WRITEABLE_ATTRS_WORD0 \
+       (FATTR4_WORD0_SIZE | FATTR4_WORD0_ACL)
+#define NFSD_WRITEABLE_ATTRS_WORD1 \
+       (FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \
+       | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET)
 #define NFSD_WRITEABLE_ATTRS_WORD2 0
 
 #define NFSD_SUPPATTR_EXCLCREAT_WORD0 \
index a3cf384..ffb5df1 100644 (file)
@@ -366,6 +366,7 @@ struct nfs4_openowner {
 
 struct nfs4_lockowner {
        struct nfs4_stateowner  lo_owner; /* must be first element */
+       struct list_head        lo_owner_ino_hash; /* hash by owner,file */
        struct list_head        lo_perstateid; /* for lockowners only */
        struct list_head        lo_list; /* for temporary uses */
 };
@@ -482,7 +483,7 @@ extern void nfsd4_shutdown_recdir(void);
 extern int nfs4_client_to_reclaim(const char *name);
 extern int nfs4_has_reclaimed_state(const char *name, bool use_exchange_id);
 extern void nfsd4_recdir_purge_old(void);
-extern int nfsd4_create_clid_dir(struct nfs4_client *clp);
+extern void nfsd4_create_clid_dir(struct nfs4_client *clp);
 extern void nfsd4_remove_clid_dir(struct nfs4_client *clp);
 extern void release_session_client(struct nfsd4_session *);
 extern __be32 nfs4_validate_stateid(struct nfs4_client *, stateid_t *);
index d25a723..edf6d3e 100644 (file)
@@ -594,8 +594,19 @@ nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_ac
        return error;
 }
 
-#define NFSD_XATTR_JUNCTION_PREFIX XATTR_TRUSTED_PREFIX "junction."
-#define NFSD_XATTR_JUNCTION_TYPE NFSD_XATTR_JUNCTION_PREFIX "type"
+/*
+ * NFS junction information is stored in an extended attribute.
+ */
+#define NFSD_JUNCTION_XATTR_NAME       XATTR_TRUSTED_PREFIX "junction.nfs"
+
+/**
+ * nfsd4_is_junction - Test if an object could be an NFS junction
+ *
+ * @dentry: object to test
+ *
+ * Returns 1 if "dentry" appears to contain NFS junction information.
+ * Otherwise 0 is returned.
+ */
 int nfsd4_is_junction(struct dentry *dentry)
 {
        struct inode *inode = dentry->d_inode;
@@ -606,7 +617,7 @@ int nfsd4_is_junction(struct dentry *dentry)
                return 0;
        if (!(inode->i_mode & S_ISVTX))
                return 0;
-       if (vfs_getxattr(dentry, NFSD_XATTR_JUNCTION_TYPE, NULL, 0) <= 0)
+       if (vfs_getxattr(dentry, NFSD_JUNCTION_XATTR_NAME, NULL, 0) <= 0)
                return 0;
        return 1;
 }
index 8620f79..dfa9009 100644 (file)
@@ -109,7 +109,7 @@ static inline int register_xpt_user(struct svc_xprt *xpt, struct svc_xpt_user *u
 
 int    svc_reg_xprt_class(struct svc_xprt_class *);
 void   svc_unreg_xprt_class(struct svc_xprt_class *);
-void   svc_xprt_init(struct svc_xprt_class *, struct svc_xprt *,
+void   svc_xprt_init(struct net *, struct svc_xprt_class *, struct svc_xprt *,
                      struct svc_serv *);
 int    svc_create_xprt(struct svc_serv *, const char *, struct net *,
                        const int, const unsigned short, int);
@@ -118,7 +118,6 @@ void        svc_xprt_received(struct svc_xprt *);
 void   svc_xprt_put(struct svc_xprt *xprt);
 void   svc_xprt_copy_addrs(struct svc_rqst *rqstp, struct svc_xprt *xprt);
 void   svc_close_xprt(struct svc_xprt *xprt);
-void   svc_delete_xprt(struct svc_xprt *xprt);
 int    svc_port_is_privileged(struct sockaddr *sin);
 int    svc_print_xprts(char *buf, int maxlen);
 struct svc_xprt *svc_find_xprt(struct svc_serv *serv, const char *xcl_name,
index 85c50b4..c84e974 100644 (file)
@@ -34,7 +34,7 @@ struct svc_sock {
 /*
  * Function prototypes.
  */
-void           svc_close_all(struct list_head *);
+void           svc_close_all(struct svc_serv *);
 int            svc_recv(struct svc_rqst *, long);
 int            svc_send(struct svc_rqst *);
 void           svc_drop(struct svc_rqst *);
index 03b56bc..465df9a 100644 (file)
@@ -1641,6 +1641,7 @@ int cache_register_net(struct cache_detail *cd, struct net *net)
                sunrpc_destroy_cache_detail(cd);
        return ret;
 }
+EXPORT_SYMBOL_GPL(cache_register_net);
 
 int cache_register(struct cache_detail *cd)
 {
@@ -1653,6 +1654,7 @@ void cache_unregister_net(struct cache_detail *cd, struct net *net)
        remove_cache_proc_entries(cd, net);
        sunrpc_destroy_cache_detail(cd);
 }
+EXPORT_SYMBOL_GPL(cache_unregister_net);
 
 void cache_unregister(struct cache_detail *cd)
 {
index 9d01d46..e4aabc0 100644 (file)
@@ -167,6 +167,7 @@ svc_pool_map_alloc_arrays(struct svc_pool_map *m, unsigned int maxpools)
 
 fail_free:
        kfree(m->to_pool);
+       m->to_pool = NULL;
 fail:
        return -ENOMEM;
 }
@@ -285,9 +286,10 @@ svc_pool_map_put(void)
        mutex_lock(&svc_pool_map_mutex);
 
        if (!--m->count) {
-               m->mode = SVC_POOL_DEFAULT;
                kfree(m->to_pool);
+               m->to_pool = NULL;
                kfree(m->pool_to);
+               m->pool_to = NULL;
                m->npools = 0;
        }
 
@@ -527,17 +529,20 @@ svc_destroy(struct svc_serv *serv)
                printk("svc_destroy: no threads for serv=%p!\n", serv);
 
        del_timer_sync(&serv->sv_temptimer);
-
-       svc_close_all(&serv->sv_tempsocks);
+       /*
+        * The set of xprts (contained in the sv_tempsocks and
+        * sv_permsocks lists) is now constant, since it is modified
+        * only by accepting new sockets (done by service threads in
+        * svc_recv) or aging old ones (done by sv_temptimer), or
+        * configuration changes (excluded by whatever locking the
+        * caller is using--nfsd_mutex in the case of nfsd).  So it's
+        * safe to traverse those lists and shut everything down:
+        */
+       svc_close_all(serv);
 
        if (serv->sv_shutdown)
                serv->sv_shutdown(serv);
 
-       svc_close_all(&serv->sv_permsocks);
-
-       BUG_ON(!list_empty(&serv->sv_permsocks));
-       BUG_ON(!list_empty(&serv->sv_tempsocks));
-
        cache_clean_deferred(serv);
 
        if (svc_serv_is_pooled(serv))
@@ -683,8 +688,8 @@ found_pool:
  * Create or destroy enough new threads to make the number
  * of threads the given number.  If `pool' is non-NULL, applies
  * only to threads in that pool, otherwise round-robins between
- * all pools.  Must be called with a svc_get() reference and
- * the BKL or another lock to protect access to svc_serv fields.
+ * all pools.  Caller must ensure that mutual exclusion between this and
+ * server startup or shutdown.
  *
  * Destroying threads relies on the service threads filling in
  * rqstp->rq_task, which only the nfs ones do.  Assumes the serv
index 38649cf..74cb0d8 100644 (file)
@@ -22,6 +22,7 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt);
 static int svc_deferred_recv(struct svc_rqst *rqstp);
 static struct cache_deferred_req *svc_defer(struct cache_req *req);
 static void svc_age_temp_xprts(unsigned long closure);
+static void svc_delete_xprt(struct svc_xprt *xprt);
 
 /* apparently the "standard" is that clients close
  * idle connections after 5 minutes, servers after
@@ -147,8 +148,8 @@ EXPORT_SYMBOL_GPL(svc_xprt_put);
  * Called by transport drivers to initialize the transport independent
  * portion of the transport instance.
  */
-void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt,
-                  struct svc_serv *serv)
+void svc_xprt_init(struct net *net, struct svc_xprt_class *xcl,
+                  struct svc_xprt *xprt, struct svc_serv *serv)
 {
        memset(xprt, 0, sizeof(*xprt));
        xprt->xpt_class = xcl;
@@ -163,7 +164,7 @@ void svc_xprt_init(struct svc_xprt_class *xcl, struct svc_xprt *xprt,
        spin_lock_init(&xprt->xpt_lock);
        set_bit(XPT_BUSY, &xprt->xpt_flags);
        rpc_init_wait_queue(&xprt->xpt_bc_pending, "xpt_bc_pending");
-       xprt->xpt_net = get_net(&init_net);
+       xprt->xpt_net = get_net(net);
 }
 EXPORT_SYMBOL_GPL(svc_xprt_init);
 
@@ -878,7 +879,7 @@ static void call_xpt_users(struct svc_xprt *xprt)
 /*
  * Remove a dead transport
  */
-void svc_delete_xprt(struct svc_xprt *xprt)
+static void svc_delete_xprt(struct svc_xprt *xprt)
 {
        struct svc_serv *serv = xprt->xpt_server;
        struct svc_deferred_req *dr;
@@ -893,14 +894,7 @@ void svc_delete_xprt(struct svc_xprt *xprt)
        spin_lock_bh(&serv->sv_lock);
        if (!test_and_set_bit(XPT_DETACHED, &xprt->xpt_flags))
                list_del_init(&xprt->xpt_list);
-       /*
-        * The only time we're called while xpt_ready is still on a list
-        * is while the list itself is about to be destroyed (in
-        * svc_destroy).  BUT svc_xprt_enqueue could still be attempting
-        * to add new entries to the sp_sockets list, so we can't leave
-        * a freed xprt on it.
-        */
-       list_del_init(&xprt->xpt_ready);
+       BUG_ON(!list_empty(&xprt->xpt_ready));
        if (test_bit(XPT_TEMP, &xprt->xpt_flags))
                serv->sv_tmpcnt--;
        spin_unlock_bh(&serv->sv_lock);
@@ -928,22 +922,48 @@ void svc_close_xprt(struct svc_xprt *xprt)
 }
 EXPORT_SYMBOL_GPL(svc_close_xprt);
 
-void svc_close_all(struct list_head *xprt_list)
+static void svc_close_list(struct list_head *xprt_list)
+{
+       struct svc_xprt *xprt;
+
+       list_for_each_entry(xprt, xprt_list, xpt_list) {
+               set_bit(XPT_CLOSE, &xprt->xpt_flags);
+               set_bit(XPT_BUSY, &xprt->xpt_flags);
+       }
+}
+
+void svc_close_all(struct svc_serv *serv)
 {
+       struct svc_pool *pool;
        struct svc_xprt *xprt;
        struct svc_xprt *tmp;
+       int i;
+
+       svc_close_list(&serv->sv_tempsocks);
+       svc_close_list(&serv->sv_permsocks);
 
+       for (i = 0; i < serv->sv_nrpools; i++) {
+               pool = &serv->sv_pools[i];
+
+               spin_lock_bh(&pool->sp_lock);
+               while (!list_empty(&pool->sp_sockets)) {
+                       xprt = list_first_entry(&pool->sp_sockets, struct svc_xprt, xpt_ready);
+                       list_del_init(&xprt->xpt_ready);
+               }
+               spin_unlock_bh(&pool->sp_lock);
+       }
        /*
-        * The server is shutting down, and no more threads are running.
-        * svc_xprt_enqueue() might still be running, but at worst it
-        * will re-add the xprt to sp_sockets, which will soon get
-        * freed.  So we don't bother with any more locking, and don't
-        * leave the close to the (nonexistent) server threads:
+        * At this point the sp_sockets lists will stay empty, since
+        * svc_enqueue will not add new entries without taking the
+        * sp_lock and checking XPT_BUSY.
         */
-       list_for_each_entry_safe(xprt, tmp, xprt_list, xpt_list) {
-               set_bit(XPT_CLOSE, &xprt->xpt_flags);
+       list_for_each_entry_safe(xprt, tmp, &serv->sv_tempsocks, xpt_list)
                svc_delete_xprt(xprt);
-       }
+       list_for_each_entry_safe(xprt, tmp, &serv->sv_permsocks, xpt_list)
+               svc_delete_xprt(xprt);
+
+       BUG_ON(!list_empty(&serv->sv_permsocks));
+       BUG_ON(!list_empty(&serv->sv_tempsocks));
 }
 
 /*
index 4653286..4645709 100644 (file)
@@ -739,7 +739,8 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
 {
        int err, level, optname, one = 1;
 
-       svc_xprt_init(&svc_udp_class, &svsk->sk_xprt, serv);
+       svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_udp_class,
+                     &svsk->sk_xprt, serv);
        clear_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);
        svsk->sk_sk->sk_data_ready = svc_udp_data_ready;
        svsk->sk_sk->sk_write_space = svc_write_space;
@@ -1343,7 +1344,8 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
 {
        struct sock     *sk = svsk->sk_sk;
 
-       svc_xprt_init(&svc_tcp_class, &svsk->sk_xprt, serv);
+       svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_tcp_class,
+                     &svsk->sk_xprt, serv);
        set_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);
        if (sk->sk_state == TCP_LISTEN) {
                dprintk("setting up TCP socket for listening\n");
@@ -1659,7 +1661,7 @@ static struct svc_xprt *svc_bc_create_socket(struct svc_serv *serv,
                return ERR_PTR(-ENOMEM);
 
        xprt = &svsk->sk_xprt;
-       svc_xprt_init(&svc_tcp_bc_class, xprt, serv);
+       svc_xprt_init(net, &svc_tcp_bc_class, xprt, serv);
 
        serv->sv_bc_xprt = xprt;
 
index ba1296d..894cb42 100644 (file)
@@ -453,7 +453,7 @@ static struct svcxprt_rdma *rdma_create_xprt(struct svc_serv *serv,
 
        if (!cma_xprt)
                return NULL;
-       svc_xprt_init(&svc_rdma_class, &cma_xprt->sc_xprt, serv);
+       svc_xprt_init(&init_net, &svc_rdma_class, &cma_xprt->sc_xprt, serv);
        INIT_LIST_HEAD(&cma_xprt->sc_accept_q);
        INIT_LIST_HEAD(&cma_xprt->sc_dto_q);
        INIT_LIST_HEAD(&cma_xprt->sc_rq_dto_q);
diff --git a/tools/nfsd/inject_fault.sh b/tools/nfsd/inject_fault.sh
new file mode 100755 (executable)
index 0000000..06a399a
--- /dev/null
@@ -0,0 +1,49 @@
+#!/bin/bash
+#
+# Copyright (c) 2011 Bryan Schumaker <bjschuma@netapp.com>
+#
+# Script for easier NFSD fault injection
+
+# Check that debugfs has been mounted
+DEBUGFS=`cat /proc/mounts | grep debugfs`
+if [ "$DEBUGFS" == "" ]; then
+       echo "debugfs does not appear to be mounted!"
+       echo "Please mount debugfs and try again"
+       exit 1
+fi
+
+# Check that the fault injection directory exists
+DEBUGDIR=`echo $DEBUGFS | awk '{print $2}'`/nfsd
+if [ ! -d "$DEBUGDIR" ]; then
+       echo "$DEBUGDIR does not exist"
+       echo "Check that your .config selects CONFIG_NFSD_FAULT_INJECTION"
+       exit 1
+fi
+
+function help()
+{
+       echo "Usage $0 injection_type [count]"
+       echo ""
+       echo "Injection types are:"
+       ls $DEBUGDIR
+       exit 1
+}
+
+if [ $# == 0 ]; then
+       help
+elif [ ! -f $DEBUGDIR/$1 ]; then
+       help
+elif [ $# != 2 ]; then
+       COUNT=0
+else
+       COUNT=$2
+fi
+
+BEFORE=`mktemp`
+AFTER=`mktemp`
+dmesg > $BEFORE
+echo $COUNT > $DEBUGDIR/$1
+dmesg > $AFTER
+# Capture lines that only exist in the $AFTER file
+diff $BEFORE $AFTER | grep ">"
+rm -f $BEFORE $AFTER