SUNRPC: Fix the problem of EADDRNOTAVAIL syslog floods on reconnect
Trond Myklebust [Tue, 21 Apr 2009 21:18:20 +0000 (17:18 -0400)]
See http://bugzilla.kernel.org/show_bug.cgi?id=13034

If the port gets into a TIME_WAIT state, then we cannot reconnect without
binding to a new port.

Tested-by: Petr Vandrovec <petr@vandrovec.name>
Tested-by: Jean Delvare <khali@linux-fr.org>
Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

include/linux/sunrpc/xprt.h
net/sunrpc/xprt.c
net/sunrpc/xprtsock.c

index 1758d9f..08afe43 100644 (file)
@@ -261,6 +261,7 @@ void                        xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie);
 #define XPRT_BINDING           (5)
 #define XPRT_CLOSING           (6)
 #define XPRT_CONNECTION_ABORT  (7)
+#define XPRT_CONNECTION_CLOSE  (8)
 
 static inline void xprt_set_connected(struct rpc_xprt *xprt)
 {
index a0bfe53..06ca058 100644 (file)
@@ -672,10 +672,8 @@ xprt_init_autodisconnect(unsigned long data)
        if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
                goto out_abort;
        spin_unlock(&xprt->transport_lock);
-       if (xprt_connecting(xprt))
-               xprt_release_write(xprt, NULL);
-       else
-               queue_work(rpciod_workqueue, &xprt->task_cleanup);
+       set_bit(XPRT_CONNECTION_CLOSE, &xprt->state);
+       queue_work(rpciod_workqueue, &xprt->task_cleanup);
        return;
 out_abort:
        spin_unlock(&xprt->transport_lock);
index d40ff50..e185961 100644 (file)
@@ -807,6 +807,9 @@ static void xs_reset_transport(struct sock_xprt *transport)
  *
  * This is used when all requests are complete; ie, no DRC state remains
  * on the server we want to save.
+ *
+ * The caller _must_ be holding XPRT_LOCKED in order to avoid issues with
+ * xs_reset_transport() zeroing the socket from underneath a writer.
  */
 static void xs_close(struct rpc_xprt *xprt)
 {
@@ -824,6 +827,14 @@ static void xs_close(struct rpc_xprt *xprt)
        xprt_disconnect_done(xprt);
 }
 
+static void xs_tcp_close(struct rpc_xprt *xprt)
+{
+       if (test_and_clear_bit(XPRT_CONNECTION_CLOSE, &xprt->state))
+               xs_close(xprt);
+       else
+               xs_tcp_shutdown(xprt);
+}
+
 /**
  * xs_destroy - prepare to shutdown a transport
  * @xprt: doomed transport
@@ -1772,6 +1783,15 @@ static void xs_tcp_setup_socket(struct rpc_xprt *xprt,
                        xprt, -status, xprt_connected(xprt),
                        sock->sk->sk_state);
        switch (status) {
+       default:
+               printk("%s: connect returned unhandled error %d\n",
+                       __func__, status);
+       case -EADDRNOTAVAIL:
+               /* We're probably in TIME_WAIT. Get rid of existing socket,
+                * and retry
+                */
+               set_bit(XPRT_CONNECTION_CLOSE, &xprt->state);
+               xprt_force_disconnect(xprt);
        case -ECONNREFUSED:
        case -ECONNRESET:
        case -ENETUNREACH:
@@ -1782,10 +1802,6 @@ static void xs_tcp_setup_socket(struct rpc_xprt *xprt,
                xprt_clear_connecting(xprt);
                return;
        }
-       /* get rid of existing socket, and retry */
-       xs_tcp_shutdown(xprt);
-       printk("%s: connect returned unhandled error %d\n",
-                       __func__, status);
 out_eagain:
        status = -EAGAIN;
 out:
@@ -1994,7 +2010,7 @@ static struct rpc_xprt_ops xs_tcp_ops = {
        .buf_free               = rpc_free,
        .send_request           = xs_tcp_send_request,
        .set_retrans_timeout    = xprt_set_retrans_timeout_def,
-       .close                  = xs_tcp_shutdown,
+       .close                  = xs_tcp_close,
        .destroy                = xs_destroy,
        .print_stats            = xs_tcp_print_stats,
 };