]> nv-tegra.nvidia Code Review - linux-2.6.git/blobdiff - net/dccp/proto.c
[DCCP]: Use snmp_mib_{init,free}().
[linux-2.6.git] / net / dccp / proto.c
index 63b3fa20e14bbb718d3baa9a7a16c2bcb1a503bd..d534cdf9603b1ae93032b5a9ccede33f0a32d71d 100644 (file)
@@ -26,6 +26,7 @@
 #include <net/sock.h>
 #include <net/xfrm.h>
 
+#include <asm/ioctls.h>
 #include <asm/semaphore.h>
 #include <linux/spinlock.h>
 #include <linux/timer.h>
@@ -59,8 +60,7 @@ void dccp_set_state(struct sock *sk, const int state)
 {
        const int oldstate = sk->sk_state;
 
-       dccp_pr_debug("%s(%p) %-10.10s -> %s\n",
-                     dccp_role(sk), sk,
+       dccp_pr_debug("%s(%p)  %s  -->  %s\n", dccp_role(sk), sk,
                      dccp_state_name(oldstate), dccp_state_name(state));
        WARN_ON(state == oldstate);
 
@@ -71,13 +71,14 @@ void dccp_set_state(struct sock *sk, const int state)
                break;
 
        case DCCP_CLOSED:
-               if (oldstate == DCCP_CLOSING || oldstate == DCCP_OPEN)
+               if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
+                   oldstate == DCCP_CLOSING)
                        DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
 
                sk->sk_prot->unhash(sk);
                if (inet_csk(sk)->icsk_bind_hash != NULL &&
                    !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
-                       inet_put_port(&dccp_hashinfo, sk);
+                       inet_put_port(sk);
                /* fall through */
        default:
                if (oldstate == DCCP_OPEN)
@@ -92,6 +93,24 @@ void dccp_set_state(struct sock *sk, const int state)
 
 EXPORT_SYMBOL_GPL(dccp_set_state);
 
+static void dccp_finish_passive_close(struct sock *sk)
+{
+       switch (sk->sk_state) {
+       case DCCP_PASSIVE_CLOSE:
+               /* Node (client or server) has received Close packet. */
+               dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
+               dccp_set_state(sk, DCCP_CLOSED);
+               break;
+       case DCCP_PASSIVE_CLOSEREQ:
+               /*
+                * Client received CloseReq. We set the `active' flag so that
+                * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
+                */
+               dccp_send_close(sk, 1);
+               dccp_set_state(sk, DCCP_CLOSING);
+       }
+}
+
 void dccp_done(struct sock *sk)
 {
        dccp_set_state(sk, DCCP_CLOSED);
@@ -133,14 +152,17 @@ EXPORT_SYMBOL_GPL(dccp_packet_name);
 const char *dccp_state_name(const int state)
 {
        static char *dccp_state_names[] = {
-       [DCCP_OPEN]       = "OPEN",
-       [DCCP_REQUESTING] = "REQUESTING",
-       [DCCP_PARTOPEN]   = "PARTOPEN",
-       [DCCP_LISTEN]     = "LISTEN",
-       [DCCP_RESPOND]    = "RESPOND",
-       [DCCP_CLOSING]    = "CLOSING",
-       [DCCP_TIME_WAIT]  = "TIME_WAIT",
-       [DCCP_CLOSED]     = "CLOSED",
+       [DCCP_OPEN]             = "OPEN",
+       [DCCP_REQUESTING]       = "REQUESTING",
+       [DCCP_PARTOPEN]         = "PARTOPEN",
+       [DCCP_LISTEN]           = "LISTEN",
+       [DCCP_RESPOND]          = "RESPOND",
+       [DCCP_CLOSING]          = "CLOSING",
+       [DCCP_ACTIVE_CLOSEREQ]  = "CLOSEREQ",
+       [DCCP_PASSIVE_CLOSE]    = "PASSIVE_CLOSE",
+       [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
+       [DCCP_TIME_WAIT]        = "TIME_WAIT",
+       [DCCP_CLOSED]           = "CLOSED",
        };
 
        if (state >= DCCP_MAX_STATES)
@@ -151,20 +173,6 @@ const char *dccp_state_name(const int state)
 
 EXPORT_SYMBOL_GPL(dccp_state_name);
 
-void dccp_hash(struct sock *sk)
-{
-       inet_hash(&dccp_hashinfo, sk);
-}
-
-EXPORT_SYMBOL_GPL(dccp_hash);
-
-void dccp_unhash(struct sock *sk)
-{
-       inet_unhash(&dccp_hashinfo, sk);
-}
-
-EXPORT_SYMBOL_GPL(dccp_unhash);
-
 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
 {
        struct dccp_sock *dp = dccp_sk(sk);
@@ -172,7 +180,19 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
        struct inet_connection_sock *icsk = inet_csk(sk);
 
        dccp_minisock_init(&dp->dccps_minisock);
-       do_gettimeofday(&dp->dccps_epoch);
+
+       icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
+       icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
+       sk->sk_state            = DCCP_CLOSED;
+       sk->sk_write_space      = dccp_write_space;
+       icsk->icsk_sync_mss     = dccp_sync_mss;
+       dp->dccps_mss_cache     = 536;
+       dp->dccps_rate_last     = jiffies;
+       dp->dccps_role          = DCCP_ROLE_UNDEFINED;
+       dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
+       dp->dccps_l_ack_ratio   = dp->dccps_r_ack_ratio = 1;
+
+       dccp_init_xmit_timers(sk);
 
        /*
         * FIXME: We're hardcoding the CCID, and doing this at this point makes
@@ -213,17 +233,6 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
                INIT_LIST_HEAD(&dmsk->dccpms_conf);
        }
 
-       dccp_init_xmit_timers(sk);
-       icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
-       icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
-       sk->sk_state            = DCCP_CLOSED;
-       sk->sk_write_space      = dccp_write_space;
-       icsk->icsk_sync_mss     = dccp_sync_mss;
-       dp->dccps_mss_cache     = 536;
-       dp->dccps_role          = DCCP_ROLE_UNDEFINED;
-       dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
-       dp->dccps_l_ack_ratio   = dp->dccps_r_ack_ratio = 1;
-
        return 0;
 }
 
@@ -245,7 +254,7 @@ int dccp_destroy_sock(struct sock *sk)
 
        /* Clean up a referenced DCCP bind bucket. */
        if (inet_csk(sk)->icsk_bind_hash != NULL)
-               inet_put_port(&dccp_hashinfo, sk);
+               inet_put_port(sk);
 
        kfree(dp->dccps_service_list);
        dp->dccps_service_list = NULL;
@@ -274,6 +283,12 @@ static inline int dccp_listen_start(struct sock *sk, int backlog)
        return inet_csk_listen_start(sk, backlog);
 }
 
+static inline int dccp_need_reset(int state)
+{
+       return state != DCCP_CLOSED && state != DCCP_LISTEN &&
+              state != DCCP_REQUESTING;
+}
+
 int dccp_disconnect(struct sock *sk, int flags)
 {
        struct inet_connection_sock *icsk = inet_csk(sk);
@@ -284,10 +299,15 @@ int dccp_disconnect(struct sock *sk, int flags)
        if (old_state != DCCP_CLOSED)
                dccp_set_state(sk, DCCP_CLOSED);
 
-       /* ABORT function of RFC793 */
+       /*
+        * This corresponds to the ABORT function of RFC793, sec. 3.8
+        * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
+        */
        if (old_state == DCCP_LISTEN) {
                inet_csk_listen_stop(sk);
-       /* FIXME: do the active reset thing */
+       } else if (dccp_need_reset(old_state)) {
+               dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
+               sk->sk_err = ECONNRESET;
        } else if (old_state == DCCP_REQUESTING)
                sk->sk_err = ECONNRESET;
 
@@ -378,8 +398,36 @@ EXPORT_SYMBOL_GPL(dccp_poll);
 
 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
 {
-       dccp_pr_debug("entry\n");
-       return -ENOIOCTLCMD;
+       int rc = -ENOTCONN;
+
+       lock_sock(sk);
+
+       if (sk->sk_state == DCCP_LISTEN)
+               goto out;
+
+       switch (cmd) {
+       case SIOCINQ: {
+               struct sk_buff *skb;
+               unsigned long amount = 0;
+
+               skb = skb_peek(&sk->sk_receive_queue);
+               if (skb != NULL) {
+                       /*
+                        * We will only return the amount of this packet since
+                        * that is all that will be read.
+                        */
+                       amount = skb->len;
+               }
+               rc = put_user(amount, (int __user *)arg);
+       }
+               break;
+       default:
+               rc = -ENOIOCTLCMD;
+               break;
+       }
+out:
+       release_sock(sk);
+       return rc;
 }
 
 EXPORT_SYMBOL_GPL(dccp_ioctl);
@@ -478,7 +526,7 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
                        err = -EINVAL;
                else
                        err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
-                                                    (struct dccp_so_feat __user *)
+                                                    (struct dccp_so_feat __user *)
                                                     optval);
                break;
        case DCCP_SOCKOPT_CHANGE_R:
@@ -489,6 +537,12 @@ static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
                                                     (struct dccp_so_feat __user *)
                                                     optval);
                break;
+       case DCCP_SOCKOPT_SERVER_TIMEWAIT:
+               if (dp->dccps_role != DCCP_ROLE_SERVER)
+                       err = -EOPNOTSUPP;
+               else
+                       dp->dccps_server_timewait = (val != 0);
+               break;
        case DCCP_SOCKOPT_SEND_CSCOV:   /* sender side, RFC 4340, sec. 9.2 */
                if (val < 0 || val > 15)
                        err = -EINVAL;
@@ -575,7 +629,7 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
        if (get_user(len, optlen))
                return -EFAULT;
 
-       if (len < sizeof(int))
+       if (len < (int)sizeof(int))
                return -EINVAL;
 
        dp = dccp_sk(sk);
@@ -587,6 +641,12 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
        case DCCP_SOCKOPT_SERVICE:
                return dccp_getsockopt_service(sk, len,
                                               (__be32 __user *)optval, optlen);
+       case DCCP_SOCKOPT_GET_CUR_MPS:
+               val = dp->dccps_mss_cache;
+               break;
+       case DCCP_SOCKOPT_SERVER_TIMEWAIT:
+               val = dp->dccps_server_timewait;
+               break;
        case DCCP_SOCKOPT_SEND_CSCOV:
                val = dp->dccps_pcslen;
                break;
@@ -603,6 +663,7 @@ static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
                return -ENOPROTOOPT;
        }
 
+       len = sizeof(val);
        if (put_user(len, optlen) || copy_to_user(optval, &val, len))
                return -EFAULT;
 
@@ -662,7 +723,7 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
         * so that the trick in dccp_rcv_request_sent_state_process.
         */
        /* Wait for a connection to finish. */
-       if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING))
+       if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
                if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
                        goto out_release;
 
@@ -713,19 +774,26 @@ int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 
                dh = dccp_hdr(skb);
 
-               if (dh->dccph_type == DCCP_PKT_DATA ||
-                   dh->dccph_type == DCCP_PKT_DATAACK)
+               switch (dh->dccph_type) {
+               case DCCP_PKT_DATA:
+               case DCCP_PKT_DATAACK:
                        goto found_ok_skb;
 
-               if (dh->dccph_type == DCCP_PKT_RESET ||
-                   dh->dccph_type == DCCP_PKT_CLOSE) {
-                       dccp_pr_debug("found fin ok!\n");
+               case DCCP_PKT_CLOSE:
+               case DCCP_PKT_CLOSEREQ:
+                       if (!(flags & MSG_PEEK))
+                               dccp_finish_passive_close(sk);
+                       /* fall through */
+               case DCCP_PKT_RESET:
+                       dccp_pr_debug("found fin (%s) ok!\n",
+                                     dccp_packet_name(dh->dccph_type));
                        len = 0;
                        goto found_fin_ok;
+               default:
+                       dccp_pr_debug("packet_type=%s\n",
+                                     dccp_packet_name(dh->dccph_type));
+                       sk_eat_skb(sk, skb, 0);
                }
-               dccp_pr_debug("packet_type=%s\n",
-                             dccp_packet_name(dh->dccph_type));
-               sk_eat_skb(sk, skb, 0);
 verify_sock_status:
                if (sock_flag(sk, SOCK_DONE)) {
                        len = 0;
@@ -827,34 +895,38 @@ out:
 
 EXPORT_SYMBOL_GPL(inet_dccp_listen);
 
-static const unsigned char dccp_new_state[] = {
-       /* current state:   new state:      action:     */
-       [0]               = DCCP_CLOSED,
-       [DCCP_OPEN]       = DCCP_CLOSING | DCCP_ACTION_FIN,
-       [DCCP_REQUESTING] = DCCP_CLOSED,
-       [DCCP_PARTOPEN]   = DCCP_CLOSING | DCCP_ACTION_FIN,
-       [DCCP_LISTEN]     = DCCP_CLOSED,
-       [DCCP_RESPOND]    = DCCP_CLOSED,
-       [DCCP_CLOSING]    = DCCP_CLOSED,
-       [DCCP_TIME_WAIT]  = DCCP_CLOSED,
-       [DCCP_CLOSED]     = DCCP_CLOSED,
-};
-
-static int dccp_close_state(struct sock *sk)
+static void dccp_terminate_connection(struct sock *sk)
 {
-       const int next = dccp_new_state[sk->sk_state];
-       const int ns = next & DCCP_STATE_MASK;
+       u8 next_state = DCCP_CLOSED;
 
-       if (ns != sk->sk_state)
-               dccp_set_state(sk, ns);
+       switch (sk->sk_state) {
+       case DCCP_PASSIVE_CLOSE:
+       case DCCP_PASSIVE_CLOSEREQ:
+               dccp_finish_passive_close(sk);
+               break;
+       case DCCP_PARTOPEN:
+               dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
+               inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
+               /* fall through */
+       case DCCP_OPEN:
+               dccp_send_close(sk, 1);
 
-       return next & DCCP_ACTION_FIN;
+               if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
+                   !dccp_sk(sk)->dccps_server_timewait)
+                       next_state = DCCP_ACTIVE_CLOSEREQ;
+               else
+                       next_state = DCCP_CLOSING;
+               /* fall through */
+       default:
+               dccp_set_state(sk, next_state);
+       }
 }
 
 void dccp_close(struct sock *sk, long timeout)
 {
        struct dccp_sock *dp = dccp_sk(sk);
        struct sk_buff *skb;
+       u32 data_was_unread = 0;
        int state;
 
        lock_sock(sk);
@@ -877,16 +949,21 @@ void dccp_close(struct sock *sk, long timeout)
         * descriptor close, not protocol-sourced closes, because the
          *reader process may not have drained the data yet!
         */
-       /* FIXME: check for unread data */
        while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+               data_was_unread += skb->len;
                __kfree_skb(skb);
        }
 
-       if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
+       if (data_was_unread) {
+               /* Unread data was tossed, send an appropriate Reset Code */
+               DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
+               dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
+               dccp_set_state(sk, DCCP_CLOSED);
+       } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
                /* Check zero linger _after_ checking for unread data. */
                sk->sk_prot->disconnect(sk, 0);
-       } else if (dccp_close_state(sk)) {
-               dccp_send_close(sk, 1);
+       } else if (sk->sk_state != DCCP_CLOSED) {
+               dccp_terminate_connection(sk);
        }
 
        sk_stream_wait_close(sk, timeout);
@@ -913,24 +990,6 @@ adjudge_to_death:
        if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
                goto out;
 
-       /*
-        * The last release_sock may have processed the CLOSE or RESET
-        * packet moving sock to CLOSED state, if not we have to fire
-        * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
-        * in draft-ietf-dccp-spec-11. -acme
-        */
-       if (sk->sk_state == DCCP_CLOSING) {
-               /* FIXME: should start at 2 * RTT */
-               /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
-               inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
-                                         inet_csk(sk)->icsk_rto,
-                                         DCCP_RTO_MAX);
-#if 0
-               /* Yeah, we should use sk->sk_prot->orphan_count, etc */
-               dccp_set_state(sk, DCCP_CLOSED);
-#endif
-       }
-
        if (sk->sk_state == DCCP_CLOSED)
                inet_csk_destroy_sock(sk);
 
@@ -946,38 +1005,19 @@ EXPORT_SYMBOL_GPL(dccp_close);
 
 void dccp_shutdown(struct sock *sk, int how)
 {
-       dccp_pr_debug("entry\n");
+       dccp_pr_debug("called shutdown(%x)\n", how);
 }
 
 EXPORT_SYMBOL_GPL(dccp_shutdown);
 
-static int __init dccp_mib_init(void)
+static inline int dccp_mib_init(void)
 {
-       int rc = -ENOMEM;
-
-       dccp_statistics[0] = alloc_percpu(struct dccp_mib);
-       if (dccp_statistics[0] == NULL)
-               goto out;
-
-       dccp_statistics[1] = alloc_percpu(struct dccp_mib);
-       if (dccp_statistics[1] == NULL)
-               goto out_free_one;
-
-       rc = 0;
-out:
-       return rc;
-out_free_one:
-       free_percpu(dccp_statistics[0]);
-       dccp_statistics[0] = NULL;
-       goto out;
-
+       return snmp_mib_init((void**)dccp_statistics, sizeof(struct dccp_mib));
 }
 
-static void dccp_mib_exit(void)
+static inline void dccp_mib_exit(void)
 {
-       free_percpu(dccp_statistics[0]);
-       free_percpu(dccp_statistics[1]);
-       dccp_statistics[0] = dccp_statistics[1] = NULL;
+       snmp_mib_free((void**)dccp_statistics);
 }
 
 static int thash_entries;
@@ -986,7 +1026,7 @@ MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
 
 #ifdef CONFIG_IP_DCCP_DEBUG
 int dccp_debug;
-module_param(dccp_debug, int, 0444);
+module_param(dccp_debug, bool, 0444);
 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
 
 EXPORT_SYMBOL_GPL(dccp_debug);
@@ -1001,7 +1041,7 @@ static int __init dccp_init(void)
        dccp_hashinfo.bind_bucket_cachep =
                kmem_cache_create("dccp_bind_bucket",
                                  sizeof(struct inet_bind_bucket), 0,
-                                 SLAB_HWCACHE_ALIGN, NULL, NULL);
+                                 SLAB_HWCACHE_ALIGN, NULL);
        if (!dccp_hashinfo.bind_bucket_cachep)
                goto out;
 
@@ -1024,7 +1064,6 @@ static int __init dccp_init(void)
        do {
                dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
                                        sizeof(struct inet_ehash_bucket);
-               dccp_hashinfo.ehash_size >>= 1;
                while (dccp_hashinfo.ehash_size &
                       (dccp_hashinfo.ehash_size - 1))
                        dccp_hashinfo.ehash_size--;
@@ -1037,11 +1076,14 @@ static int __init dccp_init(void)
                goto out_free_bind_bucket_cachep;
        }
 
-       for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) {
-               rwlock_init(&dccp_hashinfo.ehash[i].lock);
+       for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
                INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
+               INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
        }
 
+       if (inet_ehash_locks_alloc(&dccp_hashinfo))
+                       goto out_free_dccp_ehash;
+
        bhash_order = ehash_order;
 
        do {
@@ -1056,7 +1098,7 @@ static int __init dccp_init(void)
 
        if (!dccp_hashinfo.bhash) {
                DCCP_CRIT("Failed to allocate DCCP bind hash table");
-               goto out_free_dccp_ehash;
+               goto out_free_dccp_locks;
        }
 
        for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
@@ -1075,6 +1117,8 @@ static int __init dccp_init(void)
        rc = dccp_sysctl_init();
        if (rc)
                goto out_ackvec_exit;
+
+       dccp_timestamping_init();
 out:
        return rc;
 out_ackvec_exit:
@@ -1084,6 +1128,8 @@ out_free_dccp_mib:
 out_free_dccp_bhash:
        free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
        dccp_hashinfo.bhash = NULL;
+out_free_dccp_locks:
+       inet_ehash_locks_free(&dccp_hashinfo);
 out_free_dccp_ehash:
        free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
        dccp_hashinfo.ehash = NULL;
@@ -1102,6 +1148,7 @@ static void __exit dccp_fini(void)
        free_pages((unsigned long)dccp_hashinfo.ehash,
                   get_order(dccp_hashinfo.ehash_size *
                             sizeof(struct inet_ehash_bucket)));
+       inet_ehash_locks_free(&dccp_hashinfo);
        kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
        dccp_ackvec_exit();
        dccp_sysctl_exit();