]> nv-tegra.nvidia Code Review - linux-2.6.git/blobdiff - net/dccp/ipv4.c
[DCCP]: Simplified conditions due to use of enum:8 states
[linux-2.6.git] / net / dccp / ipv4.c
index 34d1b119b7208cc3c93ab9de7241a0b1c185c05d..ff81679c9f1743970455cf387141ca4fb50700d2 100644 (file)
@@ -10,7 +10,6 @@
  *     2 of the License, or (at your option) any later version.
  */
 
-#include <linux/config.h>
 #include <linux/dccp.h>
 #include <linux/icmp.h>
 #include <linux/module.h>
 #include <linux/random.h>
 
 #include <net/icmp.h>
+#include <net/inet_common.h>
 #include <net/inet_hashtables.h>
 #include <net/inet_sock.h>
+#include <net/protocol.h>
 #include <net/sock.h>
 #include <net/timewait_sock.h>
 #include <net/tcp_states.h>
 #include "dccp.h"
 #include "feat.h"
 
-struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
-       .lhash_lock     = RW_LOCK_UNLOCKED,
-       .lhash_users    = ATOMIC_INIT(0),
-       .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
-};
-
-EXPORT_SYMBOL_GPL(dccp_hashinfo);
+/*
+ * This is the global socket data structure used for responding to
+ * the Out-of-the-blue (OOTB) packets. A control sock will be created
+ * for this socket at the initialization time.
+ */
+static struct socket *dccp_v4_ctl_socket;
 
 static int dccp_v4_get_port(struct sock *sk, const unsigned short snum)
 {
@@ -50,15 +50,12 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
        struct dccp_sock *dp = dccp_sk(sk);
        const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
        struct rtable *rt;
-       u32 daddr, nexthop;
+       __be32 daddr, nexthop;
        int tmp;
        int err;
 
        dp->dccps_role = DCCP_ROLE_CLIENT;
 
-       if (dccp_service_not_initialized(sk))
-               return -EPROTO;
-
        if (addr_len < sizeof(struct sockaddr_in))
                return -EINVAL;
 
@@ -116,13 +113,8 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
        /* OK, now commit destination to socket.  */
        sk_setup_caps(sk, &rt->u.dst);
 
-       dp->dccps_gar =
-               dp->dccps_iss = secure_dccp_sequence_number(inet->saddr,
-                                                           inet->daddr,
-                                                           inet->sport,
-                                                           usin->sin_port);
-       dccp_update_gss(sk, dp->dccps_iss);
-
+       dp->dccps_iss = secure_dccp_sequence_number(inet->saddr, inet->daddr,
+                                                   inet->sport, inet->dport);
        inet->id = dp->dccps_iss ^ jiffies;
 
        err = dccp_connect(sk);
@@ -186,7 +178,7 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk,
                dccp_sync_mss(sk, mtu);
 
                /*
-                * From: draft-ietf-dccp-spec-11.txt
+                * From RFC 4340, sec. 14.1:
                 *
                 *      DCCP-Sync packets are the best choice for upward
                 *      probing, since DCCP-Sync probes do not risk application
@@ -196,88 +188,6 @@ static inline void dccp_do_pmtu_discovery(struct sock *sk,
        } /* else let the usual retransmit timer handle it */
 }
 
-static void dccp_v4_ctl_send_ack(struct sk_buff *rxskb)
-{
-       int err;
-       struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh;
-       const int dccp_hdr_ack_len = sizeof(struct dccp_hdr) +
-                                    sizeof(struct dccp_hdr_ext) +
-                                    sizeof(struct dccp_hdr_ack_bits);
-       struct sk_buff *skb;
-
-       if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL)
-               return;
-
-       skb = alloc_skb(MAX_DCCP_HEADER + 15, GFP_ATOMIC);
-       if (skb == NULL)
-               return;
-
-       /* Reserve space for headers. */
-       skb_reserve(skb, MAX_DCCP_HEADER);
-
-       skb->dst = dst_clone(rxskb->dst);
-
-       skb->h.raw = skb_push(skb, dccp_hdr_ack_len);
-       dh = dccp_hdr(skb);
-       memset(dh, 0, dccp_hdr_ack_len);
-
-       /* Build DCCP header and checksum it. */
-       dh->dccph_type     = DCCP_PKT_ACK;
-       dh->dccph_sport    = rxdh->dccph_dport;
-       dh->dccph_dport    = rxdh->dccph_sport;
-       dh->dccph_doff     = dccp_hdr_ack_len / 4;
-       dh->dccph_x        = 1;
-
-       dccp_hdr_set_seq(dh, DCCP_SKB_CB(rxskb)->dccpd_ack_seq);
-       dccp_hdr_set_ack(dccp_hdr_ack_bits(skb),
-                        DCCP_SKB_CB(rxskb)->dccpd_seq);
-
-       bh_lock_sock(dccp_ctl_socket->sk);
-       err = ip_build_and_send_pkt(skb, dccp_ctl_socket->sk,
-                                   rxskb->nh.iph->daddr,
-                                   rxskb->nh.iph->saddr, NULL);
-       bh_unlock_sock(dccp_ctl_socket->sk);
-
-       if (err == NET_XMIT_CN || err == 0) {
-               DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
-               DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS);
-       }
-}
-
-static void dccp_v4_reqsk_send_ack(struct sk_buff *skb,
-                                  struct request_sock *req)
-{
-       dccp_v4_ctl_send_ack(skb);
-}
-
-static int dccp_v4_send_response(struct sock *sk, struct request_sock *req,
-                                struct dst_entry *dst)
-{
-       int err = -1;
-       struct sk_buff *skb;
-
-       /* First, grab a route. */
-       
-       if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL)
-               goto out;
-
-       skb = dccp_make_response(sk, dst, req);
-       if (skb != NULL) {
-               const struct inet_request_sock *ireq = inet_rsk(req);
-
-               memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
-               err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
-                                           ireq->rmt_addr,
-                                           ireq->opt);
-               if (err == NET_XMIT_CN)
-                       err = 0;
-       }
-
-out:
-       dst_release(dst);
-       return err;
-}
-
 /*
  * This routine is called by the ICMP module when it gets some sort of error
  * condition. If err < 0 then the socket should be closed and the error
@@ -290,7 +200,7 @@ out:
  * check at all. A more general error queue to queue errors for later handling
  * is probably better.
  */
-void dccp_v4_err(struct sk_buff *skb, u32 info)
+static void dccp_v4_err(struct sk_buff *skb, u32 info)
 {
        const struct iphdr *iph = (struct iphdr *)skb->data;
        const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data +
@@ -316,7 +226,7 @@ void dccp_v4_err(struct sk_buff *skb, u32 info)
        }
 
        if (sk->sk_state == DCCP_TIME_WAIT) {
-               inet_twsk_put((struct inet_timewait_sock *)sk);
+               inet_twsk_put(inet_twsk(sk));
                return;
        }
 
@@ -334,7 +244,7 @@ void dccp_v4_err(struct sk_buff *skb, u32 info)
        seq = dccp_hdr_seq(skb);
        if (sk->sk_state != DCCP_LISTEN &&
            !between48(seq, dp->dccps_swl, dp->dccps_swh)) {
-               NET_INC_STATS(LINUX_MIB_OUTOFWINDOWICMPS);
+               NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
                goto out;
        }
 
@@ -434,19 +344,24 @@ out:
        sock_put(sk);
 }
 
-/* This routine computes an IPv4 DCCP checksum. */
-void dccp_v4_send_check(struct sock *sk, int len, struct sk_buff *skb)
+static inline __sum16 dccp_v4_csum_finish(struct sk_buff *skb,
+                                     __be32 src, __be32 dst)
+{
+       return csum_tcpudp_magic(src, dst, skb->len, IPPROTO_DCCP, skb->csum);
+}
+
+void dccp_v4_send_check(struct sock *sk, int unused, struct sk_buff *skb)
 {
        const struct inet_sock *inet = inet_sk(sk);
        struct dccp_hdr *dh = dccp_hdr(skb);
 
-       dh->dccph_checksum = dccp_v4_checksum(skb, inet->saddr, inet->daddr);
+       dccp_csum_outgoing(skb);
+       dh->dccph_checksum = dccp_v4_csum_finish(skb, inet->saddr, inet->daddr);
 }
 
 EXPORT_SYMBOL_GPL(dccp_v4_send_check);
 
-static inline u64 dccp_v4_init_sequence(const struct sock *sk,
-                                       const struct sk_buff *skb)
+static inline u64 dccp_v4_init_sequence(const struct sk_buff *skb)
 {
        return secure_dccp_sequence_number(skb->nh.iph->daddr,
                                           skb->nh.iph->saddr,
@@ -454,91 +369,6 @@ static inline u64 dccp_v4_init_sequence(const struct sock *sk,
                                           dccp_hdr(skb)->dccph_sport);
 }
 
-int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
-{
-       struct inet_request_sock *ireq;
-       struct dccp_sock dp;
-       struct request_sock *req;
-       struct dccp_request_sock *dreq;
-       const __be32 saddr = skb->nh.iph->saddr;
-       const __be32 daddr = skb->nh.iph->daddr;
-       const __be32 service = dccp_hdr_request(skb)->dccph_req_service;
-       struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
-       __u8 reset_code = DCCP_RESET_CODE_TOO_BUSY;
-
-       /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */
-       if (((struct rtable *)skb->dst)->rt_flags &
-           (RTCF_BROADCAST | RTCF_MULTICAST)) {
-               reset_code = DCCP_RESET_CODE_NO_CONNECTION;
-               goto drop;
-       }
-
-       if (dccp_bad_service_code(sk, service)) {
-               reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE;
-               goto drop;
-       }
-       /*
-        * TW buckets are converted to open requests without
-        * limitations, they conserve resources and peer is
-        * evidently real one.
-        */
-       if (inet_csk_reqsk_queue_is_full(sk))
-               goto drop;
-
-       /*
-        * Accept backlog is full. If we have already queued enough
-        * of warm entries in syn queue, drop request. It is better than
-        * clogging syn queue with openreqs with exponentially increasing
-        * timeout.
-        */
-       if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
-               goto drop;
-
-       req = reqsk_alloc(sk->sk_prot->rsk_prot);
-       if (req == NULL)
-               goto drop;
-
-       if (dccp_parse_options(sk, skb))
-               goto drop;
-
-       dccp_openreq_init(req, &dp, skb);
-
-       ireq = inet_rsk(req);
-       ireq->loc_addr = daddr;
-       ireq->rmt_addr = saddr;
-       req->rcv_wnd    = 100; /* Fake, option parsing will get the
-                                 right value */
-       ireq->opt       = NULL;
-
-       /* 
-        * Step 3: Process LISTEN state
-        *
-        * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
-        *
-        * In fact we defer setting S.GSR, S.SWL, S.SWH to
-        * dccp_create_openreq_child.
-        */
-       dreq = dccp_rsk(req);
-       dreq->dreq_isr     = dcb->dccpd_seq;
-       dreq->dreq_iss     = dccp_v4_init_sequence(sk, skb);
-       dreq->dreq_service = service;
-
-       if (dccp_v4_send_response(sk, req, NULL))
-               goto drop_and_free;
-
-       inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
-       return 0;
-
-drop_and_free:
-       reqsk_free(req);
-drop:
-       DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
-       dcb->dccpd_reset_code = reset_code;
-       return -1;
-}
-
-EXPORT_SYMBOL_GPL(dccp_v4_conn_request);
-
 /*
  * The three way handshake has completed - we got a valid ACK or DATAACK -
  * now create the new socket.
@@ -608,61 +438,22 @@ static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
        if (req != NULL)
                return dccp_check_req(sk, skb, req, prev);
 
-       nsk = __inet_lookup_established(&dccp_hashinfo,
-                                       iph->saddr, dh->dccph_sport,
-                                       iph->daddr, ntohs(dh->dccph_dport),
-                                       inet_iif(skb));
+       nsk = inet_lookup_established(&dccp_hashinfo,
+                                     iph->saddr, dh->dccph_sport,
+                                     iph->daddr, dh->dccph_dport,
+                                     inet_iif(skb));
        if (nsk != NULL) {
                if (nsk->sk_state != DCCP_TIME_WAIT) {
                        bh_lock_sock(nsk);
                        return nsk;
                }
-               inet_twsk_put((struct inet_timewait_sock *)nsk);
+               inet_twsk_put(inet_twsk(nsk));
                return NULL;
        }
 
        return sk;
 }
 
-int dccp_v4_checksum(const struct sk_buff *skb, const __be32 saddr,
-                    const __be32 daddr)
-{
-       const struct dccp_hdr* dh = dccp_hdr(skb);
-       int checksum_len;
-       u32 tmp;
-
-       if (dh->dccph_cscov == 0)
-               checksum_len = skb->len;
-       else {
-               checksum_len = (dh->dccph_cscov + dh->dccph_x) * sizeof(u32);
-               checksum_len = checksum_len < skb->len ? checksum_len :
-                                                        skb->len;
-       }
-
-       tmp = csum_partial((unsigned char *)dh, checksum_len, 0);
-       return csum_tcpudp_magic(saddr, daddr, checksum_len,
-                                IPPROTO_DCCP, tmp);
-}
-
-static int dccp_v4_verify_checksum(struct sk_buff *skb,
-                                  const __be32 saddr, const __be32 daddr)
-{
-       struct dccp_hdr *dh = dccp_hdr(skb);
-       int checksum_len;
-       u32 tmp;
-
-       if (dh->dccph_cscov == 0)
-               checksum_len = skb->len;
-       else {
-               checksum_len = (dh->dccph_cscov + dh->dccph_x) * sizeof(u32);
-               checksum_len = checksum_len < skb->len ? checksum_len :
-                                                        skb->len;
-       }
-       tmp = csum_partial((unsigned char *)dh, checksum_len, 0);
-       return csum_tcpudp_magic(saddr, daddr, checksum_len,
-                                IPPROTO_DCCP, tmp) == 0 ? 0 : -1;
-}
-
 static struct dst_entry* dccp_v4_route_skb(struct sock *sk,
                                           struct sk_buff *skb)
 {
@@ -679,6 +470,7 @@ static struct dst_entry* dccp_v4_route_skb(struct sock *sk,
                                     }
                          };
 
+       security_skb_classify_flow(skb, &fl);
        if (ip_route_output_flow(&rt, &fl, sk, 0)) {
                IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
                return NULL;
@@ -687,7 +479,37 @@ static struct dst_entry* dccp_v4_route_skb(struct sock *sk,
        return &rt->u.dst;
 }
 
-static void dccp_v4_ctl_send_reset(struct sk_buff *rxskb)
+static int dccp_v4_send_response(struct sock *sk, struct request_sock *req,
+                                struct dst_entry *dst)
+{
+       int err = -1;
+       struct sk_buff *skb;
+
+       /* First, grab a route. */
+       
+       if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL)
+               goto out;
+
+       skb = dccp_make_response(sk, dst, req);
+       if (skb != NULL) {
+               const struct inet_request_sock *ireq = inet_rsk(req);
+               struct dccp_hdr *dh = dccp_hdr(skb);
+
+               dh->dccph_checksum = dccp_v4_csum_finish(skb, ireq->loc_addr,
+                                                             ireq->rmt_addr);
+               memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
+               err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
+                                           ireq->rmt_addr,
+                                           ireq->opt);
+               err = net_xmit_eval(err);
+       }
+
+out:
+       dst_release(dst);
+       return err;
+}
+
+static void dccp_v4_ctl_send_reset(struct sock *sk, struct sk_buff *rxskb)
 {
        int err;
        struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh;
@@ -696,7 +518,7 @@ static void dccp_v4_ctl_send_reset(struct sk_buff *rxskb)
                                       sizeof(struct dccp_hdr_reset);
        struct sk_buff *skb;
        struct dst_entry *dst;
-       u64 seqno;
+       u64 seqno = 0;
 
        /* Never send a reset in response to a reset. */
        if (rxdh->dccph_type == DCCP_PKT_RESET)
@@ -705,21 +527,20 @@ static void dccp_v4_ctl_send_reset(struct sk_buff *rxskb)
        if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL)
                return;
 
-       dst = dccp_v4_route_skb(dccp_ctl_socket->sk, rxskb);
+       dst = dccp_v4_route_skb(dccp_v4_ctl_socket->sk, rxskb);
        if (dst == NULL)
                return;
 
-       skb = alloc_skb(MAX_DCCP_HEADER + 15, GFP_ATOMIC);
+       skb = alloc_skb(dccp_v4_ctl_socket->sk->sk_prot->max_header,
+                       GFP_ATOMIC);
        if (skb == NULL)
                goto out;
 
        /* Reserve space for headers. */
-       skb_reserve(skb, MAX_DCCP_HEADER);
+       skb_reserve(skb, dccp_v4_ctl_socket->sk->sk_prot->max_header);
        skb->dst = dst_clone(dst);
 
-       skb->h.raw = skb_push(skb, dccp_hdr_reset_len);
-       dh = dccp_hdr(skb);
-       memset(dh, 0, dccp_hdr_reset_len);
+       dh = dccp_zeroed_hdr(skb, dccp_hdr_reset_len);
 
        /* Build DCCP header and checksum it. */
        dh->dccph_type     = DCCP_PKT_RESET;
@@ -730,25 +551,24 @@ static void dccp_v4_ctl_send_reset(struct sk_buff *rxskb)
        dccp_hdr_reset(skb)->dccph_reset_code =
                                DCCP_SKB_CB(rxskb)->dccpd_reset_code;
 
-       /* See "8.3.1. Abnormal Termination" in draft-ietf-dccp-spec-11 */
-       seqno = 0;
+       /* See "8.3.1. Abnormal Termination" in RFC 4340 */
        if (DCCP_SKB_CB(rxskb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
                dccp_set_seqno(&seqno, DCCP_SKB_CB(rxskb)->dccpd_ack_seq + 1);
 
        dccp_hdr_set_seq(dh, seqno);
-       dccp_hdr_set_ack(dccp_hdr_ack_bits(skb),
-                        DCCP_SKB_CB(rxskb)->dccpd_seq);
+       dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq);
 
-       dh->dccph_checksum = dccp_v4_checksum(skb, rxskb->nh.iph->saddr,
-                                             rxskb->nh.iph->daddr);
+       dccp_csum_outgoing(skb);
+       dh->dccph_checksum = dccp_v4_csum_finish(skb, rxskb->nh.iph->saddr,
+                                                     rxskb->nh.iph->daddr);
 
-       bh_lock_sock(dccp_ctl_socket->sk);
-       err = ip_build_and_send_pkt(skb, dccp_ctl_socket->sk,
+       bh_lock_sock(dccp_v4_ctl_socket->sk);
+       err = ip_build_and_send_pkt(skb, dccp_v4_ctl_socket->sk,
                                    rxskb->nh.iph->daddr,
                                    rxskb->nh.iph->saddr, NULL);
-       bh_unlock_sock(dccp_ctl_socket->sk);
+       bh_unlock_sock(dccp_v4_ctl_socket->sk);
 
-       if (err == NET_XMIT_CN || err == 0) {
+       if (net_xmit_eval(err) == 0) {
                DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
                DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS);
        }
@@ -756,6 +576,103 @@ out:
         dst_release(dst);
 }
 
+static void dccp_v4_reqsk_destructor(struct request_sock *req)
+{
+       kfree(inet_rsk(req)->opt);
+}
+
+static struct request_sock_ops dccp_request_sock_ops __read_mostly = {
+       .family         = PF_INET,
+       .obj_size       = sizeof(struct dccp_request_sock),
+       .rtx_syn_ack    = dccp_v4_send_response,
+       .send_ack       = dccp_reqsk_send_ack,
+       .destructor     = dccp_v4_reqsk_destructor,
+       .send_reset     = dccp_v4_ctl_send_reset,
+};
+
+int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
+{
+       struct inet_request_sock *ireq;
+       struct request_sock *req;
+       struct dccp_request_sock *dreq;
+       const __be32 service = dccp_hdr_request(skb)->dccph_req_service;
+       struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
+       __u8 reset_code = DCCP_RESET_CODE_TOO_BUSY;
+
+       /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */
+       if (((struct rtable *)skb->dst)->rt_flags &
+           (RTCF_BROADCAST | RTCF_MULTICAST)) {
+               reset_code = DCCP_RESET_CODE_NO_CONNECTION;
+               goto drop;
+       }
+
+       if (dccp_bad_service_code(sk, service)) {
+               reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE;
+               goto drop;
+       }
+       /*
+        * TW buckets are converted to open requests without
+        * limitations, they conserve resources and peer is
+        * evidently real one.
+        */
+       if (inet_csk_reqsk_queue_is_full(sk))
+               goto drop;
+
+       /*
+        * Accept backlog is full. If we have already queued enough
+        * of warm entries in syn queue, drop request. It is better than
+        * clogging syn queue with openreqs with exponentially increasing
+        * timeout.
+        */
+       if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
+               goto drop;
+
+       req = reqsk_alloc(&dccp_request_sock_ops);
+       if (req == NULL)
+               goto drop;
+
+       if (dccp_parse_options(sk, skb))
+               goto drop_and_free;
+
+       dccp_reqsk_init(req, skb);
+
+       if (security_inet_conn_request(sk, skb, req))
+               goto drop_and_free;
+
+       ireq = inet_rsk(req);
+       ireq->loc_addr = skb->nh.iph->daddr;
+       ireq->rmt_addr = skb->nh.iph->saddr;
+       ireq->opt       = NULL;
+
+       /* 
+        * Step 3: Process LISTEN state
+        *
+        * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
+        *
+        * In fact we defer setting S.GSR, S.SWL, S.SWH to
+        * dccp_create_openreq_child.
+        */
+       dreq = dccp_rsk(req);
+       dreq->dreq_isr     = dcb->dccpd_seq;
+       dreq->dreq_iss     = dccp_v4_init_sequence(skb);
+       dreq->dreq_service = service;
+
+       if (dccp_v4_send_response(sk, req, NULL))
+               goto drop_and_free;
+
+       inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
+       return 0;
+
+drop_and_free:
+       reqsk_free(req);
+drop:
+       DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
+       dcb->dccpd_reset_code = reset_code;
+       return -1;
+}
+
+EXPORT_SYMBOL_GPL(dccp_v4_conn_request);
+
 int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
 {
        struct dccp_hdr *dh = dccp_hdr(skb);
@@ -768,24 +685,23 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
 
        /*
         *  Step 3: Process LISTEN state
-        *     If S.state == LISTEN,
-        *        If P.type == Request or P contains a valid Init Cookie
-        *              option,
-        *           * Must scan the packet's options to check for an Init
-        *              Cookie.  Only the Init Cookie is processed here,
-        *              however; other options are processed in Step 8.  This
-        *              scan need only be performed if the endpoint uses Init
-        *              Cookies *
-        *           * Generate a new socket and switch to that socket *
-        *           Set S := new socket for this port pair
-        *           S.state = RESPOND
-        *           Choose S.ISS (initial seqno) or set from Init Cookie
-        *           Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
-        *           Continue with S.state == RESPOND
-        *           * A Response packet will be generated in Step 11 *
-        *        Otherwise,
-        *           Generate Reset(No Connection) unless P.type == Reset
-        *           Drop packet and return
+        *       If P.type == Request or P contains a valid Init Cookie option,
+        *            (* Must scan the packet's options to check for Init
+        *               Cookies.  Only Init Cookies are processed here,
+        *               however; other options are processed in Step 8.  This
+        *               scan need only be performed if the endpoint uses Init
+        *               Cookies *)
+        *            (* Generate a new socket and switch to that socket *)
+        *            Set S := new socket for this port pair
+        *            S.state = RESPOND
+        *            Choose S.ISS (initial seqno) or set from Init Cookies
+        *            Initialize S.GAR := S.ISS
+        *            Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookies
+        *            Continue with S.state == RESPOND
+        *            (* A Response packet will be generated in Step 11 *)
+        *       Otherwise,
+        *            Generate Reset(No Connection) unless P.type == Reset
+        *            Drop packet and return
         *
         * NOTE: the check for the packet types is done in
         *       dccp_rcv_state_process
@@ -808,7 +724,7 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
        return 0;
 
 reset:
-       dccp_v4_ctl_send_reset(skb);
+       dccp_v4_ctl_send_reset(sk, skb);
 discard:
        kfree_skb(skb);
        return 0;
@@ -816,81 +732,94 @@ discard:
 
 EXPORT_SYMBOL_GPL(dccp_v4_do_rcv);
 
+/**
+ *     dccp_invalid_packet  -  check for malformed packets
+ *     Implements RFC 4340, 8.5:  Step 1: Check header basics
+ *     Packets that fail these checks are ignored and do not receive Resets.
+ */
 int dccp_invalid_packet(struct sk_buff *skb)
 {
        const struct dccp_hdr *dh;
+       unsigned int cscov;
 
        if (skb->pkt_type != PACKET_HOST)
                return 1;
 
+       /* If the packet is shorter than 12 bytes, drop packet and return */
        if (!pskb_may_pull(skb, sizeof(struct dccp_hdr))) {
-               LIMIT_NETDEBUG(KERN_WARNING "DCCP: pskb_may_pull failed\n");
+               DCCP_WARN("pskb_may_pull failed\n");
                return 1;
        }
 
        dh = dccp_hdr(skb);
 
-       /* If the packet type is not understood, drop packet and return */
+       /* If P.type is not understood, drop packet and return */
        if (dh->dccph_type >= DCCP_PKT_INVALID) {
-               LIMIT_NETDEBUG(KERN_WARNING "DCCP: invalid packet type\n");
+               DCCP_WARN("invalid packet type\n");
                return 1;
        }
 
        /*
-        * If P.Data Offset is too small for packet type, or too large for
-        * packet, drop packet and return
+        * If P.Data Offset is too small for packet type, drop packet and return
         */
        if (dh->dccph_doff < dccp_hdr_len(skb) / sizeof(u32)) {
-               LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.Data Offset(%u) "
-                                           "too small 1\n",
-                              dh->dccph_doff);
+               DCCP_WARN("P.Data Offset(%u) too small\n", dh->dccph_doff);
                return 1;
        }
-
+       /*
+        * If P.Data Offset is too too large for packet, drop packet and return
+        */
        if (!pskb_may_pull(skb, dh->dccph_doff * sizeof(u32))) {
-               LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.Data Offset(%u) "
-                                           "too small 2\n",
-                              dh->dccph_doff);
+               DCCP_WARN("P.Data Offset(%u) too large\n", dh->dccph_doff);
                return 1;
        }
 
-       dh = dccp_hdr(skb);
-
        /*
         * If P.type is not Data, Ack, or DataAck and P.X == 0 (the packet
         * has short sequence numbers), drop packet and return
         */
-       if (dh->dccph_x == 0 &&
-           dh->dccph_type != DCCP_PKT_DATA &&
-           dh->dccph_type != DCCP_PKT_ACK &&
-           dh->dccph_type != DCCP_PKT_DATAACK) {
-               LIMIT_NETDEBUG(KERN_WARNING "DCCP: P.type (%s) not Data, Ack "
-                                           "nor DataAck and P.X == 0\n",
-                              dccp_packet_name(dh->dccph_type));
+       if (dh->dccph_type >= DCCP_PKT_DATA    &&
+           dh->dccph_type <= DCCP_PKT_DATAACK && dh->dccph_x == 0)  {
+               DCCP_WARN("P.type (%s) not Data || [Data]Ack, while P.X == 0\n",
+                         dccp_packet_name(dh->dccph_type));
+               return 1;
+       }
+
+       /*
+        * If P.CsCov is too large for the packet size, drop packet and return.
+        * This must come _before_ checksumming (not as RFC 4340 suggests).
+        */
+       cscov = dccp_csum_coverage(skb);
+       if (cscov > skb->len) {
+               DCCP_WARN("P.CsCov %u exceeds packet length %d\n",
+                         dh->dccph_cscov, skb->len);
                return 1;
        }
 
+       /* If header checksum is incorrect, drop packet and return.
+        * (This step is completed in the AF-dependent functions.) */
+       skb->csum = skb_checksum(skb, 0, cscov, 0);
+
        return 0;
 }
 
 EXPORT_SYMBOL_GPL(dccp_invalid_packet);
 
 /* this is called when real data arrives */
-int dccp_v4_rcv(struct sk_buff *skb)
+static int dccp_v4_rcv(struct sk_buff *skb)
 {
        const struct dccp_hdr *dh;
        struct sock *sk;
+       int min_cov;
 
-       /* Step 1: Check header basics: */
+       /* Step 1: Check header basics */
 
        if (dccp_invalid_packet(skb))
                goto discard_it;
 
-       /* If the header checksum is incorrect, drop packet and return */
-       if (dccp_v4_verify_checksum(skb, skb->nh.iph->saddr,
-                                   skb->nh.iph->daddr) < 0) {
-               LIMIT_NETDEBUG(KERN_WARNING "%s: incorrect header checksum\n",
-                              __FUNCTION__);
+       /* Step 1: If header checksum is incorrect, drop packet and return */
+       if (dccp_v4_csum_finish(skb, skb->nh.iph->saddr, skb->nh.iph->daddr)) {
+               DCCP_WARN("dropped packet with invalid checksum\n");
                goto discard_it;
        }
 
@@ -912,8 +841,7 @@ int dccp_v4_rcv(struct sk_buff *skb)
                dccp_pr_debug_cat("\n");
        } else {
                DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb);
-               dccp_pr_debug_cat(", ack=%llu\n",
-                                 (unsigned long long)
+               dccp_pr_debug_cat(", ack=%llu\n", (unsigned long long)
                                  DCCP_SKB_CB(skb)->dccpd_ack_seq);
        }
 
@@ -921,14 +849,12 @@ int dccp_v4_rcv(struct sk_buff *skb)
         *      Look up flow ID in table and get corresponding socket */
        sk = __inet_lookup(&dccp_hashinfo,
                           skb->nh.iph->saddr, dh->dccph_sport,
-                          skb->nh.iph->daddr, ntohs(dh->dccph_dport),
+                          skb->nh.iph->daddr, dh->dccph_dport,
                           inet_iif(skb));
 
        /* 
         * Step 2:
         *      If no socket ...
-        *              Generate Reset(No Connection) unless P.type == Reset
-        *              Drop packet and return
         */
        if (sk == NULL) {
                dccp_pr_debug("failed to look up flow ID in table and "
@@ -942,88 +868,93 @@ int dccp_v4_rcv(struct sk_buff *skb)
         *              Generate Reset(No Connection) unless P.type == Reset
         *              Drop packet and return
         */
-              
        if (sk->sk_state == DCCP_TIME_WAIT) {
-               dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: "
-                             "do_time_wait\n");
-                goto do_time_wait;
+               dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: do_time_wait\n");
+               inet_twsk_put(inet_twsk(sk));
+               goto no_dccp_socket;
+       }
+
+       /*
+        * RFC 4340, sec. 9.2.1: Minimum Checksum Coverage
+        *      o if MinCsCov = 0, only packets with CsCov = 0 are accepted
+        *      o if MinCsCov > 0, also accept packets with CsCov >= MinCsCov
+        */
+       min_cov = dccp_sk(sk)->dccps_pcrlen;
+       if (dh->dccph_cscov && (min_cov == 0 || dh->dccph_cscov < min_cov))  {
+               dccp_pr_debug("Packet CsCov %d does not satisfy MinCsCov %d\n",
+                             dh->dccph_cscov, min_cov);
+               /* FIXME: "Such packets SHOULD be reported using Data Dropped
+                *         options (Section 11.7) with Drop Code 0, Protocol
+                *         Constraints."                                     */
+               goto discard_and_relse;
        }
 
        if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
                goto discard_and_relse;
        nf_reset(skb);
 
-       return sk_receive_skb(sk, skb);
+       return sk_receive_skb(sk, skb, 1);
 
 no_dccp_socket:
        if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
                goto discard_it;
        /*
         * Step 2:
+        *      If no socket ...
         *              Generate Reset(No Connection) unless P.type == Reset
         *              Drop packet and return
         */
        if (dh->dccph_type != DCCP_PKT_RESET) {
                DCCP_SKB_CB(skb)->dccpd_reset_code =
                                        DCCP_RESET_CODE_NO_CONNECTION;
-               dccp_v4_ctl_send_reset(skb);
+               dccp_v4_ctl_send_reset(sk, skb);
        }
 
 discard_it:
-       /* Discard frame. */
        kfree_skb(skb);
        return 0;
 
 discard_and_relse:
        sock_put(sk);
        goto discard_it;
-
-do_time_wait:
-       inet_twsk_put((struct inet_timewait_sock *)sk);
-       goto no_dccp_socket;
 }
 
-struct inet_connection_sock_af_ops dccp_ipv4_af_ops = {
-       .queue_xmit     = ip_queue_xmit,
-       .send_check     = dccp_v4_send_check,
-       .rebuild_header = inet_sk_rebuild_header,
-       .conn_request   = dccp_v4_conn_request,
-       .syn_recv_sock  = dccp_v4_request_recv_sock,
-       .net_header_len = sizeof(struct iphdr),
-       .setsockopt     = ip_setsockopt,
-       .getsockopt     = ip_getsockopt,
-       .addr2sockaddr  = inet_csk_addr2sockaddr,
-       .sockaddr_len   = sizeof(struct sockaddr_in),
+static struct inet_connection_sock_af_ops dccp_ipv4_af_ops = {
+       .queue_xmit        = ip_queue_xmit,
+       .send_check        = dccp_v4_send_check,
+       .rebuild_header    = inet_sk_rebuild_header,
+       .conn_request      = dccp_v4_conn_request,
+       .syn_recv_sock     = dccp_v4_request_recv_sock,
+       .net_header_len    = sizeof(struct iphdr),
+       .setsockopt        = ip_setsockopt,
+       .getsockopt        = ip_getsockopt,
+       .addr2sockaddr     = inet_csk_addr2sockaddr,
+       .sockaddr_len      = sizeof(struct sockaddr_in),
+#ifdef CONFIG_COMPAT
+       .compat_setsockopt = compat_ip_setsockopt,
+       .compat_getsockopt = compat_ip_getsockopt,
+#endif
 };
 
 static int dccp_v4_init_sock(struct sock *sk)
 {
-       const int err = dccp_init_sock(sk);
+       static __u8 dccp_v4_ctl_sock_initialized;
+       int err = dccp_init_sock(sk, dccp_v4_ctl_sock_initialized);
 
-       if (err == 0)
+       if (err == 0) {
+               if (unlikely(!dccp_v4_ctl_sock_initialized))
+                       dccp_v4_ctl_sock_initialized = 1;
                inet_csk(sk)->icsk_af_ops = &dccp_ipv4_af_ops;
-       return err;
-}
+       }
 
-static void dccp_v4_reqsk_destructor(struct request_sock *req)
-{
-       kfree(inet_rsk(req)->opt);
+       return err;
 }
 
-static struct request_sock_ops dccp_request_sock_ops = {
-       .family         = PF_INET,
-       .obj_size       = sizeof(struct dccp_request_sock),
-       .rtx_syn_ack    = dccp_v4_send_response,
-       .send_ack       = dccp_v4_reqsk_send_ack,
-       .destructor     = dccp_v4_reqsk_destructor,
-       .send_reset     = dccp_v4_ctl_send_reset,
-};
-
 static struct timewait_sock_ops dccp_timewait_sock_ops = {
        .twsk_obj_size  = sizeof(struct inet_timewait_sock),
 };
 
-struct proto dccp_prot = {
+static struct proto dccp_v4_prot = {
        .name                   = "DCCP",
        .owner                  = THIS_MODULE,
        .close                  = dccp_close,
@@ -1047,6 +978,99 @@ struct proto dccp_prot = {
        .obj_size               = sizeof(struct dccp_sock),
        .rsk_prot               = &dccp_request_sock_ops,
        .twsk_prot              = &dccp_timewait_sock_ops,
+#ifdef CONFIG_COMPAT
+       .compat_setsockopt      = compat_dccp_setsockopt,
+       .compat_getsockopt      = compat_dccp_getsockopt,
+#endif
 };
 
-EXPORT_SYMBOL_GPL(dccp_prot);
+static struct net_protocol dccp_v4_protocol = {
+       .handler        = dccp_v4_rcv,
+       .err_handler    = dccp_v4_err,
+       .no_policy      = 1,
+};
+
+static const struct proto_ops inet_dccp_ops = {
+       .family            = PF_INET,
+       .owner             = THIS_MODULE,
+       .release           = inet_release,
+       .bind              = inet_bind,
+       .connect           = inet_stream_connect,
+       .socketpair        = sock_no_socketpair,
+       .accept            = inet_accept,
+       .getname           = inet_getname,
+       /* FIXME: work on tcp_poll to rename it to inet_csk_poll */
+       .poll              = dccp_poll,
+       .ioctl             = inet_ioctl,
+       /* FIXME: work on inet_listen to rename it to sock_common_listen */
+       .listen            = inet_dccp_listen,
+       .shutdown          = inet_shutdown,
+       .setsockopt        = sock_common_setsockopt,
+       .getsockopt        = sock_common_getsockopt,
+       .sendmsg           = inet_sendmsg,
+       .recvmsg           = sock_common_recvmsg,
+       .mmap              = sock_no_mmap,
+       .sendpage          = sock_no_sendpage,
+#ifdef CONFIG_COMPAT
+       .compat_setsockopt = compat_sock_common_setsockopt,
+       .compat_getsockopt = compat_sock_common_getsockopt,
+#endif
+};
+
+static struct inet_protosw dccp_v4_protosw = {
+       .type           = SOCK_DCCP,
+       .protocol       = IPPROTO_DCCP,
+       .prot           = &dccp_v4_prot,
+       .ops            = &inet_dccp_ops,
+       .capability     = -1,
+       .no_check       = 0,
+       .flags          = INET_PROTOSW_ICSK,
+};
+
+static int __init dccp_v4_init(void)
+{
+       int err = proto_register(&dccp_v4_prot, 1);
+
+       if (err != 0)
+               goto out;
+
+       err = inet_add_protocol(&dccp_v4_protocol, IPPROTO_DCCP);
+       if (err != 0)
+               goto out_proto_unregister;
+
+       inet_register_protosw(&dccp_v4_protosw);
+
+       err = inet_csk_ctl_sock_create(&dccp_v4_ctl_socket, PF_INET,
+                                      SOCK_DCCP, IPPROTO_DCCP);
+       if (err)
+               goto out_unregister_protosw;
+out:
+       return err;
+out_unregister_protosw:
+       inet_unregister_protosw(&dccp_v4_protosw);
+       inet_del_protocol(&dccp_v4_protocol, IPPROTO_DCCP);
+out_proto_unregister:
+       proto_unregister(&dccp_v4_prot);
+       goto out;
+}
+
+static void __exit dccp_v4_exit(void)
+{
+       inet_unregister_protosw(&dccp_v4_protosw);
+       inet_del_protocol(&dccp_v4_protocol, IPPROTO_DCCP);
+       proto_unregister(&dccp_v4_prot);
+}
+
+module_init(dccp_v4_init);
+module_exit(dccp_v4_exit);
+
+/*
+ * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33)
+ * values directly, Also cover the case where the protocol is not specified,
+ * i.e. net-pf-PF_INET-proto-0-type-SOCK_DCCP
+ */
+MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-33-type-6");
+MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-0-type-6");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@mandriva.com>");
+MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");