[DCCP]: Initial implementation
Arnaldo Carvalho de Melo [Wed, 10 Aug 2005 03:14:34 +0000 (20:14 -0700)]
Development to this point was done on a subversion repository at:

http://oops.ghostprotocols.net:81/cgi-bin/viewcvs.cgi/dccp-2.6/

This repository will be kept at this site for the foreseable future,
so that interested parties can see the history of this code,
attributions, etc.

If I ever decide to take this offline I'll provide the full history at
some other suitable place.

Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>

22 files changed:
include/linux/dccp.h [new file with mode: 0644]
include/linux/in.h
include/linux/net.h
include/linux/socket.h
net/Kconfig
net/Makefile
net/dccp/Kconfig [new file with mode: 0644]
net/dccp/Makefile [new file with mode: 0644]
net/dccp/ccid.c [new file with mode: 0644]
net/dccp/ccid.h [new file with mode: 0644]
net/dccp/ccids/Kconfig [new file with mode: 0644]
net/dccp/ccids/Makefile [new file with mode: 0644]
net/dccp/ccids/ccid3.c [new file with mode: 0644]
net/dccp/ccids/ccid3.h [new file with mode: 0644]
net/dccp/dccp.h [new file with mode: 0644]
net/dccp/input.c [new file with mode: 0644]
net/dccp/ipv4.c [new file with mode: 0644]
net/dccp/minisocks.c [new file with mode: 0644]
net/dccp/options.c [new file with mode: 0644]
net/dccp/output.c [new file with mode: 0644]
net/dccp/proto.c [new file with mode: 0644]
net/dccp/timer.c [new file with mode: 0644]

diff --git a/include/linux/dccp.h b/include/linux/dccp.h
new file mode 100644 (file)
index 0000000..e3b4bf7
--- /dev/null
@@ -0,0 +1,432 @@
+#ifndef _LINUX_DCCP_H
+#define _LINUX_DCCP_H
+
+#include <linux/in.h>
+#include <linux/list.h>
+#include <linux/types.h>
+#include <linux/uio.h>
+#include <linux/workqueue.h>
+
+#include <net/inet_connection_sock.h>
+#include <net/sock.h>
+#include <net/tcp_states.h>
+#include <net/tcp.h>
+
+/* FIXME: this is utterly wrong */
+struct sockaddr_dccp {
+       struct sockaddr_in      in;
+       unsigned int            service;
+};
+
+enum dccp_state {
+       DCCP_OPEN       = TCP_ESTABLISHED,
+       DCCP_REQUESTING = TCP_SYN_SENT,
+       DCCP_PARTOPEN   = TCP_FIN_WAIT1, /* FIXME:
+                                           This mapping is horrible, but TCP has
+                                           no matching state for DCCP_PARTOPEN,
+                                           as TCP_SYN_RECV is already used by
+                                           DCCP_RESPOND, why don't stop using TCP
+                                           mapping of states? OK, now we don't use
+                                           sk_stream_sendmsg anymore, so doesn't
+                                           seem to exist any reason for us to
+                                           do the TCP mapping here */
+       DCCP_LISTEN     = TCP_LISTEN,
+       DCCP_RESPOND    = TCP_SYN_RECV,
+       DCCP_CLOSING    = TCP_CLOSING,
+       DCCP_TIME_WAIT  = TCP_TIME_WAIT,
+       DCCP_CLOSED     = TCP_CLOSE,
+       DCCP_MAX_STATES = TCP_MAX_STATES,
+};
+
+#define DCCP_STATE_MASK 0xf
+#define DCCP_ACTION_FIN (1<<7)
+
+enum {
+       DCCPF_OPEN       = TCPF_ESTABLISHED,
+       DCCPF_REQUESTING = TCPF_SYN_SENT,
+       DCCPF_PARTOPEN   = TCPF_FIN_WAIT1,
+       DCCPF_LISTEN     = TCPF_LISTEN,
+       DCCPF_RESPOND    = TCPF_SYN_RECV,
+       DCCPF_CLOSING    = TCPF_CLOSING,
+       DCCPF_TIME_WAIT  = TCPF_TIME_WAIT,
+       DCCPF_CLOSED     = TCPF_CLOSE,
+};
+
+/**
+ * struct dccp_hdr - generic part of DCCP packet header
+ *
+ * @dccph_sport - Relevant port on the endpoint that sent this packet
+ * @dccph_dport - Relevant port on the other endpoint
+ * @dccph_doff - Data Offset from the start of the DCCP header, in 32-bit words
+ * @dccph_ccval - Used by the HC-Sender CCID
+ * @dccph_cscov - Parts of the packet that are covered by the Checksum field
+ * @dccph_checksum - Internet checksum, depends on dccph_cscov
+ * @dccph_x - 0 = 24 bit sequence number, 1 = 48
+ * @dccph_type - packet type, see DCCP_PKT_ prefixed macros
+ * @dccph_seq - sequence number high or low order 24 bits, depends on dccph_x
+ */
+struct dccp_hdr {
+       __u16   dccph_sport,
+               dccph_dport;
+       __u8    dccph_doff;
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+       __u8    dccph_cscov:4,
+               dccph_ccval:4;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+       __u8    dccph_ccval:4,
+               dccph_cscov:4;
+#else
+#error  "Adjust your <asm/byteorder.h> defines"
+#endif
+       __u16   dccph_checksum;
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+       __u32   dccph_x:1,
+               dccph_type:4,
+               dccph_reserved:3,
+               dccph_seq:24;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+       __u32   dccph_reserved:3,
+               dccph_type:4,
+               dccph_x:1,
+               dccph_seq:24;
+#else
+#error  "Adjust your <asm/byteorder.h> defines"
+#endif
+};
+
+static inline struct dccp_hdr *dccp_hdr(const struct sk_buff *skb)
+{
+       return (struct dccp_hdr *)skb->h.raw;
+}
+
+/**
+ * struct dccp_hdr_ext - the low bits of a 48 bit seq packet
+ *
+ * @dccph_seq_low - low 24 bits of a 48 bit seq packet
+ */
+struct dccp_hdr_ext {
+       __u32   dccph_seq_low;
+};
+
+static inline struct dccp_hdr_ext *dccp_hdrx(const struct sk_buff *skb)
+{
+       return (struct dccp_hdr_ext *)(skb->h.raw + sizeof(struct dccp_hdr));
+}
+
+static inline unsigned int dccp_basic_hdr_len(const struct sk_buff *skb)
+{
+       const struct dccp_hdr *dh = dccp_hdr(skb);
+       return sizeof(*dh) + (dh->dccph_x ? sizeof(struct dccp_hdr_ext) : 0);
+}
+
+static inline __u64 dccp_hdr_seq(const struct sk_buff *skb)
+{
+       const struct dccp_hdr *dh = dccp_hdr(skb);
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+       __u64 seq_nr = ntohl(dh->dccph_seq << 8);
+#elif defined(__BIG_ENDIAN_BITFIELD)
+       __u64 seq_nr = ntohl(dh->dccph_seq);
+#else
+#error  "Adjust your <asm/byteorder.h> defines"
+#endif
+
+       if (dh->dccph_x != 0)
+               seq_nr = (seq_nr << 32) + ntohl(dccp_hdrx(skb)->dccph_seq_low);
+
+       return seq_nr;
+}
+
+/**
+ * struct dccp_hdr_request - Conection initiation request header
+ *
+ * @dccph_req_service - Service to which the client app wants to connect
+ * @dccph_req_options - list of options (must be a multiple of 32 bits
+ */
+struct dccp_hdr_request {
+       __u32   dccph_req_service;
+};
+
+static inline struct dccp_hdr_request *dccp_hdr_request(struct sk_buff *skb)
+{
+       return (struct dccp_hdr_request *)(skb->h.raw + dccp_basic_hdr_len(skb));
+}
+
+/**
+ * struct dccp_hdr_ack_bits - acknowledgment bits common to most packets
+ *
+ * @dccph_resp_ack_nr_high - 48 bit ack number high order bits, contains GSR
+ * @dccph_resp_ack_nr_low - 48 bit ack number low order bits, contains GSR
+ */
+struct dccp_hdr_ack_bits {
+       __u32   dccph_reserved1:8,
+               dccph_ack_nr_high:24;
+       __u32   dccph_ack_nr_low;
+};
+
+static inline struct dccp_hdr_ack_bits *dccp_hdr_ack_bits(const struct sk_buff *skb)
+{
+       return (struct dccp_hdr_ack_bits *)(skb->h.raw + dccp_basic_hdr_len(skb));
+}
+
+static inline u64 dccp_hdr_ack_seq(const struct sk_buff *skb)
+{
+       const struct dccp_hdr_ack_bits *dhack = dccp_hdr_ack_bits(skb);
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+       return (((u64)ntohl(dhack->dccph_ack_nr_high << 8)) << 32) + ntohl(dhack->dccph_ack_nr_low);
+#elif defined(__BIG_ENDIAN_BITFIELD)
+       return (((u64)ntohl(dhack->dccph_ack_nr_high)) << 32) + ntohl(dhack->dccph_ack_nr_low);
+#else
+#error  "Adjust your <asm/byteorder.h> defines"
+#endif
+}
+
+/**
+ * struct dccp_hdr_response - Conection initiation response header
+ *
+ * @dccph_resp_ack_nr_high - 48 bit ack number high order bits, contains GSR
+ * @dccph_resp_ack_nr_low - 48 bit ack number low order bits, contains GSR
+ * @dccph_resp_service - Echoes the Service Code on a received DCCP-Request
+ * @dccph_resp_options - list of options (must be a multiple of 32 bits
+ */
+struct dccp_hdr_response {
+       struct dccp_hdr_ack_bits        dccph_resp_ack;
+       __u32                           dccph_resp_service;
+};
+
+static inline struct dccp_hdr_response *dccp_hdr_response(struct sk_buff *skb)
+{
+       return (struct dccp_hdr_response *)(skb->h.raw + dccp_basic_hdr_len(skb));
+}
+
+/**
+ * struct dccp_hdr_reset - Unconditionally shut down a connection
+ *
+ * @dccph_reset_service - Echoes the Service Code on a received DCCP-Request
+ * @dccph_reset_options - list of options (must be a multiple of 32 bits
+ */
+struct dccp_hdr_reset {
+       struct dccp_hdr_ack_bits        dccph_reset_ack;
+       __u8                            dccph_reset_code,
+                                       dccph_reset_data[3];
+};
+
+static inline struct dccp_hdr_reset *dccp_hdr_reset(struct sk_buff *skb)
+{
+       return (struct dccp_hdr_reset *)(skb->h.raw + dccp_basic_hdr_len(skb));
+}
+
+enum dccp_pkt_type {
+       DCCP_PKT_REQUEST = 0,
+       DCCP_PKT_RESPONSE,
+       DCCP_PKT_DATA,
+       DCCP_PKT_ACK,
+       DCCP_PKT_DATAACK,
+       DCCP_PKT_CLOSEREQ,
+       DCCP_PKT_CLOSE,
+       DCCP_PKT_RESET,
+       DCCP_PKT_SYNC,
+       DCCP_PKT_SYNCACK,
+       DCCP_PKT_INVALID,
+};
+
+#define DCCP_NR_PKT_TYPES DCCP_PKT_INVALID
+
+static inline unsigned int dccp_packet_hdr_len(const __u8 type)
+{
+       if (type == DCCP_PKT_DATA)
+               return 0;
+       if (type == DCCP_PKT_DATAACK    ||
+           type == DCCP_PKT_ACK        ||
+           type == DCCP_PKT_SYNC       ||
+           type == DCCP_PKT_SYNCACK    ||
+           type == DCCP_PKT_CLOSE      ||
+           type == DCCP_PKT_CLOSEREQ)
+               return sizeof(struct dccp_hdr_ack_bits);
+       if (type == DCCP_PKT_REQUEST)
+               return sizeof(struct dccp_hdr_request);
+       if (type == DCCP_PKT_RESPONSE)
+               return sizeof(struct dccp_hdr_response);
+       return sizeof(struct dccp_hdr_reset);
+}
+
+static inline unsigned int dccp_hdr_len(const struct sk_buff *skb)
+{
+       return dccp_basic_hdr_len(skb) +
+              dccp_packet_hdr_len(dccp_hdr(skb)->dccph_type);
+}
+
+enum dccp_reset_codes {
+       DCCP_RESET_CODE_UNSPECIFIED = 0,
+       DCCP_RESET_CODE_CLOSED,
+       DCCP_RESET_CODE_ABORTED,
+       DCCP_RESET_CODE_NO_CONNECTION,
+       DCCP_RESET_CODE_PACKET_ERROR,
+       DCCP_RESET_CODE_OPTION_ERROR,
+       DCCP_RESET_CODE_MANDATORY_ERROR,
+       DCCP_RESET_CODE_CONNECTION_REFUSED,
+       DCCP_RESET_CODE_BAD_SERVICE_CODE,
+       DCCP_RESET_CODE_TOO_BUSY,
+       DCCP_RESET_CODE_BAD_INIT_COOKIE,
+       DCCP_RESET_CODE_AGGRESSION_PENALTY,
+};
+
+/* DCCP options */
+enum {
+       DCCPO_PADDING = 0,
+       DCCPO_MANDATORY = 1,
+       DCCPO_MIN_RESERVED = 3,
+       DCCPO_MAX_RESERVED = 31,
+       DCCPO_NDP_COUNT = 37,
+       DCCPO_ACK_VECTOR_0 = 38,
+       DCCPO_ACK_VECTOR_1 = 39,
+       DCCPO_TIMESTAMP = 41,
+       DCCPO_TIMESTAMP_ECHO = 42,
+       DCCPO_ELAPSED_TIME = 43,
+       DCCPO_MAX = 45,
+       DCCPO_MIN_CCID_SPECIFIC = 128,
+       DCCPO_MAX_CCID_SPECIFIC = 255,
+};
+
+/* DCCP features */
+enum {
+       DCCPF_RESERVED = 0,
+       DCCPF_SEQUENCE_WINDOW = 3,
+       DCCPF_SEND_ACK_VECTOR = 6,
+       DCCPF_SEND_NDP_COUNT = 7,
+       /* 10-127 reserved */
+       DCCPF_MIN_CCID_SPECIFIC = 128,
+       DCCPF_MAX_CCID_SPECIFIC = 255,
+};
+
+/* initial values for each feature */
+#define DCCPF_INITIAL_SEQUENCE_WINDOW          100
+/* FIXME: for now we're using CCID 3 (TFRC) */
+#define DCCPF_INITIAL_CCID                     3
+#define DCCPF_INITIAL_SEND_ACK_VECTOR          0
+/* FIXME: for now we're default to 1 but it should really be 0 */
+#define DCCPF_INITIAL_SEND_NDP_COUNT           1
+
+#define DCCP_NDP_LIMIT 0xFFFFFF
+
+/**
+  * struct dccp_options - option values for a DCCP connection
+  *    @dccpo_sequence_window - Sequence Window Feature (section 7.5.2)
+  *    @dccpo_ccid - Congestion Control Id (CCID) (section 10)
+  *    @dccpo_send_ack_vector - Send Ack Vector Feature (section 11.5)
+  *    @dccpo_send_ndp_count - Send NDP Count Feature (7.7.2)
+  */
+struct dccp_options {
+       __u64   dccpo_sequence_window;
+       __u8    dccpo_ccid;
+       __u8    dccpo_send_ack_vector;
+       __u8    dccpo_send_ndp_count;
+};
+
+extern void __dccp_options_init(struct dccp_options *dccpo);
+extern void dccp_options_init(struct dccp_options *dccpo);
+extern int dccp_parse_options(struct sock *sk, struct sk_buff *skb);
+
+struct dccp_request_sock {
+       struct inet_request_sock dreq_inet_rsk;
+       __u64                    dreq_iss;
+       __u64                    dreq_isr;
+       __u32                    dreq_service;
+};
+
+static inline struct dccp_request_sock *dccp_rsk(const struct request_sock *req)
+{
+       return (struct dccp_request_sock *)req;
+}
+
+/* Read about the ECN nonce to see why it is 253 */
+#define DCCP_MAX_ACK_VECTOR_LEN 253
+
+struct dccp_options_received {
+       u32     dccpor_ndp:24,
+               dccpor_ack_vector_len:8;
+       u32     dccpor_ack_vector_idx:10;
+       /* 22 bits hole, try to pack */
+       u32     dccpor_timestamp;
+       u32     dccpor_timestamp_echo;
+       u32     dccpor_elapsed_time;
+};
+
+struct ccid;
+
+enum dccp_role {
+       DCCP_ROLE_UNDEFINED,
+       DCCP_ROLE_LISTEN,
+       DCCP_ROLE_CLIENT,
+       DCCP_ROLE_SERVER,
+};
+
+/**
+ * struct dccp_sock - DCCP socket state
+ *
+ * @dccps_swl - sequence number window low
+ * @dccps_swh - sequence number window high
+ * @dccps_awl - acknowledgement number window low
+ * @dccps_awh - acknowledgement number window high
+ * @dccps_iss - initial sequence number sent
+ * @dccps_isr - initial sequence number received
+ * @dccps_osr - first OPEN sequence number received
+ * @dccps_gss - greatest sequence number sent
+ * @dccps_gsr - greatest valid sequence number received
+ * @dccps_gar - greatest valid ack number received on a non-Sync; initialized to %dccps_iss
+ * @dccps_timestamp_time - time of latest TIMESTAMP option
+ * @dccps_timestamp_echo - latest timestamp received on a TIMESTAMP option
+ * @dccps_ext_header_len - network protocol overhead (IP/IPv6 options)
+ * @dccps_pmtu_cookie - Last pmtu seen by socket
+ * @dccps_avg_packet_size - FIXME: has to be set by the app thru some setsockopt or ioctl, CCID3 uses it
+ * @dccps_role - Role of this sock, one of %dccp_role
+ * @dccps_ndp_count - number of Non Data Packets since last data packet
+ * @dccps_hc_rx_ackpkts - receiver half connection acked packets
+ */
+struct dccp_sock {
+       /* inet_connection_sock has to be the first member of dccp_sock */
+       struct inet_connection_sock     dccps_inet_connection;
+       __u64                           dccps_swl;
+       __u64                           dccps_swh;
+       __u64                           dccps_awl;
+       __u64                           dccps_awh;
+       __u64                           dccps_iss;
+       __u64                           dccps_isr;
+       __u64                           dccps_osr;
+       __u64                           dccps_gss;
+       __u64                           dccps_gsr;
+       __u64                           dccps_gar;
+       unsigned long                   dccps_service;
+       unsigned long                   dccps_timestamp_time;
+       __u32                           dccps_timestamp_echo;
+       __u32                           dccps_avg_packet_size;
+       unsigned long                   dccps_ndp_count;
+       __u16                           dccps_ext_header_len;
+       __u32                           dccps_pmtu_cookie;
+       __u32                           dccps_mss_cache;
+       struct dccp_options             dccps_options;
+       struct dccp_ackpkts             *dccps_hc_rx_ackpkts;
+       void                            *dccps_hc_rx_ccid_private;
+       void                            *dccps_hc_tx_ccid_private;
+       struct ccid                     *dccps_hc_rx_ccid;
+       struct ccid                     *dccps_hc_tx_ccid;
+       struct dccp_options_received    dccps_options_received;
+       enum dccp_role                  dccps_role:2;
+};
+static inline struct dccp_sock *dccp_sk(const struct sock *sk)
+{
+       return (struct dccp_sock *)sk;
+}
+
+static inline const char *dccp_role(const struct sock *sk)
+{
+       switch (dccp_sk(sk)->dccps_role) {
+       case DCCP_ROLE_UNDEFINED: return "undefined";
+       case DCCP_ROLE_LISTEN:    return "listen";
+       case DCCP_ROLE_SERVER:    return "server";
+       case DCCP_ROLE_CLIENT:    return "client";
+       }
+       return NULL;
+}
+
+#endif /* _LINUX_DCCP_H */
index fb88c66..ba35538 100644 (file)
@@ -32,6 +32,7 @@ enum {
   IPPROTO_PUP = 12,            /* PUP protocol                         */
   IPPROTO_UDP = 17,            /* User Datagram Protocol               */
   IPPROTO_IDP = 22,            /* XNS IDP protocol                     */
+  IPPROTO_DCCP = 33,           /* Datagram Congestion Control Protocol */
   IPPROTO_RSVP = 46,           /* RSVP protocol                        */
   IPPROTO_GRE = 47,            /* Cisco GRE tunnels (rfc 1701,1702)    */
 
index 3990661..5f8b632 100644 (file)
@@ -84,6 +84,7 @@ enum sock_type {
        SOCK_RAW        = 3,
        SOCK_RDM        = 4,
        SOCK_SEQPACKET  = 5,
+       SOCK_DCCP       = 6,
        SOCK_PACKET     = 10,
 };
 
index a5c7d96..ddf2255 100644 (file)
@@ -271,6 +271,7 @@ struct ucred {
 #define SOL_IRDA        266
 #define SOL_NETBEUI    267
 #define SOL_LLC                268
+#define SOL_DCCP       269
 
 /* IPX options */
 #define IPX_TYPE       1
index 02877ac..c07aafb 100644 (file)
@@ -147,6 +147,7 @@ source "net/bridge/netfilter/Kconfig"
 
 endif
 
+source "net/dccp/Kconfig"
 source "net/sctp/Kconfig"
 source "net/atm/Kconfig"
 source "net/bridge/Kconfig"
index 4a01be8..7e6eff2 100644 (file)
@@ -42,6 +42,7 @@ obj-$(CONFIG_ATM)             += atm/
 obj-$(CONFIG_DECNET)           += decnet/
 obj-$(CONFIG_ECONET)           += econet/
 obj-$(CONFIG_VLAN_8021Q)       += 8021q/
+obj-$(CONFIG_IP_DCCP)          += dccp/
 obj-$(CONFIG_IP_SCTP)          += sctp/
 
 ifeq ($(CONFIG_NET),y)
diff --git a/net/dccp/Kconfig b/net/dccp/Kconfig
new file mode 100644 (file)
index 0000000..90460bc
--- /dev/null
@@ -0,0 +1,24 @@
+menu "DCCP Configuration (EXPERIMENTAL)"
+       depends on INET && EXPERIMENTAL
+
+config IP_DCCP
+       tristate "The DCCP Protocol (EXPERIMENTAL)"
+       ---help---
+         Datagram Congestion Control Protocol
+
+         From draft-ietf-dccp-spec-11 <http://www.icir.org/kohler/dcp/draft-ietf-dccp-spec-11.txt>.
+
+         The Datagram Congestion Control Protocol (DCCP) is a transport
+         protocol that implements bidirectional, unicast connections of
+         congestion-controlled, unreliable datagrams. It should be suitable
+         for use by applications such as streaming media, Internet telephony,
+         and on-line games
+
+         To compile this protocol support as a module, choose M here: the
+         module will be called dccp.
+
+         If in doubt, say N.
+
+source "net/dccp/ccids/Kconfig"
+
+endmenu
diff --git a/net/dccp/Makefile b/net/dccp/Makefile
new file mode 100644 (file)
index 0000000..c6e6ba5
--- /dev/null
@@ -0,0 +1,5 @@
+obj-$(CONFIG_IP_DCCP) += dccp.o
+
+dccp-y := ccid.o input.o ipv4.o minisocks.o options.o output.o proto.o timer.o
+
+obj-y += ccids/
diff --git a/net/dccp/ccid.c b/net/dccp/ccid.c
new file mode 100644 (file)
index 0000000..9d8fc0e
--- /dev/null
@@ -0,0 +1,139 @@
+/*
+ *  net/dccp/ccid.c
+ *
+ *  An implementation of the DCCP protocol
+ *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ *  CCID infrastructure
+ *
+ *     This program is free software; you can redistribute it and/or modify it
+ *     under the terms of the GNU General Public License version 2 as
+ *     published by the Free Software Foundation.
+ */
+
+#include "ccid.h"
+
+static struct ccid *ccids[CCID_MAX];
+#if defined(CONFIG_SMP) || defined(CONFIG_PREEMPT)
+static atomic_t ccids_lockct = ATOMIC_INIT(0);
+static DEFINE_SPINLOCK(ccids_lock);
+
+/*
+ * The strategy is: modifications ccids vector are short, do not sleep and
+ * veeery rare, but read access should be free of any exclusive locks.
+ */
+static void ccids_write_lock(void)
+{
+       spin_lock(&ccids_lock);
+       while (atomic_read(&ccids_lockct) != 0) {
+               spin_unlock(&ccids_lock);
+               yield();
+               spin_lock(&ccids_lock);
+       }
+}
+
+static inline void ccids_write_unlock(void)
+{
+       spin_unlock(&ccids_lock);
+}
+
+static inline void ccids_read_lock(void)
+{
+       atomic_inc(&ccids_lockct);
+       spin_unlock_wait(&ccids_lock);
+}
+
+static inline void ccids_read_unlock(void)
+{
+       atomic_dec(&ccids_lockct);
+}
+
+#else
+#define ccids_write_lock() do { } while(0)
+#define ccids_write_unlock() do { } while(0)
+#define ccids_read_lock() do { } while(0)
+#define ccids_read_unlock() do { } while(0)
+#endif
+
+int ccid_register(struct ccid *ccid)
+{
+       int err;
+
+       if (ccid->ccid_init == NULL)
+               return -1;
+
+       ccids_write_lock();
+       err = -EEXIST;
+       if (ccids[ccid->ccid_id] == NULL) {
+               ccids[ccid->ccid_id] = ccid;
+               err = 0;
+       }
+       ccids_write_unlock();
+       if (err == 0)
+               pr_info("CCID: Registered CCID %d (%s)\n",
+                       ccid->ccid_id, ccid->ccid_name);
+       return err;
+}
+
+EXPORT_SYMBOL_GPL(ccid_register);
+
+int ccid_unregister(struct ccid *ccid)
+{
+       ccids_write_lock();
+       ccids[ccid->ccid_id] = NULL;
+       ccids_write_unlock();
+       pr_info("CCID: Unregistered CCID %d (%s)\n",
+               ccid->ccid_id, ccid->ccid_name);
+       return 0;
+}
+
+EXPORT_SYMBOL_GPL(ccid_unregister);
+
+struct ccid *ccid_init(unsigned char id, struct sock *sk)
+{
+       struct ccid *ccid;
+
+#ifdef CONFIG_KMOD
+       if (ccids[id] == NULL)
+               request_module("net-dccp-ccid-%d", id);
+#endif
+       ccids_read_lock();
+
+       ccid = ccids[id];
+       if (ccid == NULL)
+               goto out;
+
+       if (!try_module_get(ccid->ccid_owner))
+               goto out_err;
+
+       if (ccid->ccid_init(sk) != 0)
+               goto out_module_put;
+out:
+       ccids_read_unlock();
+       return ccid;
+out_module_put:
+       module_put(ccid->ccid_owner);
+out_err:
+       ccid = NULL;
+       goto out;
+}
+
+EXPORT_SYMBOL_GPL(ccid_init);
+
+void ccid_exit(struct ccid *ccid, struct sock *sk)
+{
+       if (ccid == NULL)
+               return;
+
+       ccids_read_lock();
+
+       if (ccids[ccid->ccid_id] != NULL) {
+               if (ccid->ccid_exit != NULL)
+                       ccid->ccid_exit(sk);
+               module_put(ccid->ccid_owner);
+       }
+
+       ccids_read_unlock();
+}
+
+EXPORT_SYMBOL_GPL(ccid_exit);
diff --git a/net/dccp/ccid.h b/net/dccp/ccid.h
new file mode 100644 (file)
index 0000000..06105b2
--- /dev/null
@@ -0,0 +1,156 @@
+#ifndef _CCID_H
+#define _CCID_H
+/*
+ *  net/dccp/ccid.h
+ *
+ *  An implementation of the DCCP protocol
+ *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ *  CCID infrastructure
+ *
+ *     This program is free software; you can redistribute it and/or modify it
+ *     under the terms of the GNU General Public License version 2 as
+ *     published by the Free Software Foundation.
+ */
+
+#include <net/sock.h>
+#include <linux/dccp.h>
+#include <linux/list.h>
+#include <linux/module.h>
+
+#define CCID_MAX 255
+
+struct ccid {
+       unsigned char   ccid_id;
+       const char      *ccid_name;
+       struct module   *ccid_owner;
+       int             (*ccid_init)(struct sock *sk);
+       void            (*ccid_exit)(struct sock *sk);
+       int             (*ccid_hc_rx_init)(struct sock *sk);
+       int             (*ccid_hc_tx_init)(struct sock *sk);
+       void            (*ccid_hc_rx_exit)(struct sock *sk);
+       void            (*ccid_hc_tx_exit)(struct sock *sk);
+       void            (*ccid_hc_rx_packet_recv)(struct sock *sk, struct sk_buff *skb);
+       int             (*ccid_hc_rx_parse_options)(struct sock *sk,
+                                                   unsigned char option,
+                                                   unsigned char len, u16 idx,
+                                                   unsigned char* value);
+       void            (*ccid_hc_rx_insert_options)(struct sock *sk, struct sk_buff *skb);
+       void            (*ccid_hc_tx_insert_options)(struct sock *sk, struct sk_buff *skb);
+       void            (*ccid_hc_tx_packet_recv)(struct sock *sk, struct sk_buff *skb);
+       int             (*ccid_hc_tx_parse_options)(struct sock *sk,
+                                                   unsigned char option,
+                                                   unsigned char len, u16 idx,
+                                                   unsigned char* value);
+       int             (*ccid_hc_tx_send_packet)(struct sock *sk,
+                                                 struct sk_buff *skb, int len,
+                                                 long *delay);
+       void            (*ccid_hc_tx_packet_sent)(struct sock *sk, int more, int len);
+};
+
+extern int        ccid_register(struct ccid *ccid);
+extern int        ccid_unregister(struct ccid *ccid);
+
+extern struct ccid *ccid_init(unsigned char id, struct sock *sk);
+extern void       ccid_exit(struct ccid *ccid, struct sock *sk);
+
+static inline void __ccid_get(struct ccid *ccid)
+{
+       __module_get(ccid->ccid_owner);
+}
+
+static inline int ccid_hc_tx_send_packet(struct ccid *ccid, struct sock *sk,
+                                        struct sk_buff *skb, int len,
+                                        long *delay)
+{
+       int rc = 0;
+       if (ccid->ccid_hc_tx_send_packet != NULL)
+               rc = ccid->ccid_hc_tx_send_packet(sk, skb, len, delay);
+       return rc;
+}
+
+static inline void ccid_hc_tx_packet_sent(struct ccid *ccid, struct sock *sk,
+                                         int more, int len)
+{
+       if (ccid->ccid_hc_tx_packet_sent != NULL)
+               ccid->ccid_hc_tx_packet_sent(sk, more, len);
+}
+
+static inline int ccid_hc_rx_init(struct ccid *ccid, struct sock *sk)
+{
+       int rc = 0;
+       if (ccid->ccid_hc_rx_init != NULL)
+               rc = ccid->ccid_hc_rx_init(sk);
+       return rc;
+}
+
+static inline int ccid_hc_tx_init(struct ccid *ccid, struct sock *sk)
+{
+       int rc = 0;
+       if (ccid->ccid_hc_tx_init != NULL)
+               rc = ccid->ccid_hc_tx_init(sk);
+       return rc;
+}
+
+static inline void ccid_hc_rx_exit(struct ccid *ccid, struct sock *sk)
+{
+       if (ccid->ccid_hc_rx_exit != NULL)
+               ccid->ccid_hc_rx_exit(sk);
+}
+
+static inline void ccid_hc_tx_exit(struct ccid *ccid, struct sock *sk)
+{
+       if (ccid->ccid_hc_tx_exit != NULL)
+               ccid->ccid_hc_tx_exit(sk);
+}
+
+static inline void ccid_hc_rx_packet_recv(struct ccid *ccid, struct sock *sk,
+                                         struct sk_buff *skb)
+{
+       if (ccid->ccid_hc_rx_packet_recv != NULL)
+               ccid->ccid_hc_rx_packet_recv(sk, skb);
+}
+
+static inline void ccid_hc_tx_packet_recv(struct ccid *ccid, struct sock *sk,
+                                         struct sk_buff *skb)
+{
+       if (ccid->ccid_hc_tx_packet_recv != NULL)
+               ccid->ccid_hc_tx_packet_recv(sk, skb);
+}
+
+static inline int ccid_hc_tx_parse_options(struct ccid *ccid, struct sock *sk,
+                                          unsigned char option,
+                                          unsigned char len, u16 idx,
+                                          unsigned char* value)
+{
+       int rc = 0;
+       if (ccid->ccid_hc_tx_parse_options != NULL)
+               rc = ccid->ccid_hc_tx_parse_options(sk, option, len, idx, value);
+       return rc;
+}
+
+static inline int ccid_hc_rx_parse_options(struct ccid *ccid, struct sock *sk,
+                                          unsigned char option,
+                                          unsigned char len, u16 idx,
+                                          unsigned char* value)
+{
+       int rc = 0;
+       if (ccid->ccid_hc_rx_parse_options != NULL)
+               rc = ccid->ccid_hc_rx_parse_options(sk, option, len, idx, value);
+       return rc;
+}
+
+static inline void ccid_hc_tx_insert_options(struct ccid *ccid, struct sock *sk,
+                                            struct sk_buff *skb)
+{
+       if (ccid->ccid_hc_tx_insert_options != NULL)
+               ccid->ccid_hc_tx_insert_options(sk, skb);
+}
+
+static inline void ccid_hc_rx_insert_options(struct ccid *ccid, struct sock *sk,
+                                            struct sk_buff *skb)
+{
+       if (ccid->ccid_hc_rx_insert_options != NULL)
+               ccid->ccid_hc_rx_insert_options(sk, skb);
+}
+#endif /* _CCID_H */
diff --git a/net/dccp/ccids/Kconfig b/net/dccp/ccids/Kconfig
new file mode 100644 (file)
index 0000000..67f9c06
--- /dev/null
@@ -0,0 +1,25 @@
+menu "DCCP CCIDs Configuration (EXPERIMENTAL)"
+       depends on IP_DCCP && EXPERIMENTAL
+
+config IP_DCCP_CCID3
+       tristate "CCID3 (TFRC) (EXPERIMENTAL)"
+       depends on IP_DCCP
+       ---help---
+         CCID 3 denotes TCP-Friendly Rate Control (TFRC), an equation-based
+         rate-controlled congestion control mechanism.  TFRC is designed to
+         be reasonably fair when competing for bandwidth with TCP-like flows,
+         where a flow is "reasonably fair" if its sending rate is generally
+         within a factor of two of the sending rate of a TCP flow under the
+         same conditions.  However, TFRC has a much lower variation of
+         throughput over time compared with TCP, which makes CCID 3 more
+         suitable than CCID 2 for applications such streaming media where a
+         relatively smooth sending rate is of importance.
+
+         CCID 3 is further described in [CCID 3 PROFILE]. The TFRC
+         congestion control algorithms were initially described in RFC 3448.
+
+         This text was extracted from draft-ietf-dccp-spec-11.txt.
+         
+         If in doubt, say M.
+
+endmenu
diff --git a/net/dccp/ccids/Makefile b/net/dccp/ccids/Makefile
new file mode 100644 (file)
index 0000000..1c72013
--- /dev/null
@@ -0,0 +1,3 @@
+obj-$(CONFIG_IP_DCCP_CCID3) += dccp_ccid3.o
+
+dccp_ccid3-y := ccid3.o
diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c
new file mode 100644 (file)
index 0000000..4f45902
--- /dev/null
@@ -0,0 +1,2164 @@
+/*
+ *  net/dccp/ccids/ccid3.c
+ *
+ *  Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
+ *
+ *  An implementation of the DCCP protocol
+ *
+ *  This code has been developed by the University of Waikato WAND
+ *  research group. For further information please see http://www.wand.net.nz/
+ *  or e-mail Ian McDonald - iam4@cs.waikato.ac.nz
+ *
+ *  This code also uses code from Lulea University, rereleased as GPL by its
+ *  authors:
+ *  Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
+ *
+ *  Changes to meet Linux coding standards, to make it meet latest ccid3 draft
+ *  and to make it work as a loadable module in the DCCP stack written by
+ *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>.
+ *
+ *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include "../ccid.h"
+#include "../dccp.h"
+#include "ccid3.h"
+
+#ifdef CCID3_DEBUG
+extern int ccid3_debug;
+
+#define ccid3_pr_debug(format, a...) \
+       do { if (ccid3_debug) \
+               printk(KERN_DEBUG "%s: " format, __FUNCTION__, ##a); \
+       } while (0)
+#else
+#define ccid3_pr_debug(format, a...)
+#endif
+
+#define TFRC_MIN_PACKET_SIZE      16
+#define TFRC_STD_PACKET_SIZE     256
+#define TFRC_MAX_PACKET_SIZE   65535
+
+#define USEC_IN_SEC                1000000
+
+#define TFRC_INITIAL_TIMEOUT      (2 * USEC_IN_SEC)
+/* two seconds as per CCID3 spec 11 */
+
+#define TFRC_OPSYS_HALF_TIME_GRAN      (USEC_IN_SEC / (2 * HZ))
+/* above is in usecs - half the scheduling granularity as per RFC3448 4.6 */
+
+#define TFRC_WIN_COUNT_PER_RTT     4
+#define TFRC_WIN_COUNT_LIMIT      16
+
+#define TFRC_MAX_BACK_OFF_TIME    64
+/* above is in seconds */
+
+#define TFRC_SMALLEST_P                   40
+
+#define TFRC_RECV_IVAL_F_LENGTH            8          /* length(w[]) */
+
+/* Number of later packets received before one is considered lost */
+#define TFRC_RECV_NUM_LATE_LOSS        3
+
+enum ccid3_options {
+       TFRC_OPT_LOSS_EVENT_RATE = 192,
+       TFRC_OPT_LOSS_INTERVALS  = 193,
+       TFRC_OPT_RECEIVE_RATE    = 194,
+};
+
+static int ccid3_debug;
+
+static kmem_cache_t *ccid3_tx_hist_slab;
+static kmem_cache_t *ccid3_rx_hist_slab;
+static kmem_cache_t *ccid3_loss_interval_hist_slab;
+
+static inline struct ccid3_tx_hist_entry *ccid3_tx_hist_entry_new(int prio)
+{
+       struct ccid3_tx_hist_entry *entry = kmem_cache_alloc(ccid3_tx_hist_slab, prio);
+
+       if (entry != NULL)
+               entry->ccid3htx_sent = 0;
+
+       return entry;
+}
+
+static inline void ccid3_tx_hist_entry_delete(struct ccid3_tx_hist_entry *entry)
+{
+       if (entry != NULL)
+               kmem_cache_free(ccid3_tx_hist_slab, entry);
+}
+
+static inline struct ccid3_rx_hist_entry *ccid3_rx_hist_entry_new(struct sock *sk,
+                                                                 struct sk_buff *skb,
+                                                                 int prio)
+{
+       struct ccid3_rx_hist_entry *entry = kmem_cache_alloc(ccid3_rx_hist_slab, prio);
+
+       if (entry != NULL) {
+               const struct dccp_hdr *dh = dccp_hdr(skb);
+
+               entry->ccid3hrx_seqno     = DCCP_SKB_CB(skb)->dccpd_seq;
+               entry->ccid3hrx_win_count = dh->dccph_ccval;
+               entry->ccid3hrx_type      = dh->dccph_type;
+               entry->ccid3hrx_ndp       = dccp_sk(sk)->dccps_options_received.dccpor_ndp;
+               do_gettimeofday(&(entry->ccid3hrx_tstamp));
+       }
+
+       return entry;
+}
+
+static inline void ccid3_rx_hist_entry_delete(struct ccid3_rx_hist_entry *entry)
+{
+       if (entry != NULL)
+               kmem_cache_free(ccid3_rx_hist_slab, entry);
+}
+
+static void ccid3_rx_history_delete(struct list_head *hist)
+{
+       struct ccid3_rx_hist_entry *entry, *next;
+
+       list_for_each_entry_safe(entry, next, hist, ccid3hrx_node) {
+               list_del_init(&entry->ccid3hrx_node);
+               kmem_cache_free(ccid3_rx_hist_slab, entry);
+       }
+}
+
+static inline struct ccid3_loss_interval_hist_entry *ccid3_loss_interval_hist_entry_new(int prio)
+{
+       return kmem_cache_alloc(ccid3_loss_interval_hist_slab, prio);
+}
+
+static inline void ccid3_loss_interval_hist_entry_delete(struct ccid3_loss_interval_hist_entry *entry)
+{
+       if (entry != NULL)
+               kmem_cache_free(ccid3_loss_interval_hist_slab, entry);
+}
+
+static void ccid3_loss_interval_history_delete(struct list_head *hist)
+{
+       struct ccid3_loss_interval_hist_entry *entry, *next;
+
+       list_for_each_entry_safe(entry, next, hist, ccid3lih_node) {
+               list_del_init(&entry->ccid3lih_node);
+               kmem_cache_free(ccid3_loss_interval_hist_slab, entry);
+       }
+}
+
+static int ccid3_init(struct sock *sk)
+{
+       ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
+       return 0;
+}
+
+static void ccid3_exit(struct sock *sk)
+{
+       ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
+}
+
+/* TFRC sender states */
+enum ccid3_hc_tx_states {
+               TFRC_SSTATE_NO_SENT = 1,
+       TFRC_SSTATE_NO_FBACK,
+       TFRC_SSTATE_FBACK,
+       TFRC_SSTATE_TERM,
+};
+
+#ifdef CCID3_DEBUG
+static const char *ccid3_tx_state_name(enum ccid3_hc_tx_states state)
+{
+       static char *ccid3_state_names[] = {
+       [TFRC_SSTATE_NO_SENT]  = "NO_SENT",
+       [TFRC_SSTATE_NO_FBACK] = "NO_FBACK",
+       [TFRC_SSTATE_FBACK]    = "FBACK",
+       [TFRC_SSTATE_TERM]     = "TERM",
+       };
+
+       return ccid3_state_names[state];
+}
+#endif
+
+static inline void ccid3_hc_tx_set_state(struct sock *sk, enum ccid3_hc_tx_states state)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+       struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
+       enum ccid3_hc_tx_states oldstate = hctx->ccid3hctx_state;
+
+       ccid3_pr_debug("%s(%p) %-8.8s -> %s\n",
+                      dccp_role(sk), sk, ccid3_tx_state_name(oldstate), ccid3_tx_state_name(state));
+       WARN_ON(state == oldstate);
+       hctx->ccid3hctx_state = state;
+}
+
+static void timeval_sub(struct timeval large, struct timeval small, struct timeval *result) {
+
+       result->tv_sec = large.tv_sec-small.tv_sec;
+       if (large.tv_usec < small.tv_usec) {
+               (result->tv_sec)--;
+               result->tv_usec = USEC_IN_SEC+large.tv_usec-small.tv_usec;
+       } else
+               result->tv_usec = large.tv_usec-small.tv_usec;
+}
+
+static inline void timeval_fix(struct timeval *tv) {
+       if (tv->tv_usec >= USEC_IN_SEC) {
+               tv->tv_sec++;
+               tv->tv_usec -= USEC_IN_SEC;
+       }
+}
+
+/* returns the difference in usecs between timeval passed in and current time */
+static inline u32 now_delta(struct timeval tv) {
+       struct timeval now;
+       
+       do_gettimeofday(&now);
+       return ((now.tv_sec-tv.tv_sec)*1000000+now.tv_usec-tv.tv_usec);
+}
+
+#define CALCX_ARRSIZE 500
+
+#define CALCX_SPLIT 50000
+/* equivalent to 0.05 */
+
+static const u32 calcx_lookup[CALCX_ARRSIZE][2] = {
+       { 37172 , 8172 },
+       { 53499 , 11567 },
+       { 66664 , 14180 },
+       { 78298 , 16388 },
+       { 89021 , 18339 },
+       { 99147 , 20108 },
+       { 108858 , 21738 },
+       { 118273 , 23260 },
+       { 127474 , 24693 },
+       { 136520 , 26052 },
+       { 145456 , 27348 },
+       { 154316 , 28589 },
+       { 163130 , 29783 },
+       { 171919 , 30935 },
+       { 180704 , 32049 },
+       { 189502 , 33130 },
+       { 198328 , 34180 },
+       { 207194 , 35202 },
+       { 216114 , 36198 },
+       { 225097 , 37172 },
+       { 234153 , 38123 },
+       { 243294 , 39055 },
+       { 252527 , 39968 },
+       { 261861 , 40864 },
+       { 271305 , 41743 },
+       { 280866 , 42607 },
+       { 290553 , 43457 },
+       { 300372 , 44293 },
+       { 310333 , 45117 },
+       { 320441 , 45929 },
+       { 330705 , 46729 },
+       { 341131 , 47518 },
+       { 351728 , 48297 },
+       { 362501 , 49066 },
+       { 373460 , 49826 },
+       { 384609 , 50577 },
+       { 395958 , 51320 },
+       { 407513 , 52054 },
+       { 419281 , 52780 },
+       { 431270 , 53499 },
+       { 443487 , 54211 },
+       { 455940 , 54916 },
+       { 468635 , 55614 },
+       { 481581 , 56306 },
+       { 494785 , 56991 },
+       { 508254 , 57671 },
+       { 521996 , 58345 },
+       { 536019 , 59014 },
+       { 550331 , 59677 },
+       { 564939 , 60335 },
+       { 579851 , 60988 },
+       { 595075 , 61636 },
+       { 610619 , 62279 },
+       { 626491 , 62918 },
+       { 642700 , 63553 },
+       { 659253 , 64183 },
+       { 676158 , 64809 },
+       { 693424 , 65431 },
+       { 711060 , 66050 },
+       { 729073 , 66664 },
+       { 747472 , 67275 },
+       { 766266 , 67882 },
+       { 785464 , 68486 },
+       { 805073 , 69087 },
+       { 825103 , 69684 },
+       { 845562 , 70278 },
+       { 866460 , 70868 },
+       { 887805 , 71456 },
+       { 909606 , 72041 },
+       { 931873 , 72623 },
+       { 954614 , 73202 },
+       { 977839 , 73778 },
+       { 1001557 , 74352 },
+       { 1025777 , 74923 },
+       { 1050508 , 75492 },
+       { 1075761 , 76058 },
+       { 1101544 , 76621 },
+       { 1127867 , 77183 },
+       { 1154739 , 77741 },
+       { 1182172 , 78298 },
+       { 1210173 , 78852 },
+       { 1238753 , 79405 },
+       { 1267922 , 79955 },
+       { 1297689 , 80503 },
+       { 1328066 , 81049 },
+       { 1359060 , 81593 },
+       { 1390684 , 82135 },
+       { 1422947 , 82675 },
+       { 1455859 , 83213 },
+       { 1489430 , 83750 },
+       { 1523671 , 84284 },
+       { 1558593 , 84817 },
+       { 1594205 , 85348 },
+       { 1630518 , 85878 },
+       { 1667543 , 86406 },
+       { 1705290 , 86932 },
+       { 1743770 , 87457 },
+       { 1782994 , 87980 },
+       { 1822973 , 88501 },
+       { 1863717 , 89021 },
+       { 1905237 , 89540 },
+       { 1947545 , 90057 },
+       { 1990650 , 90573 },
+       { 2034566 , 91087 },
+       { 2079301 , 91600 },
+       { 2124869 , 92111 },
+       { 2171279 , 92622 },
+       { 2218543 , 93131 },
+       { 2266673 , 93639 },
+       { 2315680 , 94145 },
+       { 2365575 , 94650 },
+       { 2416371 , 95154 },
+       { 2468077 , 95657 },
+       { 2520707 , 96159 },
+       { 2574271 , 96660 },
+       { 2628782 , 97159 },
+       { 2684250 , 97658 },
+       { 2740689 , 98155 },
+       { 2798110 , 98651 },
+       { 2856524 , 99147 },
+       { 2915944 , 99641 },
+       { 2976382 , 100134 },
+       { 3037850 , 100626 },
+       { 3100360 , 101117 },
+       { 3163924 , 101608 },
+       { 3228554 , 102097 },
+       { 3294263 , 102586 },
+       { 3361063 , 103073 },
+       { 3428966 , 103560 },
+       { 3497984 , 104045 },
+       { 3568131 , 104530 },
+       { 3639419 , 105014 },
+       { 3711860 , 105498 },
+       { 3785467 , 105980 },
+       { 3860253 , 106462 },
+       { 3936229 , 106942 },
+       { 4013410 , 107422 },
+       { 4091808 , 107902 },
+       { 4171435 , 108380 },
+       { 4252306 , 108858 },
+       { 4334431 , 109335 },
+       { 4417825 , 109811 },
+       { 4502501 , 110287 },
+       { 4588472 , 110762 },
+       { 4675750 , 111236 },
+       { 4764349 , 111709 },
+       { 4854283 , 112182 },
+       { 4945564 , 112654 },
+       { 5038206 , 113126 },
+       { 5132223 , 113597 },
+       { 5227627 , 114067 },
+       { 5324432 , 114537 },
+       { 5422652 , 115006 },
+       { 5522299 , 115474 },
+       { 5623389 , 115942 },
+       { 5725934 , 116409 },
+       { 5829948 , 116876 },
+       { 5935446 , 117342 },
+       { 6042439 , 117808 },
+       { 6150943 , 118273 },
+       { 6260972 , 118738 },
+       { 6372538 , 119202 },
+       { 6485657 , 119665 },
+       { 6600342 , 120128 },
+       { 6716607 , 120591 },
+       { 6834467 , 121053 },
+       { 6953935 , 121514 },
+       { 7075025 , 121976 },
+       { 7197752 , 122436 },
+       { 7322131 , 122896 },
+       { 7448175 , 123356 },
+       { 7575898 , 123815 },
+       { 7705316 , 124274 },
+       { 7836442 , 124733 },
+       { 7969291 , 125191 },
+       { 8103877 , 125648 },
+       { 8240216 , 126105 },
+       { 8378321 , 126562 },
+       { 8518208 , 127018 },
+       { 8659890 , 127474 },
+       { 8803384 , 127930 },
+       { 8948702 , 128385 },
+       { 9095861 , 128840 },
+       { 9244875 , 129294 },
+       { 9395760 , 129748 },
+       { 9548529 , 130202 },
+       { 9703198 , 130655 },
+       { 9859782 , 131108 },
+       { 10018296 , 131561 },
+       { 10178755 , 132014 },
+       { 10341174 , 132466 },
+       { 10505569 , 132917 },
+       { 10671954 , 133369 },
+       { 10840345 , 133820 },
+       { 11010757 , 134271 },
+       { 11183206 , 134721 },
+       { 11357706 , 135171 },
+       { 11534274 , 135621 },
+       { 11712924 , 136071 },
+       { 11893673 , 136520 },
+       { 12076536 , 136969 },
+       { 12261527 , 137418 },
+       { 12448664 , 137867 },
+       { 12637961 , 138315 },
+       { 12829435 , 138763 },
+       { 13023101 , 139211 },
+       { 13218974 , 139658 },
+       { 13417071 , 140106 },
+       { 13617407 , 140553 },
+       { 13819999 , 140999 },
+       { 14024862 , 141446 },
+       { 14232012 , 141892 },
+       { 14441465 , 142339 },
+       { 14653238 , 142785 },
+       { 14867346 , 143230 },
+       { 15083805 , 143676 },
+       { 15302632 , 144121 },
+       { 15523842 , 144566 },
+       { 15747453 , 145011 },
+       { 15973479 , 145456 },
+       { 16201939 , 145900 },
+       { 16432847 , 146345 },
+       { 16666221 , 146789 },
+       { 16902076 , 147233 },
+       { 17140429 , 147677 },
+       { 17381297 , 148121 },
+       { 17624696 , 148564 },
+       { 17870643 , 149007 },
+       { 18119154 , 149451 },
+       { 18370247 , 149894 },
+       { 18623936 , 150336 },
+       { 18880241 , 150779 },
+       { 19139176 , 151222 },
+       { 19400759 , 151664 },
+       { 19665007 , 152107 },
+       { 19931936 , 152549 },
+       { 20201564 , 152991 },
+       { 20473907 , 153433 },
+       { 20748982 , 153875 },
+       { 21026807 , 154316 },
+       { 21307399 , 154758 },
+       { 21590773 , 155199 },
+       { 21876949 , 155641 },
+       { 22165941 , 156082 },
+       { 22457769 , 156523 },
+       { 22752449 , 156964 },
+       { 23049999 , 157405 },
+       { 23350435 , 157846 },
+       { 23653774 , 158287 },
+       { 23960036 , 158727 },
+       { 24269236 , 159168 },
+       { 24581392 , 159608 },
+       { 24896521 , 160049 },
+       { 25214642 , 160489 },
+       { 25535772 , 160929 },
+       { 25859927 , 161370 },
+       { 26187127 , 161810 },
+       { 26517388 , 162250 },
+       { 26850728 , 162690 },
+       { 27187165 , 163130 },
+       { 27526716 , 163569 },
+       { 27869400 , 164009 },
+       { 28215234 , 164449 },
+       { 28564236 , 164889 },
+       { 28916423 , 165328 },
+       { 29271815 , 165768 },
+       { 29630428 , 166208 },
+       { 29992281 , 166647 },
+       { 30357392 , 167087 },
+       { 30725779 , 167526 },
+       { 31097459 , 167965 },
+       { 31472452 , 168405 },
+       { 31850774 , 168844 },
+       { 32232445 , 169283 },
+       { 32617482 , 169723 },
+       { 33005904 , 170162 },
+       { 33397730 , 170601 },
+       { 33792976 , 171041 },
+       { 34191663 , 171480 },
+       { 34593807 , 171919 },
+       { 34999428 , 172358 },
+       { 35408544 , 172797 },
+       { 35821174 , 173237 },
+       { 36237335 , 173676 },
+       { 36657047 , 174115 },
+       { 37080329 , 174554 },
+       { 37507197 , 174993 },
+       { 37937673 , 175433 },
+       { 38371773 , 175872 },
+       { 38809517 , 176311 },
+       { 39250924 , 176750 },
+       { 39696012 , 177190 },
+       { 40144800 , 177629 },
+       { 40597308 , 178068 },
+       { 41053553 , 178507 },
+       { 41513554 , 178947 },
+       { 41977332 , 179386 },
+       { 42444904 , 179825 },
+       { 42916290 , 180265 },
+       { 43391509 , 180704 },
+       { 43870579 , 181144 },
+       { 44353520 , 181583 },
+       { 44840352 , 182023 },
+       { 45331092 , 182462 },
+       { 45825761 , 182902 },
+       { 46324378 , 183342 },
+       { 46826961 , 183781 },
+       { 47333531 , 184221 },
+       { 47844106 , 184661 },
+       { 48358706 , 185101 },
+       { 48877350 , 185541 },
+       { 49400058 , 185981 },
+       { 49926849 , 186421 },
+       { 50457743 , 186861 },
+       { 50992759 , 187301 },
+       { 51531916 , 187741 },
+       { 52075235 , 188181 },
+       { 52622735 , 188622 },
+       { 53174435 , 189062 },
+       { 53730355 , 189502 },
+       { 54290515 , 189943 },
+       { 54854935 , 190383 },
+       { 55423634 , 190824 },
+       { 55996633 , 191265 },
+       { 56573950 , 191706 },
+       { 57155606 , 192146 },
+       { 57741621 , 192587 },
+       { 58332014 , 193028 },
+       { 58926806 , 193470 },
+       { 59526017 , 193911 },
+       { 60129666 , 194352 },
+       { 60737774 , 194793 },
+       { 61350361 , 195235 },
+       { 61967446 , 195677 },
+       { 62589050 , 196118 },
+       { 63215194 , 196560 },
+       { 63845897 , 197002 },
+       { 64481179 , 197444 },
+       { 65121061 , 197886 },
+       { 65765563 , 198328 },
+       { 66414705 , 198770 },
+       { 67068508 , 199213 },
+       { 67726992 , 199655 },
+       { 68390177 , 200098 },
+       { 69058085 , 200540 },
+       { 69730735 , 200983 },
+       { 70408147 , 201426 },
+       { 71090343 , 201869 },
+       { 71777343 , 202312 },
+       { 72469168 , 202755 },
+       { 73165837 , 203199 },
+       { 73867373 , 203642 },
+       { 74573795 , 204086 },
+       { 75285124 , 204529 },
+       { 76001380 , 204973 },
+       { 76722586 , 205417 },
+       { 77448761 , 205861 },
+       { 78179926 , 206306 },
+       { 78916102 , 206750 },
+       { 79657310 , 207194 },
+       { 80403571 , 207639 },
+       { 81154906 , 208084 },
+       { 81911335 , 208529 },
+       { 82672880 , 208974 },
+       { 83439562 , 209419 },
+       { 84211402 , 209864 },
+       { 84988421 , 210309 },
+       { 85770640 , 210755 },
+       { 86558080 , 211201 },
+       { 87350762 , 211647 },
+       { 88148708 , 212093 },
+       { 88951938 , 212539 },
+       { 89760475 , 212985 },
+       { 90574339 , 213432 },
+       { 91393551 , 213878 },
+       { 92218133 , 214325 },
+       { 93048107 , 214772 },
+       { 93883493 , 215219 },
+       { 94724314 , 215666 },
+       { 95570590 , 216114 },
+       { 96422343 , 216561 },
+       { 97279594 , 217009 },
+       { 98142366 , 217457 },
+       { 99010679 , 217905 },
+       { 99884556 , 218353 },
+       { 100764018 , 218801 },
+       { 101649086 , 219250 },
+       { 102539782 , 219698 },
+       { 103436128 , 220147 },
+       { 104338146 , 220596 },
+       { 105245857 , 221046 },
+       { 106159284 , 221495 },
+       { 107078448 , 221945 },
+       { 108003370 , 222394 },
+       { 108934074 , 222844 },
+       { 109870580 , 223294 },
+       { 110812910 , 223745 },
+       { 111761087 , 224195 },
+       { 112715133 , 224646 },
+       { 113675069 , 225097 },
+       { 114640918 , 225548 },
+       { 115612702 , 225999 },
+       { 116590442 , 226450 },
+       { 117574162 , 226902 },
+       { 118563882 , 227353 },
+       { 119559626 , 227805 },
+       { 120561415 , 228258 },
+       { 121569272 , 228710 },
+       { 122583219 , 229162 },
+       { 123603278 , 229615 },
+       { 124629471 , 230068 },
+       { 125661822 , 230521 },
+       { 126700352 , 230974 },
+       { 127745083 , 231428 },
+       { 128796039 , 231882 },
+       { 129853241 , 232336 },
+       { 130916713 , 232790 },
+       { 131986475 , 233244 },
+       { 133062553 , 233699 },
+       { 134144966 , 234153 },
+       { 135233739 , 234608 },
+       { 136328894 , 235064 },
+       { 137430453 , 235519 },
+       { 138538440 , 235975 },
+       { 139652876 , 236430 },
+       { 140773786 , 236886 },
+       { 141901190 , 237343 },
+       { 143035113 , 237799 },
+       { 144175576 , 238256 },
+       { 145322604 , 238713 },
+       { 146476218 , 239170 },
+       { 147636442 , 239627 },
+       { 148803298 , 240085 },
+       { 149976809 , 240542 },
+       { 151156999 , 241000 },
+       { 152343890 , 241459 },
+       { 153537506 , 241917 },
+       { 154737869 , 242376 },
+       { 155945002 , 242835 },
+       { 157158929 , 243294 },
+       { 158379673 , 243753 },
+       { 159607257 , 244213 },
+       { 160841704 , 244673 },
+       { 162083037 , 245133 },
+       { 163331279 , 245593 },
+       { 164586455 , 246054 },
+       { 165848586 , 246514 },
+       { 167117696 , 246975 },
+       { 168393810 , 247437 },
+       { 169676949 , 247898 },
+       { 170967138 , 248360 },
+       { 172264399 , 248822 },
+       { 173568757 , 249284 },
+       { 174880235 , 249747 },
+       { 176198856 , 250209 },
+       { 177524643 , 250672 },
+       { 178857621 , 251136 },
+       { 180197813 , 251599 },
+       { 181545242 , 252063 },
+       { 182899933 , 252527 },
+       { 184261908 , 252991 },
+       { 185631191 , 253456 },
+       { 187007807 , 253920 },
+       { 188391778 , 254385 },
+       { 189783129 , 254851 },
+       { 191181884 , 255316 },
+       { 192588065 , 255782 },
+       { 194001698 , 256248 },
+       { 195422805 , 256714 },
+       { 196851411 , 257181 },
+       { 198287540 , 257648 },
+       { 199731215 , 258115 },
+       { 201182461 , 258582 },
+       { 202641302 , 259050 },
+       { 204107760 , 259518 },
+       { 205581862 , 259986 },
+       { 207063630 , 260454 },
+       { 208553088 , 260923 },
+       { 210050262 , 261392 },
+       { 211555174 , 261861 },
+       { 213067849 , 262331 },
+       { 214588312 , 262800 },
+       { 216116586 , 263270 },
+       { 217652696 , 263741 },
+       { 219196666 , 264211 },
+       { 220748520 , 264682 },
+       { 222308282 , 265153 },
+       { 223875978 , 265625 },
+       { 225451630 , 266097 },
+       { 227035265 , 266569 },
+       { 228626905 , 267041 },
+       { 230226576 , 267514 },
+       { 231834302 , 267986 },
+       { 233450107 , 268460 },
+       { 235074016 , 268933 },
+       { 236706054 , 269407 },
+       { 238346244 , 269881 },
+       { 239994613 , 270355 },
+       { 241651183 , 270830 },
+       { 243315981 , 271305 }
+};
+
+/* Calculate the send rate as per section 3.1 of RFC3448
+Returns send rate in bytes per second
+
+Integer maths and lookups are used as not allowed floating point in kernel
+
+The function for Xcalc as per section 3.1 of RFC3448 is:
+
+X =                            s
+     -------------------------------------------------------------
+     R*sqrt(2*b*p/3) + (t_RTO * (3*sqrt(3*b*p/8) * p * (1+32*p^2)))
+
+where 
+X is the trasmit rate in bytes/second
+s is the packet size in bytes
+R is the round trip time in seconds
+p is the loss event rate, between 0 and 1.0, of the number of loss events 
+  as a fraction of the number of packets transmitted
+t_RTO is the TCP retransmission timeout value in seconds
+b is the number of packets acknowledged by a single TCP acknowledgement
+
+we can assume that b = 1 and t_RTO is 4 * R. With this the equation becomes:
+
+X =                            s
+     -----------------------------------------------------------------------
+     R * sqrt(2 * p / 3) + (12 * R * (sqrt(3 * p / 8) * p * (1 + 32 * p^2)))
+
+
+which we can break down into:
+
+X =     s
+     --------
+     R * f(p)
+
+where f(p) = sqrt(2 * p / 3) + (12 * sqrt(3 * p / 8) * p * (1 + 32 * p * p))
+
+Function parameters:
+s - bytes
+R - RTT in usecs
+p - loss rate (decimal fraction multiplied by 1,000,000)
+
+Returns Xcalc in bytes per second
+
+DON'T alter this code unless you run test cases against it as the code
+has been manipulated to stop underflow/overlow.
+
+*/
+static u32 ccid3_calc_x(u16 s, u32 R, u32 p)
+{
+       int index;
+       u32 f;
+       u64 tmp1, tmp2;
+
+       if (p < CALCX_SPLIT)
+               index = (p / (CALCX_SPLIT / CALCX_ARRSIZE)) - 1;
+       else
+               index = (p / (1000000 / CALCX_ARRSIZE)) - 1;
+
+       if (index < 0)
+               /* p should be 0 unless there is a bug in my code */
+               index = 0;
+
+       if (R == 0)
+               R = 1; /* RTT can't be zero or else divide by zero */
+
+       BUG_ON(index >= CALCX_ARRSIZE);
+
+       if (p >= CALCX_SPLIT)
+               f = calcx_lookup[index][0];
+       else
+               f = calcx_lookup[index][1];
+
+       tmp1 = ((u64)s * 100000000);
+       tmp2 = ((u64)R * (u64)f);
+       do_div(tmp2,10000);
+       do_div(tmp1,tmp2); 
+       /* don't alter above math unless you test due to overflow on 32 bit */
+
+       return (u32)tmp1; 
+}
+
+/* Calculate new t_ipi (inter packet interval) by t_ipi = s / X_inst */
+static inline void ccid3_calc_new_t_ipi(struct ccid3_hc_tx_sock *hctx)
+{
+       if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK)
+               return;
+       /* if no feedback spec says t_ipi is 1 second (set elsewhere and then 
+        * doubles after every no feedback timer (separate function) */
+       
+       if (hctx->ccid3hctx_x < 10) {
+               ccid3_pr_debug("ccid3_calc_new_t_ipi - ccid3hctx_x < 10\n");
+               hctx->ccid3hctx_x = 10;
+       }
+       hctx->ccid3hctx_t_ipi = (hctx->ccid3hctx_s * 100000) 
+               / (hctx->ccid3hctx_x / 10);
+       /* reason for above maths with 10 in there is to avoid 32 bit
+        * overflow for jumbo packets */
+
+}
+
+/* Calculate new delta by delta = min(t_ipi / 2, t_gran / 2) */
+static inline void ccid3_calc_new_delta(struct ccid3_hc_tx_sock *hctx)
+{
+       hctx->ccid3hctx_delta = min_t(u32, hctx->ccid3hctx_t_ipi / 2, TFRC_OPSYS_HALF_TIME_GRAN);
+
+}
+
+/*
+ * Update X by
+ *    If (p > 0)
+ *       x_calc = calcX(s, R, p);
+ *       X = max(min(X_calc, 2 * X_recv), s / t_mbi);
+ *    Else
+ *       If (now - tld >= R)
+ *          X = max(min(2 * X, 2 * X_recv), s / R);
+ *          tld = now;
+ */ 
+static void ccid3_hc_tx_update_x(struct sock *sk)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+       struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
+
+       if (hctx->ccid3hctx_p >= TFRC_SMALLEST_P) {  /* to avoid large error in calcX */
+               hctx->ccid3hctx_x_calc = ccid3_calc_x(hctx->ccid3hctx_s,
+                                                     hctx->ccid3hctx_rtt,
+                                                     hctx->ccid3hctx_p);
+               hctx->ccid3hctx_x = max_t(u32, min_t(u32, hctx->ccid3hctx_x_calc, 2 * hctx->ccid3hctx_x_recv),
+                                              hctx->ccid3hctx_s / TFRC_MAX_BACK_OFF_TIME);
+       } else if (now_delta(hctx->ccid3hctx_t_ld) >= hctx->ccid3hctx_rtt) {
+               u32 rtt = hctx->ccid3hctx_rtt;
+               if (rtt < 10) {
+                       rtt = 10;
+               } /* avoid divide by zero below */
+               
+               hctx->ccid3hctx_x = max_t(u32, min_t(u32, 2 * hctx->ccid3hctx_x_recv, 2 * hctx->ccid3hctx_x),
+                                       (hctx->ccid3hctx_s * 100000) / (rtt / 10));
+               /* Using 100000 and 10 to avoid 32 bit overflow for jumbo frames */
+               do_gettimeofday(&hctx->ccid3hctx_t_ld);
+       }
+
+       if (hctx->ccid3hctx_x == 0) {
+               ccid3_pr_debug("ccid3hctx_x = 0!\n");
+               hctx->ccid3hctx_x = 1;
+       }
+}
+
+static void ccid3_hc_tx_no_feedback_timer(unsigned long data)
+{
+       struct sock *sk = (struct sock *)data;
+       struct dccp_sock *dp = dccp_sk(sk);
+       unsigned long next_tmout = 0;
+       struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
+       u32 rtt;
+
+       bh_lock_sock(sk);
+       if (sock_owned_by_user(sk)) {
+               /* Try again later. */
+               /* XXX: set some sensible MIB */
+               sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, jiffies + HZ / 5);
+               goto out;
+       }
+
+       ccid3_pr_debug("%s, sk=%p, state=%s\n", dccp_role(sk), sk,
+                      ccid3_tx_state_name(hctx->ccid3hctx_state));
+       
+       if (hctx->ccid3hctx_x < 10) {
+               ccid3_pr_debug("TFRC_SSTATE_NO_FBACK ccid3hctx_x < 10\n");
+               hctx->ccid3hctx_x = 10;
+       }
+
+       switch (hctx->ccid3hctx_state) {
+       case TFRC_SSTATE_TERM:
+               goto out;
+       case TFRC_SSTATE_NO_FBACK:
+               /* Halve send rate */
+               hctx->ccid3hctx_x /= 2;
+               if (hctx->ccid3hctx_x < (hctx->ccid3hctx_s / TFRC_MAX_BACK_OFF_TIME))
+                       hctx->ccid3hctx_x = hctx->ccid3hctx_s / TFRC_MAX_BACK_OFF_TIME;
+
+               ccid3_pr_debug("%s, sk=%p, state=%s, updated tx rate to %d bytes/s\n",
+                              dccp_role(sk), sk, ccid3_tx_state_name(hctx->ccid3hctx_state),
+                              hctx->ccid3hctx_x);
+               next_tmout = max_t(u32, 2 * (hctx->ccid3hctx_s * 100000) 
+                               / (hctx->ccid3hctx_x / 10), TFRC_INITIAL_TIMEOUT);
+               /* do above maths with 100000 and 10 to prevent overflow on 32 bit */
+               /* FIXME - not sure above calculation is correct. See section 5 of CCID3 11
+                * should adjust tx_t_ipi and double that to achieve it really */
+               break;
+       case TFRC_SSTATE_FBACK:
+               /* Check if IDLE since last timeout and recv rate is less than 4 packets per RTT */
+               rtt = hctx->ccid3hctx_rtt;
+               if (rtt < 10)
+                       rtt = 10;
+               /* stop divide by zero below */
+               if (!hctx->ccid3hctx_idle || (hctx->ccid3hctx_x_recv >= 
+                               4 * (hctx->ccid3hctx_s * 100000) / (rtt / 10))) {
+                       ccid3_pr_debug("%s, sk=%p, state=%s, not idle\n", dccp_role(sk), sk,
+                                      ccid3_tx_state_name(hctx->ccid3hctx_state));
+                       /* Halve sending rate */
+
+                       /*  If (X_calc > 2 * X_recv)
+                        *    X_recv = max(X_recv / 2, s / (2 * t_mbi));
+                        *  Else
+                        *    X_recv = X_calc / 4;
+                        */
+                       BUG_ON(hctx->ccid3hctx_p >= TFRC_SMALLEST_P && hctx->ccid3hctx_x_calc == 0);
+
+                       /* check also if p is zero -> x_calc is infinity? */
+                       if (hctx->ccid3hctx_p < TFRC_SMALLEST_P ||
+                           hctx->ccid3hctx_x_calc > 2 * hctx->ccid3hctx_x_recv)
+                               hctx->ccid3hctx_x_recv = max_t(u32, hctx->ccid3hctx_x_recv / 2,
+                                                                   hctx->ccid3hctx_s / (2 * TFRC_MAX_BACK_OFF_TIME));
+                       else
+                               hctx->ccid3hctx_x_recv = hctx->ccid3hctx_x_calc / 4;
+
+                       /* Update sending rate */
+                       ccid3_hc_tx_update_x(sk);
+               }
+               if (hctx->ccid3hctx_x == 0) {
+                       ccid3_pr_debug("TFRC_SSTATE_FBACK ccid3hctx_x = 0!\n");
+                       hctx->ccid3hctx_x = 10;
+               }
+               /* Schedule no feedback timer to expire in max(4 * R, 2 * s / X) */
+               next_tmout = max_t(u32, inet_csk(sk)->icsk_rto, 
+                                  2 * (hctx->ccid3hctx_s * 100000) / (hctx->ccid3hctx_x / 10));
+               break;
+       default:
+               printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
+                      __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state);
+               dump_stack();
+               goto out;
+       }
+
+       sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, 
+                       jiffies + max_t(u32, 1, usecs_to_jiffies(next_tmout)));
+       hctx->ccid3hctx_idle = 1;
+out:
+       bh_unlock_sock(sk);
+       sock_put(sk);
+}
+
+static int ccid3_hc_tx_send_packet(struct sock *sk, struct sk_buff *skb,
+                                  int len, long *delay)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+       struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
+       struct ccid3_tx_hist_entry *new_packet = NULL;
+       struct timeval now;
+       int rc = -ENOTCONN;
+
+//     ccid3_pr_debug("%s, sk=%p, skb=%p, len=%d\n", dccp_role(sk), sk, skb, len);
+       /*
+        * check if pure ACK or Terminating */
+       /* XXX: We only call this function for DATA and DATAACK, on, these packets can have
+        * zero length, but why the comment about "pure ACK"?
+        */
+       if (hctx == NULL || len == 0 || hctx->ccid3hctx_state == TFRC_SSTATE_TERM)
+               goto out;
+
+       /* See if last packet allocated was not sent */
+       if (!list_empty(&hctx->ccid3hctx_hist))
+               new_packet = list_entry(hctx->ccid3hctx_hist.next,
+                                       struct ccid3_tx_hist_entry, ccid3htx_node);
+
+       if (new_packet == NULL || new_packet->ccid3htx_sent) {
+               new_packet = ccid3_tx_hist_entry_new(SLAB_ATOMIC);
+
+               rc = -ENOBUFS;
+               if (new_packet == NULL) {
+                       ccid3_pr_debug("%s, sk=%p, not enough mem to add "
+                                      "to history, send refused\n", dccp_role(sk), sk);
+                       goto out;
+               }
+
+               list_add(&new_packet->ccid3htx_node, &hctx->ccid3hctx_hist);
+       }
+
+       do_gettimeofday(&now);
+
+       switch (hctx->ccid3hctx_state) {
+       case TFRC_SSTATE_NO_SENT:
+               ccid3_pr_debug("%s, sk=%p, first packet(%llu)\n", dccp_role(sk), sk,
+                              dp->dccps_gss);
+
+               hctx->ccid3hctx_no_feedback_timer.function = ccid3_hc_tx_no_feedback_timer;
+               hctx->ccid3hctx_no_feedback_timer.data     = (unsigned long)sk;
+               sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, jiffies + usecs_to_jiffies(TFRC_INITIAL_TIMEOUT));
+               hctx->ccid3hctx_last_win_count   = 0;
+               hctx->ccid3hctx_t_last_win_count = now;
+               ccid3_hc_tx_set_state(sk, TFRC_SSTATE_NO_FBACK);
+               hctx->ccid3hctx_t_ipi = TFRC_INITIAL_TIMEOUT;
+
+               /* Set nominal send time for initial packet */
+               hctx->ccid3hctx_t_nom = now;
+               (hctx->ccid3hctx_t_nom).tv_usec += hctx->ccid3hctx_t_ipi;
+               timeval_fix(&(hctx->ccid3hctx_t_nom));
+               ccid3_calc_new_delta(hctx);
+               rc = 0;
+               break;
+       case TFRC_SSTATE_NO_FBACK:
+       case TFRC_SSTATE_FBACK:
+               *delay = (now_delta(hctx->ccid3hctx_t_nom) - hctx->ccid3hctx_delta);
+               ccid3_pr_debug("send_packet delay=%ld\n",*delay);
+               *delay /= -1000;
+               /* divide by -1000 is to convert to ms and get sign right */
+               rc = *delay > 0 ? -EAGAIN : 0;
+               break;
+       default:
+               printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
+                      __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state);
+               dump_stack();
+               rc = -EINVAL;
+               break;
+       }
+
+       /* Can we send? if so add options and add to packet history */
+       if (rc == 0)
+               new_packet->ccid3htx_win_count = DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count;
+out:
+       return rc;
+}
+
+static void ccid3_hc_tx_packet_sent(struct sock *sk, int more, int len)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+       struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
+       struct ccid3_tx_hist_entry *packet = NULL;
+       struct timeval now;
+
+//     ccid3_pr_debug("%s, sk=%p, more=%d, len=%d\n", dccp_role(sk), sk, more, len);
+       BUG_ON(hctx == NULL);
+
+       if (hctx->ccid3hctx_state == TFRC_SSTATE_TERM) {
+               ccid3_pr_debug("%s, sk=%p, while state is TFRC_SSTATE_TERM!\n",
+                              dccp_role(sk), sk);
+               return;
+       }
+
+       do_gettimeofday(&now);
+
+       /* check if we have sent a data packet */
+       if (len > 0) {
+               unsigned long quarter_rtt;
+
+               if (list_empty(&hctx->ccid3hctx_hist)) {
+                       printk(KERN_CRIT "%s: packet doesn't exists in history!\n", __FUNCTION__);
+                       return;
+               }
+               packet = list_entry(hctx->ccid3hctx_hist.next, struct ccid3_tx_hist_entry, ccid3htx_node);
+               if (packet->ccid3htx_sent) {
+                       printk(KERN_CRIT "%s: no unsent packet in history!\n", __FUNCTION__);
+                       return;
+               }
+               packet->ccid3htx_tstamp = now;
+               packet->ccid3htx_seqno  = dp->dccps_gss;
+               // ccid3_pr_debug("%s, sk=%p, seqno=%llu inserted!\n", dccp_role(sk), sk, packet->ccid3htx_seqno);
+
+               /*
+                * Check if win_count have changed */
+               /* COMPLIANCE_BEGIN
+                * Algorithm in "8.1. Window Counter Valuer" in draft-ietf-dccp-ccid3-11.txt
+                */
+               quarter_rtt = now_delta(hctx->ccid3hctx_t_last_win_count) / (hctx->ccid3hctx_rtt / 4);
+               if (quarter_rtt > 0) {
+                       hctx->ccid3hctx_t_last_win_count = now;
+                       hctx->ccid3hctx_last_win_count   = (hctx->ccid3hctx_last_win_count +
+                                                           min_t(unsigned long, quarter_rtt, 5)) % 16;
+                       ccid3_pr_debug("%s, sk=%p, window changed from %u to %u!\n",
+                                      dccp_role(sk), sk,
+                                      packet->ccid3htx_win_count,
+                                      hctx->ccid3hctx_last_win_count);
+               }
+               /* COMPLIANCE_END */
+#if 0
+               ccid3_pr_debug("%s, sk=%p, packet sent (%llu,%u)\n",
+                              dccp_role(sk), sk,
+                              packet->ccid3htx_seqno,
+                              packet->ccid3htx_win_count);
+#endif
+               hctx->ccid3hctx_idle = 0;
+               packet->ccid3htx_sent = 1;
+       } else
+               ccid3_pr_debug("%s, sk=%p, seqno=%llu NOT inserted!\n",
+                              dccp_role(sk), sk, dp->dccps_gss);
+
+       switch (hctx->ccid3hctx_state) {
+       case TFRC_SSTATE_NO_SENT:
+               /* if first wasn't pure ack */
+               if (len != 0)
+                       printk(KERN_CRIT "%s: %s, First packet sent is noted as a data packet\n",
+                              __FUNCTION__, dccp_role(sk));
+               return;
+       case TFRC_SSTATE_NO_FBACK:
+       case TFRC_SSTATE_FBACK:
+               if (len > 0) {
+                       hctx->ccid3hctx_t_nom = now;
+                       ccid3_calc_new_t_ipi(hctx);
+                       ccid3_calc_new_delta(hctx);
+                       (hctx->ccid3hctx_t_nom).tv_usec += hctx->ccid3hctx_t_ipi;
+                       timeval_fix(&(hctx->ccid3hctx_t_nom));
+               }
+               break;
+       default:
+               printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
+                      __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state);
+               dump_stack();
+               break;
+       }
+}
+
+static void ccid3_hc_tx_packet_recv(struct sock *sk, struct sk_buff *skb)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+       struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
+       struct ccid3_options_received *opt_recv;
+       struct ccid3_tx_hist_entry *entry, *next, *packet;
+       unsigned long next_tmout; 
+       u16 t_elapsed;
+       u32 pinv;
+       u32 x_recv;
+       u32 r_sample;
+#if 0
+       ccid3_pr_debug("%s, sk=%p(%s), skb=%p(%s)\n",
+                      dccp_role(sk), sk, dccp_state_name(sk->sk_state),
+                      skb, dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type));
+#endif
+       if (hctx == NULL)
+               return;
+
+       if (hctx->ccid3hctx_state == TFRC_SSTATE_TERM) {
+               ccid3_pr_debug("%s, sk=%p, received a packet when terminating!\n", dccp_role(sk), sk);
+               return;
+       }
+
+       /* we are only interested in ACKs */
+       if (!(DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK ||
+             DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_DATAACK))
+               return;
+
+       opt_recv = &hctx->ccid3hctx_options_received;
+
+       t_elapsed = dp->dccps_options_received.dccpor_elapsed_time;
+       x_recv = opt_recv->ccid3or_receive_rate;
+       pinv = opt_recv->ccid3or_loss_event_rate;
+
+       switch (hctx->ccid3hctx_state) {
+       case TFRC_SSTATE_NO_SENT:
+               /* FIXME: what to do here? */
+               return;
+       case TFRC_SSTATE_NO_FBACK:
+       case TFRC_SSTATE_FBACK:
+               /* Calculate new round trip sample by
+                * R_sample = (now - t_recvdata) - t_delay */
+               /* get t_recvdata from history */
+               packet = NULL;
+               list_for_each_entry_safe(entry, next, &hctx->ccid3hctx_hist, ccid3htx_node)
+                       if (entry->ccid3htx_seqno == DCCP_SKB_CB(skb)->dccpd_ack_seq) {
+                               packet = entry;
+                               break;
+                       }
+
+               if (packet == NULL) {
+                       ccid3_pr_debug("%s, sk=%p, seqno %llu(%s) does't exist in history!\n",
+                                      dccp_role(sk), sk, DCCP_SKB_CB(skb)->dccpd_ack_seq,
+                                      dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type));
+                       return;
+               }
+
+               /* Update RTT */
+               r_sample = now_delta(packet->ccid3htx_tstamp);
+               /* FIXME: */
+               // r_sample -= usecs_to_jiffies(t_elapsed * 10);
+
+               /* Update RTT estimate by 
+                * If (No feedback recv)
+                *    R = R_sample;
+                * Else
+                *    R = q * R + (1 - q) * R_sample;
+                *
+                * q is a constant, RFC 3448 recomments 0.9
+                */
+               if (hctx->ccid3hctx_state == TFRC_SSTATE_NO_FBACK) {
+                       ccid3_hc_tx_set_state(sk, TFRC_SSTATE_FBACK);
+                       hctx->ccid3hctx_rtt = r_sample;
+               } else
+                       hctx->ccid3hctx_rtt = (hctx->ccid3hctx_rtt * 9) / 10 + r_sample / 10;
+
+               /*
+                * XXX: this is to avoid a division by zero in ccid3_hc_tx_packet_sent
+                *      implemention of the new window count.
+                */
+               if (hctx->ccid3hctx_rtt < 4)
+                       hctx->ccid3hctx_rtt = 4;
+
+               ccid3_pr_debug("%s, sk=%p, New RTT estimate=%uus, r_sample=%us\n",
+                              dccp_role(sk), sk,
+                              hctx->ccid3hctx_rtt,
+                              r_sample);
+
+               /* Update timeout interval */
+               inet_csk(sk)->icsk_rto = max_t(u32, 4 * hctx->ccid3hctx_rtt, USEC_IN_SEC);
+
+               /* Update receive rate */
+               hctx->ccid3hctx_x_recv = x_recv;   /* x_recv in bytes per second */
+
+               /* Update loss event rate */
+               if (pinv == ~0 || pinv == 0)
+                       hctx->ccid3hctx_p = 0;
+               else {
+                       hctx->ccid3hctx_p = 1000000 / pinv;
+
+                       if (hctx->ccid3hctx_p < TFRC_SMALLEST_P) {
+                               hctx->ccid3hctx_p = TFRC_SMALLEST_P;
+                               ccid3_pr_debug("%s, sk=%p, Smallest p used!\n", dccp_role(sk), sk);
+                       }
+               }
+
+               /* unschedule no feedback timer */
+               sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer);
+
+               /* Update sending rate */
+               ccid3_hc_tx_update_x(sk);
+
+               /* Update next send time */
+               if (hctx->ccid3hctx_t_ipi > (hctx->ccid3hctx_t_nom).tv_usec) {
+                       (hctx->ccid3hctx_t_nom).tv_usec += USEC_IN_SEC;
+                       (hctx->ccid3hctx_t_nom).tv_sec--;
+               }
+               /* FIXME - if no feedback then t_ipi can go > 1 second */
+               (hctx->ccid3hctx_t_nom).tv_usec -= hctx->ccid3hctx_t_ipi;
+               ccid3_calc_new_t_ipi(hctx);
+               (hctx->ccid3hctx_t_nom).tv_usec += hctx->ccid3hctx_t_ipi;
+               timeval_fix(&(hctx->ccid3hctx_t_nom));
+               ccid3_calc_new_delta(hctx);
+
+               /* remove all packets older than the one acked from history */
+#if 0
+               FIXME!
+               list_for_each_entry_safe_continue(entry, next, &hctx->ccid3hctx_hist, ccid3htx_node) {
+                       list_del_init(&entry->ccid3htx_node);
+                       ccid3_tx_hist_entry_delete(entry);
+               }
+#endif
+               if (hctx->ccid3hctx_x < 10) {
+                       ccid3_pr_debug("ccid3_hc_tx_packet_recv hctx->ccid3hctx_x < 10\n");
+                       hctx->ccid3hctx_x = 10;
+               }
+               /* to prevent divide by zero below */
+
+               /* Schedule no feedback timer to expire in max(4 * R, 2 * s / X) */
+               next_tmout = max(inet_csk(sk)->icsk_rto,
+                       2 * (hctx->ccid3hctx_s * 100000) / (hctx->ccid3hctx_x/10));
+               /* maths with 100000 and 10 is to prevent overflow with 32 bit */
+
+               ccid3_pr_debug("%s, sk=%p, Scheduled no feedback timer to expire in %lu jiffies (%luus)\n",
+                              dccp_role(sk), sk, usecs_to_jiffies(next_tmout), next_tmout); 
+
+               sk_reset_timer(sk, &hctx->ccid3hctx_no_feedback_timer, 
+                               jiffies + max_t(u32,1,usecs_to_jiffies(next_tmout)));
+
+               /* set idle flag */
+               hctx->ccid3hctx_idle = 1;   
+               break;
+       default:
+               printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
+                      __FUNCTION__, dccp_role(sk), sk, hctx->ccid3hctx_state);
+               dump_stack();
+               break;
+       }
+}
+
+static void ccid3_hc_tx_insert_options(struct sock *sk, struct sk_buff *skb)
+{
+       const struct dccp_sock *dp = dccp_sk(sk);
+       struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
+
+       if (hctx == NULL || !(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN))
+               return;
+
+        DCCP_SKB_CB(skb)->dccpd_ccval = hctx->ccid3hctx_last_win_count;
+}
+
+static int ccid3_hc_tx_parse_options(struct sock *sk, unsigned char option,
+                                  unsigned char len, u16 idx, unsigned char *value)
+{
+       int rc = 0;
+       struct dccp_sock *dp = dccp_sk(sk);
+       struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
+       struct ccid3_options_received *opt_recv;
+
+       if (hctx == NULL)
+               return 0;
+
+       opt_recv = &hctx->ccid3hctx_options_received;
+
+       if (opt_recv->ccid3or_seqno != dp->dccps_gsr) {
+               opt_recv->ccid3or_seqno              = dp->dccps_gsr;
+               opt_recv->ccid3or_loss_event_rate    = ~0;
+               opt_recv->ccid3or_loss_intervals_idx = 0;
+               opt_recv->ccid3or_loss_intervals_len = 0;
+               opt_recv->ccid3or_receive_rate       = 0;
+       }
+
+       switch (option) {
+       case TFRC_OPT_LOSS_EVENT_RATE:
+               if (len != 4) {
+                       ccid3_pr_debug("%s, sk=%p, invalid len for TFRC_OPT_LOSS_EVENT_RATE\n",
+                                      dccp_role(sk), sk);
+                       rc = -EINVAL;
+               } else {
+                       opt_recv->ccid3or_loss_event_rate = ntohl(*(u32 *)value);
+                       ccid3_pr_debug("%s, sk=%p, LOSS_EVENT_RATE=%u\n",
+                                      dccp_role(sk), sk,
+                                      opt_recv->ccid3or_loss_event_rate);
+               }
+               break;
+       case TFRC_OPT_LOSS_INTERVALS:
+               opt_recv->ccid3or_loss_intervals_idx = idx;
+               opt_recv->ccid3or_loss_intervals_len = len;
+               ccid3_pr_debug("%s, sk=%p, LOSS_INTERVALS=(%u, %u)\n",
+                              dccp_role(sk), sk,
+                              opt_recv->ccid3or_loss_intervals_idx,
+                              opt_recv->ccid3or_loss_intervals_len);
+               break;
+       case TFRC_OPT_RECEIVE_RATE:
+               if (len != 4) {
+                       ccid3_pr_debug("%s, sk=%p, invalid len for TFRC_OPT_RECEIVE_RATE\n",
+                                      dccp_role(sk), sk);
+                       rc = -EINVAL;
+               } else {
+                       opt_recv->ccid3or_receive_rate = ntohl(*(u32 *)value);
+                       ccid3_pr_debug("%s, sk=%p, RECEIVE_RATE=%u\n",
+                                      dccp_role(sk), sk,
+                                      opt_recv->ccid3or_receive_rate);
+               }
+               break;
+       }
+
+       return rc;
+}
+
+static int ccid3_hc_tx_init(struct sock *sk)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+       struct ccid3_hc_tx_sock *hctx;
+
+       ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
+
+       hctx = dp->dccps_hc_tx_ccid_private = kmalloc(sizeof(*hctx), gfp_any());
+       if (hctx == NULL)
+               return -ENOMEM;
+
+       memset(hctx, 0, sizeof(*hctx));
+
+       if (dp->dccps_avg_packet_size >= TFRC_MIN_PACKET_SIZE &&
+           dp->dccps_avg_packet_size <= TFRC_MAX_PACKET_SIZE)
+               hctx->ccid3hctx_s = (u16)dp->dccps_avg_packet_size;
+       else
+               hctx->ccid3hctx_s = TFRC_STD_PACKET_SIZE;
+
+       hctx->ccid3hctx_x     = hctx->ccid3hctx_s; /* set transmission rate to 1 packet per second */
+       hctx->ccid3hctx_rtt   = 4; /* See ccid3_hc_tx_packet_sent win_count calculatation */
+       inet_csk(sk)->icsk_rto = USEC_IN_SEC;
+       hctx->ccid3hctx_state = TFRC_SSTATE_NO_SENT;
+       INIT_LIST_HEAD(&hctx->ccid3hctx_hist);
+       init_timer(&hctx->ccid3hctx_no_feedback_timer);
+
+       return 0;
+}
+
+static void ccid3_hc_tx_exit(struct sock *sk)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+       struct ccid3_hc_tx_sock *hctx = dp->dccps_hc_tx_ccid_private;
+       struct ccid3_tx_hist_entry *entry, *next;
+
+       ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
+       BUG_ON(hctx == NULL);
+
+       ccid3_hc_tx_set_state(sk, TFRC_SSTATE_TERM);
+       sk_stop_timer(sk, &hctx->ccid3hctx_no_feedback_timer);
+
+       /* Empty packet history */
+       list_for_each_entry_safe(entry, next, &hctx->ccid3hctx_hist, ccid3htx_node) {
+               list_del_init(&entry->ccid3htx_node);
+               ccid3_tx_hist_entry_delete(entry);
+       }
+
+       kfree(dp->dccps_hc_tx_ccid_private);
+       dp->dccps_hc_tx_ccid_private = NULL;
+}
+
+/*
+ * RX Half Connection methods
+ */
+
+/* TFRC receiver states */
+enum ccid3_hc_rx_states {
+               TFRC_RSTATE_NO_DATA = 1,
+       TFRC_RSTATE_DATA,
+       TFRC_RSTATE_TERM    = 127,
+};
+
+#ifdef CCID3_DEBUG
+static const char *ccid3_rx_state_name(enum ccid3_hc_rx_states state)
+{
+       static char *ccid3_rx_state_names[] = {
+       [TFRC_RSTATE_NO_DATA] = "NO_DATA",
+       [TFRC_RSTATE_DATA]    = "DATA",
+       [TFRC_RSTATE_TERM]    = "TERM",
+       };
+
+       return ccid3_rx_state_names[state];
+}
+#endif
+
+static inline void ccid3_hc_rx_set_state(struct sock *sk, enum ccid3_hc_rx_states state)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+       struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
+       enum ccid3_hc_rx_states oldstate = hcrx->ccid3hcrx_state;
+
+       ccid3_pr_debug("%s(%p) %-8.8s -> %s\n",
+                      dccp_role(sk), sk, ccid3_rx_state_name(oldstate), ccid3_rx_state_name(state));
+       WARN_ON(state == oldstate);
+       hcrx->ccid3hcrx_state = state;
+}
+
+static int ccid3_hc_rx_add_hist(struct sock *sk, struct ccid3_rx_hist_entry *packet)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+       struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
+       struct ccid3_rx_hist_entry *entry, *next;
+       u8 num_later = 0;
+
+       if (list_empty(&hcrx->ccid3hcrx_hist))
+               list_add(&packet->ccid3hrx_node, &hcrx->ccid3hcrx_hist);
+       else {
+               u64 seqno = packet->ccid3hrx_seqno;
+               struct ccid3_rx_hist_entry *iter = list_entry(hcrx->ccid3hcrx_hist.next,
+                                                             struct ccid3_rx_hist_entry,
+                                                             ccid3hrx_node);
+               if (after48(seqno, iter->ccid3hrx_seqno))
+                       list_add(&packet->ccid3hrx_node, &hcrx->ccid3hcrx_hist);
+               else {
+                       if (iter->ccid3hrx_type == DCCP_PKT_DATA ||
+                           iter->ccid3hrx_type == DCCP_PKT_DATAACK)
+                               num_later = 1;
+
+                       list_for_each_entry_continue(iter, &hcrx->ccid3hcrx_hist, ccid3hrx_node) {
+                               if (after48(seqno, iter->ccid3hrx_seqno)) {
+                                       list_add(&packet->ccid3hrx_node, &iter->ccid3hrx_node);
+                                       goto trim_history;
+                               }
+
+                               if (iter->ccid3hrx_type == DCCP_PKT_DATA ||
+                                   iter->ccid3hrx_type == DCCP_PKT_DATAACK)
+                                       num_later++;
+
+                               if (num_later == TFRC_RECV_NUM_LATE_LOSS) {
+                                       ccid3_rx_hist_entry_delete(packet);
+                                       ccid3_pr_debug("%s, sk=%p, packet(%llu) already lost!\n",
+                                                      dccp_role(sk), sk, seqno);
+                                       return 1;
+                               }
+                       }
+
+                       if (num_later < TFRC_RECV_NUM_LATE_LOSS)
+                               list_add_tail(&packet->ccid3hrx_node, &hcrx->ccid3hcrx_hist);
+                       /* FIXME: else what? should we destroy the packet like above? */
+               }
+       }
+
+trim_history:
+       /* Trim history (remove all packets after the NUM_LATE_LOSS + 1 data packets) */
+       num_later = TFRC_RECV_NUM_LATE_LOSS + 1;
+
+       if (!list_empty(&hcrx->ccid3hcrx_loss_interval_hist)) {
+               list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) {
+                       if (num_later == 0) {
+                               list_del_init(&entry->ccid3hrx_node);
+                               ccid3_rx_hist_entry_delete(entry);
+                       } else if (entry->ccid3hrx_type == DCCP_PKT_DATA ||
+                                  entry->ccid3hrx_type == DCCP_PKT_DATAACK)
+                               --num_later;
+               }
+       } else {
+               int step = 0;
+               u8 win_count = 0; /* Not needed, but lets shut up gcc */
+               int tmp;
+               /*
+                * We have no loss interval history so we need at least one
+                * rtt:s of data packets to approximate rtt.
+                */
+               list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) {
+                       if (num_later == 0) {
+                               switch (step) {
+                               case 0:
+                                       step = 1;
+                                       /* OK, find next data packet */
+                                       num_later = 1;
+                                       break;
+                               case 1:
+                                       step = 2;
+                                       /* OK, find next data packet */
+                                       num_later = 1;
+                                       win_count = entry->ccid3hrx_win_count;
+                                       break;
+                               case 2:
+                                       tmp = win_count - entry->ccid3hrx_win_count;
+                                       if (tmp < 0)
+                                               tmp += TFRC_WIN_COUNT_LIMIT;
+                                       if (tmp > TFRC_WIN_COUNT_PER_RTT + 1) {
+                                               /* we have found a packet older than one rtt
+                                                * remove the rest */
+                                               step = 3;
+                                       } else /* OK, find next data packet */
+                                               num_later = 1;
+                                       break;
+                               case 3:
+                                       list_del_init(&entry->ccid3hrx_node);
+                                       ccid3_rx_hist_entry_delete(entry);
+                                       break;
+                               }
+                       } else if (entry->ccid3hrx_type == DCCP_PKT_DATA ||
+                                  entry->ccid3hrx_type == DCCP_PKT_DATAACK)
+                               --num_later;
+               }
+       }
+
+       return 0;
+}
+
+static void ccid3_hc_rx_send_feedback(struct sock *sk)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+       struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
+       struct ccid3_rx_hist_entry *entry, *packet;
+
+       ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
+
+       switch (hcrx->ccid3hcrx_state) {
+       case TFRC_RSTATE_NO_DATA:
+               hcrx->ccid3hcrx_x_recv = 0;
+               break;
+       case TFRC_RSTATE_DATA: {
+               u32 delta = now_delta(hcrx->ccid3hcrx_tstamp_last_feedback);
+
+               if (delta == 0)
+                       delta = 1; /* to prevent divide by zero */
+               hcrx->ccid3hcrx_x_recv = (hcrx->ccid3hcrx_bytes_recv * USEC_IN_SEC) / delta;
+       }
+               break;
+       default:
+               printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
+                      __FUNCTION__, dccp_role(sk), sk, hcrx->ccid3hcrx_state);
+               dump_stack();
+               return;
+       }
+
+       packet = NULL;
+       list_for_each_entry(entry, &hcrx->ccid3hcrx_hist, ccid3hrx_node)
+               if (entry->ccid3hrx_type == DCCP_PKT_DATA ||
+                   entry->ccid3hrx_type == DCCP_PKT_DATAACK) {
+                       packet = entry;
+                       break;
+               }
+
+       if (packet == NULL) {
+               printk(KERN_CRIT "%s: %s, sk=%p, no data packet in history!\n",
+                      __FUNCTION__, dccp_role(sk), sk);
+               dump_stack();
+               return;
+       }
+
+       do_gettimeofday(&(hcrx->ccid3hcrx_tstamp_last_feedback));
+       hcrx->ccid3hcrx_last_counter         = packet->ccid3hrx_win_count;
+       hcrx->ccid3hcrx_seqno_last_counter   = packet->ccid3hrx_seqno;
+       hcrx->ccid3hcrx_bytes_recv           = 0;
+
+       /* Convert to multiples of 10us */
+       hcrx->ccid3hcrx_elapsed_time = now_delta(packet->ccid3hrx_tstamp) / 10;
+       if (hcrx->ccid3hcrx_p == 0)
+               hcrx->ccid3hcrx_pinv = ~0;
+       else
+               hcrx->ccid3hcrx_pinv = 1000000 / hcrx->ccid3hcrx_p;
+       dccp_send_ack(sk);
+}
+
+static void ccid3_hc_rx_insert_options(struct sock *sk, struct sk_buff *skb)
+{
+       const struct dccp_sock *dp = dccp_sk(sk);
+       struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
+
+       if (hcrx == NULL || !(sk->sk_state == DCCP_OPEN || sk->sk_state == DCCP_PARTOPEN))
+               return;
+
+       if (hcrx->ccid3hcrx_elapsed_time != 0 && !dccp_packet_without_ack(skb))
+               dccp_insert_option_elapsed_time(sk, skb, hcrx->ccid3hcrx_elapsed_time);
+
+       if (DCCP_SKB_CB(skb)->dccpd_type != DCCP_PKT_DATA) {
+               const u32 x_recv = htonl(hcrx->ccid3hcrx_x_recv);
+               const u32 pinv   = htonl(hcrx->ccid3hcrx_pinv);
+
+               dccp_insert_option(sk, skb, TFRC_OPT_LOSS_EVENT_RATE, &pinv, sizeof(pinv));
+               dccp_insert_option(sk, skb, TFRC_OPT_RECEIVE_RATE, &x_recv, sizeof(x_recv));
+       }
+
+       DCCP_SKB_CB(skb)->dccpd_ccval = hcrx->ccid3hcrx_last_counter;
+}
+
+/* Weights used to calculate loss event rate */
+/*
+ * These are integers as per section 8 of RFC3448. We can then divide by 4 *
+ * when we use it.
+ */
+const int ccid3_hc_rx_w[TFRC_RECV_IVAL_F_LENGTH] = { 4, 4, 4, 4, 3, 2, 1, 1, };
+
+/*
+ * args: fvalue - function value to match
+ * returns:  p  closest to that value
+ *
+ * both fvalue and p are multiplied by 1,000,000 to use ints
+ */
+u32 calcx_reverse_lookup(u32 fvalue) {
+       int ctr = 0;
+       int small;
+
+       if (fvalue < calcx_lookup[0][1])
+               return 0;
+       if (fvalue <= calcx_lookup[CALCX_ARRSIZE-1][1])
+               small = 1;
+       else if (fvalue > calcx_lookup[CALCX_ARRSIZE-1][0])
+               return 1000000;
+       else
+               small = 0;
+       while (fvalue > calcx_lookup[ctr][small])
+               ctr++;
+       if (small)
+               return (CALCX_SPLIT * ctr / CALCX_ARRSIZE);
+       else
+               return (1000000 * ctr / CALCX_ARRSIZE) ;
+}
+
+/* calculate first loss interval
+ *
+ * returns estimated loss interval in usecs */
+
+static u32 ccid3_hc_rx_calc_first_li(struct sock *sk)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+       struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
+       struct ccid3_rx_hist_entry *entry, *next, *tail = NULL;
+       u32 rtt, delta, x_recv, fval, p, tmp2;
+       struct timeval tstamp, tmp_tv;
+       int interval = 0;
+       int win_count = 0;
+       int step = 0;
+       u64 tmp1;
+
+       list_for_each_entry_safe(entry, next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) {
+               if (entry->ccid3hrx_type == DCCP_PKT_DATA ||
+                   entry->ccid3hrx_type == DCCP_PKT_DATAACK) {
+                       tail = entry;
+
+                       switch (step) {
+                       case 0:
+                               tstamp    = entry->ccid3hrx_tstamp;
+                               win_count = entry->ccid3hrx_win_count;
+                               step = 1;
+                               break;
+                       case 1:
+                               interval = win_count - entry->ccid3hrx_win_count;
+                               if (interval < 0)
+                                       interval += TFRC_WIN_COUNT_LIMIT;
+                               if (interval > 4)
+                                       goto found;
+                               break;
+                       }
+               }
+       }
+
+       if (step == 0) {
+               printk(KERN_CRIT "%s: %s, sk=%p, packet history contains no data packets!\n",
+                      __FUNCTION__, dccp_role(sk), sk);
+               return ~0;
+       }
+
+       if (interval == 0) {
+               ccid3_pr_debug("%s, sk=%p, Could not find a win_count interval > 0. Defaulting to 1\n",
+                              dccp_role(sk), sk);
+               interval = 1;
+       }
+found:
+       timeval_sub(tstamp,tail->ccid3hrx_tstamp,&tmp_tv);
+       rtt = (tmp_tv.tv_sec * USEC_IN_SEC + tmp_tv.tv_usec) * 4 / interval;
+       ccid3_pr_debug("%s, sk=%p, approximated RTT to %uus\n",
+                      dccp_role(sk), sk, rtt);
+       if (rtt == 0)
+               rtt = 1;
+
+       delta = now_delta(hcrx->ccid3hcrx_tstamp_last_feedback);
+       if (delta == 0)
+               delta = 1;
+
+       x_recv = (hcrx->ccid3hcrx_bytes_recv * USEC_IN_SEC) / delta;
+
+       tmp1 = (u64)x_recv * (u64)rtt;
+       do_div(tmp1,10000000);
+       tmp2 = (u32)tmp1;
+       fval = (hcrx->ccid3hcrx_s * 100000) / tmp2;
+       /* do not alter order above or you will get overflow on 32 bit */
+       p = calcx_reverse_lookup(fval);
+       ccid3_pr_debug("%s, sk=%p, receive rate=%u bytes/s, implied loss rate=%u\n",\
+                       dccp_role(sk), sk, x_recv, p);
+
+       if (p == 0)
+               return ~0;
+       else
+               return 1000000 / p; 
+}
+
+static void ccid3_hc_rx_update_li(struct sock *sk, u64 seq_loss, u8 win_loss)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+       struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
+       struct ccid3_loss_interval_hist_entry *li_entry;
+
+       if (seq_loss != DCCP_MAX_SEQNO + 1) {
+               ccid3_pr_debug("%s, sk=%p, seq_loss=%llu, win_loss=%u, packet loss detected\n",
+                              dccp_role(sk), sk, seq_loss, win_loss);
+               
+               if (list_empty(&hcrx->ccid3hcrx_loss_interval_hist)) {
+                       struct ccid3_loss_interval_hist_entry *li_tail = NULL;
+                       int i;
+
+                       ccid3_pr_debug("%s, sk=%p, first loss event detected, creating history\n", dccp_role(sk), sk);
+                       for (i = 0; i <= TFRC_RECV_IVAL_F_LENGTH; ++i) {
+                               li_entry = ccid3_loss_interval_hist_entry_new(SLAB_ATOMIC);
+                               if (li_entry == NULL) {
+                                       ccid3_loss_interval_history_delete(&hcrx->ccid3hcrx_loss_interval_hist);
+                                       ccid3_pr_debug("%s, sk=%p, not enough mem for creating history\n",
+                                                      dccp_role(sk), sk);
+                                       return;
+                               }
+                               if (li_tail == NULL)
+                                       li_tail = li_entry;
+                               list_add(&li_entry->ccid3lih_node, &hcrx->ccid3hcrx_loss_interval_hist);
+                       }
+
+                       li_entry->ccid3lih_seqno     = seq_loss;
+                       li_entry->ccid3lih_win_count = win_loss;
+
+                       li_tail->ccid3lih_interval   = ccid3_hc_rx_calc_first_li(sk);
+               }
+       }
+       /* FIXME: find end of interval */
+}
+
+static void ccid3_hc_rx_detect_loss(struct sock *sk)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+       struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
+       struct ccid3_rx_hist_entry *entry, *a_next, *b_next, *packet;
+       struct ccid3_rx_hist_entry *a_loss = NULL;
+       struct ccid3_rx_hist_entry *b_loss = NULL;
+       u64 seq_loss = DCCP_MAX_SEQNO + 1;
+       u8 win_loss = 0;
+       u8 num_later = TFRC_RECV_NUM_LATE_LOSS;
+
+       list_for_each_entry_safe(entry, b_next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) {
+               if (num_later == 0) {
+                       b_loss = entry;
+                       break;
+               } else if (entry->ccid3hrx_type == DCCP_PKT_DATA ||
+                          entry->ccid3hrx_type == DCCP_PKT_DATAACK)
+                       --num_later;
+       }
+
+       if (b_loss == NULL)
+               goto out_update_li;
+
+       a_next = b_next;
+       num_later = 1;
+#if 0
+               FIXME MERGE GIT!
+       list_for_each_entry_safe_continue(entry, a_next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) {
+               if (num_later == 0) {
+                       a_loss = entry;
+                       break;
+               } else if (entry->ccid3hrx_type == DCCP_PKT_DATA ||
+                          entry->ccid3hrx_type == DCCP_PKT_DATAACK)
+                       --num_later;
+       }
+#endif
+
+       if (a_loss == NULL) {
+               if (list_empty(&hcrx->ccid3hcrx_loss_interval_hist)) {
+                       /* no loss event have occured yet */
+                       ccid3_pr_debug("%s, sk=%p, TODO: find a lost data "
+                                       "packet by comparing to initial seqno\n",
+                                      dccp_role(sk), sk);
+                       goto out_update_li;
+               } else {
+                       pr_info("%s: %s, sk=%p, ERROR! Less than 4 data packets in history",
+                               __FUNCTION__, dccp_role(sk), sk);
+                       return;
+               }
+       }
+
+       /* Locate a lost data packet */
+       entry = packet = b_loss;
+#if 0
+               FIXME MERGE GIT!
+       list_for_each_entry_safe_continue(entry, b_next, &hcrx->ccid3hcrx_hist, ccid3hrx_node) {
+               u64 delta = dccp_delta_seqno(entry->ccid3hrx_seqno, packet->ccid3hrx_seqno);
+
+               if (delta != 0) {
+                       if (packet->ccid3hrx_type == DCCP_PKT_DATA ||
+                           packet->ccid3hrx_type == DCCP_PKT_DATAACK)
+                               --delta;
+                       /*
+                        * FIXME: check this, probably this % usage is because
+                        * in earlier drafts the ndp count was just 8 bits
+                        * long, but now it cam be up to 24 bits long.
+                        */
+#if 0
+                       if (delta % DCCP_NDP_LIMIT !=
+                           (packet->ccid3hrx_ndp - entry->ccid3hrx_ndp) % DCCP_NDP_LIMIT)
+#endif
+                       if (delta != packet->ccid3hrx_ndp - entry->ccid3hrx_ndp) {
+                               seq_loss = entry->ccid3hrx_seqno;
+                               dccp_inc_seqno(&seq_loss);
+                       }
+               }
+               packet = entry;
+               if (packet == a_loss)
+                       break;
+       }
+#endif
+
+       if (seq_loss != DCCP_MAX_SEQNO + 1)
+               win_loss = a_loss->ccid3hrx_win_count;
+
+out_update_li:
+       ccid3_hc_rx_update_li(sk, seq_loss, win_loss);
+}
+
+static u32 ccid3_hc_rx_calc_i_mean(struct sock *sk)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+       struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
+       struct ccid3_loss_interval_hist_entry *li_entry, *li_next;
+       int i = 0;
+       u32 i_tot;
+       u32 i_tot0 = 0;
+       u32 i_tot1 = 0;
+       u32 w_tot  = 0;
+
+       list_for_each_entry_safe(li_entry, li_next, &hcrx->ccid3hcrx_loss_interval_hist, ccid3lih_node) {
+               if (i < TFRC_RECV_IVAL_F_LENGTH) {
+                       i_tot0 += li_entry->ccid3lih_interval * ccid3_hc_rx_w[i];
+                       w_tot  += ccid3_hc_rx_w[i];
+               }
+
+               if (i != 0)
+                       i_tot1 += li_entry->ccid3lih_interval * ccid3_hc_rx_w[i - 1];
+
+               if (++i > TFRC_RECV_IVAL_F_LENGTH)
+                       break;
+       }
+
+       if (i != TFRC_RECV_IVAL_F_LENGTH) {
+               pr_info("%s: %s, sk=%p, ERROR! Missing entry in interval history!\n",
+                       __FUNCTION__, dccp_role(sk), sk);
+               return 0;
+       }
+
+       i_tot = max(i_tot0, i_tot1);
+
+       /* FIXME: Why do we do this? -Ian McDonald */
+       if (i_tot * 4 < w_tot)
+               i_tot = w_tot * 4;
+
+       return i_tot * 4 / w_tot;
+}
+
+static void ccid3_hc_rx_packet_recv(struct sock *sk, struct sk_buff *skb)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+       struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
+       struct ccid3_rx_hist_entry *packet;
+       struct timeval now;
+       u8 win_count;
+       u32 p_prev;
+       int ins;
+#if 0
+       ccid3_pr_debug("%s, sk=%p(%s), skb=%p(%s)\n",
+                      dccp_role(sk), sk, dccp_state_name(sk->sk_state),
+                      skb, dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type));
+#endif
+       if (hcrx == NULL)
+               return;
+
+       BUG_ON(!(hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA ||
+                hcrx->ccid3hcrx_state == TFRC_RSTATE_DATA));
+
+       switch (DCCP_SKB_CB(skb)->dccpd_type) {
+       case DCCP_PKT_ACK:
+               if (hcrx->ccid3hcrx_state == TFRC_RSTATE_NO_DATA)
+                       return;
+       case DCCP_PKT_DATAACK:
+               if (dp->dccps_options_received.dccpor_timestamp_echo == 0)
+                       break;
+               p_prev = hcrx->ccid3hcrx_rtt;
+               do_gettimeofday(&now);
+               /* hcrx->ccid3hcrx_rtt = now - dp->dccps_options_received.dccpor_timestamp_echo -
+                                     usecs_to_jiffies(dp->dccps_options_received.dccpor_elapsed_time * 10);
+               FIXME - I think above code is broken - have to look at options more, will also need
+               to fix pr_debug below */
+               if (p_prev != hcrx->ccid3hcrx_rtt)
+                       ccid3_pr_debug("%s, sk=%p, New RTT estimate=%lu jiffies, tstamp_echo=%u, elapsed time=%u\n",
+                                      dccp_role(sk), sk, hcrx->ccid3hcrx_rtt,
+                                      dp->dccps_options_received.dccpor_timestamp_echo,
+                                      dp->dccps_options_received.dccpor_elapsed_time);
+               break;
+       case DCCP_PKT_DATA:
+               break;
+       default:
+               ccid3_pr_debug("%s, sk=%p, not DATA/DATAACK/ACK packet(%s)\n",
+                              dccp_role(sk), sk,
+                              dccp_packet_name(DCCP_SKB_CB(skb)->dccpd_type));
+               return;
+       }
+
+       packet = ccid3_rx_hist_entry_new(sk, skb, SLAB_ATOMIC);
+       if (packet == NULL) {
+               ccid3_pr_debug("%s, sk=%p, Not enough mem to add rx packet to history (consider it lost)!",
+                              dccp_role(sk), sk);
+               return;
+       }
+
+       win_count = packet->ccid3hrx_win_count;
+
+       ins = ccid3_hc_rx_add_hist(sk, packet);
+
+       if (DCCP_SKB_CB(skb)->dccpd_type == DCCP_PKT_ACK)
+               return;
+
+       switch (hcrx->ccid3hcrx_state) {
+       case TFRC_RSTATE_NO_DATA:
+               ccid3_pr_debug("%s, sk=%p(%s), skb=%p, sending initial feedback\n",
+                              dccp_role(sk), sk, dccp_state_name(sk->sk_state), skb);
+               ccid3_hc_rx_send_feedback(sk);
+               ccid3_hc_rx_set_state(sk, TFRC_RSTATE_DATA);
+               return;
+       case TFRC_RSTATE_DATA:
+               hcrx->ccid3hcrx_bytes_recv += skb->len - dccp_hdr(skb)->dccph_doff * 4;
+               if (ins == 0) {
+                       do_gettimeofday(&now);
+                       if ((now_delta(hcrx->ccid3hcrx_tstamp_last_ack)) >= hcrx->ccid3hcrx_rtt) {
+                               hcrx->ccid3hcrx_tstamp_last_ack = now;
+                               ccid3_hc_rx_send_feedback(sk);
+                       }
+                       return;
+               }
+               break;
+       default:
+               printk(KERN_CRIT "%s: %s, sk=%p, Illegal state (%d)!\n",
+                      __FUNCTION__, dccp_role(sk), sk, hcrx->ccid3hcrx_state);
+               dump_stack();
+               return;
+       }
+
+       /* Dealing with packet loss */
+       ccid3_pr_debug("%s, sk=%p(%s), skb=%p, data loss! Reacting...\n",
+                      dccp_role(sk), sk, dccp_state_name(sk->sk_state), skb);
+
+       ccid3_hc_rx_detect_loss(sk);
+       p_prev = hcrx->ccid3hcrx_p;
+       
+       /* Calculate loss event rate */
+       if (!list_empty(&hcrx->ccid3hcrx_loss_interval_hist))
+               /* Scaling up by 1000000 as fixed decimal */
+               hcrx->ccid3hcrx_p = 1000000 / ccid3_hc_rx_calc_i_mean(sk);
+
+       if (hcrx->ccid3hcrx_p > p_prev) {
+               ccid3_hc_rx_send_feedback(sk);
+               return;
+       }
+}
+
+static int ccid3_hc_rx_init(struct sock *sk)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+       struct ccid3_hc_rx_sock *hcrx;
+
+       ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
+
+       hcrx = dp->dccps_hc_rx_ccid_private = kmalloc(sizeof(*hcrx), gfp_any());
+       if (hcrx == NULL)
+               return -ENOMEM;
+
+       memset(hcrx, 0, sizeof(*hcrx));
+
+       if (dp->dccps_avg_packet_size >= TFRC_MIN_PACKET_SIZE &&
+           dp->dccps_avg_packet_size <= TFRC_MAX_PACKET_SIZE)
+               hcrx->ccid3hcrx_s = (u16)dp->dccps_avg_packet_size;
+       else
+               hcrx->ccid3hcrx_s = TFRC_STD_PACKET_SIZE;
+
+       hcrx->ccid3hcrx_state = TFRC_RSTATE_NO_DATA;
+       INIT_LIST_HEAD(&hcrx->ccid3hcrx_hist);
+       INIT_LIST_HEAD(&hcrx->ccid3hcrx_loss_interval_hist);
+
+       return 0;
+}
+
+static void ccid3_hc_rx_exit(struct sock *sk)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+       struct ccid3_hc_rx_sock *hcrx = dp->dccps_hc_rx_ccid_private;
+
+       ccid3_pr_debug("%s, sk=%p\n", dccp_role(sk), sk);
+
+       if (hcrx == NULL)
+               return;
+
+       ccid3_hc_rx_set_state(sk, TFRC_RSTATE_TERM);
+
+       /* Empty packet history */
+       ccid3_rx_history_delete(&hcrx->ccid3hcrx_hist);
+
+       /* Empty loss interval history */
+       ccid3_loss_interval_history_delete(&hcrx->ccid3hcrx_loss_interval_hist);
+
+       kfree(dp->dccps_hc_rx_ccid_private);
+       dp->dccps_hc_rx_ccid_private = NULL;
+}
+
+static struct ccid ccid3 = {
+       .ccid_id                   = 3,
+       .ccid_name                 = "ccid3",
+       .ccid_owner                = THIS_MODULE,
+       .ccid_init                 = ccid3_init,
+       .ccid_exit                 = ccid3_exit,
+       .ccid_hc_tx_init           = ccid3_hc_tx_init,
+       .ccid_hc_tx_exit           = ccid3_hc_tx_exit,
+       .ccid_hc_tx_send_packet    = ccid3_hc_tx_send_packet,
+       .ccid_hc_tx_packet_sent    = ccid3_hc_tx_packet_sent,
+       .ccid_hc_tx_packet_recv    = ccid3_hc_tx_packet_recv,
+       .ccid_hc_tx_insert_options = ccid3_hc_tx_insert_options,
+       .ccid_hc_tx_parse_options  = ccid3_hc_tx_parse_options,
+       .ccid_hc_rx_init           = ccid3_hc_rx_init,
+       .ccid_hc_rx_exit           = ccid3_hc_rx_exit,
+       .ccid_hc_rx_insert_options = ccid3_hc_rx_insert_options,
+       .ccid_hc_rx_packet_recv    = ccid3_hc_rx_packet_recv,
+};
+module_param(ccid3_debug, int, 0444);
+MODULE_PARM_DESC(ccid3_debug, "Enable debug messages");
+
+static __init int ccid3_module_init(void)
+{
+       int rc = -ENOMEM;
+
+       ccid3_tx_hist_slab = kmem_cache_create("dccp_ccid3_tx_history",
+                                              sizeof(struct ccid3_tx_hist_entry), 0,
+                                              SLAB_HWCACHE_ALIGN, NULL, NULL);
+       if (ccid3_tx_hist_slab == NULL)
+               goto out;
+
+       ccid3_rx_hist_slab = kmem_cache_create("dccp_ccid3_rx_history",
+                                              sizeof(struct ccid3_rx_hist_entry), 0,
+                                              SLAB_HWCACHE_ALIGN, NULL, NULL);
+       if (ccid3_rx_hist_slab == NULL)
+               goto out_free_tx_history;
+
+       ccid3_loss_interval_hist_slab = kmem_cache_create("dccp_ccid3_loss_interval_history",
+                                                         sizeof(struct ccid3_loss_interval_hist_entry), 0,
+                                                         SLAB_HWCACHE_ALIGN, NULL, NULL);
+       if (ccid3_loss_interval_hist_slab == NULL)
+               goto out_free_rx_history;
+
+       rc = ccid_register(&ccid3);
+       if (rc != 0) 
+               goto out_free_loss_interval_history;
+
+out:
+       return rc;
+out_free_loss_interval_history:
+       kmem_cache_destroy(ccid3_loss_interval_hist_slab);
+       ccid3_loss_interval_hist_slab = NULL;
+out_free_rx_history:
+       kmem_cache_destroy(ccid3_rx_hist_slab);
+       ccid3_rx_hist_slab = NULL;
+out_free_tx_history:
+       kmem_cache_destroy(ccid3_tx_hist_slab);
+       ccid3_tx_hist_slab = NULL;
+       goto out;
+}
+module_init(ccid3_module_init);
+
+static __exit void ccid3_module_exit(void)
+{
+       ccid_unregister(&ccid3);
+
+       if (ccid3_tx_hist_slab != NULL) {
+               kmem_cache_destroy(ccid3_tx_hist_slab);
+               ccid3_tx_hist_slab = NULL;
+       }
+       if (ccid3_rx_hist_slab != NULL) {
+               kmem_cache_destroy(ccid3_rx_hist_slab);
+               ccid3_rx_hist_slab = NULL;
+       }
+       if (ccid3_loss_interval_hist_slab != NULL) {
+               kmem_cache_destroy(ccid3_loss_interval_hist_slab);
+               ccid3_loss_interval_hist_slab = NULL;
+       }
+}
+module_exit(ccid3_module_exit);
+
+MODULE_AUTHOR("Ian McDonald <iam4@cs.waikato.ac.nz> & Arnaldo Carvalho de Melo <acme@ghostprotocols.net>");
+MODULE_DESCRIPTION("DCCP TFRC CCID3 CCID");
+MODULE_LICENSE("GPL");
+MODULE_ALIAS("net-dccp-ccid-3");
diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h
new file mode 100644 (file)
index 0000000..5d6b623
--- /dev/null
@@ -0,0 +1,137 @@
+/*
+ *  net/dccp/ccids/ccid3.h
+ *
+ *  Copyright (c) 2005 The University of Waikato, Hamilton, New Zealand.
+ *
+ *  An implementation of the DCCP protocol
+ *
+ *  This code has been developed by the University of Waikato WAND
+ *  research group. For further information please see http://www.wand.net.nz/
+ *  or e-mail Ian McDonald - iam4@cs.waikato.ac.nz
+ *
+ *  This code also uses code from Lulea University, rereleased as GPL by its
+ *  authors:
+ *  Copyright (c) 2003 Nils-Erik Mattsson, Joacim Haggmark, Magnus Erixzon
+ *
+ *  Changes to meet Linux coding standards, to make it meet latest ccid3 draft
+ *  and to make it work as a loadable module in the DCCP stack written by
+ *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>.
+ *
+ *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2 of the License, or
+ *  (at your option) any later version.
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, write to the Free Software
+ *  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+#ifndef _DCCP_CCID3_H_
+#define _DCCP_CCID3_H_
+
+#include <linux/types.h>
+#include <linux/list.h>
+#include <linux/timer.h>
+
+struct ccid3_tx_hist_entry {
+       struct list_head        ccid3htx_node;
+       u64                     ccid3htx_seqno:48,
+                               ccid3htx_win_count:8,
+                               ccid3htx_sent:1;
+       struct timeval          ccid3htx_tstamp;
+};
+
+struct ccid3_options_received {
+       u64 ccid3or_seqno:48,
+           ccid3or_loss_intervals_idx:16;
+       u16 ccid3or_loss_intervals_len;
+       u32 ccid3or_loss_event_rate;
+       u32 ccid3or_receive_rate;
+};
+
+/** struct ccid3_hc_tx_sock - CCID3 sender half connection congestion control block
+ *
+  * @ccid3hctx_state - Sender state
+  * @ccid3hctx_x - Current sending rate
+  * @ccid3hctx_x_recv - Receive rate
+  * @ccid3hctx_x_calc - Calculated send (?) rate
+  * @ccid3hctx_s - Packet size
+  * @ccid3hctx_rtt - Estimate of current round trip time in usecs
+  * @@ccid3hctx_p - Current loss event rate (0-1) scaled by 1000000
+  * @ccid3hctx_last_win_count - Last window counter sent
+  * @ccid3hctx_t_last_win_count - Timestamp of earliest packet with last_win_count value sent
+  * @ccid3hctx_no_feedback_timer - Handle to no feedback timer
+  * @ccid3hctx_idle - FIXME
+  * @ccid3hctx_t_ld - Time last doubled during slow start
+  * @ccid3hctx_t_nom - Nominal send time of next packet
+  * @ccid3hctx_t_ipi - Interpacket (send) interval
+  * @ccid3hctx_delta - Send timer delta
+  * @ccid3hctx_hist - Packet history
+  */
+struct ccid3_hc_tx_sock {
+       u32                             ccid3hctx_x;
+       u32                             ccid3hctx_x_recv;
+       u32                             ccid3hctx_x_calc;
+       u16                             ccid3hctx_s;
+       u32                             ccid3hctx_rtt;
+       u32                             ccid3hctx_p;
+       u8                              ccid3hctx_state;
+       u8                              ccid3hctx_last_win_count;
+       u8                              ccid3hctx_idle;
+       struct timeval                  ccid3hctx_t_last_win_count;
+       struct timer_list               ccid3hctx_no_feedback_timer;
+       struct timeval                  ccid3hctx_t_ld;
+       struct timeval                  ccid3hctx_t_nom;
+       u32                             ccid3hctx_t_ipi;
+       u32                             ccid3hctx_delta;
+       struct list_head                ccid3hctx_hist;
+       struct ccid3_options_received   ccid3hctx_options_received;
+};
+
+struct ccid3_loss_interval_hist_entry {
+       struct list_head        ccid3lih_node;
+       u64                     ccid3lih_seqno:48,
+                               ccid3lih_win_count:4;
+       u32                     ccid3lih_interval;
+};
+
+struct ccid3_rx_hist_entry {
+       struct list_head        ccid3hrx_node;
+       u64                     ccid3hrx_seqno:48,
+                               ccid3hrx_win_count:4,
+                               ccid3hrx_type:4;
+       u32                     ccid3hrx_ndp; /* In fact it is from 8 to 24 bits */
+       struct timeval          ccid3hrx_tstamp;
+};
+
+struct ccid3_hc_rx_sock {
+       u64                     ccid3hcrx_seqno_last_counter:48,
+                               ccid3hcrx_state:8,
+                               ccid3hcrx_last_counter:4;
+       unsigned long           ccid3hcrx_rtt;
+       u32                     ccid3hcrx_p;
+       u32                     ccid3hcrx_bytes_recv;
+       struct timeval          ccid3hcrx_tstamp_last_feedback;
+       struct timeval          ccid3hcrx_tstamp_last_ack;
+       struct list_head        ccid3hcrx_hist;
+       struct list_head        ccid3hcrx_loss_interval_hist;
+       u16                     ccid3hcrx_s;
+       u32                     ccid3hcrx_pinv;
+       u32                     ccid3hcrx_elapsed_time;
+       u32                     ccid3hcrx_x_recv;
+};
+
+#define ccid3_hc_tx_field(s,field) (s->dccps_hc_tx_ccid_private == NULL ? 0 : \
+                                   ((struct ccid3_hc_tx_sock *)s->dccps_hc_tx_ccid_private)->ccid3hctx_##field)
+
+#define ccid3_hc_rx_field(s,field) (s->dccps_hc_rx_ccid_private == NULL ? 0 : \
+                                   ((struct ccid3_hc_rx_sock *)s->dccps_hc_rx_ccid_private)->ccid3hcrx_##field)
+
+#endif /* _DCCP_CCID3_H_ */
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
new file mode 100644 (file)
index 0000000..fb83454
--- /dev/null
@@ -0,0 +1,422 @@
+#ifndef _DCCP_H
+#define _DCCP_H
+/*
+ *  net/dccp/dccp.h
+ *
+ *  An implementation of the DCCP protocol
+ *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ *     This program is free software; you can redistribute it and/or modify it
+ *     under the terms of the GNU General Public License version 2 as
+ *     published by the Free Software Foundation.
+ */
+
+#include <linux/dccp.h>
+#include <net/snmp.h>
+#include <net/sock.h>
+#include <net/tcp.h>
+
+#define DCCP_DEBUG
+
+#ifdef DCCP_DEBUG
+extern int dccp_debug;
+
+#define dccp_pr_debug(format, a...) \
+       do { if (dccp_debug) \
+               printk(KERN_DEBUG "%s: " format, __FUNCTION__ , ##a); \
+       } while (0)
+#define dccp_pr_debug_cat(format, a...) do { if (dccp_debug) printk(format, ##a); } while (0)
+#else
+#define dccp_pr_debug(format, a...)
+#define dccp_pr_debug_cat(format, a...)
+#endif
+
+extern struct inet_hashinfo dccp_hashinfo;
+
+extern atomic_t dccp_orphan_count;
+extern int dccp_tw_count;
+extern void dccp_tw_deschedule(struct inet_timewait_sock *tw);
+
+extern void dccp_time_wait(struct sock *sk, int state, int timeo);
+
+/* FIXME: Right size this */
+#define DCCP_MAX_OPT_LEN 128
+
+#define DCCP_MAX_PACKET_HDR 32
+
+#define MAX_DCCP_HEADER  (DCCP_MAX_PACKET_HDR + DCCP_MAX_OPT_LEN + MAX_HEADER)
+
+#define DCCP_TIMEWAIT_LEN (60 * HZ) /* how long to wait to destroy TIME-WAIT
+                                    * state, about 60 seconds */
+
+/* draft-ietf-dccp-spec-11.txt initial RTO value */
+#define DCCP_TIMEOUT_INIT ((unsigned)(3 * HZ))
+
+/* Maximal interval between probes for local resources.  */
+#define DCCP_RESOURCE_PROBE_INTERVAL ((unsigned)(HZ / 2U))
+
+#define DCCP_RTO_MAX ((unsigned)(120 * HZ)) /* FIXME: using TCP value */
+
+extern struct proto dccp_v4_prot;
+
+/* is seq1 < seq2 ? */
+static inline const  int before48(const u64 seq1, const u64 seq2)
+{
+       return (const s64)((seq1 << 16) - (seq2 << 16)) < 0;
+}
+
+/* is seq1 > seq2 ? */
+static inline const  int after48(const u64 seq1, const u64 seq2)
+{
+       return (const s64)((seq2 << 16) - (seq1 << 16)) < 0;
+}
+
+/* is seq2 <= seq1 <= seq3 ? */
+static inline const int between48(const u64 seq1, const u64 seq2, const u64 seq3)
+{
+       return (seq3 << 16) - (seq2 << 16) >= (seq1 << 16) - (seq2 << 16);
+}
+
+static inline u64 max48(const u64 seq1, const u64 seq2)
+{
+       return after48(seq1, seq2) ? seq1 : seq2;
+}
+
+enum {
+       DCCP_MIB_NUM = 0,
+       DCCP_MIB_ACTIVEOPENS,                   /* ActiveOpens */
+       DCCP_MIB_ESTABRESETS,                   /* EstabResets */
+       DCCP_MIB_CURRESTAB,                     /* CurrEstab */
+       DCCP_MIB_OUTSEGS,                       /* OutSegs */ 
+       DCCP_MIB_OUTRSTS,
+       DCCP_MIB_ABORTONTIMEOUT,
+       DCCP_MIB_TIMEOUTS,
+       DCCP_MIB_ABORTFAILED,
+       DCCP_MIB_PASSIVEOPENS,
+       DCCP_MIB_ATTEMPTFAILS,
+       DCCP_MIB_OUTDATAGRAMS,
+       DCCP_MIB_INERRS,
+       DCCP_MIB_OPTMANDATORYERROR,
+       DCCP_MIB_INVALIDOPT,
+       __DCCP_MIB_MAX
+};
+
+#define DCCP_MIB_MAX   __DCCP_MIB_MAX
+struct dccp_mib {
+       unsigned long   mibs[DCCP_MIB_MAX];
+} __SNMP_MIB_ALIGN__;
+
+DECLARE_SNMP_STAT(struct dccp_mib, dccp_statistics);
+#define DCCP_INC_STATS(field)          SNMP_INC_STATS(dccp_statistics, field)
+#define DCCP_INC_STATS_BH(field)       SNMP_INC_STATS_BH(dccp_statistics, field)
+#define DCCP_INC_STATS_USER(field)     SNMP_INC_STATS_USER(dccp_statistics, field)
+#define DCCP_DEC_STATS(field)          SNMP_DEC_STATS(dccp_statistics, field)
+#define DCCP_ADD_STATS_BH(field, val)  SNMP_ADD_STATS_BH(dccp_statistics, field, val)
+#define DCCP_ADD_STATS_USER(field, val)        SNMP_ADD_STATS_USER(dccp_statistics, field, val)
+
+extern int  dccp_transmit_skb(struct sock *sk, struct sk_buff *skb);
+extern int  dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb);
+
+extern int dccp_send_response(struct sock *sk);
+extern void dccp_send_ack(struct sock *sk);
+extern void dccp_send_delayed_ack(struct sock *sk);
+extern void dccp_send_sync(struct sock *sk, u64 seq);
+
+extern void dccp_init_xmit_timers(struct sock *sk);
+static inline void dccp_clear_xmit_timers(struct sock *sk)
+{
+       inet_csk_clear_xmit_timers(sk);
+}
+
+extern unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu);
+
+extern const char *dccp_packet_name(const int type);
+extern const char *dccp_state_name(const int state);
+
+static inline void dccp_set_state(struct sock *sk, const int state)
+{
+       const int oldstate = sk->sk_state;
+
+       dccp_pr_debug("%s(%p) %-10.10s -> %s\n",
+                     dccp_role(sk), sk,
+                     dccp_state_name(oldstate), dccp_state_name(state));
+       WARN_ON(state == oldstate);
+
+       switch (state) {
+       case DCCP_OPEN:
+               if (oldstate != DCCP_OPEN)
+                       DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
+               break;
+
+       case DCCP_CLOSED:
+               if (oldstate == DCCP_CLOSING || oldstate == DCCP_OPEN)
+                       DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
+
+               sk->sk_prot->unhash(sk);
+               if (inet_csk(sk)->icsk_bind_hash != NULL &&
+                   !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
+                       inet_put_port(&dccp_hashinfo, sk);
+               /* fall through */
+       default:
+               if (oldstate == DCCP_OPEN)
+                       DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
+       }
+
+       /* Change state AFTER socket is unhashed to avoid closed
+        * socket sitting in hash tables.
+        */
+       sk->sk_state = state;
+}
+
+static inline void dccp_done(struct sock *sk)
+{
+       dccp_set_state(sk, DCCP_CLOSED);
+       dccp_clear_xmit_timers(sk);
+
+       sk->sk_shutdown = SHUTDOWN_MASK;
+
+       if (!sock_flag(sk, SOCK_DEAD))
+               sk->sk_state_change(sk);
+       else
+               inet_csk_destroy_sock(sk);
+}
+
+static inline void dccp_openreq_init(struct request_sock *req,
+                                    struct dccp_sock *dp,
+                                    struct sk_buff *skb)
+{
+       /*
+        * FIXME: fill in the other req fields from the DCCP options
+        * received
+        */
+       inet_rsk(req)->rmt_port = dccp_hdr(skb)->dccph_sport;
+       inet_rsk(req)->acked    = 0;
+       req->rcv_wnd = 0;
+}
+
+extern void dccp_v4_send_check(struct sock *sk, struct dccp_hdr *dh, int len, 
+                              struct sk_buff *skb);
+extern int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
+
+extern struct sock *dccp_create_openreq_child(struct sock *sk,
+                                             const struct request_sock *req,
+                                             const struct sk_buff *skb);
+
+extern int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb);
+
+extern void dccp_v4_err(struct sk_buff *skb, u32);
+
+extern int dccp_v4_rcv(struct sk_buff *skb);
+
+extern struct sock *dccp_v4_request_recv_sock(struct sock *sk,
+                                             struct sk_buff *skb,
+                                             struct request_sock *req,
+                                             struct dst_entry *dst);
+extern struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
+                                  struct request_sock *req,
+                                  struct request_sock **prev);
+
+extern int dccp_child_process(struct sock *parent, struct sock *child,
+                             struct sk_buff *skb);
+extern int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
+                                 struct dccp_hdr *dh, unsigned len);
+extern int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
+                               const struct dccp_hdr *dh, const unsigned len);
+
+extern void            dccp_close(struct sock *sk, long timeout);
+extern struct sk_buff  *dccp_make_response(struct sock *sk,
+                                           struct dst_entry *dst,
+                                           struct request_sock *req);
+
+extern int        dccp_connect(struct sock *sk);
+extern int        dccp_disconnect(struct sock *sk, int flags);
+extern int        dccp_getsockopt(struct sock *sk, int level, int optname,
+                                  char *optval, int *optlen);
+extern int        dccp_ioctl(struct sock *sk, int cmd, unsigned long arg);
+extern int        dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+                               size_t size);
+extern int        dccp_recvmsg(struct kiocb *iocb, struct sock *sk,
+                               struct msghdr *msg, size_t len, int nonblock,
+                               int flags, int *addr_len);
+extern int        dccp_setsockopt(struct sock *sk, int level, int optname,
+                                  char *optval, int optlen);
+extern void       dccp_shutdown(struct sock *sk, int how);
+
+extern int        dccp_v4_checksum(struct sk_buff *skb);
+
+extern int        dccp_v4_send_reset(struct sock *sk, enum dccp_reset_codes code);
+extern void       dccp_send_close(struct sock *sk);
+
+struct dccp_skb_cb {
+       __u8 dccpd_type;
+       __u8 dccpd_reset_code;
+       __u8 dccpd_service;
+       __u8 dccpd_ccval;
+       __u64 dccpd_seq;
+       __u64 dccpd_ack_seq;
+       int  dccpd_opt_len;
+};
+
+#define DCCP_SKB_CB(__skb) ((struct dccp_skb_cb *)&((__skb)->cb[0]))
+
+static inline int dccp_non_data_packet(const struct sk_buff *skb)
+{
+       const __u8 type = DCCP_SKB_CB(skb)->dccpd_type;
+
+       return type == DCCP_PKT_ACK      ||
+              type == DCCP_PKT_CLOSE    ||
+              type == DCCP_PKT_CLOSEREQ ||
+              type == DCCP_PKT_RESET    ||
+              type == DCCP_PKT_SYNC     ||
+              type == DCCP_PKT_SYNCACK;
+}
+
+static inline int dccp_packet_without_ack(const struct sk_buff *skb)
+{
+       const __u8 type = DCCP_SKB_CB(skb)->dccpd_type;
+
+       return type == DCCP_PKT_DATA || type == DCCP_PKT_REQUEST;
+}
+
+#define DCCP_MAX_SEQNO ((((u64)1) << 48) - 1)
+#define DCCP_PKT_WITHOUT_ACK_SEQ (DCCP_MAX_SEQNO << 2)
+
+static inline void dccp_set_seqno(u64 *seqno, u64 value)
+{
+       if (value > DCCP_MAX_SEQNO)
+               value -= DCCP_MAX_SEQNO + 1;
+       *seqno = value;
+}
+
+static inline u64 dccp_delta_seqno(u64 seqno1, u64 seqno2)
+{
+       return ((seqno2 << 16) - (seqno1 << 16)) >> 16;
+}
+
+static inline void dccp_inc_seqno(u64 *seqno)
+{
+       if (++*seqno > DCCP_MAX_SEQNO)
+               *seqno = 0;
+}
+
+static inline void dccp_hdr_set_seq(struct dccp_hdr *dh, const u64 gss)
+{
+       struct dccp_hdr_ext *dhx = (struct dccp_hdr_ext *)((void *)dh + sizeof(*dh));
+
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+       dh->dccph_seq      = htonl((gss >> 32)) >> 8;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+       dh->dccph_seq      = htonl((gss >> 32));
+#else
+#error  "Adjust your <asm/byteorder.h> defines"
+#endif
+       dhx->dccph_seq_low = htonl(gss & 0xffffffff);
+}
+
+static inline void dccp_hdr_set_ack(struct dccp_hdr_ack_bits *dhack, const u64 gsr)
+{
+#if defined(__LITTLE_ENDIAN_BITFIELD)
+       dhack->dccph_ack_nr_high = htonl((gsr >> 32)) >> 8;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+       dhack->dccph_ack_nr_high = htonl((gsr >> 32));
+#else
+#error  "Adjust your <asm/byteorder.h> defines"
+#endif
+       dhack->dccph_ack_nr_low  = htonl(gsr & 0xffffffff);
+}
+
+static inline void dccp_update_gsr(struct sock *sk, u64 seq)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+       u64 tmp_gsr;
+
+       dccp_set_seqno(&tmp_gsr, dp->dccps_gsr + 1 - (dp->dccps_options.dccpo_sequence_window / 4));
+       dp->dccps_gsr = seq;
+       dccp_set_seqno(&dp->dccps_swl, max48(tmp_gsr, dp->dccps_isr));
+       dccp_set_seqno(&dp->dccps_swh,
+                      dp->dccps_gsr + (3 * dp->dccps_options.dccpo_sequence_window) / 4);
+}
+
+static inline void dccp_update_gss(struct sock *sk, u64 seq)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+       u64 tmp_gss;
+
+       dccp_set_seqno(&tmp_gss, dp->dccps_gss - dp->dccps_options.dccpo_sequence_window + 1);
+       dp->dccps_awl = max48(tmp_gss, dp->dccps_iss);
+       dp->dccps_awh = dp->dccps_gss = seq;
+}
+
+extern void dccp_insert_options(struct sock *sk, struct sk_buff *skb);
+extern void dccp_insert_option_elapsed_time(struct sock *sk,
+                                           struct sk_buff *skb,
+                                           u32 elapsed_time);
+extern void dccp_insert_option(struct sock *sk, struct sk_buff *skb,
+                              unsigned char option,
+                              const void *value, unsigned char len);
+
+extern struct socket *dccp_ctl_socket;
+
+#define DCCP_ACKPKTS_STATE_RECEIVED    0
+#define DCCP_ACKPKTS_STATE_ECN_MARKED  (1 << 6)
+#define DCCP_ACKPKTS_STATE_NOT_RECEIVED        (3 << 6)
+
+#define DCCP_ACKPKTS_STATE_MASK                0xC0 /* 11000000 */
+#define DCCP_ACKPKTS_LEN_MASK          0x3F /* 00111111 */
+
+/** struct dccp_ackpkts - acknowledgeable packets
+ *
+ * This data structure is the one defined in the DCCP draft
+ * Appendix A.
+ *
+ * @dccpap_buf_head - circular buffer head
+ * @dccpap_buf_tail - circular buffer tail
+ * @dccpap_buf_ackno - ack # of the most recent packet acknoldgeable in the buffer (i.e. %dccpap_buf_head)
+ * @dccpap_buf_nonce - the one-bit sum of the ECN Nonces on all packets acked by the buffer with State 0
+ *
+ * Additionally, the HC-Receiver must keep some information about the
+ * Ack Vectors it has recently sent. For each packet sent carrying an
+ * Ack Vector, it remembers four variables:
+ *
+ * @dccpap_ack_seqno - the Sequence Number used for the packet (HC-Receiver seqno)
+ * @dccpap_ack_ptr - the value of buf_head at the time of acknowledgement.
+ * @dccpap_ack_ackno - the Acknowledgement Number used for the packet (HC-Sender seqno)
+ * @dccpap_ack_nonce - the one-bit sum of the ECN Nonces for all State 0.
+ *
+ * @dccpap_buf_len - circular buffer length
+ * @dccpap_buf - circular buffer of acknowledgeable packets
+ */
+struct dccp_ackpkts {
+       unsigned int            dccpap_buf_head;
+       unsigned int            dccpap_buf_tail;
+       u64                     dccpap_buf_ackno;
+       u64                     dccpap_ack_seqno;
+       u64                     dccpap_ack_ackno;
+       unsigned int            dccpap_ack_ptr;
+       unsigned int            dccpap_buf_vector_len;
+       unsigned int            dccpap_ack_vector_len;
+       unsigned int            dccpap_buf_len;
+       unsigned long           dccpap_time;
+       u8                      dccpap_buf_nonce;
+       u8                      dccpap_ack_nonce;
+       u8                      dccpap_buf[0];
+};
+
+extern struct dccp_ackpkts *dccp_ackpkts_alloc(unsigned int len, int priority);
+extern void dccp_ackpkts_free(struct dccp_ackpkts *ap);
+extern int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state);
+extern void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap,
+                                        struct sock *sk, u64 ackno);
+
+#ifdef DCCP_DEBUG
+extern void dccp_ackvector_print(const u64 ackno,
+                                const unsigned char *vector, int len);
+extern void dccp_ackpkts_print(const struct dccp_ackpkts *ap);
+#else
+static inline void dccp_ackvector_print(const u64 ackno,
+                                       const unsigned char *vector,
+                                       int len) { }
+static inline void dccp_ackpkts_print(const struct dccp_ackpkts *ap) { }
+#endif
+
+#endif /* _DCCP_H */
diff --git a/net/dccp/input.c b/net/dccp/input.c
new file mode 100644 (file)
index 0000000..622e976
--- /dev/null
@@ -0,0 +1,510 @@
+/*
+ *  net/dccp/input.c
+ * 
+ *  An implementation of the DCCP protocol
+ *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ *     This program is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License
+ *     as published by the Free Software Foundation; either version
+ *     2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/dccp.h>
+#include <linux/skbuff.h>
+
+#include <net/sock.h>
+
+#include "ccid.h"
+#include "dccp.h"
+
+static void dccp_fin(struct sock *sk, struct sk_buff *skb)
+{
+       sk->sk_shutdown |= RCV_SHUTDOWN;
+       sock_set_flag(sk, SOCK_DONE);
+       __skb_pull(skb, dccp_hdr(skb)->dccph_doff * 4);
+       __skb_queue_tail(&sk->sk_receive_queue, skb);
+       skb_set_owner_r(skb, sk);
+       sk->sk_data_ready(sk, 0);
+}
+
+static void dccp_rcv_close(struct sock *sk, struct sk_buff *skb)
+{
+       switch (sk->sk_state) {
+       case DCCP_PARTOPEN:
+       case DCCP_OPEN:
+               dccp_v4_send_reset(sk, DCCP_RESET_CODE_CLOSED);
+               dccp_fin(sk, skb);
+               dccp_set_state(sk, DCCP_CLOSED);
+               break;
+       }
+}
+
+static void dccp_rcv_closereq(struct sock *sk, struct sk_buff *skb)
+{
+       /*
+        *   Step 7: Check for unexpected packet types
+        *      If (S.is_server and P.type == CloseReq)
+        *        Send Sync packet acknowledging P.seqno
+        *        Drop packet and return
+        */
+       if (dccp_sk(sk)->dccps_role != DCCP_ROLE_CLIENT) {
+               dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq);
+               return;
+       }
+
+       switch (sk->sk_state) {
+       case DCCP_PARTOPEN:
+       case DCCP_OPEN:
+               dccp_set_state(sk, DCCP_CLOSING);
+               dccp_send_close(sk);
+               break;
+       }
+}
+
+static inline void dccp_event_ack_recv(struct sock *sk, struct sk_buff *skb)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+
+       if (dp->dccps_options.dccpo_send_ack_vector)
+               dccp_ackpkts_check_rcv_ackno(dp->dccps_hc_rx_ackpkts, sk,
+                                            DCCP_SKB_CB(skb)->dccpd_ack_seq);
+}
+
+static int dccp_check_seqno(struct sock *sk, struct sk_buff *skb)
+{
+       const struct dccp_hdr *dh = dccp_hdr(skb);
+       struct dccp_sock *dp = dccp_sk(sk);
+       u64 lswl = dp->dccps_swl;
+       u64 lawl = dp->dccps_awl;
+
+       /*
+        *   Step 5: Prepare sequence numbers for Sync
+        *     If P.type == Sync or P.type == SyncAck,
+        *        If S.AWL <= P.ackno <= S.AWH and P.seqno >= S.SWL,
+        *           / * P is valid, so update sequence number variables
+        *               accordingly.  After this update, P will pass the tests
+        *               in Step 6.  A SyncAck is generated if necessary in
+        *               Step 15 * /
+        *           Update S.GSR, S.SWL, S.SWH
+        *        Otherwise,
+        *           Drop packet and return
+        */
+       if (dh->dccph_type == DCCP_PKT_SYNC || 
+           dh->dccph_type == DCCP_PKT_SYNCACK) {
+               if (between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, dp->dccps_awl, dp->dccps_awh) &&
+                   !before48(DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_swl))
+                       dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq);
+               else
+                       return -1;
+       /*
+        *   Step 6: Check sequence numbers
+        *      Let LSWL = S.SWL and LAWL = S.AWL
+        *      If P.type == CloseReq or P.type == Close or P.type == Reset,
+        *        LSWL := S.GSR + 1, LAWL := S.GAR
+        *      If LSWL <= P.seqno <= S.SWH
+        *           and (P.ackno does not exist or LAWL <= P.ackno <= S.AWH),
+        *        Update S.GSR, S.SWL, S.SWH
+        *        If P.type != Sync,
+        *           Update S.GAR
+        *      Otherwise,
+        *        Send Sync packet acknowledging P.seqno
+        *        Drop packet and return
+        */
+       } else if (dh->dccph_type == DCCP_PKT_CLOSEREQ ||
+                  dh->dccph_type == DCCP_PKT_CLOSE ||
+                  dh->dccph_type == DCCP_PKT_RESET) {
+               lswl = dp->dccps_gsr;
+               dccp_inc_seqno(&lswl);
+               lawl = dp->dccps_gar;
+       }
+
+       if (between48(DCCP_SKB_CB(skb)->dccpd_seq, lswl, dp->dccps_swh) &&
+           (DCCP_SKB_CB(skb)->dccpd_ack_seq == DCCP_PKT_WITHOUT_ACK_SEQ ||
+            between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, lawl, dp->dccps_awh))) {
+               dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq);
+
+               if (dh->dccph_type != DCCP_PKT_SYNC &&
+                   DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
+                       dp->dccps_gar = DCCP_SKB_CB(skb)->dccpd_ack_seq;
+       } else {
+               dccp_pr_debug("Step 6 failed, sending SYNC...\n");
+               dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq);
+               return -1;
+       }
+
+       return 0;
+}
+
+int dccp_rcv_established(struct sock *sk, struct sk_buff *skb,
+                        const struct dccp_hdr *dh, const unsigned len)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+
+       if (dccp_check_seqno(sk, skb))
+               goto discard;
+
+       if (dccp_parse_options(sk, skb))
+               goto discard;
+
+       if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
+               dccp_event_ack_recv(sk, skb);
+
+       /*
+        * FIXME: check ECN to see if we should use
+        * DCCP_ACKPKTS_STATE_ECN_MARKED
+        */
+       if (dp->dccps_options.dccpo_send_ack_vector) {
+               struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts;
+
+               if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts,
+                                    DCCP_SKB_CB(skb)->dccpd_seq,
+                                    DCCP_ACKPKTS_STATE_RECEIVED)) {
+                       LIMIT_NETDEBUG(pr_info("DCCP: acknowledgeable packets buffer full!\n"));
+                       ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1;
+                       inet_csk_schedule_ack(sk);
+                       inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MIN, TCP_RTO_MAX);
+                       goto discard;
+               }
+
+               /*
+                * FIXME: this activation is probably wrong, have to study more
+                * TCP delack machinery and how it fits into DCCP draft, but
+                * for now it kinda "works" 8)
+                */
+               if (!inet_csk_ack_scheduled(sk)) {
+                       inet_csk_schedule_ack(sk);
+                       inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, 5 * HZ, TCP_RTO_MAX);
+               }
+       }
+
+       ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
+       ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
+
+       switch (dccp_hdr(skb)->dccph_type) {
+       case DCCP_PKT_DATAACK:
+       case DCCP_PKT_DATA:
+               /*
+                * FIXME: check if sk_receive_queue is full, schedule DATA_DROPPED option
+                * if it is.
+                */
+               __skb_pull(skb, dh->dccph_doff * 4);
+               __skb_queue_tail(&sk->sk_receive_queue, skb);
+               skb_set_owner_r(skb, sk);
+               sk->sk_data_ready(sk, 0);
+               return 0;
+       case DCCP_PKT_ACK:
+               goto discard;
+       case DCCP_PKT_RESET:
+               /*
+                *  Step 9: Process Reset
+                *      If P.type == Reset,
+                *              Tear down connection
+                *              S.state := TIMEWAIT
+                *              Set TIMEWAIT timer
+                *              Drop packet and return
+               */
+               dccp_fin(sk, skb);
+               dccp_time_wait(sk, DCCP_TIME_WAIT, 0);
+               return 0;
+       case DCCP_PKT_CLOSEREQ:
+               dccp_rcv_closereq(sk, skb);
+               goto discard;
+       case DCCP_PKT_CLOSE:
+               dccp_rcv_close(sk, skb);
+               return 0;
+       case DCCP_PKT_REQUEST:
+               /* Step 7 
+                *   or (S.is_server and P.type == Response)
+                *   or (S.is_client and P.type == Request)
+                *   or (S.state >= OPEN and P.type == Request
+                *      and P.seqno >= S.OSR)
+                *    or (S.state >= OPEN and P.type == Response
+                *      and P.seqno >= S.OSR)
+                *    or (S.state == RESPOND and P.type == Data),
+                *  Send Sync packet acknowledging P.seqno
+                *  Drop packet and return
+                */
+               if (dp->dccps_role != DCCP_ROLE_LISTEN)
+                       goto send_sync;
+               goto check_seq;
+       case DCCP_PKT_RESPONSE:
+               if (dp->dccps_role != DCCP_ROLE_CLIENT)
+                       goto send_sync;
+check_seq:
+               if (!before48(DCCP_SKB_CB(skb)->dccpd_seq, dp->dccps_osr)) {
+send_sync:
+                       dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq);
+               }
+               break;
+       }
+
+       DCCP_INC_STATS_BH(DCCP_MIB_INERRS);
+discard:
+       __kfree_skb(skb);
+       return 0;
+}
+
+static int dccp_rcv_request_sent_state_process(struct sock *sk,
+                                              struct sk_buff *skb,
+                                              const struct dccp_hdr *dh,
+                                              const unsigned len)
+{
+       /* 
+        *  Step 4: Prepare sequence numbers in REQUEST
+        *     If S.state == REQUEST,
+        *        If (P.type == Response or P.type == Reset)
+        *              and S.AWL <= P.ackno <= S.AWH,
+        *           / * Set sequence number variables corresponding to the
+        *              other endpoint, so P will pass the tests in Step 6 * /
+        *           Set S.GSR, S.ISR, S.SWL, S.SWH
+        *           / * Response processing continues in Step 10; Reset
+        *              processing continues in Step 9 * /
+       */
+       if (dh->dccph_type == DCCP_PKT_RESPONSE) {
+               const struct inet_connection_sock *icsk = inet_csk(sk);
+               struct dccp_sock *dp = dccp_sk(sk);
+
+               /* Stop the REQUEST timer */
+               inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
+               BUG_TRAP(sk->sk_send_head != NULL);
+               __kfree_skb(sk->sk_send_head);
+               sk->sk_send_head = NULL;
+
+               if (!between48(DCCP_SKB_CB(skb)->dccpd_ack_seq, dp->dccps_awl, dp->dccps_awh)) {
+                       dccp_pr_debug("invalid ackno: S.AWL=%llu, P.ackno=%llu, S.AWH=%llu \n",
+                                     dp->dccps_awl, DCCP_SKB_CB(skb)->dccpd_ack_seq, dp->dccps_awh);
+                       goto out_invalid_packet;
+               }
+
+               dp->dccps_isr = DCCP_SKB_CB(skb)->dccpd_seq;
+               dccp_update_gsr(sk, DCCP_SKB_CB(skb)->dccpd_seq);
+
+               if (ccid_hc_rx_init(dp->dccps_hc_rx_ccid, sk) != 0 ||
+                   ccid_hc_tx_init(dp->dccps_hc_tx_ccid, sk) != 0) {
+                       ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk);
+                       ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk);
+                       /* FIXME: send appropriate RESET code */
+                       goto out_invalid_packet;
+               }
+
+               dccp_sync_mss(sk, dp->dccps_pmtu_cookie);
+
+               /*
+                *    Step 10: Process REQUEST state (second part)
+                *       If S.state == REQUEST,
+                *        / * If we get here, P is a valid Response from the server (see
+                *           Step 4), and we should move to PARTOPEN state.  PARTOPEN
+                *           means send an Ack, don't send Data packets, retransmit
+                *           Acks periodically, and always include any Init Cookie from
+                *           the Response * /
+                *        S.state := PARTOPEN
+                *        Set PARTOPEN timer
+                *        Continue with S.state == PARTOPEN
+                *        / * Step 12 will send the Ack completing the three-way
+                *           handshake * /
+                */
+               dccp_set_state(sk, DCCP_PARTOPEN);
+
+               /* Make sure socket is routed, for correct metrics. */
+               inet_sk_rebuild_header(sk);
+
+               if (!sock_flag(sk, SOCK_DEAD)) {
+                       sk->sk_state_change(sk);
+                       sk_wake_async(sk, 0, POLL_OUT);
+               }
+
+               if (sk->sk_write_pending || icsk->icsk_ack.pingpong ||
+                   icsk->icsk_accept_queue.rskq_defer_accept) {
+                       /* Save one ACK. Data will be ready after
+                        * several ticks, if write_pending is set.
+                        *
+                        * It may be deleted, but with this feature tcpdumps
+                        * look so _wonderfully_ clever, that I was not able
+                        * to stand against the temptation 8)     --ANK
+                        */
+                       /*
+                        * OK, in DCCP we can as well do a similar trick, its
+                        * even in the draft, but there is no need for us to
+                        * schedule an ack here, as dccp_sendmsg does this for
+                        * us, also stated in the draft. -acme
+                        */
+                       __kfree_skb(skb);
+                       return 0;
+               } 
+               dccp_send_ack(sk);
+               return -1;
+       }
+
+out_invalid_packet:
+       return 1; /* dccp_v4_do_rcv will send a reset, but...
+                    FIXME: the reset code should be DCCP_RESET_CODE_PACKET_ERROR  */
+}
+
+static int dccp_rcv_respond_partopen_state_process(struct sock *sk,
+                                                  struct sk_buff *skb,
+                                                  const struct dccp_hdr *dh,
+                                                  const unsigned len)
+{
+       int queued = 0;
+
+       switch (dh->dccph_type) {
+       case DCCP_PKT_RESET:
+               inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
+               break;
+       case DCCP_PKT_DATAACK:
+       case DCCP_PKT_ACK:
+               /*
+                * FIXME: we should be reseting the PARTOPEN (DELACK) timer here,
+                * but only if we haven't used the DELACK timer for something else,
+                * like sending a delayed ack for a TIMESTAMP echo, etc, for now
+                * were not clearing it, sending an extra ACK when there is nothing
+                * else to do in DELACK is not a big deal after all.
+                */
+
+               /* Stop the PARTOPEN timer */
+               if (sk->sk_state == DCCP_PARTOPEN)
+                       inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
+
+               dccp_sk(sk)->dccps_osr = DCCP_SKB_CB(skb)->dccpd_seq;
+               dccp_set_state(sk, DCCP_OPEN);
+
+               if (dh->dccph_type == DCCP_PKT_DATAACK) {
+                       dccp_rcv_established(sk, skb, dh, len);
+                       queued = 1; /* packet was queued (by dccp_rcv_established) */
+               }
+               break;
+       }
+
+       return queued;
+}
+
+int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
+                          struct dccp_hdr *dh, unsigned len)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+       const int old_state = sk->sk_state;
+       int queued = 0;
+
+       if (sk->sk_state != DCCP_LISTEN && sk->sk_state != DCCP_REQUESTING) {
+               if (dccp_check_seqno(sk, skb))
+                       goto discard;
+
+               /*
+                * Step 8: Process options and mark acknowledgeable
+                */
+               if (dccp_parse_options(sk, skb))
+                       goto discard;
+
+               if (DCCP_SKB_CB(skb)->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
+                       dccp_event_ack_recv(sk, skb);
+
+               ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
+               ccid_hc_tx_packet_recv(dp->dccps_hc_tx_ccid, sk, skb);
+
+               /*
+                * FIXME: check ECN to see if we should use
+                * DCCP_ACKPKTS_STATE_ECN_MARKED
+                */
+               if (dp->dccps_options.dccpo_send_ack_vector) {
+                       if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts,
+                                            DCCP_SKB_CB(skb)->dccpd_seq,
+                                            DCCP_ACKPKTS_STATE_RECEIVED))
+                               goto discard;
+                       /*
+                        * FIXME: this activation is probably wrong, have to study more
+                        * TCP delack machinery and how it fits into DCCP draft, but
+                        * for now it kinda "works" 8)
+                        */
+                       if (dp->dccps_hc_rx_ackpkts->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1 &&
+                           !inet_csk_ack_scheduled(sk)) {
+                               inet_csk_schedule_ack(sk);
+                               inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MIN, TCP_RTO_MAX);
+                       }
+               }
+       }
+
+       /*
+        *  Step 9: Process Reset
+        *      If P.type == Reset,
+        *              Tear down connection
+        *              S.state := TIMEWAIT
+        *              Set TIMEWAIT timer
+        *              Drop packet and return
+       */
+       if (dh->dccph_type == DCCP_PKT_RESET) {
+               /* Queue the equivalent of TCP fin so that dccp_recvmsg exits the loop */
+               dccp_fin(sk, skb);
+               dccp_time_wait(sk, DCCP_TIME_WAIT, 0);
+               return 0;
+               /*
+                *   Step 7: Check for unexpected packet types
+                *      If (S.is_server and P.type == CloseReq)
+                *          or (S.is_server and P.type == Response)
+                *          or (S.is_client and P.type == Request)
+                *          or (S.state == RESPOND and P.type == Data),
+                *        Send Sync packet acknowledging P.seqno
+                *        Drop packet and return
+                */
+       } else if ((dp->dccps_role != DCCP_ROLE_CLIENT &&
+                   (dh->dccph_type == DCCP_PKT_RESPONSE || dh->dccph_type == DCCP_PKT_CLOSEREQ)) ||
+                   (dp->dccps_role == DCCP_ROLE_CLIENT &&
+                    dh->dccph_type == DCCP_PKT_REQUEST) ||
+                   (sk->sk_state == DCCP_RESPOND && dh->dccph_type == DCCP_PKT_DATA)) {
+               dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq);
+               goto discard;
+       }
+
+       switch (sk->sk_state) {
+       case DCCP_CLOSED:
+               return 1;
+
+       case DCCP_LISTEN:
+               if (dh->dccph_type == DCCP_PKT_ACK ||
+                   dh->dccph_type == DCCP_PKT_DATAACK)
+                       return 1;
+
+               if (dh->dccph_type == DCCP_PKT_RESET)
+                       goto discard;
+
+               if (dh->dccph_type == DCCP_PKT_REQUEST) {
+                       if (dccp_v4_conn_request(sk, skb) < 0)
+                               return 1;
+
+                       /* FIXME: do congestion control initialization */
+                       goto discard;
+               }
+               goto discard;
+
+       case DCCP_REQUESTING:
+               /* FIXME: do congestion control initialization */
+
+               queued = dccp_rcv_request_sent_state_process(sk, skb, dh, len);
+               if (queued >= 0)
+                       return queued;
+
+               __kfree_skb(skb);
+               return 0;
+
+       case DCCP_RESPOND:
+       case DCCP_PARTOPEN:
+               queued = dccp_rcv_respond_partopen_state_process(sk, skb, dh, len);
+               break;
+       }
+
+       if (dh->dccph_type == DCCP_PKT_ACK || dh->dccph_type == DCCP_PKT_DATAACK) {
+               switch (old_state) {
+               case DCCP_PARTOPEN:
+                       sk->sk_state_change(sk);
+                       sk_wake_async(sk, 0, POLL_OUT);
+                       break;
+               }
+       }
+
+       if (!queued) { 
+discard:
+               __kfree_skb(skb);
+       }
+       return 0;
+}
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
new file mode 100644 (file)
index 0000000..083baca
--- /dev/null
@@ -0,0 +1,1289 @@
+/*
+ *  net/dccp/ipv4.c
+ *
+ *  An implementation of the DCCP protocol
+ *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ *     This program is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License
+ *     as published by the Free Software Foundation; either version
+ *     2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/dccp.h>
+#include <linux/icmp.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/random.h>
+
+#include <net/icmp.h>
+#include <net/inet_hashtables.h>
+#include <net/sock.h>
+#include <net/tcp_states.h>
+#include <net/xfrm.h>
+
+#include "ccid.h"
+#include "dccp.h"
+
+struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
+       .lhash_lock     = RW_LOCK_UNLOCKED,
+       .lhash_users    = ATOMIC_INIT(0),
+       .lhash_wait     = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
+       .portalloc_lock = SPIN_LOCK_UNLOCKED,
+       .port_rover     = 1024 - 1,
+};
+
+static int dccp_v4_get_port(struct sock *sk, const unsigned short snum)
+{
+       return inet_csk_get_port(&dccp_hashinfo, sk, snum);
+}
+
+static void dccp_v4_hash(struct sock *sk)
+{
+       inet_hash(&dccp_hashinfo, sk);
+}
+
+static void dccp_v4_unhash(struct sock *sk)
+{
+       inet_unhash(&dccp_hashinfo, sk);
+}
+
+/* called with local bh disabled */
+static int __dccp_v4_check_established(struct sock *sk, const __u16 lport,
+                                     struct inet_timewait_sock **twp)
+{
+       struct inet_sock *inet = inet_sk(sk);
+       const u32 daddr = inet->rcv_saddr;
+       const u32 saddr = inet->daddr;
+       const int dif = sk->sk_bound_dev_if;
+       INET_ADDR_COOKIE(acookie, saddr, daddr)
+       const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
+       const int hash = inet_ehashfn(daddr, lport, saddr, inet->dport, dccp_hashinfo.ehash_size);
+       struct inet_ehash_bucket *head = &dccp_hashinfo.ehash[hash];
+       const struct sock *sk2;
+       const struct hlist_node *node;
+       struct inet_timewait_sock *tw;
+
+       write_lock(&head->lock);
+
+       /* Check TIME-WAIT sockets first. */
+       sk_for_each(sk2, node, &(head + dccp_hashinfo.ehash_size)->chain) {
+               tw = inet_twsk(sk2);
+
+               if (INET_TW_MATCH(sk2, acookie, saddr, daddr, ports, dif))
+                       goto not_unique;
+       }
+       tw = NULL;
+
+       /* And established part... */
+       sk_for_each(sk2, node, &head->chain) {
+               if (INET_MATCH(sk2, acookie, saddr, daddr, ports, dif))
+                       goto not_unique;
+       }
+
+       /* Must record num and sport now. Otherwise we will see
+        * in hash table socket with a funny identity. */
+       inet->num = lport;
+       inet->sport = htons(lport);
+       sk->sk_hashent = hash;
+       BUG_TRAP(sk_unhashed(sk));
+       __sk_add_node(sk, &head->chain);
+       sock_prot_inc_use(sk->sk_prot);
+       write_unlock(&head->lock);
+
+       if (twp != NULL) {
+               *twp = tw;
+               NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
+       } else if (tw != NULL) {
+               /* Silly. Should hash-dance instead... */
+               dccp_tw_deschedule(tw);
+               NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
+
+               inet_twsk_put(tw);
+       }
+
+       return 0;
+
+not_unique:
+       write_unlock(&head->lock);
+       return -EADDRNOTAVAIL;
+}
+
+/*
+ * Bind a port for a connect operation and hash it.
+ */
+static int dccp_v4_hash_connect(struct sock *sk)
+{
+       const unsigned short snum = inet_sk(sk)->num;
+       struct inet_bind_hashbucket *head;
+       struct inet_bind_bucket *tb;
+       int ret;
+
+       if (snum == 0) {
+               int rover;
+               int low = sysctl_local_port_range[0];
+               int high = sysctl_local_port_range[1];
+               int remaining = (high - low) + 1;
+               struct hlist_node *node;
+               struct inet_timewait_sock *tw = NULL;
+
+               local_bh_disable();
+
+               /* TODO. Actually it is not so bad idea to remove
+                * dccp_hashinfo.portalloc_lock before next submission to Linus.
+                * As soon as we touch this place at all it is time to think.
+                *
+                * Now it protects single _advisory_ variable dccp_hashinfo.port_rover,
+                * hence it is mostly useless.
+                * Code will work nicely if we just delete it, but
+                * I am afraid in contented case it will work not better or
+                * even worse: another cpu just will hit the same bucket
+                * and spin there.
+                * So some cpu salt could remove both contention and
+                * memory pingpong. Any ideas how to do this in a nice way?
+                */
+               spin_lock(&dccp_hashinfo.portalloc_lock);
+               rover = dccp_hashinfo.port_rover;
+
+               do {
+                       rover++;
+                       if ((rover < low) || (rover > high))
+                               rover = low;
+                       head = &dccp_hashinfo.bhash[inet_bhashfn(rover, dccp_hashinfo.bhash_size)];
+                       spin_lock(&head->lock);
+
+                       /* Does not bother with rcv_saddr checks,
+                        * because the established check is already
+                        * unique enough.
+                        */
+                       inet_bind_bucket_for_each(tb, node, &head->chain) {
+                               if (tb->port == rover) {
+                                       BUG_TRAP(!hlist_empty(&tb->owners));
+                                       if (tb->fastreuse >= 0)
+                                               goto next_port;
+                                       if (!__dccp_v4_check_established(sk,
+                                                                        rover,
+                                                                        &tw))
+                                               goto ok;
+                                       goto next_port;
+                               }
+                       }
+
+                       tb = inet_bind_bucket_create(dccp_hashinfo.bind_bucket_cachep, head, rover);
+                       if (tb == NULL) {
+                               spin_unlock(&head->lock);
+                               break;
+                       }
+                       tb->fastreuse = -1;
+                       goto ok;
+
+               next_port:
+                       spin_unlock(&head->lock);
+               } while (--remaining > 0);
+               dccp_hashinfo.port_rover = rover;
+               spin_unlock(&dccp_hashinfo.portalloc_lock);
+
+               local_bh_enable();
+
+               return -EADDRNOTAVAIL;
+
+ok:
+               /* All locks still held and bhs disabled */
+               dccp_hashinfo.port_rover = rover;
+               spin_unlock(&dccp_hashinfo.portalloc_lock);
+
+               inet_bind_hash(sk, tb, rover);
+               if (sk_unhashed(sk)) {
+                       inet_sk(sk)->sport = htons(rover);
+                       __inet_hash(&dccp_hashinfo, sk, 0);
+               }
+               spin_unlock(&head->lock);
+
+               if (tw != NULL) {
+                       dccp_tw_deschedule(tw);
+                       inet_twsk_put(tw);
+               }
+
+               ret = 0;
+               goto out;
+       }
+
+       head = &dccp_hashinfo.bhash[inet_bhashfn(snum, dccp_hashinfo.bhash_size)];
+       tb   = inet_csk(sk)->icsk_bind_hash;
+       spin_lock_bh(&head->lock);
+       if (sk_head(&tb->owners) == sk && sk->sk_bind_node.next == NULL) {
+               __inet_hash(&dccp_hashinfo, sk, 0);
+               spin_unlock_bh(&head->lock);
+               return 0;
+       } else {
+               spin_unlock(&head->lock);
+               /* No definite answer... Walk to established hash table */
+               ret = __dccp_v4_check_established(sk, snum, NULL);
+out:
+               local_bh_enable();
+               return ret;
+       }
+}
+
+static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
+                          int addr_len)
+{
+       struct inet_sock *inet = inet_sk(sk);
+       struct dccp_sock *dp = dccp_sk(sk);
+       const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr;
+       struct rtable *rt;
+       u32 daddr, nexthop;
+       int tmp;
+       int err;
+
+       dp->dccps_role = DCCP_ROLE_CLIENT;
+
+       if (addr_len < sizeof(struct sockaddr_in))
+               return -EINVAL;
+
+       if (usin->sin_family != AF_INET)
+               return -EAFNOSUPPORT;
+
+       nexthop = daddr = usin->sin_addr.s_addr;
+       if (inet->opt != NULL && inet->opt->srr) {
+               if (daddr == 0)
+                       return -EINVAL;
+               nexthop = inet->opt->faddr;
+       }
+
+       tmp = ip_route_connect(&rt, nexthop, inet->saddr,
+                              RT_CONN_FLAGS(sk), sk->sk_bound_dev_if,
+                              IPPROTO_DCCP,
+                              inet->sport, usin->sin_port, sk);
+       if (tmp < 0)
+               return tmp;
+
+       if (rt->rt_flags & (RTCF_MULTICAST | RTCF_BROADCAST)) {
+               ip_rt_put(rt);
+               return -ENETUNREACH;
+       }
+
+       if (inet->opt == NULL || !inet->opt->srr)
+               daddr = rt->rt_dst;
+
+       if (inet->saddr == 0)
+               inet->saddr = rt->rt_src;
+       inet->rcv_saddr = inet->saddr;
+
+       inet->dport = usin->sin_port;
+       inet->daddr = daddr;
+
+       dp->dccps_ext_header_len = 0;
+       if (inet->opt != NULL)
+               dp->dccps_ext_header_len = inet->opt->optlen;
+       /*
+        * Socket identity is still unknown (sport may be zero).
+        * However we set state to DCCP_REQUESTING and not releasing socket
+        * lock select source port, enter ourselves into the hash tables and
+        * complete initialization after this.
+        */
+       dccp_set_state(sk, DCCP_REQUESTING);
+       err = dccp_v4_hash_connect(sk);
+       if (err != 0)
+               goto failure;
+
+       err = ip_route_newports(&rt, inet->sport, inet->dport, sk);
+       if (err != 0)
+               goto failure;
+
+       /* OK, now commit destination to socket.  */
+       sk_setup_caps(sk, &rt->u.dst);
+
+       dp->dccps_gar =
+               dp->dccps_iss = secure_dccp_sequence_number(inet->saddr,
+                                                           inet->daddr,
+                                                           inet->sport,
+                                                           usin->sin_port);
+       dccp_update_gss(sk, dp->dccps_iss);
+
+       inet->id = dp->dccps_iss ^ jiffies;
+
+       err = dccp_connect(sk);
+       rt = NULL;
+       if (err != 0)
+               goto failure;
+out:
+       return err;
+failure:
+       /* This unhashes the socket and releases the local port, if necessary. */
+       dccp_set_state(sk, DCCP_CLOSED);
+       ip_rt_put(rt);
+       sk->sk_route_caps = 0;
+       inet->dport = 0;
+       goto out;
+}
+
+/*
+ * This routine does path mtu discovery as defined in RFC1191.
+ */
+static inline void dccp_do_pmtu_discovery(struct sock *sk,
+                                         const struct iphdr *iph,
+                                         u32 mtu)
+{
+       struct dst_entry *dst;
+       const struct inet_sock *inet = inet_sk(sk);
+       const struct dccp_sock *dp = dccp_sk(sk);
+
+       /* We are not interested in DCCP_LISTEN and request_socks (RESPONSEs
+        * send out by Linux are always < 576bytes so they should go through
+        * unfragmented).
+        */
+       if (sk->sk_state == DCCP_LISTEN)
+               return;
+
+       /* We don't check in the destentry if pmtu discovery is forbidden
+        * on this route. We just assume that no packet_to_big packets
+        * are send back when pmtu discovery is not active.
+        * There is a small race when the user changes this flag in the
+        * route, but I think that's acceptable.
+        */
+       if ((dst = __sk_dst_check(sk, 0)) == NULL)
+               return;
+
+       dst->ops->update_pmtu(dst, mtu);
+
+       /* Something is about to be wrong... Remember soft error
+        * for the case, if this connection will not able to recover.
+        */
+       if (mtu < dst_mtu(dst) && ip_dont_fragment(sk, dst))
+               sk->sk_err_soft = EMSGSIZE;
+
+       mtu = dst_mtu(dst);
+
+       if (inet->pmtudisc != IP_PMTUDISC_DONT &&
+           dp->dccps_pmtu_cookie > mtu) {
+               dccp_sync_mss(sk, mtu);
+
+               /*
+                * From: draft-ietf-dccp-spec-11.txt
+                *
+                *      DCCP-Sync packets are the best choice for upward probing,
+                *      since DCCP-Sync probes do not risk application data loss.
+                */
+               dccp_send_sync(sk, dp->dccps_gsr);
+       } /* else let the usual retransmit timer handle it */
+}
+
+static void dccp_v4_ctl_send_ack(struct sk_buff *rxskb)
+{
+       int err;
+       struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh;
+       const int dccp_hdr_ack_len = sizeof(struct dccp_hdr) +
+                                    sizeof(struct dccp_hdr_ext) +
+                                    sizeof(struct dccp_hdr_ack_bits);
+       struct sk_buff *skb;
+
+       if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL)
+               return;
+
+       skb = alloc_skb(MAX_DCCP_HEADER + 15, GFP_ATOMIC);
+       if (skb == NULL)
+               return;
+
+       /* Reserve space for headers. */
+       skb_reserve(skb, MAX_DCCP_HEADER);
+
+       skb->dst = dst_clone(rxskb->dst);
+
+       skb->h.raw = skb_push(skb, dccp_hdr_ack_len);
+       dh = dccp_hdr(skb);
+       memset(dh, 0, dccp_hdr_ack_len);
+
+       /* Build DCCP header and checksum it. */
+       dh->dccph_type     = DCCP_PKT_ACK;
+       dh->dccph_sport    = rxdh->dccph_dport;
+       dh->dccph_dport    = rxdh->dccph_sport;
+       dh->dccph_doff     = dccp_hdr_ack_len / 4;
+       dh->dccph_x        = 1;
+
+       dccp_hdr_set_seq(dh, DCCP_SKB_CB(rxskb)->dccpd_ack_seq);
+       dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq);
+
+       bh_lock_sock(dccp_ctl_socket->sk);
+       err = ip_build_and_send_pkt(skb, dccp_ctl_socket->sk,
+                                   rxskb->nh.iph->daddr, rxskb->nh.iph->saddr, NULL);
+       bh_unlock_sock(dccp_ctl_socket->sk);
+
+       if (err == NET_XMIT_CN || err == 0) {
+               DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
+               DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS);
+       }
+}
+
+static void dccp_v4_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
+{
+       dccp_v4_ctl_send_ack(skb);
+}
+
+static int dccp_v4_send_response(struct sock *sk, struct request_sock *req,
+                                struct dst_entry *dst)
+{
+       int err = -1;
+       struct sk_buff *skb;
+
+       /* First, grab a route. */
+       
+       if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL)
+               goto out;
+
+       skb = dccp_make_response(sk, dst, req);
+       if (skb != NULL) {
+               const struct inet_request_sock *ireq = inet_rsk(req);
+
+               err = ip_build_and_send_pkt(skb, sk, ireq->loc_addr,
+                                           ireq->rmt_addr,
+                                           ireq->opt);
+               if (err == NET_XMIT_CN)
+                       err = 0;
+       }
+
+out:
+       dst_release(dst);
+       return err;
+}
+
+/*
+ * This routine is called by the ICMP module when it gets some sort of error
+ * condition. If err < 0 then the socket should be closed and the error
+ * returned to the user. If err > 0 it's just the icmp type << 8 | icmp code.
+ * After adjustment header points to the first 8 bytes of the tcp header. We
+ * need to find the appropriate port.
+ *
+ * The locking strategy used here is very "optimistic". When someone else
+ * accesses the socket the ICMP is just dropped and for some paths there is no
+ * check at all. A more general error queue to queue errors for later handling
+ * is probably better.
+ */
+void dccp_v4_err(struct sk_buff *skb, u32 info)
+{
+       const struct iphdr *iph = (struct iphdr *)skb->data;
+       const struct dccp_hdr *dh = (struct dccp_hdr *)(skb->data + (iph->ihl << 2));
+       struct dccp_sock *dp;
+       struct inet_sock *inet;
+       const int type = skb->h.icmph->type;
+       const int code = skb->h.icmph->code;
+       struct sock *sk;
+       __u64 seq;
+       int err;
+
+       if (skb->len < (iph->ihl << 2) + 8) {
+               ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+               return;
+       }
+
+       sk = inet_lookup(&dccp_hashinfo, iph->daddr, dh->dccph_dport,
+                        iph->saddr, dh->dccph_sport, inet_iif(skb));
+       if (sk == NULL) {
+               ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
+               return;
+       }
+
+       if (sk->sk_state == DCCP_TIME_WAIT) {
+               inet_twsk_put((struct inet_timewait_sock *)sk);
+               return;
+       }
+
+       bh_lock_sock(sk);
+       /* If too many ICMPs get dropped on busy
+        * servers this needs to be solved differently.
+        */
+       if (sock_owned_by_user(sk))
+               NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
+
+       if (sk->sk_state == DCCP_CLOSED)
+               goto out;
+
+       dp = dccp_sk(sk);
+       seq = dccp_hdr_seq(skb);
+       if (sk->sk_state != DCCP_LISTEN &&
+           !between48(seq, dp->dccps_swl, dp->dccps_swh)) {
+               NET_INC_STATS(LINUX_MIB_OUTOFWINDOWICMPS);
+               goto out;
+       }
+
+       switch (type) {
+       case ICMP_SOURCE_QUENCH:
+               /* Just silently ignore these. */
+               goto out;
+       case ICMP_PARAMETERPROB:
+               err = EPROTO;
+               break;
+       case ICMP_DEST_UNREACH:
+               if (code > NR_ICMP_UNREACH)
+                       goto out;
+
+               if (code == ICMP_FRAG_NEEDED) { /* PMTU discovery (RFC1191) */
+                       if (!sock_owned_by_user(sk))
+                               dccp_do_pmtu_discovery(sk, iph, info);
+                       goto out;
+               }
+
+               err = icmp_err_convert[code].errno;
+               break;
+       case ICMP_TIME_EXCEEDED:
+               err = EHOSTUNREACH;
+               break;
+       default:
+               goto out;
+       }
+
+       switch (sk->sk_state) {
+               struct request_sock *req , **prev;
+       case DCCP_LISTEN:
+               if (sock_owned_by_user(sk))
+                       goto out;
+               req = inet_csk_search_req(sk, &prev, dh->dccph_dport,
+                                         iph->daddr, iph->saddr);
+               if (!req)
+                       goto out;
+
+               /*
+                * ICMPs are not backlogged, hence we cannot get an established
+                * socket here.
+                */
+               BUG_TRAP(!req->sk);
+
+               if (seq != dccp_rsk(req)->dreq_iss) {
+                       NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
+                       goto out;
+               }
+               /*
+                * Still in RESPOND, just remove it silently.
+                * There is no good way to pass the error to the newly
+                * created socket, and POSIX does not want network
+                * errors returned from accept().
+                */
+               inet_csk_reqsk_queue_drop(sk, req, prev);
+               goto out;
+
+       case DCCP_REQUESTING:
+       case DCCP_RESPOND:
+               if (!sock_owned_by_user(sk)) {
+                       DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
+                       sk->sk_err = err;
+
+                       sk->sk_error_report(sk);
+
+                       dccp_done(sk);
+               } else
+                       sk->sk_err_soft = err;
+               goto out;
+       }
+
+       /* If we've already connected we will keep trying
+        * until we time out, or the user gives up.
+        *
+        * rfc1122 4.2.3.9 allows to consider as hard errors
+        * only PROTO_UNREACH and PORT_UNREACH (well, FRAG_FAILED too,
+        * but it is obsoleted by pmtu discovery).
+        *
+        * Note, that in modern internet, where routing is unreliable
+        * and in each dark corner broken firewalls sit, sending random
+        * errors ordered by their masters even this two messages finally lose
+        * their original sense (even Linux sends invalid PORT_UNREACHs)
+        *
+        * Now we are in compliance with RFCs.
+        *                                                      --ANK (980905)
+        */
+
+       inet = inet_sk(sk);
+       if (!sock_owned_by_user(sk) && inet->recverr) {
+               sk->sk_err = err;
+               sk->sk_error_report(sk);
+       } else /* Only an error on timeout */
+               sk->sk_err_soft = err;
+out:
+       bh_unlock_sock(sk);
+       sock_put(sk);
+}
+
+extern struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst, enum dccp_reset_codes code);
+
+int dccp_v4_send_reset(struct sock *sk, enum dccp_reset_codes code)
+{
+       struct sk_buff *skb;
+       /*
+        * FIXME: what if rebuild_header fails?
+        * Should we be doing a rebuild_header here?
+        */
+       int err = inet_sk_rebuild_header(sk);
+
+       if (err != 0)
+               return err;
+
+       skb = dccp_make_reset(sk, sk->sk_dst_cache, code);
+       if (skb != NULL) {
+               const struct dccp_sock *dp = dccp_sk(sk);
+               const struct inet_sock *inet = inet_sk(sk);
+
+               err = ip_build_and_send_pkt(skb, sk,
+                                           inet->saddr, inet->daddr, NULL);
+               if (err == NET_XMIT_CN)
+                       err = 0;
+
+               ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk);
+               ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk);
+       }
+
+       return err;
+}
+
+static inline u64 dccp_v4_init_sequence(const struct sock *sk,
+                                       const struct sk_buff *skb)
+{
+       return secure_dccp_sequence_number(skb->nh.iph->daddr,
+                                          skb->nh.iph->saddr,
+                                          dccp_hdr(skb)->dccph_dport,
+                                          dccp_hdr(skb)->dccph_sport);
+}
+
+int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
+{
+       struct inet_request_sock *ireq;
+       struct dccp_sock dp;
+       struct request_sock *req;
+       struct dccp_request_sock *dreq;
+       const __u32 saddr = skb->nh.iph->saddr;
+       const __u32 daddr = skb->nh.iph->daddr;
+       struct dst_entry *dst = NULL;
+
+       /* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */
+       if (((struct rtable *)skb->dst)->rt_flags &
+           (RTCF_BROADCAST | RTCF_MULTICAST))
+               goto drop;
+
+       /*
+        * TW buckets are converted to open requests without
+        * limitations, they conserve resources and peer is
+        * evidently real one.
+        */
+       if (inet_csk_reqsk_queue_is_full(sk))
+               goto drop;
+
+       /*
+        * Accept backlog is full. If we have already queued enough
+        * of warm entries in syn queue, drop request. It is better than
+        * clogging syn queue with openreqs with exponentially increasing
+        * timeout.
+        */
+       if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
+               goto drop;
+
+       req = reqsk_alloc(sk->sk_prot->rsk_prot);
+       if (req == NULL)
+               goto drop;
+
+       /* FIXME: process options */
+
+       dccp_openreq_init(req, &dp, skb);
+
+       ireq = inet_rsk(req);
+       ireq->loc_addr = daddr;
+       ireq->rmt_addr = saddr;
+       /* FIXME: Merge Aristeu's option parsing code when ready */
+       req->rcv_wnd    = 100; /* Fake, option parsing will get the right value */
+       ireq->opt       = NULL;
+
+       /* 
+        * Step 3: Process LISTEN state
+        *
+        * Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
+        *
+        * In fact we defer setting S.GSR, S.SWL, S.SWH to
+        * dccp_create_openreq_child.
+        */
+       dreq = dccp_rsk(req);
+       dreq->dreq_isr = DCCP_SKB_CB(skb)->dccpd_seq;
+       dreq->dreq_iss = dccp_v4_init_sequence(sk, skb);
+       dreq->dreq_service = dccp_hdr_request(skb)->dccph_req_service;
+
+       if (dccp_v4_send_response(sk, req, dst))
+               goto drop_and_free;
+
+       inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
+       return 0;
+
+drop_and_free:
+       /*
+        * FIXME: should be reqsk_free after implementing req->rsk_ops
+        */
+       __reqsk_free(req);
+drop:
+       DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
+       return -1;
+}
+
+/*
+ * The three way handshake has completed - we got a valid ACK or DATAACK -
+ * now create the new socket.
+ *
+ * This is the equivalent of TCP's tcp_v4_syn_recv_sock
+ */
+struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb,
+                                      struct request_sock *req,
+                                      struct dst_entry *dst)
+{
+       struct inet_request_sock *ireq;
+       struct inet_sock *newinet;
+       struct dccp_sock *newdp;
+       struct sock *newsk;
+
+       if (sk_acceptq_is_full(sk))
+               goto exit_overflow;
+
+       if (dst == NULL && (dst = inet_csk_route_req(sk, req)) == NULL)
+               goto exit;
+
+       newsk = dccp_create_openreq_child(sk, req, skb);
+       if (newsk == NULL)
+               goto exit;
+
+       sk_setup_caps(newsk, dst);
+
+       newdp              = dccp_sk(newsk);
+       newinet            = inet_sk(newsk);
+       ireq               = inet_rsk(req);
+       newinet->daddr     = ireq->rmt_addr;
+       newinet->rcv_saddr = ireq->loc_addr;
+       newinet->saddr     = ireq->loc_addr;
+       newinet->opt       = ireq->opt;
+       ireq->opt          = NULL;
+       newinet->mc_index  = inet_iif(skb);
+       newinet->mc_ttl    = skb->nh.iph->ttl;
+       newinet->id        = jiffies;
+
+       dccp_sync_mss(newsk, dst_mtu(dst));
+
+       __inet_hash(&dccp_hashinfo, newsk, 0);
+       __inet_inherit_port(&dccp_hashinfo, sk, newsk);
+
+       return newsk;
+
+exit_overflow:
+       NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
+exit:
+       NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
+       dst_release(dst);
+       return NULL;
+}
+
+static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb)
+{
+       const struct dccp_hdr *dh = dccp_hdr(skb);
+       const struct iphdr *iph = skb->nh.iph;
+       struct sock *nsk;
+       struct request_sock **prev;
+       /* Find possible connection requests. */
+       struct request_sock *req = inet_csk_search_req(sk, &prev,
+                                                      dh->dccph_sport,
+                                                      iph->saddr, iph->daddr);
+       if (req != NULL)
+               return dccp_check_req(sk, skb, req, prev);
+
+       nsk = __inet_lookup_established(&dccp_hashinfo,
+                                       iph->saddr, dh->dccph_sport,
+                                       iph->daddr, ntohs(dh->dccph_dport),
+                                       inet_iif(skb));
+       if (nsk != NULL) {
+               if (nsk->sk_state != DCCP_TIME_WAIT) {
+                       bh_lock_sock(nsk);
+                       return nsk;
+               }
+               inet_twsk_put((struct inet_timewait_sock *)nsk);
+               return NULL;
+       }
+
+       return sk;
+}
+
+int dccp_v4_checksum(struct sk_buff *skb)
+{
+       struct dccp_hdr* dh = dccp_hdr(skb);
+       int checksum_len;
+       u32 tmp;
+
+       if (dh->dccph_cscov == 0)
+               checksum_len = skb->len;
+       else {
+               checksum_len = (dh->dccph_cscov + dh->dccph_x) * sizeof(u32);
+               checksum_len = checksum_len < skb->len ? checksum_len : skb->len;
+       }
+
+       tmp = csum_partial((unsigned char *)dh, checksum_len, 0);
+       return csum_fold(tmp);
+}
+
+static int dccp_v4_verify_checksum(struct sk_buff *skb)
+{
+       struct dccp_hdr *th = dccp_hdr(skb);
+       const u16 remote_checksum = th->dccph_checksum;
+       u16 local_checksum;
+
+       /* FIXME: don't mess with skb payload */
+       th->dccph_checksum = 0; /* zero it for computation */
+
+       local_checksum = dccp_v4_checksum(skb);
+
+       /* FIXME: don't mess with skb payload */
+       th->dccph_checksum = remote_checksum; /* put it back */
+
+       return remote_checksum == local_checksum ? 0 : -1;
+}
+
+static struct dst_entry* dccp_v4_route_skb(struct sock *sk,
+                                          struct sk_buff *skb)
+{
+       struct rtable *rt;
+       struct flowi fl = { .oif = ((struct rtable *)skb->dst)->rt_iif,
+                           .nl_u = { .ip4_u =
+                                     { .daddr = skb->nh.iph->saddr,
+                                       .saddr = skb->nh.iph->daddr,
+                                       .tos = RT_CONN_FLAGS(sk) } },
+                           .proto = sk->sk_protocol,
+                           .uli_u = { .ports =
+                                      { .sport = dccp_hdr(skb)->dccph_dport,
+                                        .dport = dccp_hdr(skb)->dccph_sport } } };
+
+       if (ip_route_output_flow(&rt, &fl, sk, 0)) {
+               IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
+               return NULL;
+       }
+
+       return &rt->u.dst;
+}
+
+void dccp_v4_ctl_send_reset(struct sk_buff *rxskb)
+{
+       int err;
+       struct dccp_hdr *rxdh = dccp_hdr(rxskb), *dh;
+       const int dccp_hdr_reset_len = sizeof(struct dccp_hdr) +
+                                      sizeof(struct dccp_hdr_ext) +
+                                      sizeof(struct dccp_hdr_reset);
+       struct sk_buff *skb;
+       struct dst_entry *dst;
+
+       /* Never send a reset in response to a reset. */
+       if (rxdh->dccph_type == DCCP_PKT_RESET)
+               return;
+
+       if (((struct rtable *)rxskb->dst)->rt_type != RTN_LOCAL)
+               return;
+
+       dst = dccp_v4_route_skb(dccp_ctl_socket->sk, rxskb);
+       if (dst == NULL)
+               return;
+
+       skb = alloc_skb(MAX_DCCP_HEADER + 15, GFP_ATOMIC);
+       if (skb == NULL)
+               goto out;
+
+       /* Reserve space for headers. */
+       skb_reserve(skb, MAX_DCCP_HEADER);
+       skb->dst = dst_clone(dst);
+
+       skb->h.raw = skb_push(skb, dccp_hdr_reset_len);
+       dh = dccp_hdr(skb);
+       memset(dh, 0, dccp_hdr_reset_len);
+
+       /* Build DCCP header and checksum it. */
+       dh->dccph_type     = DCCP_PKT_RESET;
+       dh->dccph_sport    = rxdh->dccph_dport;
+       dh->dccph_dport    = rxdh->dccph_sport;
+       dh->dccph_doff     = dccp_hdr_reset_len / 4;
+       dh->dccph_x        = 1;
+       dccp_hdr_reset(skb)->dccph_reset_code = DCCP_SKB_CB(rxskb)->dccpd_reset_code;
+
+       dccp_hdr_set_seq(dh, DCCP_SKB_CB(rxskb)->dccpd_ack_seq);
+       dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), DCCP_SKB_CB(rxskb)->dccpd_seq);
+
+       dh->dccph_checksum = dccp_v4_checksum(skb);
+
+       bh_lock_sock(dccp_ctl_socket->sk);
+       err = ip_build_and_send_pkt(skb, dccp_ctl_socket->sk,
+                                   rxskb->nh.iph->daddr, rxskb->nh.iph->saddr, NULL);
+       bh_unlock_sock(dccp_ctl_socket->sk);
+
+       if (err == NET_XMIT_CN || err == 0) {
+               DCCP_INC_STATS_BH(DCCP_MIB_OUTSEGS);
+               DCCP_INC_STATS_BH(DCCP_MIB_OUTRSTS);
+       }
+out:
+        dst_release(dst);
+}
+
+int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
+{
+       struct dccp_hdr *dh = dccp_hdr(skb);
+
+       if (sk->sk_state == DCCP_OPEN) { /* Fast path */
+               if (dccp_rcv_established(sk, skb, dh, skb->len))
+                       goto reset;
+               return 0;
+       }
+
+       /*
+        *  Step 3: Process LISTEN state
+        *     If S.state == LISTEN,
+        *        If P.type == Request or P contains a valid Init Cookie option,
+        *           * Must scan the packet's options to check for an Init
+        *              Cookie.  Only the Init Cookie is processed here,
+        *              however; other options are processed in Step 8.  This
+        *              scan need only be performed if the endpoint uses Init
+        *              Cookies *
+        *           * Generate a new socket and switch to that socket *
+        *           Set S := new socket for this port pair
+        *           S.state = RESPOND
+        *           Choose S.ISS (initial seqno) or set from Init Cookie
+        *           Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
+        *           Continue with S.state == RESPOND
+        *           * A Response packet will be generated in Step 11 *
+        *        Otherwise,
+        *           Generate Reset(No Connection) unless P.type == Reset
+        *           Drop packet and return
+        *
+        * NOTE: the check for the packet types is done in dccp_rcv_state_process
+        */
+       if (sk->sk_state == DCCP_LISTEN) {
+               struct sock *nsk = dccp_v4_hnd_req(sk, skb);
+
+               if (nsk == NULL)
+                       goto discard;
+
+               if (nsk != sk) {
+                       if (dccp_child_process(sk, nsk, skb))
+                               goto reset;
+                       return 0;
+               }
+       }
+
+       if (dccp_rcv_state_process(sk, skb, dh, skb->len))
+               goto reset;
+       return 0;
+
+reset:
+       DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
+       dccp_v4_ctl_send_reset(skb);
+discard:
+       kfree_skb(skb);
+       return 0;
+}
+
+static inline int dccp_invalid_packet(struct sk_buff *skb)
+{
+       const struct dccp_hdr *dh;
+
+       if (skb->pkt_type != PACKET_HOST)
+               return 1;
+
+       if (!pskb_may_pull(skb, sizeof(struct dccp_hdr))) {
+               dccp_pr_debug("pskb_may_pull failed\n");
+               return 1;
+       }
+
+       dh = dccp_hdr(skb);
+
+       /* If the packet type is not understood, drop packet and return */
+       if (dh->dccph_type >= DCCP_PKT_INVALID) {
+               dccp_pr_debug("invalid packet type\n");
+               return 1;
+       }
+
+       /*
+        * If P.Data Offset is too small for packet type, or too large for
+        * packet, drop packet and return
+        */
+       if (dh->dccph_doff < dccp_hdr_len(skb) / sizeof(u32)) {
+               dccp_pr_debug("Offset(%u) too small 1\n", dh->dccph_doff);
+               return 1;
+       }
+
+       if (!pskb_may_pull(skb, dh->dccph_doff * sizeof(u32))) {
+               dccp_pr_debug("P.Data Offset(%u) too small 2\n", dh->dccph_doff);
+               return 1;
+       }
+
+       dh = dccp_hdr(skb);
+
+       /*
+        * If P.type is not Data, Ack, or DataAck and P.X == 0 (the packet
+        * has short sequence numbers), drop packet and return
+        */
+       if (dh->dccph_x == 0 &&
+           dh->dccph_type != DCCP_PKT_DATA &&
+           dh->dccph_type != DCCP_PKT_ACK &&
+           dh->dccph_type != DCCP_PKT_DATAACK) {
+               dccp_pr_debug("P.type (%s) not Data, Ack nor DataAck and P.X == 0\n",
+                             dccp_packet_name(dh->dccph_type));
+               return 1;
+       }
+
+       /* If the header checksum is incorrect, drop packet and return */
+       if (dccp_v4_verify_checksum(skb) < 0) {
+               dccp_pr_debug("header checksum is incorrect\n");
+               return 1;
+       }
+
+       return 0;
+}
+
+/* this is called when real data arrives */
+int dccp_v4_rcv(struct sk_buff *skb)
+{
+       const struct dccp_hdr *dh;
+       struct sock *sk;
+       int rc;
+
+       /* Step 1: Check header basics: */
+
+       if (dccp_invalid_packet(skb))
+               goto discard_it;
+
+       dh = dccp_hdr(skb);
+#if 0
+       /*
+        * Use something like this to simulate some DATA/DATAACK loss to test
+        * dccp_ackpkts_add, you'll get something like this on a session that
+        * sends 10 DATA/DATAACK packets:
+        *
+        * dccp_ackpkts_print: 281473596467422 |0,0|3,0|0,0|3,0|0,0|3,0|0,0|3,0|0,1|
+        *
+        * 0, 0 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == just this packet
+        * 0, 1 means: DCCP_ACKPKTS_STATE_RECEIVED, RLE == two adjacent packets with the same state
+        * 3, 0 means: DCCP_ACKPKTS_STATE_NOT_RECEIVED, RLE == just this packet
+        *
+        * So...
+        *
+        * 281473596467422 was received
+        * 281473596467421 was not received
+        * 281473596467420 was received
+        * 281473596467419 was not received
+        * 281473596467418 was received
+        * 281473596467417 was not received
+        * 281473596467416 was received
+        * 281473596467415 was not received
+        * 281473596467414 was received
+        * 281473596467413 was received (this one was the 3way handshake RESPONSE)
+        *
+        */
+       if (dh->dccph_type == DCCP_PKT_DATA || dh->dccph_type == DCCP_PKT_DATAACK) {
+               static int discard = 0;
+
+               if (discard) {
+                       discard = 0;
+                       goto discard_it;
+               }
+               discard = 1;
+       }
+#endif
+       DCCP_SKB_CB(skb)->dccpd_seq  = dccp_hdr_seq(skb);
+       DCCP_SKB_CB(skb)->dccpd_type = dh->dccph_type;
+
+       dccp_pr_debug("%8.8s "
+                     "src=%u.%u.%u.%u@%-5d "
+                     "dst=%u.%u.%u.%u@%-5d seq=%llu",
+                     dccp_packet_name(dh->dccph_type),
+                     NIPQUAD(skb->nh.iph->saddr), ntohs(dh->dccph_sport),
+                     NIPQUAD(skb->nh.iph->daddr), ntohs(dh->dccph_dport),
+                     DCCP_SKB_CB(skb)->dccpd_seq);
+
+       if (dccp_packet_without_ack(skb)) {
+               DCCP_SKB_CB(skb)->dccpd_ack_seq = DCCP_PKT_WITHOUT_ACK_SEQ;
+               dccp_pr_debug_cat("\n");
+       } else {
+               DCCP_SKB_CB(skb)->dccpd_ack_seq = dccp_hdr_ack_seq(skb);
+               dccp_pr_debug_cat(", ack=%llu\n", DCCP_SKB_CB(skb)->dccpd_ack_seq);
+       }
+
+       /* Step 2:
+        *      Look up flow ID in table and get corresponding socket */
+       sk = __inet_lookup(&dccp_hashinfo,
+                          skb->nh.iph->saddr, dh->dccph_sport,
+                          skb->nh.iph->daddr, ntohs(dh->dccph_dport),
+                          inet_iif(skb));
+
+       /* 
+        * Step 2:
+        *      If no socket ...
+        *              Generate Reset(No Connection) unless P.type == Reset
+        *              Drop packet and return
+        */
+       if (sk == NULL) {
+               dccp_pr_debug("failed to look up flow ID in table and "
+                             "get corresponding socket\n");
+               goto no_dccp_socket;
+       }
+
+       /* 
+        * Step 2:
+        *      ... or S.state == TIMEWAIT,
+        *              Generate Reset(No Connection) unless P.type == Reset
+        *              Drop packet and return
+        */
+              
+       if (sk->sk_state == DCCP_TIME_WAIT) {
+               dccp_pr_debug("sk->sk_state == DCCP_TIME_WAIT: discard_and_relse\n");
+                goto discard_and_relse;
+       }
+
+       if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) {
+               dccp_pr_debug("xfrm4_policy_check failed\n");
+               goto discard_and_relse;
+       }
+
+        if (sk_filter(sk, skb, 0)) {
+               dccp_pr_debug("sk_filter failed\n");
+                goto discard_and_relse;
+       }
+
+       skb->dev = NULL;
+
+       bh_lock_sock(sk);
+       rc = 0;
+       if (!sock_owned_by_user(sk))
+               rc = dccp_v4_do_rcv(sk, skb);
+       else
+               sk_add_backlog(sk, skb);
+       bh_unlock_sock(sk);
+
+       sock_put(sk);
+       return rc;
+
+no_dccp_socket:
+       if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
+               goto discard_it;
+       /*
+        * Step 2:
+        *              Generate Reset(No Connection) unless P.type == Reset
+        *              Drop packet and return
+        */
+       if (dh->dccph_type != DCCP_PKT_RESET) {
+               DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
+               dccp_v4_ctl_send_reset(skb);
+       }
+
+discard_it:
+       /* Discard frame. */
+       kfree_skb(skb);
+       return 0;
+
+discard_and_relse:
+       sock_put(sk);
+       goto discard_it;
+}
+
+static int dccp_v4_init_sock(struct sock *sk)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+       static int dccp_ctl_socket_init = 1;
+
+       dccp_options_init(&dp->dccps_options);
+
+       if (dp->dccps_options.dccpo_send_ack_vector) {
+               dp->dccps_hc_rx_ackpkts = dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN,
+                                                            GFP_KERNEL);
+
+               if (dp->dccps_hc_rx_ackpkts == NULL)
+                       return -ENOMEM;
+       }
+
+       /*
+        * FIXME: We're hardcoding the CCID, and doing this at this point makes
+        * the listening (master) sock get CCID control blocks, which is not
+        * necessary, but for now, to not mess with the test userspace apps,
+        * lets leave it here, later the real solution is to do this in a
+        * setsockopt(CCIDs-I-want/accept). -acme
+        */
+       if (likely(!dccp_ctl_socket_init)) {
+               dp->dccps_hc_rx_ccid = ccid_init(dp->dccps_options.dccpo_ccid, sk);
+               dp->dccps_hc_tx_ccid = ccid_init(dp->dccps_options.dccpo_ccid, sk);
+               if (dp->dccps_hc_rx_ccid == NULL ||
+                   dp->dccps_hc_tx_ccid == NULL) {
+                       ccid_exit(dp->dccps_hc_rx_ccid, sk);
+                       ccid_exit(dp->dccps_hc_tx_ccid, sk);
+                       dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts);
+                       dp->dccps_hc_rx_ackpkts = NULL;
+                       dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
+                       return -ENOMEM;
+               }
+       } else
+               dccp_ctl_socket_init = 0;
+
+       dccp_init_xmit_timers(sk);
+       sk->sk_state = DCCP_CLOSED;
+       dp->dccps_mss_cache = 536;
+       dp->dccps_role = DCCP_ROLE_UNDEFINED;
+
+       return 0;
+}
+
+int dccp_v4_destroy_sock(struct sock *sk)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+
+       /*
+        * DCCP doesn't use sk_qrite_queue, just sk_send_head
+        * for retransmissions
+        */
+       if (sk->sk_send_head != NULL) {
+               kfree_skb(sk->sk_send_head);
+               sk->sk_send_head = NULL;
+       }
+
+       /* Clean up a referenced DCCP bind bucket. */
+       if (inet_csk(sk)->icsk_bind_hash != NULL)
+               inet_put_port(&dccp_hashinfo, sk);
+
+       dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts);
+       dp->dccps_hc_rx_ackpkts = NULL;
+       ccid_exit(dp->dccps_hc_rx_ccid, sk);
+       ccid_exit(dp->dccps_hc_tx_ccid, sk);
+       dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
+
+       return 0;
+}
+
+static void dccp_v4_reqsk_destructor(struct request_sock *req)
+{
+       kfree(inet_rsk(req)->opt);
+}
+
+static struct request_sock_ops dccp_request_sock_ops = {
+       .family         = PF_INET,
+       .obj_size       = sizeof(struct dccp_request_sock),
+       .rtx_syn_ack    = dccp_v4_send_response,
+       .send_ack       = dccp_v4_reqsk_send_ack,
+       .destructor     = dccp_v4_reqsk_destructor,
+       .send_reset     = dccp_v4_ctl_send_reset,
+};
+
+struct proto dccp_v4_prot = {
+       .name                   = "DCCP",
+       .owner                  = THIS_MODULE,
+       .close                  = dccp_close,
+       .connect                = dccp_v4_connect,
+       .disconnect             = dccp_disconnect,
+       .ioctl                  = dccp_ioctl,
+       .init                   = dccp_v4_init_sock,
+       .setsockopt             = dccp_setsockopt,
+       .getsockopt             = dccp_getsockopt,
+       .sendmsg                = dccp_sendmsg,
+       .recvmsg                = dccp_recvmsg,
+       .backlog_rcv            = dccp_v4_do_rcv,
+       .hash                   = dccp_v4_hash,
+       .unhash                 = dccp_v4_unhash,
+       .accept                 = inet_csk_accept,
+       .get_port               = dccp_v4_get_port,
+       .shutdown               = dccp_shutdown,
+       .destroy                = dccp_v4_destroy_sock,
+       .orphan_count           = &dccp_orphan_count,
+       .max_header             = MAX_DCCP_HEADER,
+       .obj_size               = sizeof(struct dccp_sock),
+       .rsk_prot               = &dccp_request_sock_ops,
+       .twsk_obj_size          = sizeof(struct inet_timewait_sock), /* FIXME! create dccp_timewait_sock */
+};
diff --git a/net/dccp/minisocks.c b/net/dccp/minisocks.c
new file mode 100644 (file)
index 0000000..810f0c2
--- /dev/null
@@ -0,0 +1,199 @@
+/*
+ *  net/dccp/minisocks.c
+ *
+ *  An implementation of the DCCP protocol
+ *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ *     This program is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License
+ *     as published by the Free Software Foundation; either version
+ *     2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/dccp.h>
+#include <linux/skbuff.h>
+#include <linux/timer.h>
+
+#include <net/sock.h>
+#include <net/xfrm.h>
+#include <net/inet_timewait_sock.h>
+
+#include "ccid.h"
+#include "dccp.h"
+
+void dccp_time_wait(struct sock *sk, int state, int timeo)
+{
+       /* FIXME: Implement */
+       dccp_pr_debug("Want to help? Start here\n");
+       dccp_set_state(sk, state);
+}
+
+/* This is for handling early-kills of TIME_WAIT sockets. */
+void dccp_tw_deschedule(struct inet_timewait_sock *tw)
+{
+       dccp_pr_debug("Want to help? Start here\n");
+       __inet_twsk_kill(tw, &dccp_hashinfo);
+}
+
+struct sock *dccp_create_openreq_child(struct sock *sk,
+                                      const struct request_sock *req,
+                                      const struct sk_buff *skb)
+{
+       /*
+        * Step 3: Process LISTEN state
+        *
+        * // Generate a new socket and switch to that socket
+        * Set S := new socket for this port pair
+        */
+       struct sock *newsk = inet_csk_clone(sk, req, GFP_ATOMIC);
+
+       if (newsk != NULL) {
+               const struct dccp_request_sock *dreq = dccp_rsk(req);
+               struct inet_connection_sock *newicsk = inet_csk(sk);
+               struct dccp_sock *newdp = dccp_sk(newsk);
+
+               newdp->dccps_hc_rx_ackpkts = NULL;
+               newdp->dccps_role = DCCP_ROLE_SERVER;
+               newicsk->icsk_rto = TCP_TIMEOUT_INIT;
+
+               if (newdp->dccps_options.dccpo_send_ack_vector) {
+                       newdp->dccps_hc_rx_ackpkts = dccp_ackpkts_alloc(DCCP_MAX_ACK_VECTOR_LEN,
+                                                                       GFP_ATOMIC);
+                       /*
+                        * XXX: We're using the same CCIDs set on the parent, i.e. sk_clone
+                        * copied the master sock and left the CCID pointers for this child,
+                        * that is why we do the __ccid_get calls.
+                        */
+                       if (unlikely(newdp->dccps_hc_rx_ackpkts == NULL))
+                               goto out_free;
+               }
+
+               if (unlikely(ccid_hc_rx_init(newdp->dccps_hc_rx_ccid, newsk) != 0 ||
+                            ccid_hc_tx_init(newdp->dccps_hc_tx_ccid, newsk) != 0)) {
+                       dccp_ackpkts_free(newdp->dccps_hc_rx_ackpkts);
+                       ccid_hc_rx_exit(newdp->dccps_hc_rx_ccid, newsk);
+                       ccid_hc_tx_exit(newdp->dccps_hc_tx_ccid, newsk);
+out_free:
+                       /* It is still raw copy of parent, so invalidate
+                        * destructor and make plain sk_free() */
+                       newsk->sk_destruct = NULL;
+                       sk_free(newsk);
+                       return NULL;
+               }
+
+               __ccid_get(newdp->dccps_hc_rx_ccid);
+               __ccid_get(newdp->dccps_hc_tx_ccid);
+
+               /*
+                * Step 3: Process LISTEN state
+                *
+                *      Choose S.ISS (initial seqno) or set from Init Cookie
+                *      Set S.ISR, S.GSR, S.SWL, S.SWH from packet or Init Cookie
+                */
+
+               /* See dccp_v4_conn_request */
+               newdp->dccps_options.dccpo_sequence_window = req->rcv_wnd;
+
+               newdp->dccps_gar = newdp->dccps_isr = dreq->dreq_isr;
+               dccp_update_gsr(newsk, dreq->dreq_isr);
+
+               newdp->dccps_iss = dreq->dreq_iss;
+               dccp_update_gss(newsk, dreq->dreq_iss);
+
+               dccp_init_xmit_timers(newsk);
+
+               DCCP_INC_STATS_BH(DCCP_MIB_PASSIVEOPENS);
+       }
+       return newsk;
+}
+
+/* 
+ * Process an incoming packet for RESPOND sockets represented
+ * as an request_sock.
+ */
+struct sock *dccp_check_req(struct sock *sk, struct sk_buff *skb,
+                           struct request_sock *req,
+                           struct request_sock **prev)
+{
+       struct sock *child = NULL;
+
+       /* Check for retransmitted REQUEST */
+       if (dccp_hdr(skb)->dccph_type == DCCP_PKT_REQUEST) {
+               if (after48(DCCP_SKB_CB(skb)->dccpd_seq, dccp_rsk(req)->dreq_isr)) {
+                       struct dccp_request_sock *dreq = dccp_rsk(req);
+
+                       dccp_pr_debug("Retransmitted REQUEST\n");
+                       /* Send another RESPONSE packet */
+                       dccp_set_seqno(&dreq->dreq_iss, dreq->dreq_iss + 1);
+                       dccp_set_seqno(&dreq->dreq_isr, DCCP_SKB_CB(skb)->dccpd_seq);
+                       req->rsk_ops->rtx_syn_ack(sk, req, NULL);
+               }
+               /* Network Duplicate, discard packet */
+               return NULL;
+       }
+
+       DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR;
+
+       if (dccp_hdr(skb)->dccph_type != DCCP_PKT_ACK &&
+           dccp_hdr(skb)->dccph_type != DCCP_PKT_DATAACK)
+               goto drop;
+
+       /* Invalid ACK */
+       if (DCCP_SKB_CB(skb)->dccpd_ack_seq != dccp_rsk(req)->dreq_iss) {
+               dccp_pr_debug("Invalid ACK number: ack_seq=%llu, dreq_iss=%llu\n",
+                             DCCP_SKB_CB(skb)->dccpd_ack_seq, dccp_rsk(req)->dreq_iss);
+               goto drop;
+       }
+
+       child = dccp_v4_request_recv_sock(sk, skb, req, NULL);
+       if (child == NULL)
+               goto listen_overflow;
+
+       /* FIXME: deal with options */
+
+       inet_csk_reqsk_queue_unlink(sk, req, prev);
+       inet_csk_reqsk_queue_removed(sk, req);
+       inet_csk_reqsk_queue_add(sk, req, child);
+out:
+       return child;
+listen_overflow:
+       dccp_pr_debug("listen_overflow!\n");
+       DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_TOO_BUSY;
+drop:
+       if (dccp_hdr(skb)->dccph_type != DCCP_PKT_RESET)
+               req->rsk_ops->send_reset(skb);
+
+       inet_csk_reqsk_queue_drop(sk, req, prev);
+       goto out;
+}
+
+/*
+ *  Queue segment on the new socket if the new socket is active,
+ *  otherwise we just shortcircuit this and continue with
+ *  the new socket.
+ */
+int dccp_child_process(struct sock *parent, struct sock *child,
+                      struct sk_buff *skb)
+{
+       int ret = 0;
+       const int state = child->sk_state;
+
+       if (!sock_owned_by_user(child)) {
+               ret = dccp_rcv_state_process(child, skb, dccp_hdr(skb), skb->len);
+
+               /* Wakeup parent, send SIGIO */
+               if (state == DCCP_RESPOND && child->sk_state != state)
+                       parent->sk_data_ready(parent, 0);
+       } else {
+               /* Alas, it is possible again, because we do lookup
+                * in main socket hash table and lock on listening
+                * socket does not protect us more.
+                */
+               sk_add_backlog(child, skb);
+       }
+
+       bh_unlock_sock(child);
+       sock_put(child);
+       return ret;
+}
diff --git a/net/dccp/options.c b/net/dccp/options.c
new file mode 100644 (file)
index 0000000..e186776
--- /dev/null
@@ -0,0 +1,763 @@
+/*
+ *  net/dccp/options.c
+ *
+ *  An implementation of the DCCP protocol
+ *  Aristeu Sergio Rozanski Filho <aris@cathedrallabs.org>
+ *  Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+#include <linux/config.h>
+#include <linux/dccp.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+
+#include "ccid.h"
+#include "dccp.h"
+
+static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap,
+                                            struct sock *sk,
+                                            const u64 ackno,
+                                            const unsigned char len,
+                                            const unsigned char *vector);
+
+/* stores the default values for new connection. may be changed with sysctl */
+static const struct dccp_options dccpo_default_values = {
+       .dccpo_sequence_window    = DCCPF_INITIAL_SEQUENCE_WINDOW,
+       .dccpo_ccid               = DCCPF_INITIAL_CCID,
+       .dccpo_send_ack_vector    = DCCPF_INITIAL_SEND_ACK_VECTOR,
+       .dccpo_send_ndp_count     = DCCPF_INITIAL_SEND_NDP_COUNT,
+};
+
+void dccp_options_init(struct dccp_options *dccpo)
+{
+       memcpy(dccpo, &dccpo_default_values, sizeof(*dccpo));
+}
+
+static u32 dccp_decode_value_var(const unsigned char *bf, const u8 len)
+{
+       u32 value = 0;
+
+       if (len > 3)
+               value += *bf++ << 24;
+       if (len > 2)
+               value += *bf++ << 16;
+       if (len > 1)
+               value += *bf++ << 8;
+       if (len > 0)
+               value += *bf;
+
+       return value;
+}
+
+int dccp_parse_options(struct sock *sk, struct sk_buff *skb)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+#ifdef DCCP_DEBUG
+       const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT rx opt: " :
+                                                                       "server rx opt: ";
+#endif
+       const struct dccp_hdr *dh = dccp_hdr(skb);
+       const u8 pkt_type = DCCP_SKB_CB(skb)->dccpd_type;
+       unsigned char *options = (unsigned char *)dh + dccp_hdr_len(skb);
+       unsigned char *opt_ptr = options;
+       const unsigned char *opt_end = (unsigned char *)dh + (dh->dccph_doff * 4);
+       struct dccp_options_received *opt_recv = &dp->dccps_options_received;
+       unsigned char opt, len;
+       unsigned char *value;
+
+       memset(opt_recv, 0, sizeof(*opt_recv));
+
+       while (opt_ptr != opt_end) {
+               opt   = *opt_ptr++;
+               len   = 0;
+               value = NULL;
+
+               /* Check if this isn't a single byte option */
+               if (opt > DCCPO_MAX_RESERVED) {
+                       if (opt_ptr == opt_end)
+                               goto out_invalid_option;
+
+                       len = *opt_ptr++;
+                       if (len < 3)
+                               goto out_invalid_option;
+                       /*
+                        * Remove the type and len fields, leaving
+                        * just the value size
+                        */
+                       len     -= 2;
+                       value   = opt_ptr;
+                       opt_ptr += len;
+
+                       if (opt_ptr > opt_end)
+                               goto out_invalid_option;
+               }
+
+               switch (opt) {
+               case DCCPO_PADDING:
+                       break;
+               case DCCPO_NDP_COUNT:
+                       if (len > 3)
+                               goto out_invalid_option;
+
+                       opt_recv->dccpor_ndp = dccp_decode_value_var(value, len);
+                       dccp_pr_debug("%sNDP count=%d\n", debug_prefix, opt_recv->dccpor_ndp);
+                       break;
+               case DCCPO_ACK_VECTOR_0:
+                       if (len > DCCP_MAX_ACK_VECTOR_LEN)
+                               goto out_invalid_option;
+
+                       if (pkt_type == DCCP_PKT_DATA)
+                               continue;
+
+                       opt_recv->dccpor_ack_vector_len = len;
+                       opt_recv->dccpor_ack_vector_idx = value - options;
+
+                       dccp_pr_debug("%sACK vector 0, len=%d, ack_ackno=%llu\n",
+                                     debug_prefix, len, DCCP_SKB_CB(skb)->dccpd_ack_seq);
+                       dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq,
+                                            value, len);
+                       dccp_ackpkts_check_rcv_ackvector(dp->dccps_hc_rx_ackpkts, sk,
+                                                        DCCP_SKB_CB(skb)->dccpd_ack_seq,
+                                                        len, value);
+                       break;
+               case DCCPO_TIMESTAMP:
+                       if (len != 4)
+                               goto out_invalid_option;
+
+                       opt_recv->dccpor_timestamp = ntohl(*(u32 *)value);
+
+                       dp->dccps_timestamp_echo = opt_recv->dccpor_timestamp;
+                       dp->dccps_timestamp_time = jiffies;
+
+                       dccp_pr_debug("%sTIMESTAMP=%u, ackno=%llu\n",
+                                     debug_prefix, opt_recv->dccpor_timestamp,
+                                     DCCP_SKB_CB(skb)->dccpd_ack_seq);
+                       break;
+               case DCCPO_TIMESTAMP_ECHO:
+                       if (len < 4 || len > 8)
+                               goto out_invalid_option;
+
+                       opt_recv->dccpor_timestamp_echo = ntohl(*(u32 *)value);
+
+                       dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, ackno=%llu, diff=%u\n",
+                                     debug_prefix, opt_recv->dccpor_timestamp_echo,
+                                     len + 2, DCCP_SKB_CB(skb)->dccpd_ack_seq,
+                                     tcp_time_stamp - opt_recv->dccpor_timestamp_echo);
+
+                       opt_recv->dccpor_elapsed_time = dccp_decode_value_var(value + 4, len - 4);
+                       dccp_pr_debug("%sTIMESTAMP_ECHO ELAPSED_TIME=%d\n", debug_prefix,
+                                     opt_recv->dccpor_elapsed_time);
+                       break;
+               case DCCPO_ELAPSED_TIME:
+                       if (len > 4)
+                               goto out_invalid_option;
+
+                       if (pkt_type == DCCP_PKT_DATA)
+                               continue;
+                       opt_recv->dccpor_elapsed_time = dccp_decode_value_var(value, len);
+                       dccp_pr_debug("%sELAPSED_TIME=%d\n", debug_prefix,
+                                     opt_recv->dccpor_elapsed_time);
+                       break;
+                       /*
+                        * From draft-ietf-dccp-spec-11.txt:
+                        *
+                        *      Option numbers 128 through 191 are for options sent from the HC-
+                        *      Sender to the HC-Receiver; option numbers 192 through 255 are for
+                        *      options sent from the HC-Receiver to the HC-Sender.
+                        */
+               case 128 ... 191: {
+                       const u16 idx = value - options;
+
+                       if (ccid_hc_rx_parse_options(dp->dccps_hc_rx_ccid, sk, opt, len, idx, value) != 0)
+                               goto out_invalid_option;
+               }
+                       break;
+               case 192 ... 255: {
+                       const u16 idx = value - options;
+
+                       if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk, opt, len, idx, value) != 0)
+                               goto out_invalid_option;
+               }
+                       break;
+               default:
+                       pr_info("DCCP(%p): option %d(len=%d) not implemented, ignoring\n",
+                               sk, opt, len);
+                       break;
+               }
+       }
+
+       return 0;
+
+out_invalid_option:
+       DCCP_INC_STATS_BH(DCCP_MIB_INVALIDOPT);
+       DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_OPTION_ERROR;
+       pr_info("DCCP(%p): invalid option %d, len=%d\n", sk, opt, len);
+       return -1;
+}
+
+static void dccp_encode_value_var(const u32 value, unsigned char *to,
+                                 const unsigned int len)
+{
+       if (len > 3)
+               *to++ = (value & 0xFF000000) >> 24;
+       if (len > 2)
+               *to++ = (value & 0xFF0000) >> 16;
+       if (len > 1)
+               *to++ = (value & 0xFF00) >> 8;
+       if (len > 0)
+               *to++ = (value & 0xFF);
+}
+
+static inline int dccp_ndp_len(const int ndp)
+{
+       return likely(ndp <= 0xFF) ? 1 : ndp <= 0xFFFF ? 2 : 3;
+}
+
+void dccp_insert_option(struct sock *sk, struct sk_buff *skb,
+                       const unsigned char option,
+                       const void *value, const unsigned char len)
+{
+       unsigned char *to;
+
+       if (DCCP_SKB_CB(skb)->dccpd_opt_len + len + 2 > DCCP_MAX_OPT_LEN) {
+               LIMIT_NETDEBUG(pr_info("DCCP: packet too small to insert %d option!\n", option));
+               return;
+       }
+
+       DCCP_SKB_CB(skb)->dccpd_opt_len += len + 2;
+
+       to    = skb_push(skb, len + 2);
+       *to++ = option;
+       *to++ = len + 2;
+
+       memcpy(to, value, len);
+}
+
+EXPORT_SYMBOL_GPL(dccp_insert_option);
+
+static void dccp_insert_option_ndp(struct sock *sk, struct sk_buff *skb)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+       int ndp = dp->dccps_ndp_count;
+
+       if (dccp_non_data_packet(skb))
+               ++dp->dccps_ndp_count;
+       else
+               dp->dccps_ndp_count = 0;
+
+       if (ndp > 0) {
+               unsigned char *ptr;
+               const int ndp_len = dccp_ndp_len(ndp);
+               const int len = ndp_len + 2;
+
+               if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN)
+                       return;
+
+               DCCP_SKB_CB(skb)->dccpd_opt_len += len;
+
+               ptr = skb_push(skb, len);
+               *ptr++ = DCCPO_NDP_COUNT;
+               *ptr++ = len;
+               dccp_encode_value_var(ndp, ptr, ndp_len);
+       }
+}
+
+static inline int dccp_elapsed_time_len(const u32 elapsed_time)
+{
+       return elapsed_time == 0 ? 0 :
+              elapsed_time <= 0xFF ? 1 :
+              elapsed_time <= 0xFFFF ? 2 :
+              elapsed_time <= 0xFFFFFF ? 3 : 4;
+}
+
+void dccp_insert_option_elapsed_time(struct sock *sk,
+                                    struct sk_buff *skb,
+                                    u32 elapsed_time)
+{
+#ifdef DCCP_DEBUG
+       struct dccp_sock *dp = dccp_sk(sk);
+       const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT TX opt: " :
+                                                                       "server TX opt: ";
+#endif
+       const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time);
+       const int len = 2 + elapsed_time_len;
+       unsigned char *to;
+
+       /* If elapsed_time == 0... */
+       if (elapsed_time_len == 2)
+               return;
+
+       if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) {
+               LIMIT_NETDEBUG(pr_info("DCCP: packet too small to insert elapsed time!\n"));
+               return;
+       }
+
+       DCCP_SKB_CB(skb)->dccpd_opt_len += len;
+
+       to    = skb_push(skb, len);
+       *to++ = DCCPO_ELAPSED_TIME;
+       *to++ = len;
+
+       dccp_encode_value_var(elapsed_time, to, elapsed_time_len);
+
+       dccp_pr_debug("%sELAPSED_TIME=%u, len=%d, seqno=%llu\n",
+                     debug_prefix, elapsed_time,
+                     len, DCCP_SKB_CB(skb)->dccpd_seq);
+}
+
+EXPORT_SYMBOL(dccp_insert_option_elapsed_time);
+
+static void dccp_insert_option_ack_vector(struct sock *sk, struct sk_buff *skb)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+#ifdef DCCP_DEBUG
+       const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT TX opt: " :
+                                                                       "server TX opt: ";
+#endif
+       struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts;
+       int len = ap->dccpap_buf_vector_len + 2;
+       const u32 elapsed_time = jiffies_to_usecs(jiffies - ap->dccpap_time) / 10;
+       unsigned char *to, *from;
+
+       if (elapsed_time != 0)
+               dccp_insert_option_elapsed_time(sk, skb, elapsed_time);
+
+       if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) {
+               LIMIT_NETDEBUG(pr_info("DCCP: packet too small to insert ACK Vector!\n"));
+               return;
+       }
+
+       /*
+        * XXX: now we have just one ack vector sent record, so
+        * we have to wait for it to be cleared.
+        *
+        * Of course this is not acceptable, but this is just for
+        * basic testing now.
+        */
+       if (ap->dccpap_ack_seqno != DCCP_MAX_SEQNO + 1)
+               return;
+
+       DCCP_SKB_CB(skb)->dccpd_opt_len += len;
+
+       to    = skb_push(skb, len);
+       *to++ = DCCPO_ACK_VECTOR_0;
+       *to++ = len;
+
+       len  = ap->dccpap_buf_vector_len;
+       from = ap->dccpap_buf + ap->dccpap_buf_head;
+
+       /* Check if buf_head wraps */
+       if (ap->dccpap_buf_head + len > ap->dccpap_buf_len) {
+               const unsigned int tailsize = ap->dccpap_buf_len - ap->dccpap_buf_head;
+
+               memcpy(to, from, tailsize);
+               to   += tailsize;
+               len  -= tailsize;
+               from = ap->dccpap_buf;
+       }
+
+       memcpy(to, from, len);
+       /*
+        *      From draft-ietf-dccp-spec-11.txt:
+        *
+        *      For each acknowledgement it sends, the HC-Receiver will add an
+        *      acknowledgement record.  ack_seqno will equal the HC-Receiver
+        *      sequence number it used for the ack packet; ack_ptr will equal
+        *      buf_head; ack_ackno will equal buf_ackno; and ack_nonce will equal
+        *      buf_nonce.
+        *
+        * This implemention uses just one ack record for now.
+        */
+       ap->dccpap_ack_seqno      = DCCP_SKB_CB(skb)->dccpd_seq;
+       ap->dccpap_ack_ptr        = ap->dccpap_buf_head;
+       ap->dccpap_ack_ackno      = ap->dccpap_buf_ackno;
+       ap->dccpap_ack_nonce      = ap->dccpap_buf_nonce;
+       ap->dccpap_ack_vector_len = ap->dccpap_buf_vector_len;
+
+       dccp_pr_debug("%sACK Vector 0, len=%d, ack_seqno=%llu, ack_ackno=%llu\n",
+                     debug_prefix, ap->dccpap_ack_vector_len,
+                     ap->dccpap_ack_seqno, ap->dccpap_ack_ackno);
+}
+
+static inline void dccp_insert_option_timestamp(struct sock *sk, struct sk_buff *skb)
+{
+       const u32 now = htonl(tcp_time_stamp);
+       dccp_insert_option(sk, skb, DCCPO_TIMESTAMP, &now, sizeof(now));
+}
+
+static void dccp_insert_option_timestamp_echo(struct sock *sk, struct sk_buff *skb)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+#ifdef DCCP_DEBUG
+       const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT TX opt: " :
+                                                                       "server TX opt: ";
+#endif
+       u32 tstamp_echo;
+       const u32 elapsed_time = jiffies_to_usecs(jiffies - dp->dccps_timestamp_time) / 10;
+       const int elapsed_time_len = dccp_elapsed_time_len(elapsed_time);
+       const int len = 6 + elapsed_time_len;
+       unsigned char *to;
+
+       if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN) {
+               LIMIT_NETDEBUG(pr_info("DCCP: packet too small to insert timestamp echo!\n"));
+               return;
+       }
+
+       DCCP_SKB_CB(skb)->dccpd_opt_len += len;
+
+       to    = skb_push(skb, len);
+       *to++ = DCCPO_TIMESTAMP_ECHO;
+       *to++ = len;
+
+       tstamp_echo = htonl(dp->dccps_timestamp_echo);
+       memcpy(to, &tstamp_echo, 4);
+       to += 4;
+       dccp_encode_value_var(elapsed_time, to, elapsed_time_len);
+
+       dccp_pr_debug("%sTIMESTAMP_ECHO=%u, len=%d, seqno=%llu\n",
+                     debug_prefix, dp->dccps_timestamp_echo,
+                     len, DCCP_SKB_CB(skb)->dccpd_seq);
+
+       dp->dccps_timestamp_echo = 0;
+       dp->dccps_timestamp_time = 0;
+}
+
+void dccp_insert_options(struct sock *sk, struct sk_buff *skb)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+
+       DCCP_SKB_CB(skb)->dccpd_opt_len = 0;
+
+       if (dp->dccps_options.dccpo_send_ndp_count)
+               dccp_insert_option_ndp(sk, skb);
+
+       if (!dccp_packet_without_ack(skb)) {
+               if (dp->dccps_options.dccpo_send_ack_vector &&
+                   dp->dccps_hc_rx_ackpkts->dccpap_buf_ackno != DCCP_MAX_SEQNO + 1)
+                       dccp_insert_option_ack_vector(sk, skb);
+
+               dccp_insert_option_timestamp(sk, skb);
+               if (dp->dccps_timestamp_echo != 0)
+                       dccp_insert_option_timestamp_echo(sk, skb);
+       }
+
+       ccid_hc_rx_insert_options(dp->dccps_hc_rx_ccid, sk, skb);
+       ccid_hc_tx_insert_options(dp->dccps_hc_tx_ccid, sk, skb);
+
+       /* XXX: insert other options when appropriate */
+
+       if (DCCP_SKB_CB(skb)->dccpd_opt_len != 0) {
+               /* The length of all options has to be a multiple of 4 */
+               int padding = DCCP_SKB_CB(skb)->dccpd_opt_len % 4;
+
+               if (padding != 0) {
+                       padding = 4 - padding;
+                       memset(skb_push(skb, padding), 0, padding);
+                       DCCP_SKB_CB(skb)->dccpd_opt_len += padding;
+               }
+       }
+}
+
+struct dccp_ackpkts *dccp_ackpkts_alloc(unsigned int len, int priority)
+{
+       struct dccp_ackpkts *ap = kmalloc(sizeof(*ap) + len, priority);
+
+       if (ap != NULL) {
+#ifdef DCCP_DEBUG
+               memset(ap->dccpap_buf, 0xFF, len);
+#endif
+               ap->dccpap_buf_len        = len;
+               ap->dccpap_buf_head       = ap->dccpap_buf_tail = ap->dccpap_buf_len - 1;
+               ap->dccpap_buf_ackno      = ap->dccpap_ack_ackno = ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1;
+               ap->dccpap_buf_nonce      = ap->dccpap_buf_nonce = 0;
+               ap->dccpap_ack_ptr        = 0;
+               ap->dccpap_time           = 0;
+               ap->dccpap_buf_vector_len = ap->dccpap_ack_vector_len = 0;
+       }
+
+       return ap;
+}
+
+void dccp_ackpkts_free(struct dccp_ackpkts *ap)
+{
+       if (ap != NULL) {
+#ifdef DCCP_DEBUG
+               memset(ap, 0xFF, sizeof(*ap) + ap->dccpap_buf_len);
+#endif
+               kfree(ap);
+       }
+}
+
+static inline u8 dccp_ackpkts_state(const struct dccp_ackpkts *ap,
+                                   const unsigned int index)
+{
+       return ap->dccpap_buf[index] & DCCP_ACKPKTS_STATE_MASK;
+}
+
+static inline u8 dccp_ackpkts_len(const struct dccp_ackpkts *ap,
+                                 const unsigned int index)
+{
+       return ap->dccpap_buf[index] & DCCP_ACKPKTS_LEN_MASK;
+}
+
+/*
+ * If several packets are missing, the HC-Receiver may prefer to enter multiple
+ * bytes with run length 0, rather than a single byte with a larger run length;
+ * this simplifies table updates if one of the missing packets arrives.
+ */
+static inline int dccp_ackpkts_set_buf_head_state(struct dccp_ackpkts *ap,
+                                                 const unsigned int packets,
+                                                 const unsigned char state)
+{
+       unsigned int gap;
+       signed long new_head;
+
+       if (ap->dccpap_buf_vector_len + packets > ap->dccpap_buf_len)
+               return -ENOBUFS;
+
+       gap      = packets - 1;
+       new_head = ap->dccpap_buf_head - packets;
+
+       if (new_head < 0) {
+               if (gap > 0) {
+                       memset(ap->dccpap_buf, DCCP_ACKPKTS_STATE_NOT_RECEIVED,
+                              gap + new_head + 1);
+                       gap = -new_head;
+               }
+               new_head += ap->dccpap_buf_len;
+       } 
+
+       ap->dccpap_buf_head = new_head;
+
+       if (gap > 0)
+               memset(ap->dccpap_buf + ap->dccpap_buf_head + 1,
+                      DCCP_ACKPKTS_STATE_NOT_RECEIVED, gap);
+
+       ap->dccpap_buf[ap->dccpap_buf_head] = state;
+       ap->dccpap_buf_vector_len += packets;
+       return 0;
+}
+
+/*
+ * Implements the draft-ietf-dccp-spec-11.txt Appendix A
+ */
+int dccp_ackpkts_add(struct dccp_ackpkts *ap, u64 ackno, u8 state)
+{
+       /*
+        * Check at the right places if the buffer is full, if it is, tell the
+        * caller to start dropping packets till the HC-Sender acks our ACK
+        * vectors, when we will free up space in dccpap_buf.
+        *
+        * We may well decide to do buffer compression, etc, but for now lets
+        * just drop.
+        *
+        * From Appendix A:
+        *
+        *      Of course, the circular buffer may overflow, either when the HC-
+        *      Sender is sending data at a very high rate, when the HC-Receiver's
+        *      acknowledgements are not reaching the HC-Sender, or when the HC-
+        *      Sender is forgetting to acknowledge those acks (so the HC-Receiver
+        *      is unable to clean up old state).  In this case, the HC-Receiver
+        *      should either compress the buffer (by increasing run lengths when
+        *      possible), transfer its state to a larger buffer, or, as a last
+        *      resort, drop all received packets, without processing them
+        *      whatsoever, until its buffer shrinks again.
+        */
+
+       /* See if this is the first ackno being inserted */
+       if (ap->dccpap_buf_vector_len == 0) {
+               ap->dccpap_buf[ap->dccpap_buf_head] = state;
+               ap->dccpap_buf_vector_len = 1;
+       } else if (after48(ackno, ap->dccpap_buf_ackno)) {
+               const u64 delta = dccp_delta_seqno(ap->dccpap_buf_ackno, ackno);
+
+               /*
+                * Look if the state of this packet is the same as the previous ackno
+                * and if so if we can bump the head len.
+                */
+               if (delta == 1 &&
+                   dccp_ackpkts_state(ap, ap->dccpap_buf_head) == state &&
+                   dccp_ackpkts_len(ap, ap->dccpap_buf_head) < DCCP_ACKPKTS_LEN_MASK)
+                       ap->dccpap_buf[ap->dccpap_buf_head]++;
+               else if (dccp_ackpkts_set_buf_head_state(ap, delta, state))
+                       return -ENOBUFS;
+       } else {
+               /*
+                * A.1.2.  Old Packets
+                *
+                *      When a packet with Sequence Number S arrives, and S <= buf_ackno,
+                *      the HC-Receiver will scan the table for the byte corresponding to S.
+                *      (Indexing structures could reduce the complexity of this scan.)
+                */
+               u64 delta = dccp_delta_seqno(ackno, ap->dccpap_buf_ackno);
+               unsigned int index = ap->dccpap_buf_head;
+
+               while (1) {
+                       const u8 len = dccp_ackpkts_len(ap, index);
+                       const u8 state = dccp_ackpkts_state(ap, index);
+                       /*
+                        * valid packets not yet in dccpap_buf have a reserved entry, with
+                        * a len equal to 0
+                        */
+                       if (state == DCCP_ACKPKTS_STATE_NOT_RECEIVED &&
+                           len == 0 && delta == 0) { /* Found our reserved seat! */
+                               dccp_pr_debug("Found %llu reserved seat!\n", ackno);
+                               ap->dccpap_buf[index] = state;
+                               goto out;
+                       }
+                       /* len == 0 means one packet */
+                       if (delta < len + 1)
+                               goto out_duplicate;
+
+                       delta -= len + 1;
+                       if (++index == ap->dccpap_buf_len)
+                               index = 0;
+               }
+       }
+
+       ap->dccpap_buf_ackno = ackno;
+       ap->dccpap_time = jiffies;
+out:
+       dccp_pr_debug("");
+       dccp_ackpkts_print(ap);
+       return 0;
+
+out_duplicate:
+       /* Duplicate packet */
+       dccp_pr_debug("Received a dup or already considered lost packet: %llu\n", ackno);
+       return -EILSEQ;
+}
+
+#ifdef DCCP_DEBUG
+void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, int len)
+{
+       if (!dccp_debug)
+               return;
+
+       printk("ACK vector len=%d, ackno=%llu |", len, ackno);
+
+       while (len--) {
+               const u8 state = (*vector & DCCP_ACKPKTS_STATE_MASK) >> 6;
+               const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK);
+
+               printk("%d,%d|", state, rl);
+               ++vector;
+       }
+
+       printk("\n");
+}
+
+void dccp_ackpkts_print(const struct dccp_ackpkts *ap)
+{
+       dccp_ackvector_print(ap->dccpap_buf_ackno,
+                            ap->dccpap_buf + ap->dccpap_buf_head,
+                            ap->dccpap_buf_vector_len);
+}
+#endif
+
+static void dccp_ackpkts_trow_away_ack_record(struct dccp_ackpkts *ap)
+{
+       /*
+        * As we're keeping track of the ack vector size
+        * (dccpap_buf_vector_len) and the sent ack vector size
+        * (dccpap_ack_vector_len) we don't need dccpap_buf_tail at all, but
+        * keep this code here as in the future we'll implement a vector of ack
+        * records, as suggested in draft-ietf-dccp-spec-11.txt Appendix A. -acme
+        */
+#if 0
+       ap->dccpap_buf_tail = ap->dccpap_ack_ptr + 1;
+       if (ap->dccpap_buf_tail >= ap->dccpap_buf_len)
+               ap->dccpap_buf_tail -= ap->dccpap_buf_len;
+#endif
+       ap->dccpap_buf_vector_len -= ap->dccpap_ack_vector_len;
+}
+
+void dccp_ackpkts_check_rcv_ackno(struct dccp_ackpkts *ap, struct sock *sk,
+                                u64 ackno)
+{
+       /* Check if we actually sent an ACK vector */
+       if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1)
+               return;
+
+       if (ackno == ap->dccpap_ack_seqno) {
+#ifdef DCCP_DEBUG
+               struct dccp_sock *dp = dccp_sk(sk);
+               const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT rx ack: " :
+                                                                               "server rx ack: ";
+#endif
+               dccp_pr_debug("%sACK packet 0, len=%d, ack_seqno=%llu, ack_ackno=%llu, ACKED!\n",
+                             debug_prefix, 1,
+                             ap->dccpap_ack_seqno, ap->dccpap_ack_ackno);
+               dccp_ackpkts_trow_away_ack_record(ap);
+               ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1;
+       }
+}
+
+static void dccp_ackpkts_check_rcv_ackvector(struct dccp_ackpkts *ap,
+                                            struct sock *sk, u64 ackno,
+                                            const unsigned char len,
+                                            const unsigned char *vector)
+{
+       unsigned char i;
+
+       /* Check if we actually sent an ACK vector */
+       if (ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1)
+               return;
+       /*
+        * We're in the receiver half connection, so if the received an ACK vector
+        * ackno (e.g. 50) before dccpap_ack_seqno (e.g. 52), we're not interested.
+        *
+        * Extra explanation with example:
+        * 
+        * if we received an ACK vector with ackno 50, it can only be acking
+        * 50, 49, 48, etc, not 52 (the seqno for the ACK vector we sent).
+        */
+       // dccp_pr_debug("is %llu < %llu? ", ackno, ap->dccpap_ack_seqno);
+       if (before48(ackno, ap->dccpap_ack_seqno)) {
+               // dccp_pr_debug_cat("yes\n");
+               return;
+       }
+       // dccp_pr_debug_cat("no\n");
+
+       i = len;
+       while (i--) {
+               const u8 rl = (*vector & DCCP_ACKPKTS_LEN_MASK);
+               u64 ackno_end_rl;
+
+               dccp_set_seqno(&ackno_end_rl, ackno - rl);
+
+               // dccp_pr_debug("is %llu <= %llu <= %llu? ", ackno_end_rl, ap->dccpap_ack_seqno, ackno);
+               if (between48(ap->dccpap_ack_seqno, ackno_end_rl, ackno)) {
+                       const u8 state = (*vector & DCCP_ACKPKTS_STATE_MASK) >> 6;
+                       // dccp_pr_debug_cat("yes\n");
+
+                       if (state != DCCP_ACKPKTS_STATE_NOT_RECEIVED) {
+#ifdef DCCP_DEBUG
+                               struct dccp_sock *dp = dccp_sk(sk);
+                               const char *debug_prefix = dp->dccps_role == DCCP_ROLE_CLIENT ? "CLIENT rx ack: " :
+                                                                                               "server rx ack: ";
+#endif
+                               dccp_pr_debug("%sACK vector 0, len=%d, ack_seqno=%llu, ack_ackno=%llu, ACKED!\n",
+                                             debug_prefix, len,
+                                             ap->dccpap_ack_seqno, ap->dccpap_ack_ackno);
+                               dccp_ackpkts_trow_away_ack_record(ap);
+                       }
+                       /*
+                        * If dccpap_ack_seqno was not received, no problem we'll
+                        * send another ACK vector.
+                        */
+                       ap->dccpap_ack_seqno = DCCP_MAX_SEQNO + 1;
+                       break;
+               }
+               // dccp_pr_debug_cat("no\n");
+
+               dccp_set_seqno(&ackno, ackno_end_rl - 1);
+               ++vector;
+       }
+}
diff --git a/net/dccp/output.c b/net/dccp/output.c
new file mode 100644 (file)
index 0000000..22ca291
--- /dev/null
@@ -0,0 +1,406 @@
+/*
+ *  net/dccp/output.c
+ * 
+ *  An implementation of the DCCP protocol
+ *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ *     This program is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License
+ *     as published by the Free Software Foundation; either version
+ *     2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/dccp.h>
+#include <linux/skbuff.h>
+
+#include <net/sock.h>
+
+#include "ccid.h"
+#include "dccp.h"
+
+static inline void dccp_event_ack_sent(struct sock *sk)
+{
+       inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
+}
+
+/*
+ * All SKB's seen here are completely headerless. It is our
+ * job to build the DCCP header, and pass the packet down to
+ * IP so it can do the same plus pass the packet off to the
+ * device.
+ */
+int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
+{
+       if (likely(skb != NULL)) {
+               const struct inet_sock *inet = inet_sk(sk);
+               struct dccp_sock *dp = dccp_sk(sk);
+               struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
+               struct dccp_hdr *dh;
+               /* XXX For now we're using only 48 bits sequence numbers */
+               const int dccp_header_size = sizeof(*dh) +
+                                            sizeof(struct dccp_hdr_ext) +
+                                            dccp_packet_hdr_len(dcb->dccpd_type);
+               int err, set_ack = 1;
+               u64 ackno = dp->dccps_gsr;
+
+               /*
+                * FIXME: study DCCP_PKT_SYNC[ACK] to see what is the right thing 
+                * to do here...
+                */
+               dccp_inc_seqno(&dp->dccps_gss);
+
+               dcb->dccpd_seq = dp->dccps_gss;
+               dccp_insert_options(sk, skb);
+
+               switch (dcb->dccpd_type) {
+               case DCCP_PKT_DATA:
+                       set_ack = 0;
+                       break;
+               case DCCP_PKT_SYNC:
+               case DCCP_PKT_SYNCACK:
+                       ackno = dcb->dccpd_seq;
+                       break;
+               }
+               
+               skb->h.raw = skb_push(skb, dccp_header_size);
+               dh = dccp_hdr(skb);
+               /* Data packets are not cloned as they are never retransmitted */
+               if (skb_cloned(skb))
+                       skb_set_owner_w(skb, sk);
+
+               /* Build DCCP header and checksum it. */
+               memset(dh, 0, dccp_header_size);
+               dh->dccph_type  = dcb->dccpd_type;
+               dh->dccph_sport = inet->sport;
+               dh->dccph_dport = inet->dport;
+               dh->dccph_doff  = (dccp_header_size + dcb->dccpd_opt_len) / 4;
+               dh->dccph_ccval = dcb->dccpd_ccval;
+               /* XXX For now we're using only 48 bits sequence numbers */
+               dh->dccph_x     = 1;
+
+               dp->dccps_awh = dp->dccps_gss;
+               dccp_hdr_set_seq(dh, dp->dccps_gss);
+               if (set_ack)
+                       dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), ackno);
+
+               switch (dcb->dccpd_type) {
+               case DCCP_PKT_REQUEST:
+                       dccp_hdr_request(skb)->dccph_req_service = dcb->dccpd_service;
+                       break;
+               case DCCP_PKT_RESET:
+                       dccp_hdr_reset(skb)->dccph_reset_code = dcb->dccpd_reset_code;
+                       break;
+               }
+
+               dh->dccph_checksum = dccp_v4_checksum(skb);
+
+               if (dcb->dccpd_type == DCCP_PKT_ACK ||
+                   dcb->dccpd_type == DCCP_PKT_DATAACK)
+                       dccp_event_ack_sent(sk);
+
+               DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
+
+               err = ip_queue_xmit(skb, 0);
+               if (err <= 0)
+                       return err;
+
+               /* NET_XMIT_CN is special. It does not guarantee,
+                * that this packet is lost. It tells that device
+                * is about to start to drop packets or already
+                * drops some packets of the same priority and
+                * invokes us to send less aggressively.
+                */
+               return err == NET_XMIT_CN ? 0 : err;
+       }
+       return -ENOBUFS;
+}
+
+unsigned int dccp_sync_mss(struct sock *sk, u32 pmtu)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+       int mss_now;
+
+       /*
+        * FIXME: we really should be using the af_specific thing to support IPv6.
+        * mss_now = pmtu - tp->af_specific->net_header_len - sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext);
+        */
+       mss_now = pmtu - sizeof(struct iphdr) - sizeof(struct dccp_hdr) - sizeof(struct dccp_hdr_ext);
+
+       /* Now subtract optional transport overhead */
+       mss_now -= dp->dccps_ext_header_len;
+
+       /*
+        * FIXME: this should come from the CCID infrastructure, where, say,
+        * TFRC will say it wants TIMESTAMPS, ELAPSED time, etc, for now lets
+        * put a rough estimate for NDP + TIMESTAMP + TIMESTAMP_ECHO + ELAPSED
+        * TIME + TFRC_OPT_LOSS_EVENT_RATE + TFRC_OPT_RECEIVE_RATE + padding to
+        * make it a multiple of 4
+        */
+
+       mss_now -= ((5 + 6 + 10 + 6 + 6 + 6 + 3) / 4) * 4;
+
+       /* And store cached results */
+       dp->dccps_pmtu_cookie = pmtu;
+       dp->dccps_mss_cache = mss_now;
+
+       return mss_now;
+}
+
+int dccp_retransmit_skb(struct sock *sk, struct sk_buff *skb)
+{
+       if (inet_sk_rebuild_header(sk) != 0)
+               return -EHOSTUNREACH; /* Routing failure or similar. */
+
+       return dccp_transmit_skb(sk, (skb_cloned(skb) ?
+                                     pskb_copy(skb, GFP_ATOMIC):
+                                     skb_clone(skb, GFP_ATOMIC)));
+}
+
+struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
+                                  struct request_sock *req)
+{
+       struct dccp_hdr *dh;
+       const int dccp_header_size = sizeof(struct dccp_hdr) +
+                                    sizeof(struct dccp_hdr_ext) +
+                                    sizeof(struct dccp_hdr_response);
+       struct sk_buff *skb = sock_wmalloc(sk, MAX_HEADER + DCCP_MAX_OPT_LEN +
+                                              dccp_header_size, 1,
+                                          GFP_ATOMIC);
+       if (skb == NULL)
+               return NULL;
+
+       /* Reserve space for headers. */
+       skb_reserve(skb, MAX_HEADER + DCCP_MAX_OPT_LEN + dccp_header_size);
+
+       skb->dst = dst_clone(dst);
+       skb->csum = 0;
+
+       DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE;
+       DCCP_SKB_CB(skb)->dccpd_seq  = dccp_rsk(req)->dreq_iss;
+       dccp_insert_options(sk, skb);
+
+       skb->h.raw = skb_push(skb, dccp_header_size);
+
+       dh = dccp_hdr(skb);
+       memset(dh, 0, dccp_header_size);
+
+       dh->dccph_sport = inet_sk(sk)->sport;
+       dh->dccph_dport = inet_rsk(req)->rmt_port;
+       dh->dccph_doff  = (dccp_header_size + DCCP_SKB_CB(skb)->dccpd_opt_len) / 4;
+       dh->dccph_type  = DCCP_PKT_RESPONSE;
+       dh->dccph_x     = 1;
+       dccp_hdr_set_seq(dh, dccp_rsk(req)->dreq_iss);
+       dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dccp_rsk(req)->dreq_isr);
+
+       dh->dccph_checksum = dccp_v4_checksum(skb);
+
+       DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
+       return skb;
+}
+
+struct sk_buff *dccp_make_reset(struct sock *sk, struct dst_entry *dst,
+                               const enum dccp_reset_codes code)
+                                  
+{
+       struct dccp_hdr *dh;
+       struct dccp_sock *dp = dccp_sk(sk);
+       const int dccp_header_size = sizeof(struct dccp_hdr) +
+                                    sizeof(struct dccp_hdr_ext) +
+                                    sizeof(struct dccp_hdr_reset);
+       struct sk_buff *skb = sock_wmalloc(sk, MAX_HEADER + DCCP_MAX_OPT_LEN +
+                                              dccp_header_size, 1,
+                                          GFP_ATOMIC);
+       if (skb == NULL)
+               return NULL;
+
+       /* Reserve space for headers. */
+       skb_reserve(skb, MAX_HEADER + DCCP_MAX_OPT_LEN + dccp_header_size);
+
+       skb->dst = dst_clone(dst);
+       skb->csum = 0;
+
+       dccp_inc_seqno(&dp->dccps_gss);
+
+       DCCP_SKB_CB(skb)->dccpd_reset_code = code;
+       DCCP_SKB_CB(skb)->dccpd_type       = DCCP_PKT_RESET;
+       DCCP_SKB_CB(skb)->dccpd_seq        = dp->dccps_gss;
+       dccp_insert_options(sk, skb);
+
+       skb->h.raw = skb_push(skb, dccp_header_size);
+
+       dh = dccp_hdr(skb);
+       memset(dh, 0, dccp_header_size);
+
+       dh->dccph_sport = inet_sk(sk)->sport;
+       dh->dccph_dport = inet_sk(sk)->dport;
+       dh->dccph_doff  = (dccp_header_size + DCCP_SKB_CB(skb)->dccpd_opt_len) / 4;
+       dh->dccph_type  = DCCP_PKT_RESET;
+       dh->dccph_x     = 1;
+       dccp_hdr_set_seq(dh, dp->dccps_gss);
+       dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dp->dccps_gsr);
+
+       dccp_hdr_reset(skb)->dccph_reset_code = code;
+
+       dh->dccph_checksum = dccp_v4_checksum(skb);
+
+       DCCP_INC_STATS(DCCP_MIB_OUTSEGS);
+       return skb;
+}
+
+/*
+ * Do all connect socket setups that can be done AF independent.
+ */
+static inline void dccp_connect_init(struct sock *sk)
+{
+       struct dst_entry *dst = __sk_dst_get(sk);
+       struct inet_connection_sock *icsk = inet_csk(sk);
+
+       sk->sk_err = 0;
+       sock_reset_flag(sk, SOCK_DONE);
+       
+       dccp_sync_mss(sk, dst_mtu(dst));
+
+       /*
+        * FIXME: set dp->{dccps_swh,dccps_swl}, with
+        * something like dccp_inc_seq
+        */
+
+       icsk->icsk_retransmits = 0;
+}
+
+int dccp_connect(struct sock *sk)
+{
+       struct sk_buff *skb;
+       struct inet_connection_sock *icsk = inet_csk(sk);
+
+       dccp_connect_init(sk);
+
+       skb = alloc_skb(MAX_DCCP_HEADER + 15, sk->sk_allocation);
+       if (unlikely(skb == NULL))
+               return -ENOBUFS;
+
+       /* Reserve space for headers. */
+       skb_reserve(skb, MAX_DCCP_HEADER);
+
+       DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST;
+       /* FIXME: set service to something meaningful, coming
+        * from userspace*/
+       DCCP_SKB_CB(skb)->dccpd_service = 0;
+       skb->csum = 0;
+       skb_set_owner_w(skb, sk);
+
+       BUG_TRAP(sk->sk_send_head == NULL);
+       sk->sk_send_head = skb;
+       dccp_transmit_skb(sk, skb_clone(skb, GFP_KERNEL));
+       DCCP_INC_STATS(DCCP_MIB_ACTIVEOPENS);
+
+       /* Timer for repeating the REQUEST until an answer. */
+       inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX);
+       return 0;
+}
+
+void dccp_send_ack(struct sock *sk)
+{
+       /* If we have been reset, we may not send again. */
+       if (sk->sk_state != DCCP_CLOSED) {
+               struct sk_buff *skb = alloc_skb(MAX_DCCP_HEADER, GFP_ATOMIC);
+
+               if (skb == NULL) {
+                       inet_csk_schedule_ack(sk);
+                       inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN;
+                       inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, TCP_DELACK_MAX, TCP_RTO_MAX);
+                       return;
+               }
+
+               /* Reserve space for headers */
+               skb_reserve(skb, MAX_DCCP_HEADER);
+               skb->csum = 0;
+               DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_ACK;
+               skb_set_owner_w(skb, sk);
+               dccp_transmit_skb(sk, skb);
+       }
+}
+
+EXPORT_SYMBOL_GPL(dccp_send_ack);
+
+void dccp_send_delayed_ack(struct sock *sk)
+{
+       struct inet_connection_sock *icsk = inet_csk(sk);
+       /*
+        * FIXME: tune this timer. elapsed time fixes the skew, so no problem
+        * with using 2s, and active senders also piggyback the ACK into a
+        * DATAACK packet, so this is really for quiescent senders.
+        */
+       unsigned long timeout = jiffies + 2 * HZ;
+
+       /* Use new timeout only if there wasn't a older one earlier. */
+       if (icsk->icsk_ack.pending & ICSK_ACK_TIMER) {
+               /* If delack timer was blocked or is about to expire,
+                * send ACK now.
+                *
+                * FIXME: check the "about to expire" part
+                */
+               if (icsk->icsk_ack.blocked) {
+                       dccp_send_ack(sk);
+                       return;
+               }
+
+               if (!time_before(timeout, icsk->icsk_ack.timeout))
+                       timeout = icsk->icsk_ack.timeout;
+       }
+       icsk->icsk_ack.pending |= ICSK_ACK_SCHED | ICSK_ACK_TIMER;
+       icsk->icsk_ack.timeout = timeout;
+       sk_reset_timer(sk, &icsk->icsk_delack_timer, timeout);
+}
+
+void dccp_send_sync(struct sock *sk, u64 seq)
+{
+       /*
+        * We are not putting this on the write queue, so
+        * dccp_transmit_skb() will set the ownership to this
+        * sock.
+        */
+       struct sk_buff *skb = alloc_skb(MAX_DCCP_HEADER, GFP_ATOMIC);
+
+       if (skb == NULL)
+               /* FIXME: how to make sure the sync is sent? */
+               return;
+
+       /* Reserve space for headers and prepare control bits. */
+       skb_reserve(skb, MAX_DCCP_HEADER);
+       skb->csum = 0;
+       DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_SYNC;
+       DCCP_SKB_CB(skb)->dccpd_seq = seq;
+
+       skb_set_owner_w(skb, sk);
+       dccp_transmit_skb(sk, skb);
+}
+
+/* Send a DCCP_PKT_CLOSE/CLOSEREQ.  The caller locks the socket for us.  This cannot be
+ * allowed to fail queueing a DCCP_PKT_CLOSE/CLOSEREQ frame under any circumstances.
+ */
+void dccp_send_close(struct sock *sk)
+{
+       struct dccp_sock *dp = dccp_sk(sk);
+       struct sk_buff *skb;
+
+       /* Socket is locked, keep trying until memory is available. */
+       for (;;) {
+               skb = alloc_skb(sk->sk_prot->max_header, GFP_KERNEL);
+               if (skb != NULL)
+                       break;
+               yield();
+       }
+
+       /* Reserve space for headers and prepare control bits. */
+       skb_reserve(skb, sk->sk_prot->max_header);
+       skb->csum = 0;
+       DCCP_SKB_CB(skb)->dccpd_type = dp->dccps_role == DCCP_ROLE_CLIENT ? DCCP_PKT_CLOSE : DCCP_PKT_CLOSEREQ;
+
+       skb_set_owner_w(skb, sk);
+       dccp_transmit_skb(sk, skb);
+
+       ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk);
+       ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk);
+}
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
new file mode 100644 (file)
index 0000000..70284e6
--- /dev/null
@@ -0,0 +1,818 @@
+/*
+ *  net/dccp/proto.c
+ *
+ *  An implementation of the DCCP protocol
+ *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ *     This program is free software; you can redistribute it and/or modify it
+ *     under the terms of the GNU General Public License version 2 as
+ *     published by the Free Software Foundation.
+ */
+
+#include <linux/config.h>
+#include <linux/dccp.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/netdevice.h>
+#include <linux/in.h>
+#include <linux/if_arp.h>
+#include <linux/init.h>
+#include <linux/random.h>
+#include <net/checksum.h>
+
+#include <net/inet_common.h>
+#include <net/ip.h>
+#include <net/protocol.h>
+#include <net/sock.h>
+#include <net/xfrm.h>
+
+#include <asm/semaphore.h>
+#include <linux/spinlock.h>
+#include <linux/timer.h>
+#include <linux/delay.h>
+#include <linux/poll.h>
+#include <linux/dccp.h>
+
+#include "ccid.h"
+#include "dccp.h"
+
+DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics);
+
+atomic_t dccp_orphan_count = ATOMIC_INIT(0);
+
+static struct net_protocol dccp_protocol = {
+       .handler        = dccp_v4_rcv,
+       .err_handler    = dccp_v4_err,
+};
+
+const char *dccp_packet_name(const int type)
+{
+       static const char *dccp_packet_names[] = {
+               [DCCP_PKT_REQUEST]  = "REQUEST",
+               [DCCP_PKT_RESPONSE] = "RESPONSE",
+               [DCCP_PKT_DATA]     = "DATA",
+               [DCCP_PKT_ACK]      = "ACK",
+               [DCCP_PKT_DATAACK]  = "DATAACK",
+               [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
+               [DCCP_PKT_CLOSE]    = "CLOSE",
+               [DCCP_PKT_RESET]    = "RESET",
+               [DCCP_PKT_SYNC]     = "SYNC",
+               [DCCP_PKT_SYNCACK]  = "SYNCACK",
+       };
+
+       if (type >= DCCP_NR_PKT_TYPES)
+               return "INVALID";
+       else
+               return dccp_packet_names[type];
+}
+
+EXPORT_SYMBOL_GPL(dccp_packet_name);
+
+const char *dccp_state_name(const int state)
+{
+       static char *dccp_state_names[] = {
+       [DCCP_OPEN]       = "OPEN",
+       [DCCP_REQUESTING] = "REQUESTING",
+       [DCCP_PARTOPEN]   = "PARTOPEN",
+       [DCCP_LISTEN]     = "LISTEN",
+       [DCCP_RESPOND]    = "RESPOND",
+       [DCCP_CLOSING]    = "CLOSING",
+       [DCCP_TIME_WAIT]  = "TIME_WAIT",
+       [DCCP_CLOSED]     = "CLOSED",
+       };
+
+       if (state >= DCCP_MAX_STATES)
+               return "INVALID STATE!";
+       else
+               return dccp_state_names[state];
+}
+
+EXPORT_SYMBOL_GPL(dccp_state_name);
+
+static inline int dccp_listen_start(struct sock *sk)
+{
+       dccp_sk(sk)->dccps_role = DCCP_ROLE_LISTEN;
+       return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
+}
+
+int dccp_disconnect(struct sock *sk, int flags)
+{
+       struct inet_connection_sock *icsk = inet_csk(sk);
+       struct inet_sock *inet = inet_sk(sk);
+       int err = 0;
+       const int old_state = sk->sk_state;
+
+       if (old_state != DCCP_CLOSED)
+               dccp_set_state(sk, DCCP_CLOSED);
+
+       /* ABORT function of RFC793 */
+       if (old_state == DCCP_LISTEN) {
+               inet_csk_listen_stop(sk);
+       /* FIXME: do the active reset thing */
+       } else if (old_state == DCCP_REQUESTING)
+               sk->sk_err = ECONNRESET;
+
+       dccp_clear_xmit_timers(sk);
+       __skb_queue_purge(&sk->sk_receive_queue);
+       if (sk->sk_send_head != NULL) {
+               __kfree_skb(sk->sk_send_head);
+               sk->sk_send_head = NULL;
+       }
+
+       inet->dport = 0;
+
+       if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
+               inet_reset_saddr(sk);
+
+       sk->sk_shutdown = 0;
+       sock_reset_flag(sk, SOCK_DONE);
+
+       icsk->icsk_backoff = 0;
+       inet_csk_delack_init(sk);
+       __sk_dst_reset(sk);
+
+       BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
+
+       sk->sk_error_report(sk);
+       return err;
+}
+
+int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
+{
+       dccp_pr_debug("entry\n");
+       return -ENOIOCTLCMD;
+}
+
+int dccp_setsockopt(struct sock *sk, int level, int optname,
+                   char *optval, int optlen)
+{
+       dccp_pr_debug("entry\n");
+
+       if (level != SOL_DCCP)
+               return ip_setsockopt(sk, level, optname, optval, optlen);
+
+       return -EOPNOTSUPP;
+}
+
+int dccp_getsockopt(struct sock *sk, int level, int optname,
+                   char *optval, int *optlen)
+{
+       dccp_pr_debug("entry\n");
+
+       if (level != SOL_DCCP)
+               return ip_getsockopt(sk, level, optname, optval, optlen);
+
+       return -EOPNOTSUPP;
+}
+
+int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+                size_t len)
+{
+       const struct dccp_sock *dp = dccp_sk(sk);
+       const int flags = msg->msg_flags;
+       const int noblock = flags & MSG_DONTWAIT;
+       struct sk_buff *skb;
+       int rc, size;
+       long timeo;
+
+       if (len > dp->dccps_mss_cache)
+               return -EMSGSIZE;
+
+       lock_sock(sk);
+
+       timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
+
+       /*
+        * We have to use sk_stream_wait_connect here to set sk_write_pending,
+        * so that the trick in dccp_rcv_request_sent_state_process.
+        */
+       /* Wait for a connection to finish. */
+       if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING))
+               if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
+                       goto out_err;
+
+       size = sk->sk_prot->max_header + len;
+       release_sock(sk);
+       skb = sock_alloc_send_skb(sk, size, noblock, &rc);
+       lock_sock(sk);
+
+       if (skb == NULL)
+               goto out_release;
+
+       skb_reserve(skb, sk->sk_prot->max_header);
+       rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
+       if (rc == 0) {
+               struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
+               const struct dccp_ackpkts *ap = dp->dccps_hc_rx_ackpkts;
+               long delay; 
+
+               /*
+                * XXX: This is just to match the Waikato tree CA interaction
+                * points, after the CCID3 code is stable and I have a better
+                * understanding of behaviour I'll change this to look more like
+                * TCP.
+                */
+               while (1) {
+                       rc = ccid_hc_tx_send_packet(dp->dccps_hc_tx_ccid, sk,
+                                                   skb, len, &delay);
+                       if (rc == 0)
+                               break;
+                       if (rc != -EAGAIN)
+                               goto out_discard;
+                       if (delay > timeo)
+                               goto out_discard;
+                       release_sock(sk);
+                       delay = schedule_timeout(delay);
+                       lock_sock(sk);
+                       timeo -= delay;
+                       if (signal_pending(current))
+                               goto out_interrupted;
+                       rc = -EPIPE;
+                       if (!(sk->sk_state == DCCP_PARTOPEN || sk->sk_state == DCCP_OPEN))
+                               goto out_discard;
+               }
+
+               if (sk->sk_state == DCCP_PARTOPEN) {
+                       /* See 8.1.5.  Handshake Completion */
+                       inet_csk_schedule_ack(sk);
+                       inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
+                       dcb->dccpd_type = DCCP_PKT_DATAACK;
+                       /* FIXME: we really should have a dccps_ack_pending or use icsk */
+               } else if (inet_csk_ack_scheduled(sk) ||
+                          (dp->dccps_options.dccpo_send_ack_vector &&
+                           ap->dccpap_buf_ackno != DCCP_MAX_SEQNO + 1 &&
+                           ap->dccpap_ack_seqno == DCCP_MAX_SEQNO + 1))
+                       dcb->dccpd_type = DCCP_PKT_DATAACK;
+               else
+                       dcb->dccpd_type = DCCP_PKT_DATA;
+               dccp_transmit_skb(sk, skb);
+               ccid_hc_tx_packet_sent(dp->dccps_hc_tx_ccid, sk, 0, len);
+       } else {
+out_discard:
+               kfree_skb(skb);
+       }
+out_release:
+       release_sock(sk);
+       return rc ? : len;
+out_err:
+       rc = sk_stream_error(sk, flags, rc);
+       goto out_release;
+out_interrupted:
+       rc = sock_intr_errno(timeo);
+       goto out_discard;
+}
+
+EXPORT_SYMBOL(dccp_sendmsg);
+
+int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+                size_t len, int nonblock, int flags, int *addr_len)
+{
+       const struct dccp_hdr *dh;
+       int copied = 0;
+       unsigned long used;
+       int err;
+       int target;             /* Read at least this many bytes */
+       long timeo;
+
+       lock_sock(sk);
+
+       err = -ENOTCONN;
+       if (sk->sk_state == DCCP_LISTEN)
+               goto out;
+
+       timeo = sock_rcvtimeo(sk, nonblock);
+
+       /* Urgent data needs to be handled specially. */
+       if (flags & MSG_OOB)
+               goto recv_urg;
+
+       /* FIXME */
+#if 0
+       seq = &tp->copied_seq;
+       if (flags & MSG_PEEK) {
+               peek_seq = tp->copied_seq;
+               seq = &peek_seq;
+       }
+#endif
+
+       target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
+
+       do {
+               struct sk_buff *skb;
+               u32 offset;
+
+       /* FIXME */
+#if 0
+               /* Are we at urgent data? Stop if we have read anything or have SIGURG pending. */
+               if (tp->urg_data && tp->urg_seq == *seq) {
+                       if (copied)
+                               break;
+                       if (signal_pending(current)) {
+                               copied = timeo ? sock_intr_errno(timeo) : -EAGAIN;
+                               break;
+                       }
+               }
+#endif
+
+               /* Next get a buffer. */
+
+               skb = skb_peek(&sk->sk_receive_queue);
+               do {
+                       if (!skb)
+                               break;
+
+                       offset = 0;
+                       dh = dccp_hdr(skb);
+
+                       if (dh->dccph_type == DCCP_PKT_DATA ||
+                           dh->dccph_type == DCCP_PKT_DATAACK)
+                               goto found_ok_skb;
+
+                       if (dh->dccph_type == DCCP_PKT_RESET ||
+                           dh->dccph_type == DCCP_PKT_CLOSE) {
+                               dccp_pr_debug("found fin ok!\n");
+                               goto found_fin_ok;
+                       }
+                       dccp_pr_debug("packet_type=%s\n", dccp_packet_name(dh->dccph_type));
+                       BUG_TRAP(flags & MSG_PEEK);
+                       skb = skb->next;
+               } while (skb != (struct sk_buff *)&sk->sk_receive_queue);
+
+               /* Well, if we have backlog, try to process it now yet. */
+               if (copied >= target && !sk->sk_backlog.tail)
+                       break;
+
+               if (copied) {
+                       if (sk->sk_err ||
+                           sk->sk_state == DCCP_CLOSED ||
+                           (sk->sk_shutdown & RCV_SHUTDOWN) ||
+                           !timeo ||
+                           signal_pending(current) ||
+                           (flags & MSG_PEEK))
+                               break;
+               } else {
+                       if (sock_flag(sk, SOCK_DONE))
+                               break;
+
+                       if (sk->sk_err) {
+                               copied = sock_error(sk);
+                               break;
+                       }
+
+                       if (sk->sk_shutdown & RCV_SHUTDOWN)
+                               break;
+
+                       if (sk->sk_state == DCCP_CLOSED) {
+                               if (!sock_flag(sk, SOCK_DONE)) {
+                                       /* This occurs when user tries to read
+                                        * from never connected socket.
+                                        */
+                                       copied = -ENOTCONN;
+                                       break;
+                               }
+                               break;
+                       }
+
+                       if (!timeo) {
+                               copied = -EAGAIN;
+                               break;
+                       }
+
+                       if (signal_pending(current)) {
+                               copied = sock_intr_errno(timeo);
+                               break;
+                       }
+               }
+
+               /* FIXME: cleanup_rbuf(sk, copied); */
+
+               if (copied >= target) {
+                       /* Do not sleep, just process backlog. */
+                       release_sock(sk);
+                       lock_sock(sk);
+               } else
+                       sk_wait_data(sk, &timeo);
+
+               continue;
+
+       found_ok_skb:
+               /* Ok so how much can we use? */
+               used = skb->len - offset;
+               if (len < used)
+                       used = len;
+
+               if (!(flags & MSG_TRUNC)) {
+                       err = skb_copy_datagram_iovec(skb, offset,
+                                                     msg->msg_iov, used);
+                       if (err) {
+                               /* Exception. Bailout! */
+                               if (!copied)
+                                       copied = -EFAULT;
+                               break;
+                       }
+               }
+
+               copied += used;
+               len -= used;
+
+               /* FIXME: tcp_rcv_space_adjust(sk); */
+
+//skip_copy:
+               if (used + offset < skb->len)
+                       continue;
+
+               if (!(flags & MSG_PEEK))
+                       sk_eat_skb(sk, skb);
+               continue;
+       found_fin_ok:
+               if (!(flags & MSG_PEEK))
+                       sk_eat_skb(sk, skb);
+               break;
+               
+       } while (len > 0);
+
+       /* According to UNIX98, msg_name/msg_namelen are ignored
+        * on connected socket. I was just happy when found this 8) --ANK
+        */
+
+       /* Clean up data we have read: This will do ACK frames. */
+       /* FIXME: cleanup_rbuf(sk, copied); */
+
+       release_sock(sk);
+       return copied;
+
+out:
+       release_sock(sk);
+       return err;
+
+recv_urg:
+       /* FIXME: err = tcp_recv_urg(sk, timeo, msg, len, flags, addr_len); */
+       goto out;
+}
+
+static int inet_dccp_listen(struct socket *sock, int backlog)
+{
+       struct sock *sk = sock->sk;
+       unsigned char old_state;
+       int err;
+
+       lock_sock(sk);
+
+       err = -EINVAL;
+       if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
+               goto out;
+
+       old_state = sk->sk_state;
+       if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
+               goto out;
+
+       /* Really, if the socket is already in listen state
+        * we can only allow the backlog to be adjusted.
+        */
+       if (old_state != DCCP_LISTEN) {
+               /*
+                * FIXME: here it probably should be sk->sk_prot->listen_start
+                * see tcp_listen_start
+                */
+               err = dccp_listen_start(sk);
+               if (err)
+                       goto out;
+       }
+       sk->sk_max_ack_backlog = backlog;
+       err = 0;
+
+out:
+       release_sock(sk);
+       return err;
+}
+
+static const unsigned char dccp_new_state[] = {
+       /* current state:        new state:      action:        */
+       [0]                     = DCCP_CLOSED,
+       [DCCP_OPEN]             = DCCP_CLOSING | DCCP_ACTION_FIN,
+       [DCCP_REQUESTING]       = DCCP_CLOSED,
+       [DCCP_PARTOPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
+       [DCCP_LISTEN]           = DCCP_CLOSED,
+       [DCCP_RESPOND]  = DCCP_CLOSED,
+       [DCCP_CLOSING]  = DCCP_CLOSED,
+       [DCCP_TIME_WAIT]        = DCCP_CLOSED,
+       [DCCP_CLOSED]   = DCCP_CLOSED,
+};
+
+static int dccp_close_state(struct sock *sk)
+{
+       const int next = dccp_new_state[sk->sk_state];
+       const int ns = next & DCCP_STATE_MASK;
+
+       if (ns != sk->sk_state)
+               dccp_set_state(sk, ns);
+
+       return next & DCCP_ACTION_FIN;
+}
+
+void dccp_close(struct sock *sk, long timeout)
+{
+       struct sk_buff *skb;
+
+       lock_sock(sk);
+
+       sk->sk_shutdown = SHUTDOWN_MASK;
+
+       if (sk->sk_state == DCCP_LISTEN) {
+               dccp_set_state(sk, DCCP_CLOSED);
+
+               /* Special case. */
+               inet_csk_listen_stop(sk);
+
+               goto adjudge_to_death;
+       }
+
+       /*
+        * We need to flush the recv. buffs.  We do this only on the
+        * descriptor close, not protocol-sourced closes, because the
+         *reader process may not have drained the data yet!
+        */
+       /* FIXME: check for unread data */
+       while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
+               __kfree_skb(skb);
+       }
+
+       if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
+               /* Check zero linger _after_ checking for unread data. */
+               sk->sk_prot->disconnect(sk, 0);
+       } else if (dccp_close_state(sk)) {
+               dccp_send_close(sk);
+       }
+
+       sk_stream_wait_close(sk, timeout);
+
+adjudge_to_death:
+       release_sock(sk);
+       /*
+        * Now socket is owned by kernel and we acquire BH lock
+        * to finish close. No need to check for user refs.
+        */
+       local_bh_disable();
+       bh_lock_sock(sk);
+       BUG_TRAP(!sock_owned_by_user(sk));
+
+       sock_hold(sk);
+       sock_orphan(sk);
+                                               
+       if (sk->sk_state != DCCP_CLOSED)
+               dccp_set_state(sk, DCCP_CLOSED);
+
+       atomic_inc(&dccp_orphan_count);
+       if (sk->sk_state == DCCP_CLOSED)
+               inet_csk_destroy_sock(sk);
+
+       /* Otherwise, socket is reprieved until protocol close. */
+
+       bh_unlock_sock(sk);
+       local_bh_enable();
+       sock_put(sk);
+}
+
+void dccp_shutdown(struct sock *sk, int how)
+{
+       dccp_pr_debug("entry\n");
+}
+
+struct proto_ops inet_dccp_ops = {
+       .family         = PF_INET,
+       .owner          = THIS_MODULE,
+       .release        = inet_release,
+       .bind           = inet_bind,
+       .connect        = inet_stream_connect,
+       .socketpair     = sock_no_socketpair,
+       .accept         = inet_accept,
+       .getname        = inet_getname,
+       .poll           = sock_no_poll,
+       .ioctl          = inet_ioctl,
+       .listen         = inet_dccp_listen, /* FIXME: work on inet_listen to rename it to sock_common_listen */
+       .shutdown       = inet_shutdown,
+       .setsockopt     = sock_common_setsockopt,
+       .getsockopt     = sock_common_getsockopt,
+       .sendmsg        = inet_sendmsg,
+       .recvmsg        = sock_common_recvmsg,
+       .mmap           = sock_no_mmap,
+       .sendpage       = sock_no_sendpage,
+};
+
+extern struct net_proto_family inet_family_ops;
+
+static struct inet_protosw dccp_v4_protosw = {
+       .type           = SOCK_DCCP,
+       .protocol       = IPPROTO_DCCP,
+       .prot           = &dccp_v4_prot,
+       .ops            = &inet_dccp_ops,
+       .capability     = -1,
+       .no_check       = 0,
+       .flags          = 0,
+};
+
+/*
+ * This is the global socket data structure used for responding to
+ * the Out-of-the-blue (OOTB) packets. A control sock will be created
+ * for this socket at the initialization time.
+ */
+struct socket *dccp_ctl_socket;
+
+static char dccp_ctl_socket_err_msg[] __initdata =
+       KERN_ERR "DCCP: Failed to create the control socket.\n";
+
+static int __init dccp_ctl_sock_init(void)
+{
+       int rc = sock_create_kern(PF_INET, SOCK_DCCP, IPPROTO_DCCP,
+                                 &dccp_ctl_socket);
+       if (rc < 0)
+               printk(dccp_ctl_socket_err_msg);
+       else {
+               dccp_ctl_socket->sk->sk_allocation = GFP_ATOMIC;
+               inet_sk(dccp_ctl_socket->sk)->uc_ttl = -1;
+
+               /* Unhash it so that IP input processing does not even
+                * see it, we do not wish this socket to see incoming
+                * packets.
+                */
+               dccp_ctl_socket->sk->sk_prot->unhash(dccp_ctl_socket->sk);
+       }
+
+       return rc;
+}
+
+static void __exit dccp_ctl_sock_exit(void)
+{
+       if (dccp_ctl_socket != NULL)
+               sock_release(dccp_ctl_socket);
+}
+
+static int __init init_dccp_v4_mibs(void)
+{
+       int rc = -ENOMEM;
+
+       dccp_statistics[0] = alloc_percpu(struct dccp_mib);
+       if (dccp_statistics[0] == NULL)
+               goto out;
+
+       dccp_statistics[1] = alloc_percpu(struct dccp_mib);
+       if (dccp_statistics[1] == NULL)
+               goto out_free_one;
+
+       rc = 0;
+out:
+       return rc;
+out_free_one:
+       free_percpu(dccp_statistics[0]);
+       dccp_statistics[0] = NULL;
+       goto out;
+
+}
+
+static int thash_entries;
+module_param(thash_entries, int, 0444);
+MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
+
+int dccp_debug;
+module_param(dccp_debug, int, 0444);
+MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
+
+static int __init dccp_init(void)
+{
+       unsigned long goal;
+       int ehash_order, bhash_order, i;
+       int rc = proto_register(&dccp_v4_prot, 1);
+
+       if (rc)
+               goto out;
+
+       dccp_hashinfo.bind_bucket_cachep = kmem_cache_create("dccp_bind_bucket",
+                                              sizeof(struct inet_bind_bucket),
+                                              0, SLAB_HWCACHE_ALIGN,
+                                              NULL, NULL);
+       if (!dccp_hashinfo.bind_bucket_cachep)
+               goto out_proto_unregister;
+
+       /*
+        * Size and allocate the main established and bind bucket
+        * hash tables.
+        *
+        * The methodology is similar to that of the buffer cache.
+        */
+       if (num_physpages >= (128 * 1024))
+               goal = num_physpages >> (21 - PAGE_SHIFT);
+       else
+               goal = num_physpages >> (23 - PAGE_SHIFT);
+
+       if (thash_entries)
+               goal = (thash_entries * sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
+       for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
+               ;
+       do {
+               dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
+                                       sizeof(struct inet_ehash_bucket);
+               dccp_hashinfo.ehash_size >>= 1;
+               while (dccp_hashinfo.ehash_size & (dccp_hashinfo.ehash_size - 1))
+                       dccp_hashinfo.ehash_size--;
+               dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
+                       __get_free_pages(GFP_ATOMIC, ehash_order);
+       } while (!dccp_hashinfo.ehash && --ehash_order > 0);
+
+       if (!dccp_hashinfo.ehash) {
+               printk(KERN_CRIT "Failed to allocate DCCP "
+                                "established hash table\n");
+               goto out_free_bind_bucket_cachep;
+       }
+
+       for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) {
+               rwlock_init(&dccp_hashinfo.ehash[i].lock);
+               INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
+       }
+
+       bhash_order = ehash_order;
+
+       do {
+               dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
+                                       sizeof(struct inet_bind_hashbucket);
+               if ((dccp_hashinfo.bhash_size > (64 * 1024)) && bhash_order > 0)
+                       continue;
+               dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
+                       __get_free_pages(GFP_ATOMIC, bhash_order);
+       } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
+
+       if (!dccp_hashinfo.bhash) {
+               printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n");
+               goto out_free_dccp_ehash;
+       }
+
+       for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
+               spin_lock_init(&dccp_hashinfo.bhash[i].lock);
+               INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
+       }
+
+       if (init_dccp_v4_mibs())
+               goto out_free_dccp_bhash;
+
+       rc = -EAGAIN;
+       if (inet_add_protocol(&dccp_protocol, IPPROTO_DCCP))
+               goto out_free_dccp_v4_mibs;
+
+       inet_register_protosw(&dccp_v4_protosw);
+
+       rc = dccp_ctl_sock_init();
+       if (rc)
+               goto out_unregister_protosw;
+out:
+       return rc;
+out_unregister_protosw:
+       inet_unregister_protosw(&dccp_v4_protosw);
+       inet_del_protocol(&dccp_protocol, IPPROTO_DCCP);
+out_free_dccp_v4_mibs:
+       free_percpu(dccp_statistics[0]);
+       free_percpu(dccp_statistics[1]);
+       dccp_statistics[0] = dccp_statistics[1] = NULL;
+out_free_dccp_bhash:
+       free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
+       dccp_hashinfo.bhash = NULL;
+out_free_dccp_ehash:
+       free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
+       dccp_hashinfo.ehash = NULL;
+out_free_bind_bucket_cachep:
+       kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
+       dccp_hashinfo.bind_bucket_cachep = NULL;
+out_proto_unregister:
+       proto_unregister(&dccp_v4_prot);
+       goto out;
+}
+
+static const char dccp_del_proto_err_msg[] __exitdata =
+       KERN_ERR "can't remove dccp net_protocol\n";
+
+static void __exit dccp_fini(void)
+{
+       dccp_ctl_sock_exit();
+
+       inet_unregister_protosw(&dccp_v4_protosw);
+
+       if (inet_del_protocol(&dccp_protocol, IPPROTO_DCCP) < 0)
+               printk(dccp_del_proto_err_msg);
+
+       /* Free the control endpoint.  */
+       sock_release(dccp_ctl_socket);
+
+       proto_unregister(&dccp_v4_prot);
+
+       kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
+}
+
+module_init(dccp_init);
+module_exit(dccp_fini);
+
+/* __stringify doesn't likes enums, so use SOCK_DCCP (6) value directly  */
+MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-6");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
+MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");
diff --git a/net/dccp/timer.c b/net/dccp/timer.c
new file mode 100644 (file)
index 0000000..8c396ee
--- /dev/null
@@ -0,0 +1,249 @@
+/*
+ *  net/dccp/timer.c
+ * 
+ *  An implementation of the DCCP protocol
+ *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
+ *
+ *     This program is free software; you can redistribute it and/or
+ *     modify it under the terms of the GNU General Public License
+ *     as published by the Free Software Foundation; either version
+ *     2 of the License, or (at your option) any later version.
+ */
+
+#include <linux/config.h>
+#include <linux/dccp.h>
+#include <linux/skbuff.h>
+
+#include "dccp.h"
+
+static void dccp_write_timer(unsigned long data);
+static void dccp_keepalive_timer(unsigned long data);
+static void dccp_delack_timer(unsigned long data);
+
+void dccp_init_xmit_timers(struct sock *sk)
+{
+       inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer,
+                                 &dccp_keepalive_timer);
+}
+
+static void dccp_write_err(struct sock *sk)
+{
+       sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT;
+       sk->sk_error_report(sk);
+
+       dccp_v4_send_reset(sk, DCCP_RESET_CODE_ABORTED);
+       dccp_done(sk);
+       DCCP_INC_STATS_BH(DCCP_MIB_ABORTONTIMEOUT);
+}
+
+/* A write timeout has occurred. Process the after effects. */
+static int dccp_write_timeout(struct sock *sk)
+{
+       const struct inet_connection_sock *icsk = inet_csk(sk);
+       int retry_until;
+
+       if (sk->sk_state == DCCP_REQUESTING || sk->sk_state == DCCP_PARTOPEN) {
+               if (icsk->icsk_retransmits != 0)
+                       dst_negative_advice(&sk->sk_dst_cache);
+               retry_until = icsk->icsk_syn_retries ? :  /* FIXME! */ 3 /* FIXME! sysctl_tcp_syn_retries */;
+       } else {
+               if (icsk->icsk_retransmits >= /* FIXME! sysctl_tcp_retries1 */ 5 /* FIXME! */) {
+                       /* NOTE. draft-ietf-tcpimpl-pmtud-01.txt requires pmtu black
+                          hole detection. :-(
+
+                          It is place to make it. It is not made. I do not want
+                          to make it. It is disguisting. It does not work in any
+                          case. Let me to cite the same draft, which requires for
+                          us to implement this:
+
+   "The one security concern raised by this memo is that ICMP black holes
+   are often caused by over-zealous security administrators who block
+   all ICMP messages.  It is vitally important that those who design and
+   deploy security systems understand the impact of strict filtering on
+   upper-layer protocols.  The safest web site in the world is worthless
+   if most TCP implementations cannot transfer data from it.  It would
+   be far nicer to have all of the black holes fixed rather than fixing
+   all of the TCP implementations."
+
+                           Golden words :-).
+                  */
+
+                       dst_negative_advice(&sk->sk_dst_cache);
+               }
+
+               retry_until = /* FIXME! */ 15 /* FIXME! sysctl_tcp_retries2 */;
+               /*
+                * FIXME: see tcp_write_timout and tcp_out_of_resources
+                */
+       }
+
+       if (icsk->icsk_retransmits >= retry_until) {
+               /* Has it gone just too far? */
+               dccp_write_err(sk);
+               return 1;
+       }
+       return 0;
+}
+
+/* This is the same as tcp_delack_timer, sans prequeue & mem_reclaim stuff */
+static void dccp_delack_timer(unsigned long data)
+{
+       struct sock *sk = (struct sock *)data;
+       struct inet_connection_sock *icsk = inet_csk(sk);
+
+       bh_lock_sock(sk);
+       if (sock_owned_by_user(sk)) {
+               /* Try again later. */
+               icsk->icsk_ack.blocked = 1;
+               NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKLOCKED);
+               sk_reset_timer(sk, &icsk->icsk_delack_timer, jiffies + TCP_DELACK_MIN);
+               goto out;
+       }
+
+       if (sk->sk_state == DCCP_CLOSED || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER))
+               goto out;
+       if (time_after(icsk->icsk_ack.timeout, jiffies)) {
+               sk_reset_timer(sk, &icsk->icsk_delack_timer, icsk->icsk_ack.timeout);
+               goto out;
+       }
+
+       icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER;
+
+       if (inet_csk_ack_scheduled(sk)) {
+               if (!icsk->icsk_ack.pingpong) {
+                       /* Delayed ACK missed: inflate ATO. */
+                       icsk->icsk_ack.ato = min(icsk->icsk_ack.ato << 1, icsk->icsk_rto);
+               } else {
+                       /* Delayed ACK missed: leave pingpong mode and
+                        * deflate ATO.
+                        */
+                       icsk->icsk_ack.pingpong = 0;
+                       icsk->icsk_ack.ato = TCP_ATO_MIN;
+               }
+               dccp_send_ack(sk);
+               NET_INC_STATS_BH(LINUX_MIB_DELAYEDACKS);
+       }
+out:
+       bh_unlock_sock(sk);
+       sock_put(sk);
+}
+
+/*
+ *     The DCCP retransmit timer.
+ */
+static void dccp_retransmit_timer(struct sock *sk)
+{
+       struct inet_connection_sock *icsk = inet_csk(sk);
+
+       /*
+        * sk->sk_send_head has to have one skb with
+        * DCCP_SKB_CB(skb)->dccpd_type set to one of the retransmittable DCCP
+        * packet types (REQUEST, RESPONSE, the ACK in the 3way hanshake
+        * (PARTOPEN timer), etc).
+        */
+       BUG_TRAP(sk->sk_send_head != NULL);
+
+       /* 
+        * More than than 4MSL (8 minutes) has passed, a RESET(aborted) was
+        * sent, no need to retransmit, this sock is dead.
+        */
+       if (dccp_write_timeout(sk))
+               goto out;
+
+       /*
+        * We want to know the number of packets retransmitted, not the
+        * total number of retransmissions of clones of original packets.
+        */
+       if (icsk->icsk_retransmits == 0)
+               DCCP_INC_STATS_BH(DCCP_MIB_TIMEOUTS);
+
+       if (dccp_retransmit_skb(sk, sk->sk_send_head) < 0) {
+               /*
+                * Retransmission failed because of local congestion,
+                * do not backoff.
+                */
+               if (icsk->icsk_retransmits == 0)
+                       icsk->icsk_retransmits = 1;
+               inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+                                         min(icsk->icsk_rto,
+                                             TCP_RESOURCE_PROBE_INTERVAL),
+                                         TCP_RTO_MAX);
+               goto out;
+       }
+
+       icsk->icsk_backoff++;
+       icsk->icsk_retransmits++;
+
+       icsk->icsk_rto = min(icsk->icsk_rto << 1, DCCP_RTO_MAX);
+       inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX);
+       if (icsk->icsk_retransmits > 3 /* FIXME: sysctl_dccp_retries1 */)
+               __sk_dst_reset(sk);
+out:;
+}
+
+static void dccp_write_timer(unsigned long data)
+{
+       struct sock *sk = (struct sock *)data;
+       struct inet_connection_sock *icsk = inet_csk(sk);
+       int event = 0;
+
+       bh_lock_sock(sk);
+       if (sock_owned_by_user(sk)) {
+               /* Try again later */
+               sk_reset_timer(sk, &icsk->icsk_retransmit_timer, jiffies + (HZ / 20));
+               goto out;
+       }
+
+       if (sk->sk_state == DCCP_CLOSED || !icsk->icsk_pending)
+               goto out;
+
+       if (time_after(icsk->icsk_timeout, jiffies)) {
+               sk_reset_timer(sk, &icsk->icsk_retransmit_timer, icsk->icsk_timeout);
+               goto out;
+       }
+
+       event = icsk->icsk_pending;
+       icsk->icsk_pending = 0;
+
+       switch (event) {
+       case ICSK_TIME_RETRANS:
+               dccp_retransmit_timer(sk);
+               break;
+       }
+out:
+       bh_unlock_sock(sk);
+       sock_put(sk);
+}
+
+/*
+ *     Timer for listening sockets
+ */
+static void dccp_response_timer(struct sock *sk)
+{
+       struct inet_connection_sock *icsk = inet_csk(sk);
+       const int max_retries = icsk->icsk_syn_retries ? : TCP_SYNACK_RETRIES /* FIXME sysctl_tcp_synack_retries */;
+
+       reqsk_queue_prune(&icsk->icsk_accept_queue, sk, TCP_SYNQ_INTERVAL,
+                         DCCP_TIMEOUT_INIT, DCCP_RTO_MAX, max_retries);
+}
+
+static void dccp_keepalive_timer(unsigned long data)
+{
+       struct sock *sk = (struct sock *)data;
+
+       /* Only process if socket is not in use. */
+       bh_lock_sock(sk);
+       if (sock_owned_by_user(sk)) {
+               /* Try again later. */ 
+               inet_csk_reset_keepalive_timer(sk, HZ / 20);
+               goto out;
+       }
+
+       if (sk->sk_state == DCCP_LISTEN) {
+               dccp_response_timer(sk);
+               goto out;
+       }
+out:
+       bh_unlock_sock(sk);
+       sock_put(sk);
+}