]> nv-tegra.nvidia Code Review - linux-3.10.git/blobdiff - net/ipv4/tcp.c
Merge master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
[linux-3.10.git] / net / ipv4 / tcp.c
index e2b7b80550371dd836d3b32635c999b2a46f6fcc..804458712d881dee4065f2e4cbd7e7c99296d545 100644 (file)
  *     TCP_CLOSE               socket is finished
  */
 
-#include <linux/config.h>
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/fcntl.h>
 #include <linux/random.h>
 #include <linux/bootmem.h>
 #include <linux/cache.h>
+#include <linux/err.h>
 
 #include <net/icmp.h>
 #include <net/tcp.h>
 #include <net/xfrm.h>
 #include <net/ip.h>
-
+#include <net/netdma.h>
 
 #include <asm/uaccess.h>
 #include <asm/ioctls.h>
@@ -571,7 +571,7 @@ new_segment:
                skb->ip_summed = CHECKSUM_HW;
                tp->write_seq += copy;
                TCP_SKB_CB(skb)->end_seq += copy;
-               skb_shinfo(skb)->tso_segs = 0;
+               skb_shinfo(skb)->gso_segs = 0;
 
                if (!copied)
                        TCP_SKB_CB(skb)->flags &= ~TCPCB_FLAG_PSH;
@@ -622,14 +622,10 @@ ssize_t tcp_sendpage(struct socket *sock, struct page *page, int offset,
        ssize_t res;
        struct sock *sk = sock->sk;
 
-#define TCP_ZC_CSUM_FLAGS (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM | NETIF_F_HW_CSUM)
-
        if (!(sk->sk_route_caps & NETIF_F_SG) ||
-           !(sk->sk_route_caps & TCP_ZC_CSUM_FLAGS))
+           !(sk->sk_route_caps & NETIF_F_ALL_CSUM))
                return sock_no_sendpage(sock, page, offset, size, flags);
 
-#undef TCP_ZC_CSUM_FLAGS
-
        lock_sock(sk);
        TCP_CHECK_TIMER(sk);
        res = do_tcp_sendpages(sk, &page, offset, size, flags);
@@ -646,7 +642,7 @@ static inline int select_size(struct sock *sk, struct tcp_sock *tp)
        int tmp = tp->mss_cache;
 
        if (sk->sk_route_caps & NETIF_F_SG) {
-               if (sk->sk_route_caps & NETIF_F_TSO)
+               if (sk_can_gso(sk))
                        tmp = 0;
                else {
                        int pgbreak = SKB_MAX_HEAD(MAX_TCP_HEADER);
@@ -726,9 +722,7 @@ new_segment:
                                /*
                                 * Check whether we can use HW checksum.
                                 */
-                               if (sk->sk_route_caps &
-                                   (NETIF_F_IP_CSUM | NETIF_F_NO_CSUM |
-                                    NETIF_F_HW_CSUM))
+                               if (sk->sk_route_caps & NETIF_F_ALL_CSUM)
                                        skb->ip_summed = CHECKSUM_HW;
 
                                skb_entail(sk, tp, skb);
@@ -824,7 +818,7 @@ new_segment:
 
                        tp->write_seq += copy;
                        TCP_SKB_CB(skb)->end_seq += copy;
-                       skb_shinfo(skb)->tso_segs = 0;
+                       skb_shinfo(skb)->gso_segs = 0;
 
                        from += copy;
                        copied += copy;
@@ -937,7 +931,7 @@ static int tcp_recv_urg(struct sock *sk, long timeo,
  * calculation of whether or not we must ACK for the sake of
  * a window update.
  */
-static void cleanup_rbuf(struct sock *sk, int copied)
+void tcp_cleanup_rbuf(struct sock *sk, int copied)
 {
        struct tcp_sock *tp = tcp_sk(sk);
        int time_to_ack = 0;
@@ -1072,11 +1066,11 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
                                break;
                }
                if (skb->h.th->fin) {
-                       sk_eat_skb(sk, skb);
+                       sk_eat_skb(sk, skb, 0);
                        ++seq;
                        break;
                }
-               sk_eat_skb(sk, skb);
+               sk_eat_skb(sk, skb, 0);
                if (!desc->count)
                        break;
        }
@@ -1086,7 +1080,7 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
 
        /* Clean up data we have read: This will do ACK frames. */
        if (copied)
-               cleanup_rbuf(sk, copied);
+               tcp_cleanup_rbuf(sk, copied);
        return copied;
 }
 
@@ -1110,6 +1104,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
        int target;             /* Read at least this many bytes */
        long timeo;
        struct task_struct *user_recv = NULL;
+       int copied_early = 0;
 
        lock_sock(sk);
 
@@ -1133,6 +1128,17 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 
        target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
 
+#ifdef CONFIG_NET_DMA
+       tp->ucopy.dma_chan = NULL;
+       preempt_disable();
+       if ((len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) &&
+           !sysctl_tcp_low_latency && __get_cpu_var(softnet_data.net_dma)) {
+               preempt_enable_no_resched();
+               tp->ucopy.pinned_list = dma_pin_iovec_pages(msg->msg_iov, len);
+       } else
+               preempt_enable_no_resched();
+#endif
+
        do {
                struct sk_buff *skb;
                u32 offset;
@@ -1220,7 +1226,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
                        }
                }
 
-               cleanup_rbuf(sk, copied);
+               tcp_cleanup_rbuf(sk, copied);
 
                if (!sysctl_tcp_low_latency && tp->ucopy.task == user_recv) {
                        /* Install new reader */
@@ -1274,6 +1280,10 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
                } else
                        sk_wait_data(sk, &timeo);
 
+#ifdef CONFIG_NET_DMA
+               tp->ucopy.wakeup = 0;
+#endif
+
                if (user_recv) {
                        int chunk;
 
@@ -1329,13 +1339,39 @@ do_prequeue:
                }
 
                if (!(flags & MSG_TRUNC)) {
-                       err = skb_copy_datagram_iovec(skb, offset,
-                                                     msg->msg_iov, used);
-                       if (err) {
-                               /* Exception. Bailout! */
-                               if (!copied)
-                                       copied = -EFAULT;
-                               break;
+#ifdef CONFIG_NET_DMA
+                       if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
+                               tp->ucopy.dma_chan = get_softnet_dma();
+
+                       if (tp->ucopy.dma_chan) {
+                               tp->ucopy.dma_cookie = dma_skb_copy_datagram_iovec(
+                                       tp->ucopy.dma_chan, skb, offset,
+                                       msg->msg_iov, used,
+                                       tp->ucopy.pinned_list);
+
+                               if (tp->ucopy.dma_cookie < 0) {
+
+                                       printk(KERN_ALERT "dma_cookie < 0\n");
+
+                                       /* Exception. Bailout! */
+                                       if (!copied)
+                                               copied = -EFAULT;
+                                       break;
+                               }
+                               if ((offset + used) == skb->len)
+                                       copied_early = 1;
+
+                       } else
+#endif
+                       {
+                               err = skb_copy_datagram_iovec(skb, offset,
+                                               msg->msg_iov, used);
+                               if (err) {
+                                       /* Exception. Bailout! */
+                                       if (!copied)
+                                               copied = -EFAULT;
+                                       break;
+                               }
                        }
                }
 
@@ -1355,15 +1391,19 @@ skip_copy:
 
                if (skb->h.th->fin)
                        goto found_fin_ok;
-               if (!(flags & MSG_PEEK))
-                       sk_eat_skb(sk, skb);
+               if (!(flags & MSG_PEEK)) {
+                       sk_eat_skb(sk, skb, copied_early);
+                       copied_early = 0;
+               }
                continue;
 
        found_fin_ok:
                /* Process the FIN. */
                ++*seq;
-               if (!(flags & MSG_PEEK))
-                       sk_eat_skb(sk, skb);
+               if (!(flags & MSG_PEEK)) {
+                       sk_eat_skb(sk, skb, copied_early);
+                       copied_early = 0;
+               }
                break;
        } while (len > 0);
 
@@ -1386,12 +1426,42 @@ skip_copy:
                tp->ucopy.len = 0;
        }
 
+#ifdef CONFIG_NET_DMA
+       if (tp->ucopy.dma_chan) {
+               struct sk_buff *skb;
+               dma_cookie_t done, used;
+
+               dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);
+
+               while (dma_async_memcpy_complete(tp->ucopy.dma_chan,
+                                                tp->ucopy.dma_cookie, &done,
+                                                &used) == DMA_IN_PROGRESS) {
+                       /* do partial cleanup of sk_async_wait_queue */
+                       while ((skb = skb_peek(&sk->sk_async_wait_queue)) &&
+                              (dma_async_is_complete(skb->dma_cookie, done,
+                                                     used) == DMA_SUCCESS)) {
+                               __skb_dequeue(&sk->sk_async_wait_queue);
+                               kfree_skb(skb);
+                       }
+               }
+
+               /* Safe to free early-copied skbs now */
+               __skb_queue_purge(&sk->sk_async_wait_queue);
+               dma_chan_put(tp->ucopy.dma_chan);
+               tp->ucopy.dma_chan = NULL;
+       }
+       if (tp->ucopy.pinned_list) {
+               dma_unpin_iovec_pages(tp->ucopy.pinned_list);
+               tp->ucopy.pinned_list = NULL;
+       }
+#endif
+
        /* According to UNIX98, msg_name/msg_namelen are ignored
         * on connected socket. I was just happy when found this 8) --ANK
         */
 
        /* Clean up data we have read: This will do ACK frames. */
-       cleanup_rbuf(sk, copied);
+       tcp_cleanup_rbuf(sk, copied);
 
        TCP_CHECK_TIMER(sk);
        release_sock(sk);
@@ -1658,6 +1728,9 @@ int tcp_disconnect(struct sock *sk, int flags)
        __skb_queue_purge(&sk->sk_receive_queue);
        sk_stream_writequeue_purge(sk);
        __skb_queue_purge(&tp->out_of_order_queue);
+#ifdef CONFIG_NET_DMA
+       __skb_queue_purge(&sk->sk_async_wait_queue);
+#endif
 
        inet->dport = 0;
 
@@ -1858,7 +1931,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
                            (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) &&
                            inet_csk_ack_scheduled(sk)) {
                                icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
-                               cleanup_rbuf(sk, 1);
+                               tcp_cleanup_rbuf(sk, 1);
                                if (!(val & 1))
                                        icsk->icsk_ack.pingpong = 1;
                        }
@@ -2071,6 +2144,78 @@ int compat_tcp_getsockopt(struct sock *sk, int level, int optname,
 EXPORT_SYMBOL(compat_tcp_getsockopt);
 #endif
 
+struct sk_buff *tcp_tso_segment(struct sk_buff *skb, int features)
+{
+       struct sk_buff *segs = ERR_PTR(-EINVAL);
+       struct tcphdr *th;
+       unsigned thlen;
+       unsigned int seq;
+       unsigned int delta;
+       unsigned int oldlen;
+       unsigned int len;
+
+       if (!pskb_may_pull(skb, sizeof(*th)))
+               goto out;
+
+       th = skb->h.th;
+       thlen = th->doff * 4;
+       if (thlen < sizeof(*th))
+               goto out;
+
+       if (!pskb_may_pull(skb, thlen))
+               goto out;
+
+       oldlen = (u16)~skb->len;
+       __skb_pull(skb, thlen);
+
+       if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) {
+               /* Packet is from an untrusted source, reset gso_segs. */
+               int mss = skb_shinfo(skb)->gso_size;
+
+               skb_shinfo(skb)->gso_segs = (skb->len + mss - 1) / mss;
+
+               segs = NULL;
+               goto out;
+       }
+
+       segs = skb_segment(skb, features);
+       if (IS_ERR(segs))
+               goto out;
+
+       len = skb_shinfo(skb)->gso_size;
+       delta = htonl(oldlen + (thlen + len));
+
+       skb = segs;
+       th = skb->h.th;
+       seq = ntohl(th->seq);
+
+       do {
+               th->fin = th->psh = 0;
+
+               th->check = ~csum_fold(th->check + delta);
+               if (skb->ip_summed != CHECKSUM_HW)
+                       th->check = csum_fold(csum_partial(skb->h.raw, thlen,
+                                                          skb->csum));
+
+               seq += len;
+               skb = skb->next;
+               th = skb->h.th;
+
+               th->seq = htonl(seq);
+               th->cwr = 0;
+       } while (skb->next);
+
+       delta = htonl(oldlen + (skb->tail - skb->h.raw) + skb->data_len);
+       th->check = ~csum_fold(th->check + delta);
+       if (skb->ip_summed != CHECKSUM_HW)
+               th->check = csum_fold(csum_partial(skb->h.raw, thlen,
+                                                  skb->csum));
+
+out:
+       return segs;
+}
+EXPORT_SYMBOL(tcp_tso_segment);
+
 extern void __skb_cb_too_small_for_tcp(int, int);
 extern struct tcp_congestion_ops tcp_reno;