]> nv-tegra.nvidia Code Review - linux-2.6.git/blobdiff - net/core/datagram.c
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net
[linux-2.6.git] / net / core / datagram.c
index e5a05a046fef185790dbb136551f1307e537b50e..68bbf9f65cb08fac1ffc5a81cb6d69e76d631fae 100644 (file)
@@ -9,7 +9,7 @@
  *     identical recvmsg() code. So we share it here. The poll was
  *     shared before but buried in udp.c so I moved it.
  *
- *     Authors:        Alan Cox <alan@redhat.com>. (datagram_poll() from old
+ *     Authors:        Alan Cox <alan@lxorguk.ukuu.org.uk>. (datagram_poll() from old
  *                                                  udp.c code)
  *
  *     Fixes:
@@ -48,6 +48,7 @@
 #include <linux/poll.h>
 #include <linux/highmem.h>
 #include <linux/spinlock.h>
+#include <linux/slab.h>
 
 #include <net/protocol.h>
 #include <linux/skbuff.h>
@@ -55,6 +56,7 @@
 #include <net/checksum.h>
 #include <net/sock.h>
 #include <net/tcp_states.h>
+#include <trace/events/skb.h>
 
 /*
  *     Is a socket 'connection oriented' ?
@@ -64,15 +66,27 @@ static inline int connection_based(struct sock *sk)
        return sk->sk_type == SOCK_SEQPACKET || sk->sk_type == SOCK_STREAM;
 }
 
+static int receiver_wake_function(wait_queue_t *wait, unsigned mode, int sync,
+                                 void *key)
+{
+       unsigned long bits = (unsigned long)key;
+
+       /*
+        * Avoid a wakeup if event not interesting for us
+        */
+       if (bits && !(bits & (POLLIN | POLLERR)))
+               return 0;
+       return autoremove_wake_function(wait, mode, sync, key);
+}
 /*
  * Wait for a packet..
  */
 static int wait_for_packet(struct sock *sk, int *err, long *timeo_p)
 {
        int error;
-       DEFINE_WAIT(wait);
+       DEFINE_WAIT_FUNC(wait, receiver_wake_function);
 
-       prepare_to_wait_exclusive(sk->sk_sleep, &wait, TASK_INTERRUPTIBLE);
+       prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
 
        /* Socket errors? */
        error = sock_error(sk);
@@ -101,7 +115,7 @@ static int wait_for_packet(struct sock *sk, int *err, long *timeo_p)
        error = 0;
        *timeo_p = schedule_timeout(*timeo_p);
 out:
-       finish_wait(sk->sk_sleep, &wait);
+       finish_wait(sk_sleep(sk), &wait);
        return error;
 interrupted:
        error = sock_intr_errno(*timeo_p);
@@ -115,10 +129,10 @@ out_noerr:
 }
 
 /**
- *     skb_recv_datagram - Receive a datagram skbuff
+ *     __skb_recv_datagram - Receive a datagram skbuff
  *     @sk: socket
  *     @flags: MSG_ flags
- *     @noblock: blocking operation?
+ *     @peeked: returns non-zero if this packet has been seen before
  *     @err: error code returned
  *
  *     Get a datagram skbuff, understands the peeking, nonblocking wakeups
@@ -143,8 +157,8 @@ out_noerr:
  *     quite explicitly by POSIX 1003.1g, don't change them without having
  *     the standard around please.
  */
-struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
-                                 int noblock, int *err)
+struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags,
+                                   int *peeked, int *err)
 {
        struct sk_buff *skb;
        long timeo;
@@ -156,27 +170,28 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
        if (error)
                goto no_packet;
 
-       timeo = sock_rcvtimeo(sk, noblock);
+       timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
 
        do {
                /* Again only user level code calls this function, so nothing
                 * interrupt level will suddenly eat the receive_queue.
                 *
                 * Look at current nfs client by the way...
-                * However, this function was corrent in any case. 8)
+                * However, this function was correct in any case. 8)
                 */
-               if (flags & MSG_PEEK) {
-                       unsigned long cpu_flags;
-
-                       spin_lock_irqsave(&sk->sk_receive_queue.lock,
-                                         cpu_flags);
-                       skb = skb_peek(&sk->sk_receive_queue);
-                       if (skb)
+               unsigned long cpu_flags;
+
+               spin_lock_irqsave(&sk->sk_receive_queue.lock, cpu_flags);
+               skb = skb_peek(&sk->sk_receive_queue);
+               if (skb) {
+                       *peeked = skb->peeked;
+                       if (flags & MSG_PEEK) {
+                               skb->peeked = 1;
                                atomic_inc(&skb->users);
-                       spin_unlock_irqrestore(&sk->sk_receive_queue.lock,
-                                              cpu_flags);
-               } else
-                       skb = skb_dequeue(&sk->sk_receive_queue);
+                       } else
+                               __skb_unlink(skb, &sk->sk_receive_queue);
+               }
+               spin_unlock_irqrestore(&sk->sk_receive_queue.lock, cpu_flags);
 
                if (skb)
                        return skb;
@@ -194,11 +209,44 @@ no_packet:
        *err = error;
        return NULL;
 }
+EXPORT_SYMBOL(__skb_recv_datagram);
+
+struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags,
+                                 int noblock, int *err)
+{
+       int peeked;
+
+       return __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
+                                  &peeked, err);
+}
+EXPORT_SYMBOL(skb_recv_datagram);
 
 void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
 {
-       kfree_skb(skb);
+       consume_skb(skb);
+       sk_mem_reclaim_partial(sk);
+}
+EXPORT_SYMBOL(skb_free_datagram);
+
+void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb)
+{
+       bool slow;
+
+       if (likely(atomic_read(&skb->users) == 1))
+               smp_rmb();
+       else if (likely(!atomic_dec_and_test(&skb->users)))
+               return;
+
+       slow = lock_sock_fast(sk);
+       skb_orphan(skb);
+       sk_mem_reclaim_partial(sk);
+       unlock_sock_fast(sk, slow);
+
+       /* skb is now orphaned, can be freed outside of locked section */
+       trace_kfree_skb(skb, skb_free_datagram_locked);
+       __kfree_skb(skb);
 }
+EXPORT_SYMBOL(skb_free_datagram_locked);
 
 /**
  *     skb_kill_datagram - Free a datagram skbuff forcibly
@@ -217,22 +265,31 @@ void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
  *     This function currently only disables BH when acquiring the
  *     sk_receive_queue lock.  Therefore it must not be used in a
  *     context where that lock is acquired in an IRQ context.
+ *
+ *     It returns 0 if the packet was removed by us.
  */
 
-void skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
+int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
 {
+       int err = 0;
+
        if (flags & MSG_PEEK) {
+               err = -ENOENT;
                spin_lock_bh(&sk->sk_receive_queue.lock);
                if (skb == skb_peek(&sk->sk_receive_queue)) {
                        __skb_unlink(skb, &sk->sk_receive_queue);
                        atomic_dec(&skb->users);
+                       err = 0;
                }
                spin_unlock_bh(&sk->sk_receive_queue.lock);
        }
 
        kfree_skb(skb);
-}
+       atomic_inc(&sk->sk_drops);
+       sk_mem_reclaim_partial(sk);
 
+       return err;
+}
 EXPORT_SYMBOL(skb_kill_datagram);
 
 /**
@@ -249,6 +306,9 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
 {
        int start = skb_headlen(skb);
        int i, copy = start - offset;
+       struct sk_buff *frag_iter;
+
+       trace_skb_copy_datagram_iovec(skb, len);
 
        /* Copy header. */
        if (copy > 0) {
@@ -264,15 +324,15 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
        /* Copy paged appendix. Hmm... why does this look so complicated? */
        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
                int end;
+               const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 
-               BUG_TRAP(start <= offset + len);
+               WARN_ON(start > offset + len);
 
-               end = start + skb_shinfo(skb)->frags[i].size;
+               end = start + skb_frag_size(frag);
                if ((copy = end - offset) > 0) {
                        int err;
                        u8  *vaddr;
-                       skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
-                       struct page *page = frag->page;
+                       struct page *page = skb_frag_page(frag);
 
                        if (copy > len)
                                copy = len;
@@ -289,28 +349,24 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
                start = end;
        }
 
-       if (skb_shinfo(skb)->frag_list) {
-               struct sk_buff *list = skb_shinfo(skb)->frag_list;
-
-               for (; list; list = list->next) {
-                       int end;
-
-                       BUG_TRAP(start <= offset + len);
-
-                       end = start + list->len;
-                       if ((copy = end - offset) > 0) {
-                               if (copy > len)
-                                       copy = len;
-                               if (skb_copy_datagram_iovec(list,
-                                                           offset - start,
-                                                           to, copy))
-                                       goto fault;
-                               if ((len -= copy) == 0)
-                                       return 0;
-                               offset += copy;
-                       }
-                       start = end;
+       skb_walk_frags(skb, frag_iter) {
+               int end;
+
+               WARN_ON(start > offset + len);
+
+               end = start + frag_iter->len;
+               if ((copy = end - offset) > 0) {
+                       if (copy > len)
+                               copy = len;
+                       if (skb_copy_datagram_iovec(frag_iter,
+                                                   offset - start,
+                                                   to, copy))
+                               goto fault;
+                       if ((len -= copy) == 0)
+                               return 0;
+                       offset += copy;
                }
+               start = end;
        }
        if (!len)
                return 0;
@@ -318,14 +374,198 @@ int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
 fault:
        return -EFAULT;
 }
+EXPORT_SYMBOL(skb_copy_datagram_iovec);
+
+/**
+ *     skb_copy_datagram_const_iovec - Copy a datagram to an iovec.
+ *     @skb: buffer to copy
+ *     @offset: offset in the buffer to start copying from
+ *     @to: io vector to copy to
+ *     @to_offset: offset in the io vector to start copying to
+ *     @len: amount of data to copy from buffer to iovec
+ *
+ *     Returns 0 or -EFAULT.
+ *     Note: the iovec is not modified during the copy.
+ */
+int skb_copy_datagram_const_iovec(const struct sk_buff *skb, int offset,
+                                 const struct iovec *to, int to_offset,
+                                 int len)
+{
+       int start = skb_headlen(skb);
+       int i, copy = start - offset;
+       struct sk_buff *frag_iter;
+
+       /* Copy header. */
+       if (copy > 0) {
+               if (copy > len)
+                       copy = len;
+               if (memcpy_toiovecend(to, skb->data + offset, to_offset, copy))
+                       goto fault;
+               if ((len -= copy) == 0)
+                       return 0;
+               offset += copy;
+               to_offset += copy;
+       }
+
+       /* Copy paged appendix. Hmm... why does this look so complicated? */
+       for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+               int end;
+               const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+               WARN_ON(start > offset + len);
+
+               end = start + skb_frag_size(frag);
+               if ((copy = end - offset) > 0) {
+                       int err;
+                       u8  *vaddr;
+                       struct page *page = skb_frag_page(frag);
+
+                       if (copy > len)
+                               copy = len;
+                       vaddr = kmap(page);
+                       err = memcpy_toiovecend(to, vaddr + frag->page_offset +
+                                               offset - start, to_offset, copy);
+                       kunmap(page);
+                       if (err)
+                               goto fault;
+                       if (!(len -= copy))
+                               return 0;
+                       offset += copy;
+                       to_offset += copy;
+               }
+               start = end;
+       }
+
+       skb_walk_frags(skb, frag_iter) {
+               int end;
+
+               WARN_ON(start > offset + len);
+
+               end = start + frag_iter->len;
+               if ((copy = end - offset) > 0) {
+                       if (copy > len)
+                               copy = len;
+                       if (skb_copy_datagram_const_iovec(frag_iter,
+                                                         offset - start,
+                                                         to, to_offset,
+                                                         copy))
+                               goto fault;
+                       if ((len -= copy) == 0)
+                               return 0;
+                       offset += copy;
+                       to_offset += copy;
+               }
+               start = end;
+       }
+       if (!len)
+               return 0;
+
+fault:
+       return -EFAULT;
+}
+EXPORT_SYMBOL(skb_copy_datagram_const_iovec);
+
+/**
+ *     skb_copy_datagram_from_iovec - Copy a datagram from an iovec.
+ *     @skb: buffer to copy
+ *     @offset: offset in the buffer to start copying to
+ *     @from: io vector to copy to
+ *     @from_offset: offset in the io vector to start copying from
+ *     @len: amount of data to copy to buffer from iovec
+ *
+ *     Returns 0 or -EFAULT.
+ *     Note: the iovec is not modified during the copy.
+ */
+int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset,
+                                const struct iovec *from, int from_offset,
+                                int len)
+{
+       int start = skb_headlen(skb);
+       int i, copy = start - offset;
+       struct sk_buff *frag_iter;
+
+       /* Copy header. */
+       if (copy > 0) {
+               if (copy > len)
+                       copy = len;
+               if (memcpy_fromiovecend(skb->data + offset, from, from_offset,
+                                       copy))
+                       goto fault;
+               if ((len -= copy) == 0)
+                       return 0;
+               offset += copy;
+               from_offset += copy;
+       }
+
+       /* Copy paged appendix. Hmm... why does this look so complicated? */
+       for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+               int end;
+               const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+               WARN_ON(start > offset + len);
+
+               end = start + skb_frag_size(frag);
+               if ((copy = end - offset) > 0) {
+                       int err;
+                       u8  *vaddr;
+                       struct page *page = skb_frag_page(frag);
+
+                       if (copy > len)
+                               copy = len;
+                       vaddr = kmap(page);
+                       err = memcpy_fromiovecend(vaddr + frag->page_offset +
+                                                 offset - start,
+                                                 from, from_offset, copy);
+                       kunmap(page);
+                       if (err)
+                               goto fault;
+
+                       if (!(len -= copy))
+                               return 0;
+                       offset += copy;
+                       from_offset += copy;
+               }
+               start = end;
+       }
+
+       skb_walk_frags(skb, frag_iter) {
+               int end;
+
+               WARN_ON(start > offset + len);
+
+               end = start + frag_iter->len;
+               if ((copy = end - offset) > 0) {
+                       if (copy > len)
+                               copy = len;
+                       if (skb_copy_datagram_from_iovec(frag_iter,
+                                                        offset - start,
+                                                        from,
+                                                        from_offset,
+                                                        copy))
+                               goto fault;
+                       if ((len -= copy) == 0)
+                               return 0;
+                       offset += copy;
+                       from_offset += copy;
+               }
+               start = end;
+       }
+       if (!len)
+               return 0;
+
+fault:
+       return -EFAULT;
+}
+EXPORT_SYMBOL(skb_copy_datagram_from_iovec);
 
 static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
                                      u8 __user *to, int len,
-                                     unsigned int *csump)
+                                     __wsum *csump)
 {
        int start = skb_headlen(skb);
-       int pos = 0;
        int i, copy = start - offset;
+       struct sk_buff *frag_iter;
+       int pos = 0;
 
        /* Copy header. */
        if (copy > 0) {
@@ -345,16 +585,16 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
 
        for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
                int end;
+               const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
 
-               BUG_TRAP(start <= offset + len);
+               WARN_ON(start > offset + len);
 
-               end = start + skb_shinfo(skb)->frags[i].size;
+               end = start + skb_frag_size(frag);
                if ((copy = end - offset) > 0) {
-                       unsigned int csum2;
+                       __wsum csum2;
                        int err = 0;
                        u8  *vaddr;
-                       skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
-                       struct page *page = frag->page;
+                       struct page *page = skb_frag_page(frag);
 
                        if (copy > len)
                                copy = len;
@@ -376,33 +616,29 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
                start = end;
        }
 
-       if (skb_shinfo(skb)->frag_list) {
-               struct sk_buff *list = skb_shinfo(skb)->frag_list;
-
-               for (; list; list=list->next) {
-                       int end;
-
-                       BUG_TRAP(start <= offset + len);
-
-                       end = start + list->len;
-                       if ((copy = end - offset) > 0) {
-                               unsigned int csum2 = 0;
-                               if (copy > len)
-                                       copy = len;
-                               if (skb_copy_and_csum_datagram(list,
-                                                              offset - start,
-                                                              to, copy,
-                                                              &csum2))
-                                       goto fault;
-                               *csump = csum_block_add(*csump, csum2, pos);
-                               if ((len -= copy) == 0)
-                                       return 0;
-                               offset += copy;
-                               to += copy;
-                               pos += copy;
-                       }
-                       start = end;
+       skb_walk_frags(skb, frag_iter) {
+               int end;
+
+               WARN_ON(start > offset + len);
+
+               end = start + frag_iter->len;
+               if ((copy = end - offset) > 0) {
+                       __wsum csum2 = 0;
+                       if (copy > len)
+                               copy = len;
+                       if (skb_copy_and_csum_datagram(frag_iter,
+                                                      offset - start,
+                                                      to, copy,
+                                                      &csum2))
+                               goto fault;
+                       *csump = csum_block_add(*csump, csum2, pos);
+                       if ((len -= copy) == 0)
+                               return 0;
+                       offset += copy;
+                       to += copy;
+                       pos += copy;
                }
+               start = end;
        }
        if (!len)
                return 0;
@@ -411,11 +647,11 @@ fault:
        return -EFAULT;
 }
 
-unsigned int __skb_checksum_complete(struct sk_buff *skb)
+__sum16 __skb_checksum_complete_head(struct sk_buff *skb, int len)
 {
        __sum16 sum;
 
-       sum = csum_fold(skb_checksum(skb, 0, skb->len, skb->csum));
+       sum = csum_fold(skb_checksum(skb, 0, len, skb->csum));
        if (likely(!sum)) {
                if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
                        netdev_rx_csum_fault(skb->dev);
@@ -423,6 +659,12 @@ unsigned int __skb_checksum_complete(struct sk_buff *skb)
        }
        return sum;
 }
+EXPORT_SYMBOL(__skb_checksum_complete_head);
+
+__sum16 __skb_checksum_complete(struct sk_buff *skb)
+{
+       return __skb_checksum_complete_head(skb, skb->len);
+}
 EXPORT_SYMBOL(__skb_checksum_complete);
 
 /**
@@ -430,7 +672,7 @@ EXPORT_SYMBOL(__skb_checksum_complete);
  *     @skb: skbuff
  *     @hlen: hardware length
  *     @iov: io vector
- * 
+ *
  *     Caller _must_ check that skb will fit to this iovec.
  *
  *     Returns: 0       - success.
@@ -444,6 +686,9 @@ int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb,
        __wsum csum;
        int chunk = skb->len - hlen;
 
+       if (!chunk)
+               return 0;
+
        /* Skip filled elements.
         * Pretty silly, look at memcpy_toiovec, though 8)
         */
@@ -473,6 +718,7 @@ csum_error:
 fault:
        return -EFAULT;
 }
+EXPORT_SYMBOL(skb_copy_and_csum_datagram_iovec);
 
 /**
  *     datagram_poll - generic datagram poll
@@ -494,20 +740,19 @@ unsigned int datagram_poll(struct file *file, struct socket *sock,
        struct sock *sk = sock->sk;
        unsigned int mask;
 
-       poll_wait(file, sk->sk_sleep, wait);
+       sock_poll_wait(file, sk_sleep(sk), wait);
        mask = 0;
 
        /* exceptional events? */
        if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
                mask |= POLLERR;
        if (sk->sk_shutdown & RCV_SHUTDOWN)
-               mask |= POLLRDHUP;
+               mask |= POLLRDHUP | POLLIN | POLLRDNORM;
        if (sk->sk_shutdown == SHUTDOWN_MASK)
                mask |= POLLHUP;
 
        /* readable? */
-       if (!skb_queue_empty(&sk->sk_receive_queue) ||
-           (sk->sk_shutdown & RCV_SHUTDOWN))
+       if (!skb_queue_empty(&sk->sk_receive_queue))
                mask |= POLLIN | POLLRDNORM;
 
        /* Connection-based need to check for termination and startup */
@@ -527,9 +772,4 @@ unsigned int datagram_poll(struct file *file, struct socket *sock,
 
        return mask;
 }
-
 EXPORT_SYMBOL(datagram_poll);
-EXPORT_SYMBOL(skb_copy_and_csum_datagram_iovec);
-EXPORT_SYMBOL(skb_copy_datagram_iovec);
-EXPORT_SYMBOL(skb_free_datagram);
-EXPORT_SYMBOL(skb_recv_datagram);