packet_mmap: expose hw packet timestamps to network packet capture utilities
Scott McMillan [Wed, 2 Jun 2010 12:53:56 +0000 (05:53 -0700)]
This patch adds a setting, PACKET_TIMESTAMP, to specify the packet
timestamp source that is exported to capture utilities like tcpdump by
packet_mmap.

PACKET_TIMESTAMP accepts the same integer bit field as
SO_TIMESTAMPING.  However, only the SOF_TIMESTAMPING_SYS_HARDWARE and
SOF_TIMESTAMPING_RAW_HARDWARE values are currently recognized by
PACKET_TIMESTAMP.  SOF_TIMESTAMPING_SYS_HARDWARE takes precedence over
SOF_TIMESTAMPING_RAW_HARDWARE if both bits are set.

If PACKET_TIMESTAMP is not set, a software timestamp generated inside
the networking stack is used (the behavior before this setting was
added).

Signed-off-by: Scott McMillan <scott.a.mcmillan@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

Documentation/networking/packet_mmap.txt
include/linux/if_packet.h
net/packet/af_packet.c

index 98f71a5..2546aa4 100644 (file)
@@ -493,6 +493,32 @@ The user can also use poll() to check if a buffer is available:
     pfd.events = POLLOUT;
     retval = poll(&pfd, 1, timeout);
 
+-------------------------------------------------------------------------------
++ PACKET_TIMESTAMP
+-------------------------------------------------------------------------------
+
+The PACKET_TIMESTAMP setting determines the source of the timestamp in
+the packet meta information.  If your NIC is capable of timestamping
+packets in hardware, you can request those hardware timestamps to used.
+Note: you may need to enable the generation of hardware timestamps with
+SIOCSHWTSTAMP.
+
+PACKET_TIMESTAMP accepts the same integer bit field as
+SO_TIMESTAMPING.  However, only the SOF_TIMESTAMPING_SYS_HARDWARE
+and SOF_TIMESTAMPING_RAW_HARDWARE values are recognized by
+PACKET_TIMESTAMP.  SOF_TIMESTAMPING_SYS_HARDWARE takes precedence over
+SOF_TIMESTAMPING_RAW_HARDWARE if both bits are set.
+
+    int req = 0;
+    req |= SOF_TIMESTAMPING_SYS_HARDWARE;
+    setsockopt(fd, SOL_PACKET, PACKET_TIMESTAMP, (void *) &req, sizeof(req))
+
+If PACKET_TIMESTAMP is not set, a software timestamp generated inside
+the networking stack is used (the behavior before this setting was added).
+
+See include/linux/net_tstamp.h and Documentation/networking/timestamping
+for more information on hardware timestamps.
+
 --------------------------------------------------------------------------------
 + THANKS
 --------------------------------------------------------------------------------
index 6ac23ef..72bfa5a 100644 (file)
@@ -48,6 +48,7 @@ struct sockaddr_ll {
 #define PACKET_LOSS                    14
 #define PACKET_VNET_HDR                        15
 #define PACKET_TX_TIMESTAMP            16
+#define PACKET_TIMESTAMP               17
 
 struct tpacket_stats {
        unsigned int    tp_packets;
index 2078a27..9a17f28 100644 (file)
@@ -83,6 +83,7 @@
 #include <linux/if_vlan.h>
 #include <linux/virtio_net.h>
 #include <linux/errqueue.h>
+#include <linux/net_tstamp.h>
 
 #ifdef CONFIG_INET
 #include <net/inet_common.h>
@@ -202,6 +203,7 @@ struct packet_sock {
        unsigned int            tp_hdrlen;
        unsigned int            tp_reserve;
        unsigned int            tp_loss:1;
+       unsigned int            tp_tstamp;
        struct packet_type      prot_hook ____cacheline_aligned_in_smp;
 };
 
@@ -656,6 +658,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
        struct sk_buff *copy_skb = NULL;
        struct timeval tv;
        struct timespec ts;
+       struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
 
        if (skb->pkt_type == PACKET_LOOPBACK)
                goto drop;
@@ -737,7 +740,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
                h.h1->tp_snaplen = snaplen;
                h.h1->tp_mac = macoff;
                h.h1->tp_net = netoff;
-               if (skb->tstamp.tv64)
+               if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
+                               && shhwtstamps->syststamp.tv64)
+                       tv = ktime_to_timeval(shhwtstamps->syststamp);
+               else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
+                               && shhwtstamps->hwtstamp.tv64)
+                       tv = ktime_to_timeval(shhwtstamps->hwtstamp);
+               else if (skb->tstamp.tv64)
                        tv = ktime_to_timeval(skb->tstamp);
                else
                        do_gettimeofday(&tv);
@@ -750,7 +759,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
                h.h2->tp_snaplen = snaplen;
                h.h2->tp_mac = macoff;
                h.h2->tp_net = netoff;
-               if (skb->tstamp.tv64)
+               if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
+                               && shhwtstamps->syststamp.tv64)
+                       ts = ktime_to_timespec(shhwtstamps->syststamp);
+               else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
+                               && shhwtstamps->hwtstamp.tv64)
+                       ts = ktime_to_timespec(shhwtstamps->hwtstamp);
+               else if (skb->tstamp.tv64)
                        ts = ktime_to_timespec(skb->tstamp);
                else
                        getnstimeofday(&ts);
@@ -2027,6 +2042,18 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
                po->has_vnet_hdr = !!val;
                return 0;
        }
+       case PACKET_TIMESTAMP:
+       {
+               int val;
+
+               if (optlen != sizeof(val))
+                       return -EINVAL;
+               if (copy_from_user(&val, optval, sizeof(val)))
+                       return -EFAULT;
+
+               po->tp_tstamp = val;
+               return 0;
+       }
        default:
                return -ENOPROTOOPT;
        }
@@ -2119,6 +2146,12 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
                val = po->tp_loss;
                data = &val;
                break;
+       case PACKET_TIMESTAMP:
+               if (len > sizeof(int))
+                       len = sizeof(int);
+               val = po->tp_tstamp;
+               data = &val;
+               break;
        default:
                return -ENOPROTOOPT;
        }