[NET]: convert network timestamps to ktime_t
Eric Dumazet [Thu, 19 Apr 2007 23:16:32 +0000 (16:16 -0700)]
We currently use a special structure (struct skb_timeval) and plain
'struct timeval' to store packet timestamps in sk_buffs and struct
sock.

This has some drawbacks :
- Fixed resolution of micro second.
- Waste of space on 64bit platforms where sizeof(struct timeval)=16

I suggest using ktime_t that is a nice abstraction of high resolution
time services, currently capable of nanosecond resolution.

As sizeof(ktime_t) is 8 bytes, using ktime_t in 'struct sock' permits
a 8 byte shrink of this structure on 64bit architectures. Some other
structures also benefit from this size reduction (struct ipq in
ipv4/ip_fragment.c, struct frag_queue in ipv6/reassembly.c, ...)

Once this ktime infrastructure adopted, we can more easily provide
nanosecond resolution on top of it. (ioctl SIOCGSTAMPNS and/or
SO_TIMESTAMPNS/SCM_TIMESTAMPNS)

Note : this patch includes a bug correction in
compat_sock_get_timestamp() where a "err = 0;" was missing (so this
syscall returned -ENOENT instead of 0)

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
CC: Stephen Hemminger <shemminger@linux-foundation.org>
CC: John find <linux.kernel@free.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>

20 files changed:
include/linux/skbuff.h
include/net/sock.h
kernel/time.c
net/bridge/netfilter/ebt_ulog.c
net/compat.c
net/core/dev.c
net/core/sock.c
net/econet/af_econet.c
net/ipv4/ip_fragment.c
net/ipv4/netfilter/ip_queue.c
net/ipv4/netfilter/ipt_ULOG.c
net/ipv6/exthdrs.c
net/ipv6/netfilter/ip6_queue.c
net/ipv6/netfilter/nf_conntrack_reasm.c
net/ipv6/reassembly.c
net/ipx/af_ipx.c
net/netfilter/nfnetlink_log.c
net/netfilter/nfnetlink_queue.c
net/packet/af_packet.c
net/sunrpc/svcsock.c

index 5992f65..f9441b5 100644 (file)
@@ -27,6 +27,7 @@
 #include <net/checksum.h>
 #include <linux/rcupdate.h>
 #include <linux/dmaengine.h>
+#include <linux/hrtimer.h>
 
 #define HAVE_ALLOC_SKB         /* For the drivers to know */
 #define HAVE_ALIGNABLE_SKB     /* Ditto 8)                */
@@ -156,11 +157,6 @@ struct skb_shared_info {
 #define SKB_DATAREF_SHIFT 16
 #define SKB_DATAREF_MASK ((1 << SKB_DATAREF_SHIFT) - 1)
 
-struct skb_timeval {
-       u32     off_sec;
-       u32     off_usec;
-};
-
 
 enum {
        SKB_FCLONE_UNAVAILABLE,
@@ -233,7 +229,7 @@ struct sk_buff {
        struct sk_buff          *prev;
 
        struct sock             *sk;
-       struct skb_timeval      tstamp;
+       ktime_t                 tstamp;
        struct net_device       *dev;
        int                     iif;
        /* 4 byte hole on 64 bit*/
@@ -1365,26 +1361,14 @@ extern void skb_add_mtu(int mtu);
  */
 static inline void skb_get_timestamp(const struct sk_buff *skb, struct timeval *stamp)
 {
-       stamp->tv_sec  = skb->tstamp.off_sec;
-       stamp->tv_usec = skb->tstamp.off_usec;
+       *stamp = ktime_to_timeval(skb->tstamp);
 }
 
-/**
- *     skb_set_timestamp - set timestamp of a skb
- *     @skb: skb to set stamp of
- *     @stamp: pointer to struct timeval to get stamp from
- *
- *     Timestamps are stored in the skb as offsets to a base timestamp.
- *     This function converts a struct timeval to an offset and stores
- *     it in the skb.
- */
-static inline void skb_set_timestamp(struct sk_buff *skb, const struct timeval *stamp)
+static inline void __net_timestamp(struct sk_buff *skb)
 {
-       skb->tstamp.off_sec  = stamp->tv_sec;
-       skb->tstamp.off_usec = stamp->tv_usec;
+       skb->tstamp = ktime_get_real();
 }
 
-extern void __net_timestamp(struct sk_buff *skb);
 
 extern __sum16 __skb_checksum_complete(struct sk_buff *skb);
 
index a3366c3..9583639 100644 (file)
@@ -244,7 +244,7 @@ struct sock {
        struct sk_filter        *sk_filter;
        void                    *sk_protinfo;
        struct timer_list       sk_timer;
-       struct timeval          sk_stamp;
+       ktime_t                 sk_stamp;
        struct socket           *sk_socket;
        void                    *sk_user_data;
        struct page             *sk_sndmsg_page;
@@ -1307,19 +1307,19 @@ static inline int sock_intr_errno(long timeo)
 static __inline__ void
 sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
 {
-       struct timeval stamp;
+       ktime_t kt = skb->tstamp;
 
-       skb_get_timestamp(skb, &stamp);
        if (sock_flag(sk, SOCK_RCVTSTAMP)) {
+               struct timeval tv;
                /* Race occurred between timestamp enabling and packet
                   receiving.  Fill in the current time for now. */
-               if (stamp.tv_sec == 0)
-                       do_gettimeofday(&stamp);
-               skb_set_timestamp(skb, &stamp);
-               put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP, sizeof(struct timeval),
-                        &stamp);
+               if (kt.tv64 == 0)
+                       kt = ktime_get_real();
+               skb->tstamp = kt;
+               tv = ktime_to_timeval(kt);
+               put_cmsg(msg, SOL_SOCKET, SO_TIMESTAMP, sizeof(tv), &tv);
        } else
-               sk->sk_stamp = stamp;
+               sk->sk_stamp = kt;
 }
 
 /**
index 2f47888..a1439f4 100644 (file)
@@ -469,6 +469,7 @@ struct timeval ns_to_timeval(const s64 nsec)
 
        return tv;
 }
+EXPORT_SYMBOL(ns_to_timeval);
 
 /*
  * Convert jiffies to milliseconds and back.
index 8e15cc4..259f5c3 100644 (file)
@@ -130,6 +130,7 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
        unsigned int group = uloginfo->nlgroup;
        ebt_ulog_buff_t *ub = &ulog_buffers[group];
        spinlock_t *lock = &ub->lock;
+       ktime_t kt;
 
        if ((uloginfo->cprange == 0) ||
            (uloginfo->cprange > skb->len + ETH_HLEN))
@@ -164,9 +165,10 @@ static void ebt_ulog_packet(unsigned int hooknr, const struct sk_buff *skb,
 
        /* Fill in the ulog data */
        pm->version = EBT_ULOG_VERSION;
-       do_gettimeofday(&pm->stamp);
+       kt = ktime_get_real();
+       pm->stamp = ktime_to_timeval(kt);
        if (ub->qlen == 1)
-               skb_set_timestamp(ub->skb, &pm->stamp);
+               ub->skb->tstamp = kt;
        pm->data_len = copy_len;
        pm->mark = skb->mark;
        pm->hook = hooknr;
index 1f32866..17c2710 100644 (file)
@@ -545,15 +545,20 @@ int compat_sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
        struct compat_timeval __user *ctv =
                        (struct compat_timeval __user*) userstamp;
        int err = -ENOENT;
+       struct timeval tv;
 
        if (!sock_flag(sk, SOCK_TIMESTAMP))
                sock_enable_timestamp(sk);
-       if (sk->sk_stamp.tv_sec == -1)
+       tv = ktime_to_timeval(sk->sk_stamp);
+       if (tv.tv_sec == -1)
                return err;
-       if (sk->sk_stamp.tv_sec == 0)
-               do_gettimeofday(&sk->sk_stamp);
-       if (put_user(sk->sk_stamp.tv_sec, &ctv->tv_sec) ||
-                       put_user(sk->sk_stamp.tv_usec, &ctv->tv_usec))
+       if (tv.tv_sec == 0) {
+               sk->sk_stamp = ktime_get_real();
+               tv = ktime_to_timeval(sk->sk_stamp);
+       }
+       err = 0;
+       if (put_user(tv.tv_sec, &ctv->tv_sec) ||
+                       put_user(tv.tv_usec, &ctv->tv_usec))
                err = -EFAULT;
        return err;
 }
index 4dc93cc..582db64 100644 (file)
@@ -1031,23 +1031,12 @@ void net_disable_timestamp(void)
        atomic_dec(&netstamp_needed);
 }
 
-void __net_timestamp(struct sk_buff *skb)
-{
-       struct timeval tv;
-
-       do_gettimeofday(&tv);
-       skb_set_timestamp(skb, &tv);
-}
-EXPORT_SYMBOL(__net_timestamp);
-
 static inline void net_timestamp(struct sk_buff *skb)
 {
        if (atomic_read(&netstamp_needed))
                __net_timestamp(skb);
-       else {
-               skb->tstamp.off_sec = 0;
-               skb->tstamp.off_usec = 0;
-       }
+       else
+               skb->tstamp.tv64 = 0;
 }
 
 /*
@@ -1577,7 +1566,7 @@ int netif_rx(struct sk_buff *skb)
        if (netpoll_rx(skb))
                return NET_RX_DROP;
 
-       if (!skb->tstamp.off_sec)
+       if (!skb->tstamp.tv64)
                net_timestamp(skb);
 
        /*
@@ -1769,7 +1758,7 @@ int netif_receive_skb(struct sk_buff *skb)
        if (skb->dev->poll && netpoll_rx(skb))
                return NET_RX_DROP;
 
-       if (!skb->tstamp.off_sec)
+       if (!skb->tstamp.tv64)
                net_timestamp(skb);
 
        if (!skb->iif)
index 6d35d57..6ddb366 100644 (file)
@@ -1512,8 +1512,7 @@ void sock_init_data(struct socket *sock, struct sock *sk)
        sk->sk_rcvtimeo         =       MAX_SCHEDULE_TIMEOUT;
        sk->sk_sndtimeo         =       MAX_SCHEDULE_TIMEOUT;
 
-       sk->sk_stamp.tv_sec     = -1L;
-       sk->sk_stamp.tv_usec    = -1L;
+       sk->sk_stamp = ktime_set(-1L, -1L);
 
        atomic_set(&sk->sk_refcnt, 1);
 }
@@ -1554,14 +1553,17 @@ EXPORT_SYMBOL(release_sock);
 
 int sock_get_timestamp(struct sock *sk, struct timeval __user *userstamp)
 {
+       struct timeval tv;
        if (!sock_flag(sk, SOCK_TIMESTAMP))
                sock_enable_timestamp(sk);
-       if (sk->sk_stamp.tv_sec == -1)
+       tv = ktime_to_timeval(sk->sk_stamp);
+       if (tv.tv_sec == -1)
                return -ENOENT;
-       if (sk->sk_stamp.tv_sec == 0)
-               do_gettimeofday(&sk->sk_stamp);
-       return copy_to_user(userstamp, &sk->sk_stamp, sizeof(struct timeval)) ?
-               -EFAULT : 0;
+       if (tv.tv_sec == 0) {
+               sk->sk_stamp = ktime_get_real();
+               tv = ktime_to_timeval(sk->sk_stamp);
+       }
+       return copy_to_user(userstamp, &tv, sizeof(tv)) ? -EFAULT : 0;
 }
 EXPORT_SYMBOL(sock_get_timestamp);
 
index bc12e36..f573edd 100644 (file)
@@ -162,7 +162,7 @@ static int econet_recvmsg(struct kiocb *iocb, struct socket *sock,
        err = memcpy_toiovec(msg->msg_iov, skb->data, copied);
        if (err)
                goto out_free;
-       skb_get_timestamp(skb, &sk->sk_stamp);
+       sk->sk_stamp = skb->tstamp;
 
        if (msg->msg_name)
                memcpy(msg->msg_name, skb->cb, msg->msg_namelen);
index b6f0553..e10be7d 100644 (file)
@@ -92,7 +92,7 @@ struct ipq {
        spinlock_t      lock;
        atomic_t        refcnt;
        struct timer_list timer;        /* when will this queue expire?         */
-       struct timeval  stamp;
+       ktime_t         stamp;
        int             iif;
        unsigned int    rid;
        struct inet_peer *peer;
@@ -592,7 +592,7 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
        if (skb->dev)
                qp->iif = skb->dev->ifindex;
        skb->dev = NULL;
-       skb_get_timestamp(skb, &qp->stamp);
+       qp->stamp = skb->tstamp;
        qp->meat += skb->len;
        atomic_add(skb->truesize, &ip_frag_mem);
        if (offset == 0)
@@ -674,7 +674,7 @@ static struct sk_buff *ip_frag_reasm(struct ipq *qp, struct net_device *dev)
 
        head->next = NULL;
        head->dev = dev;
-       skb_set_timestamp(head, &qp->stamp);
+       head->tstamp = qp->stamp;
 
        iph = head->nh.iph;
        iph->frag_off = 0;
@@ -734,7 +734,7 @@ struct sk_buff *ip_defrag(struct sk_buff *skb, u32 user)
        return NULL;
 }
 
-void ipfrag_init(void)
+void __init ipfrag_init(void)
 {
        ipfrag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
                                 (jiffies ^ (jiffies >> 6)));
index a14798a..5842f1a 100644 (file)
@@ -197,6 +197,7 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
        struct sk_buff *skb;
        struct ipq_packet_msg *pmsg;
        struct nlmsghdr *nlh;
+       struct timeval tv;
 
        read_lock_bh(&queue_lock);
 
@@ -241,8 +242,9 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
 
        pmsg->packet_id       = (unsigned long )entry;
        pmsg->data_len        = data_len;
-       pmsg->timestamp_sec   = entry->skb->tstamp.off_sec;
-       pmsg->timestamp_usec  = entry->skb->tstamp.off_usec;
+       tv = ktime_to_timeval(entry->skb->tstamp);
+       pmsg->timestamp_sec   = tv.tv_sec;
+       pmsg->timestamp_usec  = tv.tv_usec;
        pmsg->mark            = entry->skb->mark;
        pmsg->hook            = entry->info->hook;
        pmsg->hw_protocol     = entry->skb->protocol;
index 9acc018..9718b66 100644 (file)
@@ -187,6 +187,7 @@ static void ipt_ulog_packet(unsigned int hooknum,
        ulog_packet_msg_t *pm;
        size_t size, copy_len;
        struct nlmsghdr *nlh;
+       struct timeval tv;
 
        /* ffs == find first bit set, necessary because userspace
         * is already shifting groupnumber, but we need unshifted.
@@ -232,13 +233,14 @@ static void ipt_ulog_packet(unsigned int hooknum,
        pm = NLMSG_DATA(nlh);
 
        /* We might not have a timestamp, get one */
-       if (skb->tstamp.off_sec == 0)
+       if (skb->tstamp.tv64 == 0)
                __net_timestamp((struct sk_buff *)skb);
 
        /* copy hook, prefix, timestamp, payload, etc. */
        pm->data_len = copy_len;
-       put_unaligned(skb->tstamp.off_sec, &pm->timestamp_sec);
-       put_unaligned(skb->tstamp.off_usec, &pm->timestamp_usec);
+       tv = ktime_to_timeval(skb->tstamp);
+       put_unaligned(tv.tv_sec, &pm->timestamp_sec);
+       put_unaligned(tv.tv_usec, &pm->timestamp_usec);
        put_unaligned(skb->mark, &pm->mark);
        pm->hook = hooknum;
        if (prefix != NULL)
index fb39604..a963a31 100644 (file)
@@ -255,7 +255,7 @@ static int ipv6_dest_hao(struct sk_buff **skbp, int optoff)
        ipv6_addr_copy(&ipv6h->saddr, &hao->addr);
        ipv6_addr_copy(&hao->addr, &tmp_addr);
 
-       if (skb->tstamp.off_sec == 0)
+       if (skb->tstamp.tv64 == 0)
                __net_timestamp(skb);
 
        return 1;
index fdb30a5..66a2c41 100644 (file)
@@ -195,6 +195,7 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
        struct sk_buff *skb;
        struct ipq_packet_msg *pmsg;
        struct nlmsghdr *nlh;
+       struct timeval tv;
 
        read_lock_bh(&queue_lock);
 
@@ -239,8 +240,9 @@ ipq_build_packet_message(struct ipq_queue_entry *entry, int *errp)
 
        pmsg->packet_id       = (unsigned long )entry;
        pmsg->data_len        = data_len;
-       pmsg->timestamp_sec   = entry->skb->tstamp.off_sec;
-       pmsg->timestamp_usec  = entry->skb->tstamp.off_usec;
+       tv = ktime_to_timeval(entry->skb->tstamp);
+       pmsg->timestamp_sec   = tv.tv_sec;
+       pmsg->timestamp_usec  = tv.tv_usec;
        pmsg->mark            = entry->skb->mark;
        pmsg->hook            = entry->info->hook;
        pmsg->hw_protocol     = entry->skb->protocol;
index 15ab1e3..c311b9a 100644 (file)
@@ -82,7 +82,7 @@ struct nf_ct_frag6_queue
        struct sk_buff          *fragments;
        int                     len;
        int                     meat;
-       struct timeval          stamp;
+       ktime_t                 stamp;
        unsigned int            csum;
        __u8                    last_in;        /* has first/last segment arrived? */
 #define COMPLETE               4
@@ -542,7 +542,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb,
                fq->fragments = skb;
 
        skb->dev = NULL;
-       skb_get_timestamp(skb, &fq->stamp);
+       fq->stamp = skb->tstamp;
        fq->meat += skb->len;
        atomic_add(skb->truesize, &nf_ct_frag6_mem);
 
@@ -648,7 +648,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
 
        head->next = NULL;
        head->dev = dev;
-       skb_set_timestamp(head, &fq->stamp);
+       head->tstamp = fq->stamp;
        head->nh.ipv6h->payload_len = htons(payload_len);
 
        /* Yes, and fold redundant checksum back. 8) */
index 7034c54..1dde449 100644 (file)
@@ -88,7 +88,7 @@ struct frag_queue
        int                     len;
        int                     meat;
        int                     iif;
-       struct timeval          stamp;
+       ktime_t                 stamp;
        unsigned int            csum;
        __u8                    last_in;        /* has first/last segment arrived? */
 #define COMPLETE               4
@@ -562,7 +562,7 @@ static void ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
        if (skb->dev)
                fq->iif = skb->dev->ifindex;
        skb->dev = NULL;
-       skb_get_timestamp(skb, &fq->stamp);
+       fq->stamp = skb->tstamp;
        fq->meat += skb->len;
        atomic_add(skb->truesize, &ip6_frag_mem);
 
@@ -663,7 +663,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
 
        head->next = NULL;
        head->dev = dev;
-       skb_set_timestamp(head, &fq->stamp);
+       head->tstamp = fq->stamp;
        head->nh.ipv6h->payload_len = htons(payload_len);
        IP6CB(head)->nhoff = nhoff;
 
index cac35a7..6c6c0a3 100644 (file)
@@ -1807,8 +1807,8 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock,
                                     copied);
        if (rc)
                goto out_free;
-       if (skb->tstamp.off_sec)
-               skb_get_timestamp(skb, &sk->sk_stamp);
+       if (skb->tstamp.tv64)
+               sk->sk_stamp = skb->tstamp;
 
        msg->msg_namelen = sizeof(*sipx);
 
index 5cb30eb..5eeebd2 100644 (file)
@@ -509,11 +509,11 @@ __build_packet_message(struct nfulnl_instance *inst,
                NFA_PUT(inst->skb, NFULA_HWADDR, sizeof(phw), &phw);
        }
 
-       if (skb->tstamp.off_sec) {
+       if (skb->tstamp.tv64) {
                struct nfulnl_msg_packet_timestamp ts;
-
-               ts.sec = cpu_to_be64(skb->tstamp.off_sec);
-               ts.usec = cpu_to_be64(skb->tstamp.off_usec);
+               struct timeval tv = ktime_to_timeval(skb->tstamp);
+               ts.sec = cpu_to_be64(tv.tv_sec);
+               ts.usec = cpu_to_be64(tv.tv_usec);
 
                NFA_PUT(inst->skb, NFULA_TIMESTAMP, sizeof(ts), &ts);
        }
index d9ce4a7..cfbee39 100644 (file)
@@ -495,11 +495,11 @@ nfqnl_build_packet_message(struct nfqnl_instance *queue,
                NFA_PUT(skb, NFQA_HWADDR, sizeof(phw), &phw);
        }
 
-       if (entskb->tstamp.off_sec) {
+       if (entskb->tstamp.tv64) {
                struct nfqnl_msg_packet_timestamp ts;
-
-               ts.sec = cpu_to_be64(entskb->tstamp.off_sec);
-               ts.usec = cpu_to_be64(entskb->tstamp.off_usec);
+               struct timeval tv = ktime_to_timeval(entskb->tstamp);
+               ts.sec = cpu_to_be64(tv.tv_sec);
+               ts.usec = cpu_to_be64(tv.tv_usec);
 
                NFA_PUT(skb, NFQA_TIMESTAMP, sizeof(ts), &ts);
        }
index 28d47e8..f9866a8 100644 (file)
@@ -582,6 +582,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
        unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
        unsigned short macoff, netoff;
        struct sk_buff *copy_skb = NULL;
+       struct timeval tv;
 
        if (skb->pkt_type == PACKET_LOOPBACK)
                goto drop;
@@ -656,12 +657,13 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe
        h->tp_snaplen = snaplen;
        h->tp_mac = macoff;
        h->tp_net = netoff;
-       if (skb->tstamp.off_sec == 0) {
+       if (skb->tstamp.tv64 == 0) {
                __net_timestamp(skb);
                sock_enable_timestamp(sk);
        }
-       h->tp_sec = skb->tstamp.off_sec;
-       h->tp_usec = skb->tstamp.off_usec;
+       tv = ktime_to_timeval(skb->tstamp);
+       h->tp_sec = tv.tv_sec;
+       h->tp_usec = tv.tv_usec;
 
        sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h)));
        sll->sll_halen = 0;
index 2772fee..22f61ae 100644 (file)
@@ -798,16 +798,12 @@ svc_udp_recvfrom(struct svc_rqst *rqstp)
                dprintk("svc: recvfrom returned error %d\n", -err);
        }
        rqstp->rq_addrlen = sizeof(rqstp->rq_addr);
-       if (skb->tstamp.off_sec == 0) {
-               struct timeval tv;
-
-               tv.tv_sec = xtime.tv_sec;
-               tv.tv_usec = xtime.tv_nsec / NSEC_PER_USEC;
-               skb_set_timestamp(skb, &tv);
+       if (skb->tstamp.tv64 == 0) {
+               skb->tstamp = ktime_get_real();
                /* Don't enable netstamp, sunrpc doesn't
                   need that much accuracy */
        }
-       skb_get_timestamp(skb, &svsk->sk_sk->sk_stamp);
+       svsk->sk_sk->sk_stamp = skb->tstamp;
        set_bit(SK_DATA, &svsk->sk_flags); /* there may be more data... */
 
        /*