]> nv-tegra.nvidia Code Review - linux-2.6.git/blobdiff - net/ipv4/ipip.c
net: struct xfrm_tunnel in read_mostly section
[linux-2.6.git] / net / ipv4 / ipip.c
index b3c3d7b0d1168ddc69f7c911d90b3f097e3b1e2a..3c6f8f3968a6fcc2a7b41e1290ca87cf55640198 100644 (file)
@@ -95,6 +95,7 @@
 #include <linux/module.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
+#include <linux/slab.h>
 #include <asm/uaccess.h>
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
 #define HASH_SIZE  16
 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
 
-static int ipip_net_id;
+static int ipip_net_id __read_mostly;
 struct ipip_net {
        struct ip_tunnel *tunnels_r_l[HASH_SIZE];
        struct ip_tunnel *tunnels_r[HASH_SIZE];
@@ -130,11 +131,16 @@ struct ipip_net {
        struct net_device *fb_tunnel_dev;
 };
 
-static int ipip_fb_tunnel_init(struct net_device *dev);
-static int ipip_tunnel_init(struct net_device *dev);
+static void ipip_tunnel_init(struct net_device *dev);
 static void ipip_tunnel_setup(struct net_device *dev);
 
-static DEFINE_RWLOCK(ipip_lock);
+/*
+ * Locking : hash tables are protected by RCU and a spinlock
+ */
+static DEFINE_SPINLOCK(ipip_lock);
+
+#define for_each_ip_tunnel_rcu(start) \
+       for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
 
 static struct ip_tunnel * ipip_tunnel_lookup(struct net *net,
                __be32 remote, __be32 local)
@@ -144,20 +150,21 @@ static struct ip_tunnel * ipip_tunnel_lookup(struct net *net,
        struct ip_tunnel *t;
        struct ipip_net *ipn = net_generic(net, ipip_net_id);
 
-       for (t = ipn->tunnels_r_l[h0^h1]; t; t = t->next) {
+       for_each_ip_tunnel_rcu(ipn->tunnels_r_l[h0 ^ h1])
                if (local == t->parms.iph.saddr &&
                    remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
                        return t;
-       }
-       for (t = ipn->tunnels_r[h0]; t; t = t->next) {
+
+       for_each_ip_tunnel_rcu(ipn->tunnels_r[h0])
                if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
                        return t;
-       }
-       for (t = ipn->tunnels_l[h1]; t; t = t->next) {
+
+       for_each_ip_tunnel_rcu(ipn->tunnels_l[h1])
                if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
                        return t;
-       }
-       if ((t = ipn->tunnels_wc[0]) != NULL && (t->dev->flags&IFF_UP))
+
+       t = rcu_dereference(ipn->tunnels_wc[0]);
+       if (t && (t->dev->flags&IFF_UP))
                return t;
        return NULL;
 }
@@ -193,9 +200,9 @@ static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
 
        for (tp = ipip_bucket(ipn, t); *tp; tp = &(*tp)->next) {
                if (t == *tp) {
-                       write_lock_bh(&ipip_lock);
+                       spin_lock_bh(&ipip_lock);
                        *tp = t->next;
-                       write_unlock_bh(&ipip_lock);
+                       spin_unlock_bh(&ipip_lock);
                        break;
                }
        }
@@ -205,10 +212,10 @@ static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
 {
        struct ip_tunnel **tp = ipip_bucket(ipn, t);
 
+       spin_lock_bh(&ipip_lock);
        t->next = *tp;
-       write_lock_bh(&ipip_lock);
-       *tp = t;
-       write_unlock_bh(&ipip_lock);
+       rcu_assign_pointer(*tp, t);
+       spin_unlock_bh(&ipip_lock);
 }
 
 static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
@@ -245,9 +252,10 @@ static struct ip_tunnel * ipip_tunnel_locate(struct net *net,
        }
 
        nt = netdev_priv(dev);
-       dev->init = ipip_tunnel_init;
        nt->parms = *parms;
 
+       ipip_tunnel_init(dev);
+
        if (register_netdevice(dev) < 0)
                goto failed_free;
 
@@ -266,9 +274,9 @@ static void ipip_tunnel_uninit(struct net_device *dev)
        struct ipip_net *ipn = net_generic(net, ipip_net_id);
 
        if (dev == ipn->fb_tunnel_dev) {
-               write_lock_bh(&ipip_lock);
+               spin_lock_bh(&ipip_lock);
                ipn->tunnels_wc[0] = NULL;
-               write_unlock_bh(&ipip_lock);
+               spin_unlock_bh(&ipip_lock);
        } else
                ipip_tunnel_unlink(ipn, netdev_priv(dev));
        dev_put(dev);
@@ -317,7 +325,7 @@ static int ipip_err(struct sk_buff *skb, u32 info)
 
        err = -ENOENT;
 
-       read_lock(&ipip_lock);
+       rcu_read_lock();
        t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
        if (t == NULL || t->parms.iph.daddr == 0)
                goto out;
@@ -326,13 +334,13 @@ static int ipip_err(struct sk_buff *skb, u32 info)
        if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
                goto out;
 
-       if (jiffies - t->err_time < IPTUNNEL_ERR_TIMEO)
+       if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
                t->err_count++;
        else
                t->err_count = 1;
        t->err_time = jiffies;
 out:
-       read_unlock(&ipip_lock);
+       rcu_read_unlock();
        return err;
 }
 
@@ -350,11 +358,11 @@ static int ipip_rcv(struct sk_buff *skb)
        struct ip_tunnel *tunnel;
        const struct iphdr *iph = ip_hdr(skb);
 
-       read_lock(&ipip_lock);
+       rcu_read_lock();
        if ((tunnel = ipip_tunnel_lookup(dev_net(skb->dev),
                                        iph->saddr, iph->daddr)) != NULL) {
                if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
-                       read_unlock(&ipip_lock);
+                       rcu_read_unlock();
                        kfree_skb(skb);
                        return 0;
                }
@@ -366,18 +374,14 @@ static int ipip_rcv(struct sk_buff *skb)
                skb->protocol = htons(ETH_P_IP);
                skb->pkt_type = PACKET_HOST;
 
-               tunnel->dev->stats.rx_packets++;
-               tunnel->dev->stats.rx_bytes += skb->len;
-               skb->dev = tunnel->dev;
-               dst_release(skb->dst);
-               skb->dst = NULL;
-               nf_reset(skb);
+               skb_tunnel_rx(skb, tunnel->dev);
+
                ipip_ecn_decapsulate(iph, skb);
                netif_rx(skb);
-               read_unlock(&ipip_lock);
+               rcu_read_unlock();
                return 0;
        }
-       read_unlock(&ipip_lock);
+       rcu_read_unlock();
 
        return -1;
 }
@@ -387,10 +391,11 @@ static int ipip_rcv(struct sk_buff *skb)
  *     and that skb is filled properly by that function.
  */
 
-static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 {
        struct ip_tunnel *tunnel = netdev_priv(dev);
-       struct net_device_stats *stats = &tunnel->dev->stats;
+       struct net_device_stats *stats = &dev->stats;
+       struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
        struct iphdr  *tiph = &tunnel->parms.iph;
        u8     tos = tunnel->parms.iph.tos;
        __be16 df = tiph->frag_off;
@@ -402,11 +407,6 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
        __be32 dst = tiph->daddr;
        int    mtu;
 
-       if (tunnel->recursion++) {
-               stats->collisions++;
-               goto tx_error;
-       }
-
        if (skb->protocol != htons(ETH_P_IP))
                goto tx_error;
 
@@ -415,7 +415,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 
        if (!dst) {
                /* NBMA tunnel */
-               if ((rt = skb->rtable) == NULL) {
+               if ((rt = skb_rtable(skb)) == NULL) {
                        stats->tx_fifo_errors++;
                        goto tx_error;
                }
@@ -435,7 +435,7 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
                        goto tx_error_icmp;
                }
        }
-       tdev = rt->u.dst.dev;
+       tdev = rt->dst.dev;
 
        if (tdev == dev) {
                ip_rt_put(rt);
@@ -443,29 +443,32 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
                goto tx_error;
        }
 
-       if (tiph->frag_off)
-               mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
-       else
-               mtu = skb->dst ? dst_mtu(skb->dst) : dev->mtu;
+       df |= old_iph->frag_off & htons(IP_DF);
 
-       if (mtu < 68) {
-               stats->collisions++;
-               ip_rt_put(rt);
-               goto tx_error;
-       }
-       if (skb->dst)
-               skb->dst->ops->update_pmtu(skb->dst, mtu);
+       if (df) {
+               mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
 
-       df |= (old_iph->frag_off&htons(IP_DF));
+               if (mtu < 68) {
+                       stats->collisions++;
+                       ip_rt_put(rt);
+                       goto tx_error;
+               }
 
-       if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
-               icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
-               ip_rt_put(rt);
-               goto tx_error;
+               if (skb_dst(skb))
+                       skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
+
+               if ((old_iph->frag_off & htons(IP_DF)) &&
+                   mtu < ntohs(old_iph->tot_len)) {
+                       icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+                                 htonl(mtu));
+                       ip_rt_put(rt);
+                       goto tx_error;
+               }
        }
 
        if (tunnel->err_count > 0) {
-               if (jiffies - tunnel->err_time < IPTUNNEL_ERR_TIMEO) {
+               if (time_before(jiffies,
+                               tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
                        tunnel->err_count--;
                        dst_link_failure(skb);
                } else
@@ -482,10 +485,9 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
                struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
                if (!new_skb) {
                        ip_rt_put(rt);
-                       stats->tx_dropped++;
+                       txq->tx_dropped++;
                        dev_kfree_skb(skb);
-                       tunnel->recursion--;
-                       return 0;
+                       return NETDEV_TX_OK;
                }
                if (skb->sk)
                        skb_set_owner_w(new_skb, skb->sk);
@@ -500,8 +502,8 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
        memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
        IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
                              IPSKB_REROUTED);
-       dst_release(skb->dst);
-       skb->dst = &rt->u.dst;
+       skb_dst_drop(skb);
+       skb_dst_set(skb, &rt->dst);
 
        /*
         *      Push down and install the IPIP header.
@@ -522,16 +524,14 @@ static int ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
        nf_reset(skb);
 
        IPTUNNEL_XMIT();
-       tunnel->recursion--;
-       return 0;
+       return NETDEV_TX_OK;
 
 tx_error_icmp:
        dst_link_failure(skb);
 tx_error:
        stats->tx_errors++;
        dev_kfree_skb(skb);
-       tunnel->recursion--;
-       return 0;
+       return NETDEV_TX_OK;
 }
 
 static void ipip_tunnel_bind_dev(struct net_device *dev)
@@ -552,7 +552,7 @@ static void ipip_tunnel_bind_dev(struct net_device *dev)
                                    .proto = IPPROTO_IPIP };
                struct rtable *rt;
                if (!ip_route_output_key(dev_net(dev), &rt, &fl)) {
-                       tdev = rt->u.dst.dev;
+                       tdev = rt->dst.dev;
                        ip_rt_put(rt);
                }
                dev->flags |= IFF_POINTOPOINT;
@@ -691,12 +691,17 @@ static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
        return 0;
 }
 
+static const struct net_device_ops ipip_netdev_ops = {
+       .ndo_uninit     = ipip_tunnel_uninit,
+       .ndo_start_xmit = ipip_tunnel_xmit,
+       .ndo_do_ioctl   = ipip_tunnel_ioctl,
+       .ndo_change_mtu = ipip_tunnel_change_mtu,
+
+};
+
 static void ipip_tunnel_setup(struct net_device *dev)
 {
-       dev->uninit             = ipip_tunnel_uninit;
-       dev->hard_start_xmit    = ipip_tunnel_xmit;
-       dev->do_ioctl           = ipip_tunnel_ioctl;
-       dev->change_mtu         = ipip_tunnel_change_mtu;
+       dev->netdev_ops         = &ipip_netdev_ops;
        dev->destructor         = free_netdev;
 
        dev->type               = ARPHRD_TUNNEL;
@@ -706,13 +711,12 @@ static void ipip_tunnel_setup(struct net_device *dev)
        dev->iflink             = 0;
        dev->addr_len           = 4;
        dev->features           |= NETIF_F_NETNS_LOCAL;
+       dev->priv_flags         &= ~IFF_XMIT_DST_RELEASE;
 }
 
-static int ipip_tunnel_init(struct net_device *dev)
+static void ipip_tunnel_init(struct net_device *dev)
 {
-       struct ip_tunnel *tunnel;
-
-       tunnel = netdev_priv(dev);
+       struct ip_tunnel *tunnel = netdev_priv(dev);
 
        tunnel->dev = dev;
        strcpy(tunnel->parms.name, dev->name);
@@ -721,11 +725,9 @@ static int ipip_tunnel_init(struct net_device *dev)
        memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
 
        ipip_tunnel_bind_dev(dev);
-
-       return 0;
 }
 
-static int ipip_fb_tunnel_init(struct net_device *dev)
+static void __net_init ipip_fb_tunnel_init(struct net_device *dev)
 {
        struct ip_tunnel *tunnel = netdev_priv(dev);
        struct iphdr *iph = &tunnel->parms.iph;
@@ -740,45 +742,38 @@ static int ipip_fb_tunnel_init(struct net_device *dev)
 
        dev_hold(dev);
        ipn->tunnels_wc[0]      = tunnel;
-       return 0;
 }
 
-static struct xfrm_tunnel ipip_handler = {
+static struct xfrm_tunnel ipip_handler __read_mostly = {
        .handler        =       ipip_rcv,
        .err_handler    =       ipip_err,
        .priority       =       1,
 };
 
-static char banner[] __initdata =
+static const char banner[] __initconst =
        KERN_INFO "IPv4 over IPv4 tunneling driver\n";
 
-static void ipip_destroy_tunnels(struct ipip_net *ipn)
+static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
 {
        int prio;
 
        for (prio = 1; prio < 4; prio++) {
                int h;
                for (h = 0; h < HASH_SIZE; h++) {
-                       struct ip_tunnel *t;
-                       while ((t = ipn->tunnels[prio][h]) != NULL)
-                               unregister_netdevice(t->dev);
+                       struct ip_tunnel *t = ipn->tunnels[prio][h];
+
+                       while (t != NULL) {
+                               unregister_netdevice_queue(t->dev, head);
+                               t = t->next;
+                       }
                }
        }
 }
 
-static int ipip_init_net(struct net *net)
+static int __net_init ipip_init_net(struct net *net)
 {
+       struct ipip_net *ipn = net_generic(net, ipip_net_id);
        int err;
-       struct ipip_net *ipn;
-
-       err = -ENOMEM;
-       ipn = kzalloc(sizeof(struct ipip_net), GFP_KERNEL);
-       if (ipn == NULL)
-               goto err_alloc;
-
-       err = net_assign_generic(net, ipip_net_id, ipn);
-       if (err < 0)
-               goto err_assign;
 
        ipn->tunnels[0] = ipn->tunnels_wc;
        ipn->tunnels[1] = ipn->tunnels_l;
@@ -792,10 +787,10 @@ static int ipip_init_net(struct net *net)
                err = -ENOMEM;
                goto err_alloc_dev;
        }
-
-       ipn->fb_tunnel_dev->init = ipip_fb_tunnel_init;
        dev_net_set(ipn->fb_tunnel_dev, net);
 
+       ipip_fb_tunnel_init(ipn->fb_tunnel_dev);
+
        if ((err = register_netdev(ipn->fb_tunnel_dev)))
                goto err_reg_dev;
 
@@ -805,27 +800,26 @@ err_reg_dev:
        free_netdev(ipn->fb_tunnel_dev);
 err_alloc_dev:
        /* nothing */
-err_assign:
-       kfree(ipn);
-err_alloc:
        return err;
 }
 
-static void ipip_exit_net(struct net *net)
+static void __net_exit ipip_exit_net(struct net *net)
 {
-       struct ipip_net *ipn;
+       struct ipip_net *ipn = net_generic(net, ipip_net_id);
+       LIST_HEAD(list);
 
-       ipn = net_generic(net, ipip_net_id);
        rtnl_lock();
-       ipip_destroy_tunnels(ipn);
-       unregister_netdevice(ipn->fb_tunnel_dev);
+       ipip_destroy_tunnels(ipn, &list);
+       unregister_netdevice_queue(ipn->fb_tunnel_dev, &list);
+       unregister_netdevice_many(&list);
        rtnl_unlock();
-       kfree(ipn);
 }
 
 static struct pernet_operations ipip_net_ops = {
        .init = ipip_init_net,
        .exit = ipip_exit_net,
+       .id   = &ipip_net_id,
+       .size = sizeof(struct ipip_net),
 };
 
 static int __init ipip_init(void)
@@ -834,15 +828,14 @@ static int __init ipip_init(void)
 
        printk(banner);
 
-       if (xfrm4_tunnel_register(&ipip_handler, AF_INET)) {
+       err = register_pernet_device(&ipip_net_ops);
+       if (err < 0)
+               return err;
+       err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
+       if (err < 0) {
+               unregister_pernet_device(&ipip_net_ops);
                printk(KERN_INFO "ipip init: can't register tunnel\n");
-               return -EAGAIN;
        }
-
-       err = register_pernet_gen_device(&ipip_net_id, &ipip_net_ops);
-       if (err)
-               xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
-
        return err;
 }
 
@@ -851,7 +844,7 @@ static void __exit ipip_fini(void)
        if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
                printk(KERN_INFO "ipip close: can't deregister tunnel\n");
 
-       unregister_pernet_gen_device(ipip_net_id, &ipip_net_ops);
+       unregister_pernet_device(&ipip_net_ops);
 }
 
 module_init(ipip_init);