2c2c35bace76482838d6301d74f24a9800ec3c51
[linux-3.10.git] / net / ipv4 / ipip.c
1 /*
2  *      Linux NET3:     IP/IP protocol decoder.
3  *
4  *      Authors:
5  *              Sam Lantinga (slouken@cs.ucdavis.edu)  02/01/95
6  *
7  *      Fixes:
8  *              Alan Cox        :       Merged and made usable non modular (its so tiny its silly as
9  *                                      a module taking up 2 pages).
10  *              Alan Cox        :       Fixed bug with 1.3.18 and IPIP not working (now needs to set skb->h.iph)
11  *                                      to keep ip_forward happy.
12  *              Alan Cox        :       More fixes for 1.3.21, and firewall fix. Maybe this will work soon 8).
13  *              Kai Schulte     :       Fixed #defines for IP_FIREWALL->FIREWALL
14  *              David Woodhouse :       Perform some basic ICMP handling.
15  *                                      IPIP Routing without decapsulation.
16  *              Carlos Picoto   :       GRE over IP support
17  *              Alexey Kuznetsov:       Reworked. Really, now it is truncated version of ipv4/ip_gre.c.
18  *                                      I do not want to merge them together.
19  *
20  *      This program is free software; you can redistribute it and/or
21  *      modify it under the terms of the GNU General Public License
22  *      as published by the Free Software Foundation; either version
23  *      2 of the License, or (at your option) any later version.
24  *
25  */
26
27 /* tunnel.c: an IP tunnel driver
28
29         The purpose of this driver is to provide an IP tunnel through
30         which you can tunnel network traffic transparently across subnets.
31
32         This was written by looking at Nick Holloway's dummy driver
33         Thanks for the great code!
34
35                 -Sam Lantinga   (slouken@cs.ucdavis.edu)  02/01/95
36
37         Minor tweaks:
38                 Cleaned up the code a little and added some pre-1.3.0 tweaks.
39                 dev->hard_header/hard_header_len changed to use no headers.
40                 Comments/bracketing tweaked.
41                 Made the tunnels use dev->name not tunnel: when error reporting.
42                 Added tx_dropped stat
43
44                 -Alan Cox       (alan@lxorguk.ukuu.org.uk) 21 March 95
45
46         Reworked:
47                 Changed to tunnel to destination gateway in addition to the
48                         tunnel's pointopoint address
49                 Almost completely rewritten
50                 Note:  There is currently no firewall or ICMP handling done.
51
52                 -Sam Lantinga   (slouken@cs.ucdavis.edu) 02/13/96
53
54 */
55
56 /* Things I wish I had known when writing the tunnel driver:
57
58         When the tunnel_xmit() function is called, the skb contains the
59         packet to be sent (plus a great deal of extra info), and dev
60         contains the tunnel device that _we_ are.
61
62         When we are passed a packet, we are expected to fill in the
63         source address with our source IP address.
64
65         What is the proper way to allocate, copy and free a buffer?
66         After you allocate it, it is a "0 length" chunk of memory
67         starting at zero.  If you want to add headers to the buffer
68         later, you'll have to call "skb_reserve(skb, amount)" with
69         the amount of memory you want reserved.  Then, you call
70         "skb_put(skb, amount)" with the amount of space you want in
71         the buffer.  skb_put() returns a pointer to the top (#0) of
72         that buffer.  skb->len is set to the amount of space you have
73         "allocated" with skb_put().  You can then write up to skb->len
74         bytes to that buffer.  If you need more, you can call skb_put()
75         again with the additional amount of space you need.  You can
76         find out how much more space you can allocate by calling
77         "skb_tailroom(skb)".
78         Now, to add header space, call "skb_push(skb, header_len)".
79         This creates space at the beginning of the buffer and returns
80         a pointer to this new space.  If later you need to strip a
81         header from a buffer, call "skb_pull(skb, header_len)".
82         skb_headroom() will return how much space is left at the top
83         of the buffer (before the main data).  Remember, this headroom
84         space must be reserved before the skb_put() function is called.
85         */
86
87 /*
88    This version of net/ipv4/ipip.c is cloned of net/ipv4/ip_gre.c
89
90    For comments look at net/ipv4/ip_gre.c --ANK
91  */
92
93
94 #include <linux/capability.h>
95 #include <linux/module.h>
96 #include <linux/types.h>
97 #include <linux/kernel.h>
98 #include <linux/slab.h>
99 #include <asm/uaccess.h>
100 #include <linux/skbuff.h>
101 #include <linux/netdevice.h>
102 #include <linux/in.h>
103 #include <linux/tcp.h>
104 #include <linux/udp.h>
105 #include <linux/if_arp.h>
106 #include <linux/mroute.h>
107 #include <linux/init.h>
108 #include <linux/netfilter_ipv4.h>
109 #include <linux/if_ether.h>
110
111 #include <net/sock.h>
112 #include <net/ip.h>
113 #include <net/icmp.h>
114 #include <net/ipip.h>
115 #include <net/inet_ecn.h>
116 #include <net/xfrm.h>
117 #include <net/net_namespace.h>
118 #include <net/netns/generic.h>
119
120 #define HASH_SIZE  16
121 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
122
123 static int ipip_net_id __read_mostly;
124 struct ipip_net {
125         struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE];
126         struct ip_tunnel __rcu *tunnels_r[HASH_SIZE];
127         struct ip_tunnel __rcu *tunnels_l[HASH_SIZE];
128         struct ip_tunnel __rcu *tunnels_wc[1];
129         struct ip_tunnel __rcu **tunnels[4];
130
131         struct net_device *fb_tunnel_dev;
132 };
133
134 static int ipip_tunnel_init(struct net_device *dev);
135 static void ipip_tunnel_setup(struct net_device *dev);
136 static void ipip_dev_free(struct net_device *dev);
137
138 /*
139  * Locking : hash tables are protected by RCU and RTNL
140  */
141
142 #define for_each_ip_tunnel_rcu(start) \
143         for (t = rcu_dereference(start); t; t = rcu_dereference(t->next))
144
145 /* often modified stats are per cpu, other are shared (netdev->stats) */
146 struct pcpu_tstats {
147         u64     rx_packets;
148         u64     rx_bytes;
149         u64     tx_packets;
150         u64     tx_bytes;
151         struct u64_stats_sync   syncp;
152 };
153
154 static struct rtnl_link_stats64 *ipip_get_stats64(struct net_device *dev,
155                                                   struct rtnl_link_stats64 *tot)
156 {
157         int i;
158
159         for_each_possible_cpu(i) {
160                 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
161                 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
162                 unsigned int start;
163
164                 do {
165                         start = u64_stats_fetch_begin_bh(&tstats->syncp);
166                         rx_packets = tstats->rx_packets;
167                         tx_packets = tstats->tx_packets;
168                         rx_bytes = tstats->rx_bytes;
169                         tx_bytes = tstats->tx_bytes;
170                 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
171
172                 tot->rx_packets += rx_packets;
173                 tot->tx_packets += tx_packets;
174                 tot->rx_bytes   += rx_bytes;
175                 tot->tx_bytes   += tx_bytes;
176         }
177
178         tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
179         tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
180         tot->tx_dropped = dev->stats.tx_dropped;
181         tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
182         tot->tx_errors = dev->stats.tx_errors;
183         tot->collisions = dev->stats.collisions;
184
185         return tot;
186 }
187
188 static struct ip_tunnel *ipip_tunnel_lookup(struct net *net,
189                 __be32 remote, __be32 local)
190 {
191         unsigned int h0 = HASH(remote);
192         unsigned int h1 = HASH(local);
193         struct ip_tunnel *t;
194         struct ipip_net *ipn = net_generic(net, ipip_net_id);
195
196         for_each_ip_tunnel_rcu(ipn->tunnels_r_l[h0 ^ h1])
197                 if (local == t->parms.iph.saddr &&
198                     remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
199                         return t;
200
201         for_each_ip_tunnel_rcu(ipn->tunnels_r[h0])
202                 if (remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
203                         return t;
204
205         for_each_ip_tunnel_rcu(ipn->tunnels_l[h1])
206                 if (local == t->parms.iph.saddr && (t->dev->flags&IFF_UP))
207                         return t;
208
209         t = rcu_dereference(ipn->tunnels_wc[0]);
210         if (t && (t->dev->flags&IFF_UP))
211                 return t;
212         return NULL;
213 }
214
215 static struct ip_tunnel __rcu **__ipip_bucket(struct ipip_net *ipn,
216                 struct ip_tunnel_parm *parms)
217 {
218         __be32 remote = parms->iph.daddr;
219         __be32 local = parms->iph.saddr;
220         unsigned int h = 0;
221         int prio = 0;
222
223         if (remote) {
224                 prio |= 2;
225                 h ^= HASH(remote);
226         }
227         if (local) {
228                 prio |= 1;
229                 h ^= HASH(local);
230         }
231         return &ipn->tunnels[prio][h];
232 }
233
234 static inline struct ip_tunnel __rcu **ipip_bucket(struct ipip_net *ipn,
235                 struct ip_tunnel *t)
236 {
237         return __ipip_bucket(ipn, &t->parms);
238 }
239
240 static void ipip_tunnel_unlink(struct ipip_net *ipn, struct ip_tunnel *t)
241 {
242         struct ip_tunnel __rcu **tp;
243         struct ip_tunnel *iter;
244
245         for (tp = ipip_bucket(ipn, t);
246              (iter = rtnl_dereference(*tp)) != NULL;
247              tp = &iter->next) {
248                 if (t == iter) {
249                         rcu_assign_pointer(*tp, t->next);
250                         break;
251                 }
252         }
253 }
254
255 static void ipip_tunnel_link(struct ipip_net *ipn, struct ip_tunnel *t)
256 {
257         struct ip_tunnel __rcu **tp = ipip_bucket(ipn, t);
258
259         rcu_assign_pointer(t->next, rtnl_dereference(*tp));
260         rcu_assign_pointer(*tp, t);
261 }
262
263 static struct ip_tunnel *ipip_tunnel_locate(struct net *net,
264                 struct ip_tunnel_parm *parms, int create)
265 {
266         __be32 remote = parms->iph.daddr;
267         __be32 local = parms->iph.saddr;
268         struct ip_tunnel *t, *nt;
269         struct ip_tunnel __rcu **tp;
270         struct net_device *dev;
271         char name[IFNAMSIZ];
272         struct ipip_net *ipn = net_generic(net, ipip_net_id);
273
274         for (tp = __ipip_bucket(ipn, parms);
275                  (t = rtnl_dereference(*tp)) != NULL;
276                  tp = &t->next) {
277                 if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr)
278                         return t;
279         }
280         if (!create)
281                 return NULL;
282
283         if (parms->name[0])
284                 strlcpy(name, parms->name, IFNAMSIZ);
285         else
286                 strcpy(name, "tunl%d");
287
288         dev = alloc_netdev(sizeof(*t), name, ipip_tunnel_setup);
289         if (dev == NULL)
290                 return NULL;
291
292         dev_net_set(dev, net);
293
294         nt = netdev_priv(dev);
295         nt->parms = *parms;
296
297         if (ipip_tunnel_init(dev) < 0)
298                 goto failed_free;
299
300         if (register_netdevice(dev) < 0)
301                 goto failed_free;
302
303         strcpy(nt->parms.name, dev->name);
304
305         dev_hold(dev);
306         ipip_tunnel_link(ipn, nt);
307         return nt;
308
309 failed_free:
310         ipip_dev_free(dev);
311         return NULL;
312 }
313
314 /* called with RTNL */
315 static void ipip_tunnel_uninit(struct net_device *dev)
316 {
317         struct net *net = dev_net(dev);
318         struct ipip_net *ipn = net_generic(net, ipip_net_id);
319
320         if (dev == ipn->fb_tunnel_dev)
321                 RCU_INIT_POINTER(ipn->tunnels_wc[0], NULL);
322         else
323                 ipip_tunnel_unlink(ipn, netdev_priv(dev));
324         dev_put(dev);
325 }
326
327 static int ipip_err(struct sk_buff *skb, u32 info)
328 {
329
330 /* All the routers (except for Linux) return only
331    8 bytes of packet payload. It means, that precise relaying of
332    ICMP in the real Internet is absolutely infeasible.
333  */
334         const struct iphdr *iph = (const struct iphdr *)skb->data;
335         const int type = icmp_hdr(skb)->type;
336         const int code = icmp_hdr(skb)->code;
337         struct ip_tunnel *t;
338         int err;
339
340         switch (type) {
341         default:
342         case ICMP_PARAMETERPROB:
343                 return 0;
344
345         case ICMP_DEST_UNREACH:
346                 switch (code) {
347                 case ICMP_SR_FAILED:
348                 case ICMP_PORT_UNREACH:
349                         /* Impossible event. */
350                         return 0;
351                 default:
352                         /* All others are translated to HOST_UNREACH.
353                            rfc2003 contains "deep thoughts" about NET_UNREACH,
354                            I believe they are just ether pollution. --ANK
355                          */
356                         break;
357                 }
358                 break;
359         case ICMP_TIME_EXCEEDED:
360                 if (code != ICMP_EXC_TTL)
361                         return 0;
362                 break;
363         case ICMP_REDIRECT:
364                 break;
365         }
366
367         err = -ENOENT;
368
369         rcu_read_lock();
370         t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr);
371         if (t == NULL)
372                 goto out;
373
374         if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
375                 ipv4_update_pmtu(skb, dev_net(skb->dev), info,
376                                  t->dev->ifindex, 0, IPPROTO_IPIP, 0);
377                 err = 0;
378                 goto out;
379         }
380
381         if (type == ICMP_REDIRECT) {
382                 ipv4_redirect(skb, dev_net(skb->dev), t->dev->ifindex, 0,
383                               IPPROTO_IPIP, 0);
384                 err = 0;
385                 goto out;
386         }
387
388         if (t->parms.iph.daddr == 0)
389                 goto out;
390
391         err = 0;
392         if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED)
393                 goto out;
394
395         if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO))
396                 t->err_count++;
397         else
398                 t->err_count = 1;
399         t->err_time = jiffies;
400 out:
401         rcu_read_unlock();
402         return err;
403 }
404
405 static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph,
406                                         struct sk_buff *skb)
407 {
408         struct iphdr *inner_iph = ip_hdr(skb);
409
410         if (INET_ECN_is_ce(outer_iph->tos))
411                 IP_ECN_set_ce(inner_iph);
412 }
413
414 static int ipip_rcv(struct sk_buff *skb)
415 {
416         struct ip_tunnel *tunnel;
417         const struct iphdr *iph = ip_hdr(skb);
418
419         rcu_read_lock();
420         tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr);
421         if (tunnel != NULL) {
422                 struct pcpu_tstats *tstats;
423
424                 if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
425                         rcu_read_unlock();
426                         kfree_skb(skb);
427                         return 0;
428                 }
429
430                 secpath_reset(skb);
431
432                 skb->mac_header = skb->network_header;
433                 skb_reset_network_header(skb);
434                 skb->protocol = htons(ETH_P_IP);
435                 skb->pkt_type = PACKET_HOST;
436
437                 tstats = this_cpu_ptr(tunnel->dev->tstats);
438                 u64_stats_update_begin(&tstats->syncp);
439                 tstats->rx_packets++;
440                 tstats->rx_bytes += skb->len;
441                 u64_stats_update_end(&tstats->syncp);
442
443                 __skb_tunnel_rx(skb, tunnel->dev);
444
445                 ipip_ecn_decapsulate(iph, skb);
446
447                 netif_rx(skb);
448
449                 rcu_read_unlock();
450                 return 0;
451         }
452         rcu_read_unlock();
453
454         return -1;
455 }
456
457 /*
458  *      This function assumes it is being called from dev_queue_xmit()
459  *      and that skb is filled properly by that function.
460  */
461
462 static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
463 {
464         struct ip_tunnel *tunnel = netdev_priv(dev);
465         struct pcpu_tstats *tstats;
466         const struct iphdr  *tiph = &tunnel->parms.iph;
467         u8     tos = tunnel->parms.iph.tos;
468         __be16 df = tiph->frag_off;
469         struct rtable *rt;                      /* Route to the other host */
470         struct net_device *tdev;                /* Device to other host */
471         const struct iphdr  *old_iph = ip_hdr(skb);
472         struct iphdr  *iph;                     /* Our new IP header */
473         unsigned int max_headroom;              /* The extra header space needed */
474         __be32 dst = tiph->daddr;
475         struct flowi4 fl4;
476         int    mtu;
477
478         if (skb->protocol != htons(ETH_P_IP))
479                 goto tx_error;
480
481         if (tos & 1)
482                 tos = old_iph->tos;
483
484         if (!dst) {
485                 /* NBMA tunnel */
486                 if ((rt = skb_rtable(skb)) == NULL) {
487                         dev->stats.tx_fifo_errors++;
488                         goto tx_error;
489                 }
490                 dst = rt->rt_gateway;
491         }
492
493         rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
494                                    dst, tiph->saddr,
495                                    0, 0,
496                                    IPPROTO_IPIP, RT_TOS(tos),
497                                    tunnel->parms.link);
498         if (IS_ERR(rt)) {
499                 dev->stats.tx_carrier_errors++;
500                 goto tx_error_icmp;
501         }
502         tdev = rt->dst.dev;
503
504         if (tdev == dev) {
505                 ip_rt_put(rt);
506                 dev->stats.collisions++;
507                 goto tx_error;
508         }
509
510         df |= old_iph->frag_off & htons(IP_DF);
511
512         if (df) {
513                 mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr);
514
515                 if (mtu < 68) {
516                         dev->stats.collisions++;
517                         ip_rt_put(rt);
518                         goto tx_error;
519                 }
520
521                 if (skb_dst(skb))
522                         skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
523
524                 if ((old_iph->frag_off & htons(IP_DF)) &&
525                     mtu < ntohs(old_iph->tot_len)) {
526                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
527                                   htonl(mtu));
528                         ip_rt_put(rt);
529                         goto tx_error;
530                 }
531         }
532
533         if (tunnel->err_count > 0) {
534                 if (time_before(jiffies,
535                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
536                         tunnel->err_count--;
537                         dst_link_failure(skb);
538                 } else
539                         tunnel->err_count = 0;
540         }
541
542         /*
543          * Okay, now see if we can stuff it in the buffer as-is.
544          */
545         max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
546
547         if (skb_headroom(skb) < max_headroom || skb_shared(skb) ||
548             (skb_cloned(skb) && !skb_clone_writable(skb, 0))) {
549                 struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
550                 if (!new_skb) {
551                         ip_rt_put(rt);
552                         dev->stats.tx_dropped++;
553                         dev_kfree_skb(skb);
554                         return NETDEV_TX_OK;
555                 }
556                 if (skb->sk)
557                         skb_set_owner_w(new_skb, skb->sk);
558                 dev_kfree_skb(skb);
559                 skb = new_skb;
560                 old_iph = ip_hdr(skb);
561         }
562
563         skb->transport_header = skb->network_header;
564         skb_push(skb, sizeof(struct iphdr));
565         skb_reset_network_header(skb);
566         memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
567         IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED |
568                               IPSKB_REROUTED);
569         skb_dst_drop(skb);
570         skb_dst_set(skb, &rt->dst);
571
572         /*
573          *      Push down and install the IPIP header.
574          */
575
576         iph                     =       ip_hdr(skb);
577         iph->version            =       4;
578         iph->ihl                =       sizeof(struct iphdr)>>2;
579         iph->frag_off           =       df;
580         iph->protocol           =       IPPROTO_IPIP;
581         iph->tos                =       INET_ECN_encapsulate(tos, old_iph->tos);
582         iph->daddr              =       fl4.daddr;
583         iph->saddr              =       fl4.saddr;
584
585         if ((iph->ttl = tiph->ttl) == 0)
586                 iph->ttl        =       old_iph->ttl;
587
588         nf_reset(skb);
589         tstats = this_cpu_ptr(dev->tstats);
590         __IPTUNNEL_XMIT(tstats, &dev->stats);
591         return NETDEV_TX_OK;
592
593 tx_error_icmp:
594         dst_link_failure(skb);
595 tx_error:
596         dev->stats.tx_errors++;
597         dev_kfree_skb(skb);
598         return NETDEV_TX_OK;
599 }
600
601 static void ipip_tunnel_bind_dev(struct net_device *dev)
602 {
603         struct net_device *tdev = NULL;
604         struct ip_tunnel *tunnel;
605         const struct iphdr *iph;
606
607         tunnel = netdev_priv(dev);
608         iph = &tunnel->parms.iph;
609
610         if (iph->daddr) {
611                 struct rtable *rt;
612                 struct flowi4 fl4;
613
614                 rt = ip_route_output_ports(dev_net(dev), &fl4, NULL,
615                                            iph->daddr, iph->saddr,
616                                            0, 0,
617                                            IPPROTO_IPIP,
618                                            RT_TOS(iph->tos),
619                                            tunnel->parms.link);
620                 if (!IS_ERR(rt)) {
621                         tdev = rt->dst.dev;
622                         ip_rt_put(rt);
623                 }
624                 dev->flags |= IFF_POINTOPOINT;
625         }
626
627         if (!tdev && tunnel->parms.link)
628                 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
629
630         if (tdev) {
631                 dev->hard_header_len = tdev->hard_header_len + sizeof(struct iphdr);
632                 dev->mtu = tdev->mtu - sizeof(struct iphdr);
633         }
634         dev->iflink = tunnel->parms.link;
635 }
636
637 static int
638 ipip_tunnel_ioctl (struct net_device *dev, struct ifreq *ifr, int cmd)
639 {
640         int err = 0;
641         struct ip_tunnel_parm p;
642         struct ip_tunnel *t;
643         struct net *net = dev_net(dev);
644         struct ipip_net *ipn = net_generic(net, ipip_net_id);
645
646         switch (cmd) {
647         case SIOCGETTUNNEL:
648                 t = NULL;
649                 if (dev == ipn->fb_tunnel_dev) {
650                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
651                                 err = -EFAULT;
652                                 break;
653                         }
654                         t = ipip_tunnel_locate(net, &p, 0);
655                 }
656                 if (t == NULL)
657                         t = netdev_priv(dev);
658                 memcpy(&p, &t->parms, sizeof(p));
659                 if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
660                         err = -EFAULT;
661                 break;
662
663         case SIOCADDTUNNEL:
664         case SIOCCHGTUNNEL:
665                 err = -EPERM;
666                 if (!capable(CAP_NET_ADMIN))
667                         goto done;
668
669                 err = -EFAULT;
670                 if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
671                         goto done;
672
673                 err = -EINVAL;
674                 if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
675                     p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
676                         goto done;
677                 if (p.iph.ttl)
678                         p.iph.frag_off |= htons(IP_DF);
679
680                 t = ipip_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
681
682                 if (dev != ipn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
683                         if (t != NULL) {
684                                 if (t->dev != dev) {
685                                         err = -EEXIST;
686                                         break;
687                                 }
688                         } else {
689                                 if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
690                                     (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
691                                         err = -EINVAL;
692                                         break;
693                                 }
694                                 t = netdev_priv(dev);
695                                 ipip_tunnel_unlink(ipn, t);
696                                 synchronize_net();
697                                 t->parms.iph.saddr = p.iph.saddr;
698                                 t->parms.iph.daddr = p.iph.daddr;
699                                 memcpy(dev->dev_addr, &p.iph.saddr, 4);
700                                 memcpy(dev->broadcast, &p.iph.daddr, 4);
701                                 ipip_tunnel_link(ipn, t);
702                                 netdev_state_change(dev);
703                         }
704                 }
705
706                 if (t) {
707                         err = 0;
708                         if (cmd == SIOCCHGTUNNEL) {
709                                 t->parms.iph.ttl = p.iph.ttl;
710                                 t->parms.iph.tos = p.iph.tos;
711                                 t->parms.iph.frag_off = p.iph.frag_off;
712                                 if (t->parms.link != p.link) {
713                                         t->parms.link = p.link;
714                                         ipip_tunnel_bind_dev(dev);
715                                         netdev_state_change(dev);
716                                 }
717                         }
718                         if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
719                                 err = -EFAULT;
720                 } else
721                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
722                 break;
723
724         case SIOCDELTUNNEL:
725                 err = -EPERM;
726                 if (!capable(CAP_NET_ADMIN))
727                         goto done;
728
729                 if (dev == ipn->fb_tunnel_dev) {
730                         err = -EFAULT;
731                         if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
732                                 goto done;
733                         err = -ENOENT;
734                         if ((t = ipip_tunnel_locate(net, &p, 0)) == NULL)
735                                 goto done;
736                         err = -EPERM;
737                         if (t->dev == ipn->fb_tunnel_dev)
738                                 goto done;
739                         dev = t->dev;
740                 }
741                 unregister_netdevice(dev);
742                 err = 0;
743                 break;
744
745         default:
746                 err = -EINVAL;
747         }
748
749 done:
750         return err;
751 }
752
753 static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
754 {
755         if (new_mtu < 68 || new_mtu > 0xFFF8 - sizeof(struct iphdr))
756                 return -EINVAL;
757         dev->mtu = new_mtu;
758         return 0;
759 }
760
761 static const struct net_device_ops ipip_netdev_ops = {
762         .ndo_uninit     = ipip_tunnel_uninit,
763         .ndo_start_xmit = ipip_tunnel_xmit,
764         .ndo_do_ioctl   = ipip_tunnel_ioctl,
765         .ndo_change_mtu = ipip_tunnel_change_mtu,
766         .ndo_get_stats64 = ipip_get_stats64,
767 };
768
769 static void ipip_dev_free(struct net_device *dev)
770 {
771         free_percpu(dev->tstats);
772         free_netdev(dev);
773 }
774
775 static void ipip_tunnel_setup(struct net_device *dev)
776 {
777         dev->netdev_ops         = &ipip_netdev_ops;
778         dev->destructor         = ipip_dev_free;
779
780         dev->type               = ARPHRD_TUNNEL;
781         dev->hard_header_len    = LL_MAX_HEADER + sizeof(struct iphdr);
782         dev->mtu                = ETH_DATA_LEN - sizeof(struct iphdr);
783         dev->flags              = IFF_NOARP;
784         dev->iflink             = 0;
785         dev->addr_len           = 4;
786         dev->features           |= NETIF_F_NETNS_LOCAL;
787         dev->features           |= NETIF_F_LLTX;
788         dev->priv_flags         &= ~IFF_XMIT_DST_RELEASE;
789 }
790
791 static int ipip_tunnel_init(struct net_device *dev)
792 {
793         struct ip_tunnel *tunnel = netdev_priv(dev);
794
795         tunnel->dev = dev;
796
797         memcpy(dev->dev_addr, &tunnel->parms.iph.saddr, 4);
798         memcpy(dev->broadcast, &tunnel->parms.iph.daddr, 4);
799
800         ipip_tunnel_bind_dev(dev);
801
802         dev->tstats = alloc_percpu(struct pcpu_tstats);
803         if (!dev->tstats)
804                 return -ENOMEM;
805
806         return 0;
807 }
808
809 static int __net_init ipip_fb_tunnel_init(struct net_device *dev)
810 {
811         struct ip_tunnel *tunnel = netdev_priv(dev);
812         struct iphdr *iph = &tunnel->parms.iph;
813         struct ipip_net *ipn = net_generic(dev_net(dev), ipip_net_id);
814
815         tunnel->dev = dev;
816         strcpy(tunnel->parms.name, dev->name);
817
818         iph->version            = 4;
819         iph->protocol           = IPPROTO_IPIP;
820         iph->ihl                = 5;
821
822         dev->tstats = alloc_percpu(struct pcpu_tstats);
823         if (!dev->tstats)
824                 return -ENOMEM;
825
826         dev_hold(dev);
827         rcu_assign_pointer(ipn->tunnels_wc[0], tunnel);
828         return 0;
829 }
830
831 static struct xfrm_tunnel ipip_handler __read_mostly = {
832         .handler        =       ipip_rcv,
833         .err_handler    =       ipip_err,
834         .priority       =       1,
835 };
836
837 static const char banner[] __initconst =
838         KERN_INFO "IPv4 over IPv4 tunneling driver\n";
839
840 static void ipip_destroy_tunnels(struct ipip_net *ipn, struct list_head *head)
841 {
842         int prio;
843
844         for (prio = 1; prio < 4; prio++) {
845                 int h;
846                 for (h = 0; h < HASH_SIZE; h++) {
847                         struct ip_tunnel *t;
848
849                         t = rtnl_dereference(ipn->tunnels[prio][h]);
850                         while (t != NULL) {
851                                 unregister_netdevice_queue(t->dev, head);
852                                 t = rtnl_dereference(t->next);
853                         }
854                 }
855         }
856 }
857
858 static int __net_init ipip_init_net(struct net *net)
859 {
860         struct ipip_net *ipn = net_generic(net, ipip_net_id);
861         struct ip_tunnel *t;
862         int err;
863
864         ipn->tunnels[0] = ipn->tunnels_wc;
865         ipn->tunnels[1] = ipn->tunnels_l;
866         ipn->tunnels[2] = ipn->tunnels_r;
867         ipn->tunnels[3] = ipn->tunnels_r_l;
868
869         ipn->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel),
870                                            "tunl0",
871                                            ipip_tunnel_setup);
872         if (!ipn->fb_tunnel_dev) {
873                 err = -ENOMEM;
874                 goto err_alloc_dev;
875         }
876         dev_net_set(ipn->fb_tunnel_dev, net);
877
878         err = ipip_fb_tunnel_init(ipn->fb_tunnel_dev);
879         if (err)
880                 goto err_reg_dev;
881
882         if ((err = register_netdev(ipn->fb_tunnel_dev)))
883                 goto err_reg_dev;
884
885         t = netdev_priv(ipn->fb_tunnel_dev);
886
887         strcpy(t->parms.name, ipn->fb_tunnel_dev->name);
888         return 0;
889
890 err_reg_dev:
891         ipip_dev_free(ipn->fb_tunnel_dev);
892 err_alloc_dev:
893         /* nothing */
894         return err;
895 }
896
897 static void __net_exit ipip_exit_net(struct net *net)
898 {
899         struct ipip_net *ipn = net_generic(net, ipip_net_id);
900         LIST_HEAD(list);
901
902         rtnl_lock();
903         ipip_destroy_tunnels(ipn, &list);
904         unregister_netdevice_queue(ipn->fb_tunnel_dev, &list);
905         unregister_netdevice_many(&list);
906         rtnl_unlock();
907 }
908
909 static struct pernet_operations ipip_net_ops = {
910         .init = ipip_init_net,
911         .exit = ipip_exit_net,
912         .id   = &ipip_net_id,
913         .size = sizeof(struct ipip_net),
914 };
915
916 static int __init ipip_init(void)
917 {
918         int err;
919
920         printk(banner);
921
922         err = register_pernet_device(&ipip_net_ops);
923         if (err < 0)
924                 return err;
925         err = xfrm4_tunnel_register(&ipip_handler, AF_INET);
926         if (err < 0) {
927                 unregister_pernet_device(&ipip_net_ops);
928                 pr_info("%s: can't register tunnel\n", __func__);
929         }
930         return err;
931 }
932
933 static void __exit ipip_fini(void)
934 {
935         if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
936                 pr_info("%s: can't deregister tunnel\n", __func__);
937
938         unregister_pernet_device(&ipip_net_ops);
939 }
940
941 module_init(ipip_init);
942 module_exit(ipip_fini);
943 MODULE_LICENSE("GPL");
944 MODULE_ALIAS_NETDEV("tunl0");