7fa8f08fa7ae08220bfdb3a9bd4ab7077a1c5a9e
[linux-3.10.git] / net / ipv4 / ip_tunnel.c
1 /*
2  * Copyright (c) 2013 Nicira, Inc.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of version 2 of the GNU General Public
6  * License as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * along with this program; if not, write to the Free Software
15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16  * 02110-1301, USA
17  */
18
19 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20
21 #include <linux/capability.h>
22 #include <linux/module.h>
23 #include <linux/types.h>
24 #include <linux/kernel.h>
25 #include <linux/slab.h>
26 #include <linux/uaccess.h>
27 #include <linux/skbuff.h>
28 #include <linux/netdevice.h>
29 #include <linux/in.h>
30 #include <linux/tcp.h>
31 #include <linux/udp.h>
32 #include <linux/if_arp.h>
33 #include <linux/mroute.h>
34 #include <linux/init.h>
35 #include <linux/in6.h>
36 #include <linux/inetdevice.h>
37 #include <linux/igmp.h>
38 #include <linux/netfilter_ipv4.h>
39 #include <linux/etherdevice.h>
40 #include <linux/if_ether.h>
41 #include <linux/if_vlan.h>
42 #include <linux/rculist.h>
43
44 #include <net/sock.h>
45 #include <net/ip.h>
46 #include <net/icmp.h>
47 #include <net/protocol.h>
48 #include <net/ip_tunnels.h>
49 #include <net/arp.h>
50 #include <net/checksum.h>
51 #include <net/dsfield.h>
52 #include <net/inet_ecn.h>
53 #include <net/xfrm.h>
54 #include <net/net_namespace.h>
55 #include <net/netns/generic.h>
56 #include <net/rtnetlink.h>
57
58 #if IS_ENABLED(CONFIG_IPV6)
59 #include <net/ipv6.h>
60 #include <net/ip6_fib.h>
61 #include <net/ip6_route.h>
62 #endif
63
64 static unsigned int ip_tunnel_hash(struct ip_tunnel_net *itn,
65                                    __be32 key, __be32 remote)
66 {
67         return hash_32((__force u32)key ^ (__force u32)remote,
68                          IP_TNL_HASH_BITS);
69 }
70
71 /* Often modified stats are per cpu, other are shared (netdev->stats) */
72 struct rtnl_link_stats64 *ip_tunnel_get_stats64(struct net_device *dev,
73                                                 struct rtnl_link_stats64 *tot)
74 {
75         int i;
76
77         for_each_possible_cpu(i) {
78                 const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i);
79                 u64 rx_packets, rx_bytes, tx_packets, tx_bytes;
80                 unsigned int start;
81
82                 do {
83                         start = u64_stats_fetch_begin_bh(&tstats->syncp);
84                         rx_packets = tstats->rx_packets;
85                         tx_packets = tstats->tx_packets;
86                         rx_bytes = tstats->rx_bytes;
87                         tx_bytes = tstats->tx_bytes;
88                 } while (u64_stats_fetch_retry_bh(&tstats->syncp, start));
89
90                 tot->rx_packets += rx_packets;
91                 tot->tx_packets += tx_packets;
92                 tot->rx_bytes   += rx_bytes;
93                 tot->tx_bytes   += tx_bytes;
94         }
95
96         tot->multicast = dev->stats.multicast;
97
98         tot->rx_crc_errors = dev->stats.rx_crc_errors;
99         tot->rx_fifo_errors = dev->stats.rx_fifo_errors;
100         tot->rx_length_errors = dev->stats.rx_length_errors;
101         tot->rx_frame_errors = dev->stats.rx_frame_errors;
102         tot->rx_errors = dev->stats.rx_errors;
103
104         tot->tx_fifo_errors = dev->stats.tx_fifo_errors;
105         tot->tx_carrier_errors = dev->stats.tx_carrier_errors;
106         tot->tx_dropped = dev->stats.tx_dropped;
107         tot->tx_aborted_errors = dev->stats.tx_aborted_errors;
108         tot->tx_errors = dev->stats.tx_errors;
109
110         tot->collisions  = dev->stats.collisions;
111
112         return tot;
113 }
114 EXPORT_SYMBOL_GPL(ip_tunnel_get_stats64);
115
116 static bool ip_tunnel_key_match(const struct ip_tunnel_parm *p,
117                                 __be16 flags, __be32 key)
118 {
119         if (p->i_flags & TUNNEL_KEY) {
120                 if (flags & TUNNEL_KEY)
121                         return key == p->i_key;
122                 else
123                         /* key expected, none present */
124                         return false;
125         } else
126                 return !(flags & TUNNEL_KEY);
127 }
128
129 /* Fallback tunnel: no source, no destination, no key, no options
130
131    Tunnel hash table:
132    We require exact key match i.e. if a key is present in packet
133    it will match only tunnel with the same key; if it is not present,
134    it will match only keyless tunnel.
135
136    All keysless packets, if not matched configured keyless tunnels
137    will match fallback tunnel.
138    Given src, dst and key, find appropriate for input tunnel.
139 */
140 struct ip_tunnel *ip_tunnel_lookup(struct ip_tunnel_net *itn,
141                                    int link, __be16 flags,
142                                    __be32 remote, __be32 local,
143                                    __be32 key)
144 {
145         unsigned int hash;
146         struct ip_tunnel *t, *cand = NULL;
147         struct hlist_head *head;
148
149         hash = ip_tunnel_hash(itn, key, remote);
150         head = &itn->tunnels[hash];
151
152         hlist_for_each_entry_rcu(t, head, hash_node) {
153                 if (local != t->parms.iph.saddr ||
154                     remote != t->parms.iph.daddr ||
155                     !(t->dev->flags & IFF_UP))
156                         continue;
157
158                 if (!ip_tunnel_key_match(&t->parms, flags, key))
159                         continue;
160
161                 if (t->parms.link == link)
162                         return t;
163                 else
164                         cand = t;
165         }
166
167         hlist_for_each_entry_rcu(t, head, hash_node) {
168                 if (remote != t->parms.iph.daddr ||
169                     !(t->dev->flags & IFF_UP))
170                         continue;
171
172                 if (!ip_tunnel_key_match(&t->parms, flags, key))
173                         continue;
174
175                 if (t->parms.link == link)
176                         return t;
177                 else if (!cand)
178                         cand = t;
179         }
180
181         hash = ip_tunnel_hash(itn, key, 0);
182         head = &itn->tunnels[hash];
183
184         hlist_for_each_entry_rcu(t, head, hash_node) {
185                 if ((local != t->parms.iph.saddr &&
186                      (local != t->parms.iph.daddr ||
187                       !ipv4_is_multicast(local))) ||
188                     !(t->dev->flags & IFF_UP))
189                         continue;
190
191                 if (!ip_tunnel_key_match(&t->parms, flags, key))
192                         continue;
193
194                 if (t->parms.link == link)
195                         return t;
196                 else if (!cand)
197                         cand = t;
198         }
199
200         if (flags & TUNNEL_NO_KEY)
201                 goto skip_key_lookup;
202
203         hlist_for_each_entry_rcu(t, head, hash_node) {
204                 if (t->parms.i_key != key ||
205                     !(t->dev->flags & IFF_UP))
206                         continue;
207
208                 if (t->parms.link == link)
209                         return t;
210                 else if (!cand)
211                         cand = t;
212         }
213
214 skip_key_lookup:
215         if (cand)
216                 return cand;
217
218         if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP)
219                 return netdev_priv(itn->fb_tunnel_dev);
220
221
222         return NULL;
223 }
224 EXPORT_SYMBOL_GPL(ip_tunnel_lookup);
225
226 static struct hlist_head *ip_bucket(struct ip_tunnel_net *itn,
227                                     struct ip_tunnel_parm *parms)
228 {
229         unsigned int h;
230         __be32 remote;
231
232         if (parms->iph.daddr && !ipv4_is_multicast(parms->iph.daddr))
233                 remote = parms->iph.daddr;
234         else
235                 remote = 0;
236
237         h = ip_tunnel_hash(itn, parms->i_key, remote);
238         return &itn->tunnels[h];
239 }
240
241 static void ip_tunnel_add(struct ip_tunnel_net *itn, struct ip_tunnel *t)
242 {
243         struct hlist_head *head = ip_bucket(itn, &t->parms);
244
245         hlist_add_head_rcu(&t->hash_node, head);
246 }
247
248 static void ip_tunnel_del(struct ip_tunnel *t)
249 {
250         hlist_del_init_rcu(&t->hash_node);
251 }
252
253 static struct ip_tunnel *ip_tunnel_find(struct ip_tunnel_net *itn,
254                                         struct ip_tunnel_parm *parms,
255                                         int type)
256 {
257         __be32 remote = parms->iph.daddr;
258         __be32 local = parms->iph.saddr;
259         __be32 key = parms->i_key;
260         int link = parms->link;
261         struct ip_tunnel *t = NULL;
262         struct hlist_head *head = ip_bucket(itn, parms);
263
264         hlist_for_each_entry_rcu(t, head, hash_node) {
265                 if (local == t->parms.iph.saddr &&
266                     remote == t->parms.iph.daddr &&
267                     key == t->parms.i_key &&
268                     link == t->parms.link &&
269                     type == t->dev->type)
270                         break;
271         }
272         return t;
273 }
274
275 static struct net_device *__ip_tunnel_create(struct net *net,
276                                              const struct rtnl_link_ops *ops,
277                                              struct ip_tunnel_parm *parms)
278 {
279         int err;
280         struct ip_tunnel *tunnel;
281         struct net_device *dev;
282         char name[IFNAMSIZ];
283
284         if (parms->name[0])
285                 strlcpy(name, parms->name, IFNAMSIZ);
286         else {
287                 if (strlen(ops->kind) > (IFNAMSIZ - 3)) {
288                         err = -E2BIG;
289                         goto failed;
290                 }
291                 strlcpy(name, ops->kind, IFNAMSIZ);
292                 strncat(name, "%d", 2);
293         }
294
295         ASSERT_RTNL();
296         dev = alloc_netdev(ops->priv_size, name, ops->setup);
297         if (!dev) {
298                 err = -ENOMEM;
299                 goto failed;
300         }
301         dev_net_set(dev, net);
302
303         dev->rtnl_link_ops = ops;
304
305         tunnel = netdev_priv(dev);
306         tunnel->parms = *parms;
307
308         err = register_netdevice(dev);
309         if (err)
310                 goto failed_free;
311
312         return dev;
313
314 failed_free:
315         free_netdev(dev);
316 failed:
317         return ERR_PTR(err);
318 }
319
320 static inline struct rtable *ip_route_output_tunnel(struct net *net,
321                                                     struct flowi4 *fl4,
322                                                     int proto,
323                                                     __be32 daddr, __be32 saddr,
324                                                     __be32 key, __u8 tos, int oif)
325 {
326         memset(fl4, 0, sizeof(*fl4));
327         fl4->flowi4_oif = oif;
328         fl4->daddr = daddr;
329         fl4->saddr = saddr;
330         fl4->flowi4_tos = tos;
331         fl4->flowi4_proto = proto;
332         fl4->fl4_gre_key = key;
333         return ip_route_output_key(net, fl4);
334 }
335
336 static int ip_tunnel_bind_dev(struct net_device *dev)
337 {
338         struct net_device *tdev = NULL;
339         struct ip_tunnel *tunnel = netdev_priv(dev);
340         const struct iphdr *iph;
341         int hlen = LL_MAX_HEADER;
342         int mtu = ETH_DATA_LEN;
343         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
344
345         iph = &tunnel->parms.iph;
346
347         /* Guess output device to choose reasonable mtu and needed_headroom */
348         if (iph->daddr) {
349                 struct flowi4 fl4;
350                 struct rtable *rt;
351
352                 rt = ip_route_output_tunnel(dev_net(dev), &fl4,
353                                             tunnel->parms.iph.protocol,
354                                             iph->daddr, iph->saddr,
355                                             tunnel->parms.o_key,
356                                             RT_TOS(iph->tos),
357                                             tunnel->parms.link);
358                 if (!IS_ERR(rt)) {
359                         tdev = rt->dst.dev;
360                         ip_rt_put(rt);
361                 }
362                 if (dev->type != ARPHRD_ETHER)
363                         dev->flags |= IFF_POINTOPOINT;
364         }
365
366         if (!tdev && tunnel->parms.link)
367                 tdev = __dev_get_by_index(dev_net(dev), tunnel->parms.link);
368
369         if (tdev) {
370                 hlen = tdev->hard_header_len + tdev->needed_headroom;
371                 mtu = tdev->mtu;
372         }
373         dev->iflink = tunnel->parms.link;
374
375         dev->needed_headroom = t_hlen + hlen;
376         mtu -= (dev->hard_header_len + t_hlen);
377
378         if (mtu < 68)
379                 mtu = 68;
380
381         return mtu;
382 }
383
384 static struct ip_tunnel *ip_tunnel_create(struct net *net,
385                                           struct ip_tunnel_net *itn,
386                                           struct ip_tunnel_parm *parms)
387 {
388         struct ip_tunnel *nt, *fbt;
389         struct net_device *dev;
390
391         BUG_ON(!itn->fb_tunnel_dev);
392         fbt = netdev_priv(itn->fb_tunnel_dev);
393         dev = __ip_tunnel_create(net, itn->fb_tunnel_dev->rtnl_link_ops, parms);
394         if (IS_ERR(dev))
395                 return NULL;
396
397         dev->mtu = ip_tunnel_bind_dev(dev);
398
399         nt = netdev_priv(dev);
400         ip_tunnel_add(itn, nt);
401         return nt;
402 }
403
404 int ip_tunnel_rcv(struct ip_tunnel *tunnel, struct sk_buff *skb,
405                   const struct tnl_ptk_info *tpi, bool log_ecn_error)
406 {
407         struct pcpu_tstats *tstats;
408         const struct iphdr *iph = ip_hdr(skb);
409         int err;
410
411         secpath_reset(skb);
412
413         skb->protocol = tpi->proto;
414
415         skb->mac_header = skb->network_header;
416         __pskb_pull(skb, tunnel->hlen);
417         skb_postpull_rcsum(skb, skb_transport_header(skb), tunnel->hlen);
418 #ifdef CONFIG_NET_IPGRE_BROADCAST
419         if (ipv4_is_multicast(iph->daddr)) {
420                 /* Looped back packet, drop it! */
421                 if (rt_is_output_route(skb_rtable(skb)))
422                         goto drop;
423                 tunnel->dev->stats.multicast++;
424                 skb->pkt_type = PACKET_BROADCAST;
425         }
426 #endif
427
428         if ((!(tpi->flags&TUNNEL_CSUM) &&  (tunnel->parms.i_flags&TUNNEL_CSUM)) ||
429              ((tpi->flags&TUNNEL_CSUM) && !(tunnel->parms.i_flags&TUNNEL_CSUM))) {
430                 tunnel->dev->stats.rx_crc_errors++;
431                 tunnel->dev->stats.rx_errors++;
432                 goto drop;
433         }
434
435         if (tunnel->parms.i_flags&TUNNEL_SEQ) {
436                 if (!(tpi->flags&TUNNEL_SEQ) ||
437                     (tunnel->i_seqno && (s32)(ntohl(tpi->seq) - tunnel->i_seqno) < 0)) {
438                         tunnel->dev->stats.rx_fifo_errors++;
439                         tunnel->dev->stats.rx_errors++;
440                         goto drop;
441                 }
442                 tunnel->i_seqno = ntohl(tpi->seq) + 1;
443         }
444
445         /* Warning: All skb pointers will be invalidated! */
446         if (tunnel->dev->type == ARPHRD_ETHER) {
447                 if (!pskb_may_pull(skb, ETH_HLEN)) {
448                         tunnel->dev->stats.rx_length_errors++;
449                         tunnel->dev->stats.rx_errors++;
450                         goto drop;
451                 }
452
453                 iph = ip_hdr(skb);
454                 skb->protocol = eth_type_trans(skb, tunnel->dev);
455                 skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
456         }
457
458         skb->pkt_type = PACKET_HOST;
459         __skb_tunnel_rx(skb, tunnel->dev);
460
461         skb_reset_network_header(skb);
462         err = IP_ECN_decapsulate(iph, skb);
463         if (unlikely(err)) {
464                 if (log_ecn_error)
465                         net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n",
466                                         &iph->saddr, iph->tos);
467                 if (err > 1) {
468                         ++tunnel->dev->stats.rx_frame_errors;
469                         ++tunnel->dev->stats.rx_errors;
470                         goto drop;
471                 }
472         }
473
474         tstats = this_cpu_ptr(tunnel->dev->tstats);
475         u64_stats_update_begin(&tstats->syncp);
476         tstats->rx_packets++;
477         tstats->rx_bytes += skb->len;
478         u64_stats_update_end(&tstats->syncp);
479
480         gro_cells_receive(&tunnel->gro_cells, skb);
481         return 0;
482
483 drop:
484         kfree_skb(skb);
485         return 0;
486 }
487 EXPORT_SYMBOL_GPL(ip_tunnel_rcv);
488
489 void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
490                     const struct iphdr *tnl_params)
491 {
492         struct ip_tunnel *tunnel = netdev_priv(dev);
493         const struct iphdr *inner_iph;
494         struct iphdr *iph;
495         struct flowi4 fl4;
496         u8     tos, ttl;
497         __be16 df;
498         struct rtable *rt;              /* Route to the other host */
499         struct net_device *tdev;        /* Device to other host */
500         unsigned int max_headroom;      /* The extra header space needed */
501         __be32 dst;
502         int mtu;
503
504         inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
505
506         memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
507         dst = tnl_params->daddr;
508         if (dst == 0) {
509                 /* NBMA tunnel */
510
511                 if (skb_dst(skb) == NULL) {
512                         dev->stats.tx_fifo_errors++;
513                         goto tx_error;
514                 }
515
516                 if (skb->protocol == htons(ETH_P_IP)) {
517                         rt = skb_rtable(skb);
518                         dst = rt_nexthop(rt, inner_iph->daddr);
519                 }
520 #if IS_ENABLED(CONFIG_IPV6)
521                 else if (skb->protocol == htons(ETH_P_IPV6)) {
522                         const struct in6_addr *addr6;
523                         struct neighbour *neigh;
524                         bool do_tx_error_icmp;
525                         int addr_type;
526
527                         neigh = dst_neigh_lookup(skb_dst(skb),
528                                                  &ipv6_hdr(skb)->daddr);
529                         if (neigh == NULL)
530                                 goto tx_error;
531
532                         addr6 = (const struct in6_addr *)&neigh->primary_key;
533                         addr_type = ipv6_addr_type(addr6);
534
535                         if (addr_type == IPV6_ADDR_ANY) {
536                                 addr6 = &ipv6_hdr(skb)->daddr;
537                                 addr_type = ipv6_addr_type(addr6);
538                         }
539
540                         if ((addr_type & IPV6_ADDR_COMPATv4) == 0)
541                                 do_tx_error_icmp = true;
542                         else {
543                                 do_tx_error_icmp = false;
544                                 dst = addr6->s6_addr32[3];
545                         }
546                         neigh_release(neigh);
547                         if (do_tx_error_icmp)
548                                 goto tx_error_icmp;
549                 }
550 #endif
551                 else
552                         goto tx_error;
553         }
554
555         tos = tnl_params->tos;
556         if (tos & 0x1) {
557                 tos &= ~0x1;
558                 if (skb->protocol == htons(ETH_P_IP))
559                         tos = inner_iph->tos;
560                 else if (skb->protocol == htons(ETH_P_IPV6))
561                         tos = ipv6_get_dsfield((const struct ipv6hdr *)inner_iph);
562         }
563
564         rt = ip_route_output_tunnel(dev_net(dev), &fl4,
565                                     tunnel->parms.iph.protocol,
566                                     dst, tnl_params->saddr,
567                                     tunnel->parms.o_key,
568                                     RT_TOS(tos),
569                                     tunnel->parms.link);
570         if (IS_ERR(rt)) {
571                 dev->stats.tx_carrier_errors++;
572                 goto tx_error;
573         }
574         tdev = rt->dst.dev;
575
576         if (tdev == dev) {
577                 ip_rt_put(rt);
578                 dev->stats.collisions++;
579                 goto tx_error;
580         }
581
582         df = tnl_params->frag_off;
583
584         if (df)
585                 mtu = dst_mtu(&rt->dst) - dev->hard_header_len
586                                         - sizeof(struct iphdr);
587         else
588                 mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
589
590         if (skb_dst(skb))
591                 skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu);
592
593         if (skb->protocol == htons(ETH_P_IP)) {
594                 df |= (inner_iph->frag_off&htons(IP_DF));
595
596                 if (!skb_is_gso(skb) &&
597                     (inner_iph->frag_off&htons(IP_DF)) &&
598                      mtu < ntohs(inner_iph->tot_len)) {
599                         icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
600                         ip_rt_put(rt);
601                         goto tx_error;
602                 }
603         }
604 #if IS_ENABLED(CONFIG_IPV6)
605         else if (skb->protocol == htons(ETH_P_IPV6)) {
606                 struct rt6_info *rt6 = (struct rt6_info *)skb_dst(skb);
607
608                 if (rt6 && mtu < dst_mtu(skb_dst(skb)) &&
609                     mtu >= IPV6_MIN_MTU) {
610                         if ((tunnel->parms.iph.daddr &&
611                             !ipv4_is_multicast(tunnel->parms.iph.daddr)) ||
612                             rt6->rt6i_dst.plen == 128) {
613                                 rt6->rt6i_flags |= RTF_MODIFIED;
614                                 dst_metric_set(skb_dst(skb), RTAX_MTU, mtu);
615                         }
616                 }
617
618                 if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
619                     mtu < skb->len) {
620                         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
621                         ip_rt_put(rt);
622                         goto tx_error;
623                 }
624         }
625 #endif
626
627         if (tunnel->err_count > 0) {
628                 if (time_before(jiffies,
629                                 tunnel->err_time + IPTUNNEL_ERR_TIMEO)) {
630                         tunnel->err_count--;
631
632                         dst_link_failure(skb);
633                 } else
634                         tunnel->err_count = 0;
635         }
636
637         ttl = tnl_params->ttl;
638         if (ttl == 0) {
639                 if (skb->protocol == htons(ETH_P_IP))
640                         ttl = inner_iph->ttl;
641 #if IS_ENABLED(CONFIG_IPV6)
642                 else if (skb->protocol == htons(ETH_P_IPV6))
643                         ttl = ((const struct ipv6hdr *)inner_iph)->hop_limit;
644 #endif
645                 else
646                         ttl = ip4_dst_hoplimit(&rt->dst);
647         }
648
649         max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(struct iphdr)
650                                                + rt->dst.header_len;
651         if (max_headroom > dev->needed_headroom) {
652                 dev->needed_headroom = max_headroom;
653                 if (skb_cow_head(skb, dev->needed_headroom)) {
654                         dev->stats.tx_dropped++;
655                         dev_kfree_skb(skb);
656                         return;
657                 }
658         }
659
660         skb_dst_drop(skb);
661         skb_dst_set(skb, &rt->dst);
662
663         /* Push down and install the IP header. */
664         skb_push(skb, sizeof(struct iphdr));
665         skb_reset_network_header(skb);
666
667         iph = ip_hdr(skb);
668         inner_iph = (const struct iphdr *)skb_inner_network_header(skb);
669
670         iph->version    =       4;
671         iph->ihl        =       sizeof(struct iphdr) >> 2;
672         iph->frag_off   =       df;
673         iph->protocol   =       tnl_params->protocol;
674         iph->tos        =       ip_tunnel_ecn_encap(tos, inner_iph, skb);
675         iph->daddr      =       fl4.daddr;
676         iph->saddr      =       fl4.saddr;
677         iph->ttl        =       ttl;
678         tunnel_ip_select_ident(skb, inner_iph, &rt->dst);
679
680         iptunnel_xmit(skb, dev);
681         return;
682
683 #if IS_ENABLED(CONFIG_IPV6)
684 tx_error_icmp:
685         dst_link_failure(skb);
686 #endif
687 tx_error:
688         dev->stats.tx_errors++;
689         dev_kfree_skb(skb);
690 }
691 EXPORT_SYMBOL_GPL(ip_tunnel_xmit);
692
693 static void ip_tunnel_update(struct ip_tunnel_net *itn,
694                              struct ip_tunnel *t,
695                              struct net_device *dev,
696                              struct ip_tunnel_parm *p,
697                              bool set_mtu)
698 {
699         ip_tunnel_del(t);
700         t->parms.iph.saddr = p->iph.saddr;
701         t->parms.iph.daddr = p->iph.daddr;
702         t->parms.i_key = p->i_key;
703         t->parms.o_key = p->o_key;
704         if (dev->type != ARPHRD_ETHER) {
705                 memcpy(dev->dev_addr, &p->iph.saddr, 4);
706                 memcpy(dev->broadcast, &p->iph.daddr, 4);
707         }
708         ip_tunnel_add(itn, t);
709
710         t->parms.iph.ttl = p->iph.ttl;
711         t->parms.iph.tos = p->iph.tos;
712         t->parms.iph.frag_off = p->iph.frag_off;
713
714         if (t->parms.link != p->link) {
715                 int mtu;
716
717                 t->parms.link = p->link;
718                 mtu = ip_tunnel_bind_dev(dev);
719                 if (set_mtu)
720                         dev->mtu = mtu;
721         }
722         netdev_state_change(dev);
723 }
724
725 int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
726 {
727         int err = 0;
728         struct ip_tunnel *t;
729         struct net *net = dev_net(dev);
730         struct ip_tunnel *tunnel = netdev_priv(dev);
731         struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
732
733         BUG_ON(!itn->fb_tunnel_dev);
734         switch (cmd) {
735         case SIOCGETTUNNEL:
736                 t = NULL;
737                 if (dev == itn->fb_tunnel_dev)
738                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
739                 if (t == NULL)
740                         t = netdev_priv(dev);
741                 memcpy(p, &t->parms, sizeof(*p));
742                 break;
743
744         case SIOCADDTUNNEL:
745         case SIOCCHGTUNNEL:
746                 err = -EPERM;
747                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
748                         goto done;
749                 if (p->iph.ttl)
750                         p->iph.frag_off |= htons(IP_DF);
751                 if (!(p->i_flags&TUNNEL_KEY))
752                         p->i_key = 0;
753                 if (!(p->o_flags&TUNNEL_KEY))
754                         p->o_key = 0;
755
756                 t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
757
758                 if (!t && (cmd == SIOCADDTUNNEL))
759                         t = ip_tunnel_create(net, itn, p);
760
761                 if (dev != itn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
762                         if (t != NULL) {
763                                 if (t->dev != dev) {
764                                         err = -EEXIST;
765                                         break;
766                                 }
767                         } else {
768                                 unsigned int nflags = 0;
769
770                                 if (ipv4_is_multicast(p->iph.daddr))
771                                         nflags = IFF_BROADCAST;
772                                 else if (p->iph.daddr)
773                                         nflags = IFF_POINTOPOINT;
774
775                                 if ((dev->flags^nflags)&(IFF_POINTOPOINT|IFF_BROADCAST)) {
776                                         err = -EINVAL;
777                                         break;
778                                 }
779
780                                 t = netdev_priv(dev);
781                         }
782                 }
783
784                 if (t) {
785                         err = 0;
786                         ip_tunnel_update(itn, t, dev, p, true);
787                 } else
788                         err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
789                 break;
790
791         case SIOCDELTUNNEL:
792                 err = -EPERM;
793                 if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
794                         goto done;
795
796                 if (dev == itn->fb_tunnel_dev) {
797                         err = -ENOENT;
798                         t = ip_tunnel_find(itn, p, itn->fb_tunnel_dev->type);
799                         if (t == NULL)
800                                 goto done;
801                         err = -EPERM;
802                         if (t == netdev_priv(itn->fb_tunnel_dev))
803                                 goto done;
804                         dev = t->dev;
805                 }
806                 unregister_netdevice(dev);
807                 err = 0;
808                 break;
809
810         default:
811                 err = -EINVAL;
812         }
813
814 done:
815         return err;
816 }
817 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
818
819 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
820 {
821         struct ip_tunnel *tunnel = netdev_priv(dev);
822         int t_hlen = tunnel->hlen + sizeof(struct iphdr);
823
824         if (new_mtu < 68 ||
825             new_mtu > 0xFFF8 - dev->hard_header_len - t_hlen)
826                 return -EINVAL;
827         dev->mtu = new_mtu;
828         return 0;
829 }
830 EXPORT_SYMBOL_GPL(ip_tunnel_change_mtu);
831
832 static void ip_tunnel_dev_free(struct net_device *dev)
833 {
834         struct ip_tunnel *tunnel = netdev_priv(dev);
835
836         gro_cells_destroy(&tunnel->gro_cells);
837         free_percpu(dev->tstats);
838         free_netdev(dev);
839 }
840
841 void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
842 {
843         struct net *net = dev_net(dev);
844         struct ip_tunnel *tunnel = netdev_priv(dev);
845         struct ip_tunnel_net *itn;
846
847         itn = net_generic(net, tunnel->ip_tnl_net_id);
848
849         if (itn->fb_tunnel_dev != dev) {
850                 ip_tunnel_del(netdev_priv(dev));
851                 unregister_netdevice_queue(dev, head);
852         }
853 }
854 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
855
856 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
857                                   struct rtnl_link_ops *ops, char *devname)
858 {
859         struct ip_tunnel_net *itn = net_generic(net, ip_tnl_net_id);
860         struct ip_tunnel_parm parms;
861
862         itn->tunnels = kzalloc(IP_TNL_HASH_SIZE * sizeof(struct hlist_head), GFP_KERNEL);
863         if (!itn->tunnels)
864                 return -ENOMEM;
865
866         if (!ops) {
867                 itn->fb_tunnel_dev = NULL;
868                 return 0;
869         }
870         memset(&parms, 0, sizeof(parms));
871         if (devname)
872                 strlcpy(parms.name, devname, IFNAMSIZ);
873
874         rtnl_lock();
875         itn->fb_tunnel_dev = __ip_tunnel_create(net, ops, &parms);
876         rtnl_unlock();
877         if (IS_ERR(itn->fb_tunnel_dev)) {
878                 kfree(itn->tunnels);
879                 return PTR_ERR(itn->fb_tunnel_dev);
880         }
881
882         return 0;
883 }
884 EXPORT_SYMBOL_GPL(ip_tunnel_init_net);
885
886 static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head)
887 {
888         int h;
889
890         for (h = 0; h < IP_TNL_HASH_SIZE; h++) {
891                 struct ip_tunnel *t;
892                 struct hlist_node *n;
893                 struct hlist_head *thead = &itn->tunnels[h];
894
895                 hlist_for_each_entry_safe(t, n, thead, hash_node)
896                         unregister_netdevice_queue(t->dev, head);
897         }
898         if (itn->fb_tunnel_dev)
899                 unregister_netdevice_queue(itn->fb_tunnel_dev, head);
900 }
901
902 void ip_tunnel_delete_net(struct ip_tunnel_net *itn)
903 {
904         LIST_HEAD(list);
905
906         rtnl_lock();
907         ip_tunnel_destroy(itn, &list);
908         unregister_netdevice_many(&list);
909         rtnl_unlock();
910         kfree(itn->tunnels);
911 }
912 EXPORT_SYMBOL_GPL(ip_tunnel_delete_net);
913
914 int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[],
915                       struct ip_tunnel_parm *p)
916 {
917         struct ip_tunnel *nt;
918         struct net *net = dev_net(dev);
919         struct ip_tunnel_net *itn;
920         int mtu;
921         int err;
922
923         nt = netdev_priv(dev);
924         itn = net_generic(net, nt->ip_tnl_net_id);
925
926         if (ip_tunnel_find(itn, p, dev->type))
927                 return -EEXIST;
928
929         nt->parms = *p;
930         err = register_netdevice(dev);
931         if (err)
932                 goto out;
933
934         if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS])
935                 eth_hw_addr_random(dev);
936
937         mtu = ip_tunnel_bind_dev(dev);
938         if (!tb[IFLA_MTU])
939                 dev->mtu = mtu;
940
941         ip_tunnel_add(itn, nt);
942
943 out:
944         return err;
945 }
946 EXPORT_SYMBOL_GPL(ip_tunnel_newlink);
947
948 int ip_tunnel_changelink(struct net_device *dev, struct nlattr *tb[],
949                          struct ip_tunnel_parm *p)
950 {
951         struct ip_tunnel *t, *nt;
952         struct net *net = dev_net(dev);
953         struct ip_tunnel *tunnel = netdev_priv(dev);
954         struct ip_tunnel_net *itn = net_generic(net, tunnel->ip_tnl_net_id);
955
956         if (dev == itn->fb_tunnel_dev)
957                 return -EINVAL;
958
959         nt = netdev_priv(dev);
960
961         t = ip_tunnel_find(itn, p, dev->type);
962
963         if (t) {
964                 if (t->dev != dev)
965                         return -EEXIST;
966         } else {
967                 t = nt;
968
969                 if (dev->type != ARPHRD_ETHER) {
970                         unsigned int nflags = 0;
971
972                         if (ipv4_is_multicast(p->iph.daddr))
973                                 nflags = IFF_BROADCAST;
974                         else if (p->iph.daddr)
975                                 nflags = IFF_POINTOPOINT;
976
977                         if ((dev->flags ^ nflags) &
978                             (IFF_POINTOPOINT | IFF_BROADCAST))
979                                 return -EINVAL;
980                 }
981         }
982
983         ip_tunnel_update(itn, t, dev, p, !tb[IFLA_MTU]);
984         return 0;
985 }
986 EXPORT_SYMBOL_GPL(ip_tunnel_changelink);
987
988 int ip_tunnel_init(struct net_device *dev)
989 {
990         struct ip_tunnel *tunnel = netdev_priv(dev);
991         struct iphdr *iph = &tunnel->parms.iph;
992         int err;
993
994         dev->destructor = ip_tunnel_dev_free;
995         dev->tstats = alloc_percpu(struct pcpu_tstats);
996         if (!dev->tstats)
997                 return -ENOMEM;
998
999         err = gro_cells_init(&tunnel->gro_cells, dev);
1000         if (err) {
1001                 free_percpu(dev->tstats);
1002                 return err;
1003         }
1004
1005         tunnel->dev = dev;
1006         strcpy(tunnel->parms.name, dev->name);
1007         iph->version            = 4;
1008         iph->ihl                = 5;
1009
1010         return 0;
1011 }
1012 EXPORT_SYMBOL_GPL(ip_tunnel_init);
1013
1014 void ip_tunnel_uninit(struct net_device *dev)
1015 {
1016         struct net *net = dev_net(dev);
1017         struct ip_tunnel *tunnel = netdev_priv(dev);
1018         struct ip_tunnel_net *itn;
1019
1020         itn = net_generic(net, tunnel->ip_tnl_net_id);
1021         /* fb_tunnel_dev will be unregisted in net-exit call. */
1022         if (itn->fb_tunnel_dev != dev)
1023                 ip_tunnel_del(netdev_priv(dev));
1024 }
1025 EXPORT_SYMBOL_GPL(ip_tunnel_uninit);
1026
1027 /* Do least required initialization, rest of init is done in tunnel_init call */
1028 void ip_tunnel_setup(struct net_device *dev, int net_id)
1029 {
1030         struct ip_tunnel *tunnel = netdev_priv(dev);
1031         tunnel->ip_tnl_net_id = net_id;
1032 }
1033 EXPORT_SYMBOL_GPL(ip_tunnel_setup);
1034
1035 MODULE_LICENSE("GPL");