net: Allow netdevices to specify needed head/tailroom
[linux-2.6.git] / net / ipv6 / ndisc.c
1 /*
2  *      Neighbour Discovery for IPv6
3  *      Linux INET6 implementation
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *      Mike Shaver             <shaver@ingenia.com>
8  *
9  *      This program is free software; you can redistribute it and/or
10  *      modify it under the terms of the GNU General Public License
11  *      as published by the Free Software Foundation; either version
12  *      2 of the License, or (at your option) any later version.
13  */
14
15 /*
16  *      Changes:
17  *
18  *      Pierre Ynard                    :       export userland ND options
19  *                                              through netlink (RDNSS support)
20  *      Lars Fenneberg                  :       fixed MTU setting on receipt
21  *                                              of an RA.
22  *      Janos Farkas                    :       kmalloc failure checks
23  *      Alexey Kuznetsov                :       state machine reworked
24  *                                              and moved to net/core.
25  *      Pekka Savola                    :       RFC2461 validation
26  *      YOSHIFUJI Hideaki @USAGI        :       Verify ND options properly
27  */
28
29 /* Set to 3 to get tracing... */
30 #define ND_DEBUG 1
31
32 #define ND_PRINTK(fmt, args...) do { if (net_ratelimit()) { printk(fmt, ## args); } } while(0)
33 #define ND_NOPRINTK(x...) do { ; } while(0)
34 #define ND_PRINTK0 ND_PRINTK
35 #define ND_PRINTK1 ND_NOPRINTK
36 #define ND_PRINTK2 ND_NOPRINTK
37 #define ND_PRINTK3 ND_NOPRINTK
38 #if ND_DEBUG >= 1
39 #undef ND_PRINTK1
40 #define ND_PRINTK1 ND_PRINTK
41 #endif
42 #if ND_DEBUG >= 2
43 #undef ND_PRINTK2
44 #define ND_PRINTK2 ND_PRINTK
45 #endif
46 #if ND_DEBUG >= 3
47 #undef ND_PRINTK3
48 #define ND_PRINTK3 ND_PRINTK
49 #endif
50
51 #include <linux/module.h>
52 #include <linux/errno.h>
53 #include <linux/types.h>
54 #include <linux/socket.h>
55 #include <linux/sockios.h>
56 #include <linux/sched.h>
57 #include <linux/net.h>
58 #include <linux/in6.h>
59 #include <linux/route.h>
60 #include <linux/init.h>
61 #include <linux/rcupdate.h>
62 #ifdef CONFIG_SYSCTL
63 #include <linux/sysctl.h>
64 #endif
65
66 #include <linux/if_addr.h>
67 #include <linux/if_arp.h>
68 #include <linux/ipv6.h>
69 #include <linux/icmpv6.h>
70 #include <linux/jhash.h>
71
72 #include <net/sock.h>
73 #include <net/snmp.h>
74
75 #include <net/ipv6.h>
76 #include <net/protocol.h>
77 #include <net/ndisc.h>
78 #include <net/ip6_route.h>
79 #include <net/addrconf.h>
80 #include <net/icmp.h>
81
82 #include <net/netlink.h>
83 #include <linux/rtnetlink.h>
84
85 #include <net/flow.h>
86 #include <net/ip6_checksum.h>
87 #include <net/inet_common.h>
88 #include <linux/proc_fs.h>
89
90 #include <linux/netfilter.h>
91 #include <linux/netfilter_ipv6.h>
92
93 static u32 ndisc_hash(const void *pkey, const struct net_device *dev);
94 static int ndisc_constructor(struct neighbour *neigh);
95 static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb);
96 static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb);
97 static int pndisc_constructor(struct pneigh_entry *n);
98 static void pndisc_destructor(struct pneigh_entry *n);
99 static void pndisc_redo(struct sk_buff *skb);
100
101 static struct neigh_ops ndisc_generic_ops = {
102         .family =               AF_INET6,
103         .solicit =              ndisc_solicit,
104         .error_report =         ndisc_error_report,
105         .output =               neigh_resolve_output,
106         .connected_output =     neigh_connected_output,
107         .hh_output =            dev_queue_xmit,
108         .queue_xmit =           dev_queue_xmit,
109 };
110
111 static struct neigh_ops ndisc_hh_ops = {
112         .family =               AF_INET6,
113         .solicit =              ndisc_solicit,
114         .error_report =         ndisc_error_report,
115         .output =               neigh_resolve_output,
116         .connected_output =     neigh_resolve_output,
117         .hh_output =            dev_queue_xmit,
118         .queue_xmit =           dev_queue_xmit,
119 };
120
121
122 static struct neigh_ops ndisc_direct_ops = {
123         .family =               AF_INET6,
124         .output =               dev_queue_xmit,
125         .connected_output =     dev_queue_xmit,
126         .hh_output =            dev_queue_xmit,
127         .queue_xmit =           dev_queue_xmit,
128 };
129
130 struct neigh_table nd_tbl = {
131         .family =       AF_INET6,
132         .entry_size =   sizeof(struct neighbour) + sizeof(struct in6_addr),
133         .key_len =      sizeof(struct in6_addr),
134         .hash =         ndisc_hash,
135         .constructor =  ndisc_constructor,
136         .pconstructor = pndisc_constructor,
137         .pdestructor =  pndisc_destructor,
138         .proxy_redo =   pndisc_redo,
139         .id =           "ndisc_cache",
140         .parms = {
141                 .tbl =                  &nd_tbl,
142                 .base_reachable_time =  30 * HZ,
143                 .retrans_time =  1 * HZ,
144                 .gc_staletime = 60 * HZ,
145                 .reachable_time =               30 * HZ,
146                 .delay_probe_time =      5 * HZ,
147                 .queue_len =             3,
148                 .ucast_probes =  3,
149                 .mcast_probes =  3,
150                 .anycast_delay =         1 * HZ,
151                 .proxy_delay =          (8 * HZ) / 10,
152                 .proxy_qlen =           64,
153         },
154         .gc_interval =    30 * HZ,
155         .gc_thresh1 =    128,
156         .gc_thresh2 =    512,
157         .gc_thresh3 =   1024,
158 };
159
160 /* ND options */
161 struct ndisc_options {
162         struct nd_opt_hdr *nd_opt_array[__ND_OPT_ARRAY_MAX];
163 #ifdef CONFIG_IPV6_ROUTE_INFO
164         struct nd_opt_hdr *nd_opts_ri;
165         struct nd_opt_hdr *nd_opts_ri_end;
166 #endif
167         struct nd_opt_hdr *nd_useropts;
168         struct nd_opt_hdr *nd_useropts_end;
169 };
170
171 #define nd_opts_src_lladdr      nd_opt_array[ND_OPT_SOURCE_LL_ADDR]
172 #define nd_opts_tgt_lladdr      nd_opt_array[ND_OPT_TARGET_LL_ADDR]
173 #define nd_opts_pi              nd_opt_array[ND_OPT_PREFIX_INFO]
174 #define nd_opts_pi_end          nd_opt_array[__ND_OPT_PREFIX_INFO_END]
175 #define nd_opts_rh              nd_opt_array[ND_OPT_REDIRECT_HDR]
176 #define nd_opts_mtu             nd_opt_array[ND_OPT_MTU]
177
178 #define NDISC_OPT_SPACE(len) (((len)+2+7)&~7)
179
180 /*
181  * Return the padding between the option length and the start of the
182  * link addr.  Currently only IP-over-InfiniBand needs this, although
183  * if RFC 3831 IPv6-over-Fibre Channel is ever implemented it may
184  * also need a pad of 2.
185  */
186 static int ndisc_addr_option_pad(unsigned short type)
187 {
188         switch (type) {
189         case ARPHRD_INFINIBAND: return 2;
190         default:                return 0;
191         }
192 }
193
194 static inline int ndisc_opt_addr_space(struct net_device *dev)
195 {
196         return NDISC_OPT_SPACE(dev->addr_len + ndisc_addr_option_pad(dev->type));
197 }
198
199 static u8 *ndisc_fill_addr_option(u8 *opt, int type, void *data, int data_len,
200                                   unsigned short addr_type)
201 {
202         int space = NDISC_OPT_SPACE(data_len);
203         int pad   = ndisc_addr_option_pad(addr_type);
204
205         opt[0] = type;
206         opt[1] = space>>3;
207
208         memset(opt + 2, 0, pad);
209         opt   += pad;
210         space -= pad;
211
212         memcpy(opt+2, data, data_len);
213         data_len += 2;
214         opt += data_len;
215         if ((space -= data_len) > 0)
216                 memset(opt, 0, space);
217         return opt + space;
218 }
219
220 static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
221                                             struct nd_opt_hdr *end)
222 {
223         int type;
224         if (!cur || !end || cur >= end)
225                 return NULL;
226         type = cur->nd_opt_type;
227         do {
228                 cur = ((void *)cur) + (cur->nd_opt_len << 3);
229         } while(cur < end && cur->nd_opt_type != type);
230         return (cur <= end && cur->nd_opt_type == type ? cur : NULL);
231 }
232
233 static inline int ndisc_is_useropt(struct nd_opt_hdr *opt)
234 {
235         return (opt->nd_opt_type == ND_OPT_RDNSS);
236 }
237
238 static struct nd_opt_hdr *ndisc_next_useropt(struct nd_opt_hdr *cur,
239                                              struct nd_opt_hdr *end)
240 {
241         if (!cur || !end || cur >= end)
242                 return NULL;
243         do {
244                 cur = ((void *)cur) + (cur->nd_opt_len << 3);
245         } while(cur < end && !ndisc_is_useropt(cur));
246         return (cur <= end && ndisc_is_useropt(cur) ? cur : NULL);
247 }
248
249 static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
250                                                  struct ndisc_options *ndopts)
251 {
252         struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)opt;
253
254         if (!nd_opt || opt_len < 0 || !ndopts)
255                 return NULL;
256         memset(ndopts, 0, sizeof(*ndopts));
257         while (opt_len) {
258                 int l;
259                 if (opt_len < sizeof(struct nd_opt_hdr))
260                         return NULL;
261                 l = nd_opt->nd_opt_len << 3;
262                 if (opt_len < l || l == 0)
263                         return NULL;
264                 switch (nd_opt->nd_opt_type) {
265                 case ND_OPT_SOURCE_LL_ADDR:
266                 case ND_OPT_TARGET_LL_ADDR:
267                 case ND_OPT_MTU:
268                 case ND_OPT_REDIRECT_HDR:
269                         if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
270                                 ND_PRINTK2(KERN_WARNING
271                                            "%s(): duplicated ND6 option found: type=%d\n",
272                                            __func__,
273                                            nd_opt->nd_opt_type);
274                         } else {
275                                 ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt;
276                         }
277                         break;
278                 case ND_OPT_PREFIX_INFO:
279                         ndopts->nd_opts_pi_end = nd_opt;
280                         if (!ndopts->nd_opt_array[nd_opt->nd_opt_type])
281                                 ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt;
282                         break;
283 #ifdef CONFIG_IPV6_ROUTE_INFO
284                 case ND_OPT_ROUTE_INFO:
285                         ndopts->nd_opts_ri_end = nd_opt;
286                         if (!ndopts->nd_opts_ri)
287                                 ndopts->nd_opts_ri = nd_opt;
288                         break;
289 #endif
290                 default:
291                         if (ndisc_is_useropt(nd_opt)) {
292                                 ndopts->nd_useropts_end = nd_opt;
293                                 if (!ndopts->nd_useropts)
294                                         ndopts->nd_useropts = nd_opt;
295                         } else {
296                                 /*
297                                  * Unknown options must be silently ignored,
298                                  * to accommodate future extension to the
299                                  * protocol.
300                                  */
301                                 ND_PRINTK2(KERN_NOTICE
302                                            "%s(): ignored unsupported option; type=%d, len=%d\n",
303                                            __func__,
304                                            nd_opt->nd_opt_type, nd_opt->nd_opt_len);
305                         }
306                 }
307                 opt_len -= l;
308                 nd_opt = ((void *)nd_opt) + l;
309         }
310         return ndopts;
311 }
312
313 static inline u8 *ndisc_opt_addr_data(struct nd_opt_hdr *p,
314                                       struct net_device *dev)
315 {
316         u8 *lladdr = (u8 *)(p + 1);
317         int lladdrlen = p->nd_opt_len << 3;
318         int prepad = ndisc_addr_option_pad(dev->type);
319         if (lladdrlen != NDISC_OPT_SPACE(dev->addr_len + prepad))
320                 return NULL;
321         return (lladdr + prepad);
322 }
323
324 int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *dev, int dir)
325 {
326         switch (dev->type) {
327         case ARPHRD_ETHER:
328         case ARPHRD_IEEE802:    /* Not sure. Check it later. --ANK */
329         case ARPHRD_FDDI:
330                 ipv6_eth_mc_map(addr, buf);
331                 return 0;
332         case ARPHRD_IEEE802_TR:
333                 ipv6_tr_mc_map(addr,buf);
334                 return 0;
335         case ARPHRD_ARCNET:
336                 ipv6_arcnet_mc_map(addr, buf);
337                 return 0;
338         case ARPHRD_INFINIBAND:
339                 ipv6_ib_mc_map(addr, dev->broadcast, buf);
340                 return 0;
341         default:
342                 if (dir) {
343                         memcpy(buf, dev->broadcast, dev->addr_len);
344                         return 0;
345                 }
346         }
347         return -EINVAL;
348 }
349
350 EXPORT_SYMBOL(ndisc_mc_map);
351
352 static u32 ndisc_hash(const void *pkey, const struct net_device *dev)
353 {
354         const u32 *p32 = pkey;
355         u32 addr_hash, i;
356
357         addr_hash = 0;
358         for (i = 0; i < (sizeof(struct in6_addr) / sizeof(u32)); i++)
359                 addr_hash ^= *p32++;
360
361         return jhash_2words(addr_hash, dev->ifindex, nd_tbl.hash_rnd);
362 }
363
364 static int ndisc_constructor(struct neighbour *neigh)
365 {
366         struct in6_addr *addr = (struct in6_addr*)&neigh->primary_key;
367         struct net_device *dev = neigh->dev;
368         struct inet6_dev *in6_dev;
369         struct neigh_parms *parms;
370         int is_multicast = ipv6_addr_is_multicast(addr);
371
372         rcu_read_lock();
373         in6_dev = in6_dev_get(dev);
374         if (in6_dev == NULL) {
375                 rcu_read_unlock();
376                 return -EINVAL;
377         }
378
379         parms = in6_dev->nd_parms;
380         __neigh_parms_put(neigh->parms);
381         neigh->parms = neigh_parms_clone(parms);
382         rcu_read_unlock();
383
384         neigh->type = is_multicast ? RTN_MULTICAST : RTN_UNICAST;
385         if (!dev->header_ops) {
386                 neigh->nud_state = NUD_NOARP;
387                 neigh->ops = &ndisc_direct_ops;
388                 neigh->output = neigh->ops->queue_xmit;
389         } else {
390                 if (is_multicast) {
391                         neigh->nud_state = NUD_NOARP;
392                         ndisc_mc_map(addr, neigh->ha, dev, 1);
393                 } else if (dev->flags&(IFF_NOARP|IFF_LOOPBACK)) {
394                         neigh->nud_state = NUD_NOARP;
395                         memcpy(neigh->ha, dev->dev_addr, dev->addr_len);
396                         if (dev->flags&IFF_LOOPBACK)
397                                 neigh->type = RTN_LOCAL;
398                 } else if (dev->flags&IFF_POINTOPOINT) {
399                         neigh->nud_state = NUD_NOARP;
400                         memcpy(neigh->ha, dev->broadcast, dev->addr_len);
401                 }
402                 if (dev->header_ops->cache)
403                         neigh->ops = &ndisc_hh_ops;
404                 else
405                         neigh->ops = &ndisc_generic_ops;
406                 if (neigh->nud_state&NUD_VALID)
407                         neigh->output = neigh->ops->connected_output;
408                 else
409                         neigh->output = neigh->ops->output;
410         }
411         in6_dev_put(in6_dev);
412         return 0;
413 }
414
415 static int pndisc_constructor(struct pneigh_entry *n)
416 {
417         struct in6_addr *addr = (struct in6_addr*)&n->key;
418         struct in6_addr maddr;
419         struct net_device *dev = n->dev;
420
421         if (dev == NULL || __in6_dev_get(dev) == NULL)
422                 return -EINVAL;
423         addrconf_addr_solict_mult(addr, &maddr);
424         ipv6_dev_mc_inc(dev, &maddr);
425         return 0;
426 }
427
428 static void pndisc_destructor(struct pneigh_entry *n)
429 {
430         struct in6_addr *addr = (struct in6_addr*)&n->key;
431         struct in6_addr maddr;
432         struct net_device *dev = n->dev;
433
434         if (dev == NULL || __in6_dev_get(dev) == NULL)
435                 return;
436         addrconf_addr_solict_mult(addr, &maddr);
437         ipv6_dev_mc_dec(dev, &maddr);
438 }
439
440 /*
441  *      Send a Neighbour Advertisement
442  */
443 static void __ndisc_send(struct net_device *dev,
444                          struct neighbour *neigh,
445                          const struct in6_addr *daddr,
446                          const struct in6_addr *saddr,
447                          struct icmp6hdr *icmp6h, const struct in6_addr *target,
448                          int llinfo)
449 {
450         struct flowi fl;
451         struct dst_entry *dst;
452         struct net *net = dev_net(dev);
453         struct sock *sk = net->ipv6.ndisc_sk;
454         struct sk_buff *skb;
455         struct icmp6hdr *hdr;
456         struct inet6_dev *idev;
457         int len;
458         int err;
459         u8 *opt, type;
460
461         type = icmp6h->icmp6_type;
462
463         icmpv6_flow_init(sk, &fl, type, saddr, daddr, dev->ifindex);
464
465         dst = icmp6_dst_alloc(dev, neigh, daddr);
466         if (!dst)
467                 return;
468
469         err = xfrm_lookup(&dst, &fl, NULL, 0);
470         if (err < 0)
471                 return;
472
473         if (!dev->addr_len)
474                 llinfo = 0;
475
476         len = sizeof(struct icmp6hdr) + (target ? sizeof(*target) : 0);
477         if (llinfo)
478                 len += ndisc_opt_addr_space(dev);
479
480         skb = sock_alloc_send_skb(sk,
481                                   (MAX_HEADER + sizeof(struct ipv6hdr) +
482                                    len + LL_ALLOCATED_SPACE(dev)),
483                                   1, &err);
484         if (!skb) {
485                 ND_PRINTK0(KERN_ERR
486                            "ICMPv6 ND: %s() failed to allocate an skb.\n",
487                            __func__);
488                 dst_release(dst);
489                 return;
490         }
491
492         skb_reserve(skb, LL_RESERVED_SPACE(dev));
493         ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len);
494
495         skb->transport_header = skb->tail;
496         skb_put(skb, len);
497
498         hdr = (struct icmp6hdr *)skb_transport_header(skb);
499         memcpy(hdr, icmp6h, sizeof(*hdr));
500
501         opt = skb_transport_header(skb) + sizeof(struct icmp6hdr);
502         if (target) {
503                 ipv6_addr_copy((struct in6_addr *)opt, target);
504                 opt += sizeof(*target);
505         }
506
507         if (llinfo)
508                 ndisc_fill_addr_option(opt, llinfo, dev->dev_addr,
509                                        dev->addr_len, dev->type);
510
511         hdr->icmp6_cksum = csum_ipv6_magic(saddr, daddr, len,
512                                            IPPROTO_ICMPV6,
513                                            csum_partial((__u8 *) hdr,
514                                                         len, 0));
515
516         skb->dst = dst;
517
518         idev = in6_dev_get(dst->dev);
519         IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS);
520
521         err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
522                       dst_output);
523         if (!err) {
524                 ICMP6MSGOUT_INC_STATS(idev, type);
525                 ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
526         }
527
528         if (likely(idev != NULL))
529                 in6_dev_put(idev);
530 }
531
532 static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
533                           const struct in6_addr *daddr,
534                           const struct in6_addr *solicited_addr,
535                           int router, int solicited, int override, int inc_opt)
536 {
537         struct in6_addr tmpaddr;
538         struct inet6_ifaddr *ifp;
539         const struct in6_addr *src_addr;
540         struct icmp6hdr icmp6h = {
541                 .icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT,
542         };
543
544         /* for anycast or proxy, solicited_addr != src_addr */
545         ifp = ipv6_get_ifaddr(dev_net(dev), solicited_addr, dev, 1);
546         if (ifp) {
547                 src_addr = solicited_addr;
548                 if (ifp->flags & IFA_F_OPTIMISTIC)
549                         override = 0;
550                 in6_ifa_put(ifp);
551         } else {
552                 if (ipv6_dev_get_saddr(dev, daddr,
553                                        inet6_sk(dev_net(dev)->ipv6.ndisc_sk)->srcprefs,
554                                        &tmpaddr))
555                         return;
556                 src_addr = &tmpaddr;
557         }
558
559         icmp6h.icmp6_router = router;
560         icmp6h.icmp6_solicited = solicited;
561         icmp6h.icmp6_override = override;
562
563         __ndisc_send(dev, neigh, daddr, src_addr,
564                      &icmp6h, solicited_addr,
565                      inc_opt ? ND_OPT_TARGET_LL_ADDR : 0);
566 }
567
568 void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
569                    const struct in6_addr *solicit,
570                    const struct in6_addr *daddr, const struct in6_addr *saddr)
571 {
572         struct in6_addr addr_buf;
573         struct icmp6hdr icmp6h = {
574                 .icmp6_type = NDISC_NEIGHBOUR_SOLICITATION,
575         };
576
577         if (saddr == NULL) {
578                 if (ipv6_get_lladdr(dev, &addr_buf,
579                                    (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)))
580                         return;
581                 saddr = &addr_buf;
582         }
583
584         __ndisc_send(dev, neigh, daddr, saddr,
585                      &icmp6h, solicit,
586                      !ipv6_addr_any(saddr) ? ND_OPT_SOURCE_LL_ADDR : 0);
587 }
588
589 void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr,
590                    const struct in6_addr *daddr)
591 {
592         struct icmp6hdr icmp6h = {
593                 .icmp6_type = NDISC_ROUTER_SOLICITATION,
594         };
595         int send_sllao = dev->addr_len;
596
597 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
598         /*
599          * According to section 2.2 of RFC 4429, we must not
600          * send router solicitations with a sllao from
601          * optimistic addresses, but we may send the solicitation
602          * if we don't include the sllao.  So here we check
603          * if our address is optimistic, and if so, we
604          * suppress the inclusion of the sllao.
605          */
606         if (send_sllao) {
607                 struct inet6_ifaddr *ifp = ipv6_get_ifaddr(dev_net(dev), saddr,
608                                                            dev, 1);
609                 if (ifp) {
610                         if (ifp->flags & IFA_F_OPTIMISTIC)  {
611                                 send_sllao = 0;
612                         }
613                         in6_ifa_put(ifp);
614                 } else {
615                         send_sllao = 0;
616                 }
617         }
618 #endif
619         __ndisc_send(dev, NULL, daddr, saddr,
620                      &icmp6h, NULL,
621                      send_sllao ? ND_OPT_SOURCE_LL_ADDR : 0);
622 }
623
624
625 static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb)
626 {
627         /*
628          *      "The sender MUST return an ICMP
629          *       destination unreachable"
630          */
631         dst_link_failure(skb);
632         kfree_skb(skb);
633 }
634
635 /* Called with locked neigh: either read or both */
636
637 static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
638 {
639         struct in6_addr *saddr = NULL;
640         struct in6_addr mcaddr;
641         struct net_device *dev = neigh->dev;
642         struct in6_addr *target = (struct in6_addr *)&neigh->primary_key;
643         int probes = atomic_read(&neigh->probes);
644
645         if (skb && ipv6_chk_addr(dev_net(dev), &ipv6_hdr(skb)->saddr, dev, 1))
646                 saddr = &ipv6_hdr(skb)->saddr;
647
648         if ((probes -= neigh->parms->ucast_probes) < 0) {
649                 if (!(neigh->nud_state & NUD_VALID)) {
650                         ND_PRINTK1(KERN_DEBUG
651                                    "%s(): trying to ucast probe in NUD_INVALID: "
652                                    NIP6_FMT "\n",
653                                    __func__,
654                                    NIP6(*target));
655                 }
656                 ndisc_send_ns(dev, neigh, target, target, saddr);
657         } else if ((probes -= neigh->parms->app_probes) < 0) {
658 #ifdef CONFIG_ARPD
659                 neigh_app_ns(neigh);
660 #endif
661         } else {
662                 addrconf_addr_solict_mult(target, &mcaddr);
663                 ndisc_send_ns(dev, NULL, target, &mcaddr, saddr);
664         }
665 }
666
667 static int pndisc_is_router(const void *pkey,
668                             struct net_device *dev)
669 {
670         struct pneigh_entry *n;
671         int ret = -1;
672
673         read_lock_bh(&nd_tbl.lock);
674         n = __pneigh_lookup(&nd_tbl, dev_net(dev), pkey, dev);
675         if (n)
676                 ret = !!(n->flags & NTF_ROUTER);
677         read_unlock_bh(&nd_tbl.lock);
678
679         return ret;
680 }
681
682 static void ndisc_recv_ns(struct sk_buff *skb)
683 {
684         struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
685         struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
686         struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
687         u8 *lladdr = NULL;
688         u32 ndoptlen = skb->tail - (skb->transport_header +
689                                     offsetof(struct nd_msg, opt));
690         struct ndisc_options ndopts;
691         struct net_device *dev = skb->dev;
692         struct inet6_ifaddr *ifp;
693         struct inet6_dev *idev = NULL;
694         struct neighbour *neigh;
695         int dad = ipv6_addr_any(saddr);
696         int inc;
697         int is_router = -1;
698
699         if (ipv6_addr_is_multicast(&msg->target)) {
700                 ND_PRINTK2(KERN_WARNING
701                            "ICMPv6 NS: multicast target address");
702                 return;
703         }
704
705         /*
706          * RFC2461 7.1.1:
707          * DAD has to be destined for solicited node multicast address.
708          */
709         if (dad &&
710             !(daddr->s6_addr32[0] == htonl(0xff020000) &&
711               daddr->s6_addr32[1] == htonl(0x00000000) &&
712               daddr->s6_addr32[2] == htonl(0x00000001) &&
713               daddr->s6_addr [12] == 0xff )) {
714                 ND_PRINTK2(KERN_WARNING
715                            "ICMPv6 NS: bad DAD packet (wrong destination)\n");
716                 return;
717         }
718
719         if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) {
720                 ND_PRINTK2(KERN_WARNING
721                            "ICMPv6 NS: invalid ND options\n");
722                 return;
723         }
724
725         if (ndopts.nd_opts_src_lladdr) {
726                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr, dev);
727                 if (!lladdr) {
728                         ND_PRINTK2(KERN_WARNING
729                                    "ICMPv6 NS: invalid link-layer address length\n");
730                         return;
731                 }
732
733                 /* RFC2461 7.1.1:
734                  *      If the IP source address is the unspecified address,
735                  *      there MUST NOT be source link-layer address option
736                  *      in the message.
737                  */
738                 if (dad) {
739                         ND_PRINTK2(KERN_WARNING
740                                    "ICMPv6 NS: bad DAD packet (link-layer address option)\n");
741                         return;
742                 }
743         }
744
745         inc = ipv6_addr_is_multicast(daddr);
746
747         ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1);
748         if (ifp) {
749
750                 if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) {
751                         if (dad) {
752                                 if (dev->type == ARPHRD_IEEE802_TR) {
753                                         const unsigned char *sadr;
754                                         sadr = skb_mac_header(skb);
755                                         if (((sadr[8] ^ dev->dev_addr[0]) & 0x7f) == 0 &&
756                                             sadr[9] == dev->dev_addr[1] &&
757                                             sadr[10] == dev->dev_addr[2] &&
758                                             sadr[11] == dev->dev_addr[3] &&
759                                             sadr[12] == dev->dev_addr[4] &&
760                                             sadr[13] == dev->dev_addr[5]) {
761                                                 /* looped-back to us */
762                                                 goto out;
763                                         }
764                                 }
765
766                                 /*
767                                  * We are colliding with another node
768                                  * who is doing DAD
769                                  * so fail our DAD process
770                                  */
771                                 addrconf_dad_failure(ifp);
772                                 return;
773                         } else {
774                                 /*
775                                  * This is not a dad solicitation.
776                                  * If we are an optimistic node,
777                                  * we should respond.
778                                  * Otherwise, we should ignore it.
779                                  */
780                                 if (!(ifp->flags & IFA_F_OPTIMISTIC))
781                                         goto out;
782                         }
783                 }
784
785                 idev = ifp->idev;
786         } else {
787                 idev = in6_dev_get(dev);
788                 if (!idev) {
789                         /* XXX: count this drop? */
790                         return;
791                 }
792
793                 if (ipv6_chk_acast_addr(dev_net(dev), dev, &msg->target) ||
794                     (idev->cnf.forwarding &&
795                      (ipv6_devconf.proxy_ndp || idev->cnf.proxy_ndp) &&
796                      (is_router = pndisc_is_router(&msg->target, dev)) >= 0)) {
797                         if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) &&
798                             skb->pkt_type != PACKET_HOST &&
799                             inc != 0 &&
800                             idev->nd_parms->proxy_delay != 0) {
801                                 /*
802                                  * for anycast or proxy,
803                                  * sender should delay its response
804                                  * by a random time between 0 and
805                                  * MAX_ANYCAST_DELAY_TIME seconds.
806                                  * (RFC2461) -- yoshfuji
807                                  */
808                                 struct sk_buff *n = skb_clone(skb, GFP_ATOMIC);
809                                 if (n)
810                                         pneigh_enqueue(&nd_tbl, idev->nd_parms, n);
811                                 goto out;
812                         }
813                 } else
814                         goto out;
815         }
816
817         if (is_router < 0)
818                 is_router = !!idev->cnf.forwarding;
819
820         if (dad) {
821                 ndisc_send_na(dev, NULL, &in6addr_linklocal_allnodes, &msg->target,
822                               is_router, 0, (ifp != NULL), 1);
823                 goto out;
824         }
825
826         if (inc)
827                 NEIGH_CACHE_STAT_INC(&nd_tbl, rcv_probes_mcast);
828         else
829                 NEIGH_CACHE_STAT_INC(&nd_tbl, rcv_probes_ucast);
830
831         /*
832          *      update / create cache entry
833          *      for the source address
834          */
835         neigh = __neigh_lookup(&nd_tbl, saddr, dev,
836                                !inc || lladdr || !dev->addr_len);
837         if (neigh)
838                 neigh_update(neigh, lladdr, NUD_STALE,
839                              NEIGH_UPDATE_F_WEAK_OVERRIDE|
840                              NEIGH_UPDATE_F_OVERRIDE);
841         if (neigh || !dev->header_ops) {
842                 ndisc_send_na(dev, neigh, saddr, &msg->target,
843                               is_router,
844                               1, (ifp != NULL && inc), inc);
845                 if (neigh)
846                         neigh_release(neigh);
847         }
848
849 out:
850         if (ifp)
851                 in6_ifa_put(ifp);
852         else
853                 in6_dev_put(idev);
854
855         return;
856 }
857
858 static void ndisc_recv_na(struct sk_buff *skb)
859 {
860         struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
861         struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
862         struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
863         u8 *lladdr = NULL;
864         u32 ndoptlen = skb->tail - (skb->transport_header +
865                                     offsetof(struct nd_msg, opt));
866         struct ndisc_options ndopts;
867         struct net_device *dev = skb->dev;
868         struct inet6_ifaddr *ifp;
869         struct neighbour *neigh;
870
871         if (skb->len < sizeof(struct nd_msg)) {
872                 ND_PRINTK2(KERN_WARNING
873                            "ICMPv6 NA: packet too short\n");
874                 return;
875         }
876
877         if (ipv6_addr_is_multicast(&msg->target)) {
878                 ND_PRINTK2(KERN_WARNING
879                            "ICMPv6 NA: target address is multicast.\n");
880                 return;
881         }
882
883         if (ipv6_addr_is_multicast(daddr) &&
884             msg->icmph.icmp6_solicited) {
885                 ND_PRINTK2(KERN_WARNING
886                            "ICMPv6 NA: solicited NA is multicasted.\n");
887                 return;
888         }
889
890         if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) {
891                 ND_PRINTK2(KERN_WARNING
892                            "ICMPv6 NS: invalid ND option\n");
893                 return;
894         }
895         if (ndopts.nd_opts_tgt_lladdr) {
896                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr, dev);
897                 if (!lladdr) {
898                         ND_PRINTK2(KERN_WARNING
899                                    "ICMPv6 NA: invalid link-layer address length\n");
900                         return;
901                 }
902         }
903         ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1);
904         if (ifp) {
905                 if (ifp->flags & IFA_F_TENTATIVE) {
906                         addrconf_dad_failure(ifp);
907                         return;
908                 }
909                 /* What should we make now? The advertisement
910                    is invalid, but ndisc specs say nothing
911                    about it. It could be misconfiguration, or
912                    an smart proxy agent tries to help us :-)
913                  */
914                 ND_PRINTK1(KERN_WARNING
915                            "ICMPv6 NA: someone advertises our address on %s!\n",
916                            ifp->idev->dev->name);
917                 in6_ifa_put(ifp);
918                 return;
919         }
920         neigh = neigh_lookup(&nd_tbl, &msg->target, dev);
921
922         if (neigh) {
923                 u8 old_flags = neigh->flags;
924
925                 if (neigh->nud_state & NUD_FAILED)
926                         goto out;
927
928                 /*
929                  * Don't update the neighbor cache entry on a proxy NA from
930                  * ourselves because either the proxied node is off link or it
931                  * has already sent a NA to us.
932                  */
933                 if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) &&
934                     ipv6_devconf.forwarding && ipv6_devconf.proxy_ndp &&
935                     pneigh_lookup(&nd_tbl, dev_net(dev), &msg->target, dev, 0)) {
936                         /* XXX: idev->cnf.prixy_ndp */
937                         goto out;
938                 }
939
940                 neigh_update(neigh, lladdr,
941                              msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE,
942                              NEIGH_UPDATE_F_WEAK_OVERRIDE|
943                              (msg->icmph.icmp6_override ? NEIGH_UPDATE_F_OVERRIDE : 0)|
944                              NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
945                              (msg->icmph.icmp6_router ? NEIGH_UPDATE_F_ISROUTER : 0));
946
947                 if ((old_flags & ~neigh->flags) & NTF_ROUTER) {
948                         /*
949                          * Change: router to host
950                          */
951                         struct rt6_info *rt;
952                         rt = rt6_get_dflt_router(saddr, dev);
953                         if (rt)
954                                 ip6_del_rt(rt);
955                 }
956
957 out:
958                 neigh_release(neigh);
959         }
960 }
961
962 static void ndisc_recv_rs(struct sk_buff *skb)
963 {
964         struct rs_msg *rs_msg = (struct rs_msg *)skb_transport_header(skb);
965         unsigned long ndoptlen = skb->len - sizeof(*rs_msg);
966         struct neighbour *neigh;
967         struct inet6_dev *idev;
968         struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
969         struct ndisc_options ndopts;
970         u8 *lladdr = NULL;
971
972         if (skb->len < sizeof(*rs_msg))
973                 return;
974
975         idev = in6_dev_get(skb->dev);
976         if (!idev) {
977                 if (net_ratelimit())
978                         ND_PRINTK1("ICMP6 RS: can't find in6 device\n");
979                 return;
980         }
981
982         /* Don't accept RS if we're not in router mode */
983         if (!idev->cnf.forwarding)
984                 goto out;
985
986         /*
987          * Don't update NCE if src = ::;
988          * this implies that the source node has no ip address assigned yet.
989          */
990         if (ipv6_addr_any(saddr))
991                 goto out;
992
993         /* Parse ND options */
994         if (!ndisc_parse_options(rs_msg->opt, ndoptlen, &ndopts)) {
995                 if (net_ratelimit())
996                         ND_PRINTK2("ICMP6 NS: invalid ND option, ignored\n");
997                 goto out;
998         }
999
1000         if (ndopts.nd_opts_src_lladdr) {
1001                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr,
1002                                              skb->dev);
1003                 if (!lladdr)
1004                         goto out;
1005         }
1006
1007         neigh = __neigh_lookup(&nd_tbl, saddr, skb->dev, 1);
1008         if (neigh) {
1009                 neigh_update(neigh, lladdr, NUD_STALE,
1010                              NEIGH_UPDATE_F_WEAK_OVERRIDE|
1011                              NEIGH_UPDATE_F_OVERRIDE|
1012                              NEIGH_UPDATE_F_OVERRIDE_ISROUTER);
1013                 neigh_release(neigh);
1014         }
1015 out:
1016         in6_dev_put(idev);
1017 }
1018
1019 static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt)
1020 {
1021         struct icmp6hdr *icmp6h = (struct icmp6hdr *)skb_transport_header(ra);
1022         struct sk_buff *skb;
1023         struct nlmsghdr *nlh;
1024         struct nduseroptmsg *ndmsg;
1025         struct net *net = dev_net(ra->dev);
1026         int err;
1027         int base_size = NLMSG_ALIGN(sizeof(struct nduseroptmsg)
1028                                     + (opt->nd_opt_len << 3));
1029         size_t msg_size = base_size + nla_total_size(sizeof(struct in6_addr));
1030
1031         skb = nlmsg_new(msg_size, GFP_ATOMIC);
1032         if (skb == NULL) {
1033                 err = -ENOBUFS;
1034                 goto errout;
1035         }
1036
1037         nlh = nlmsg_put(skb, 0, 0, RTM_NEWNDUSEROPT, base_size, 0);
1038         if (nlh == NULL) {
1039                 goto nla_put_failure;
1040         }
1041
1042         ndmsg = nlmsg_data(nlh);
1043         ndmsg->nduseropt_family = AF_INET6;
1044         ndmsg->nduseropt_ifindex = ra->dev->ifindex;
1045         ndmsg->nduseropt_icmp_type = icmp6h->icmp6_type;
1046         ndmsg->nduseropt_icmp_code = icmp6h->icmp6_code;
1047         ndmsg->nduseropt_opts_len = opt->nd_opt_len << 3;
1048
1049         memcpy(ndmsg + 1, opt, opt->nd_opt_len << 3);
1050
1051         NLA_PUT(skb, NDUSEROPT_SRCADDR, sizeof(struct in6_addr),
1052                 &ipv6_hdr(ra)->saddr);
1053         nlmsg_end(skb, nlh);
1054
1055         err = rtnl_notify(skb, net, 0, RTNLGRP_ND_USEROPT, NULL,
1056                           GFP_ATOMIC);
1057         if (err < 0)
1058                 goto errout;
1059
1060         return;
1061
1062 nla_put_failure:
1063         nlmsg_free(skb);
1064         err = -EMSGSIZE;
1065 errout:
1066         rtnl_set_sk_err(net, RTNLGRP_ND_USEROPT, err);
1067 }
1068
1069 static void ndisc_router_discovery(struct sk_buff *skb)
1070 {
1071         struct ra_msg *ra_msg = (struct ra_msg *)skb_transport_header(skb);
1072         struct neighbour *neigh = NULL;
1073         struct inet6_dev *in6_dev;
1074         struct rt6_info *rt = NULL;
1075         int lifetime;
1076         struct ndisc_options ndopts;
1077         int optlen;
1078         unsigned int pref = 0;
1079
1080         __u8 * opt = (__u8 *)(ra_msg + 1);
1081
1082         optlen = (skb->tail - skb->transport_header) - sizeof(struct ra_msg);
1083
1084         if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
1085                 ND_PRINTK2(KERN_WARNING
1086                            "ICMPv6 RA: source address is not link-local.\n");
1087                 return;
1088         }
1089         if (optlen < 0) {
1090                 ND_PRINTK2(KERN_WARNING
1091                            "ICMPv6 RA: packet too short\n");
1092                 return;
1093         }
1094
1095 #ifdef CONFIG_IPV6_NDISC_NODETYPE
1096         if (skb->ndisc_nodetype == NDISC_NODETYPE_HOST) {
1097                 ND_PRINTK2(KERN_WARNING
1098                            "ICMPv6 RA: from host or unauthorized router\n");
1099                 return;
1100         }
1101 #endif
1102
1103         /*
1104          *      set the RA_RECV flag in the interface
1105          */
1106
1107         in6_dev = in6_dev_get(skb->dev);
1108         if (in6_dev == NULL) {
1109                 ND_PRINTK0(KERN_ERR
1110                            "ICMPv6 RA: can't find inet6 device for %s.\n",
1111                            skb->dev->name);
1112                 return;
1113         }
1114         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_ra) {
1115                 in6_dev_put(in6_dev);
1116                 return;
1117         }
1118
1119         if (!ndisc_parse_options(opt, optlen, &ndopts)) {
1120                 in6_dev_put(in6_dev);
1121                 ND_PRINTK2(KERN_WARNING
1122                            "ICMP6 RA: invalid ND options\n");
1123                 return;
1124         }
1125
1126 #ifdef CONFIG_IPV6_NDISC_NODETYPE
1127         /* skip link-specific parameters from interior routers */
1128         if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT)
1129                 goto skip_linkparms;
1130 #endif
1131
1132         if (in6_dev->if_flags & IF_RS_SENT) {
1133                 /*
1134                  *      flag that an RA was received after an RS was sent
1135                  *      out on this interface.
1136                  */
1137                 in6_dev->if_flags |= IF_RA_RCVD;
1138         }
1139
1140         /*
1141          * Remember the managed/otherconf flags from most recently
1142          * received RA message (RFC 2462) -- yoshfuji
1143          */
1144         in6_dev->if_flags = (in6_dev->if_flags & ~(IF_RA_MANAGED |
1145                                 IF_RA_OTHERCONF)) |
1146                                 (ra_msg->icmph.icmp6_addrconf_managed ?
1147                                         IF_RA_MANAGED : 0) |
1148                                 (ra_msg->icmph.icmp6_addrconf_other ?
1149                                         IF_RA_OTHERCONF : 0);
1150
1151         if (!in6_dev->cnf.accept_ra_defrtr)
1152                 goto skip_defrtr;
1153
1154         lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime);
1155
1156 #ifdef CONFIG_IPV6_ROUTER_PREF
1157         pref = ra_msg->icmph.icmp6_router_pref;
1158         /* 10b is handled as if it were 00b (medium) */
1159         if (pref == ICMPV6_ROUTER_PREF_INVALID ||
1160             !in6_dev->cnf.accept_ra_rtr_pref)
1161                 pref = ICMPV6_ROUTER_PREF_MEDIUM;
1162 #endif
1163
1164         rt = rt6_get_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev);
1165
1166         if (rt)
1167                 neigh = rt->rt6i_nexthop;
1168
1169         if (rt && lifetime == 0) {
1170                 neigh_clone(neigh);
1171                 ip6_del_rt(rt);
1172                 rt = NULL;
1173         }
1174
1175         if (rt == NULL && lifetime) {
1176                 ND_PRINTK3(KERN_DEBUG
1177                            "ICMPv6 RA: adding default router.\n");
1178
1179                 rt = rt6_add_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev, pref);
1180                 if (rt == NULL) {
1181                         ND_PRINTK0(KERN_ERR
1182                                    "ICMPv6 RA: %s() failed to add default route.\n",
1183                                    __func__);
1184                         in6_dev_put(in6_dev);
1185                         return;
1186                 }
1187
1188                 neigh = rt->rt6i_nexthop;
1189                 if (neigh == NULL) {
1190                         ND_PRINTK0(KERN_ERR
1191                                    "ICMPv6 RA: %s() got default router without neighbour.\n",
1192                                    __func__);
1193                         dst_release(&rt->u.dst);
1194                         in6_dev_put(in6_dev);
1195                         return;
1196                 }
1197                 neigh->flags |= NTF_ROUTER;
1198         } else if (rt) {
1199                 rt->rt6i_flags |= (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
1200         }
1201
1202         if (rt)
1203                 rt->rt6i_expires = jiffies + (HZ * lifetime);
1204
1205         if (ra_msg->icmph.icmp6_hop_limit) {
1206                 in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit;
1207                 if (rt)
1208                         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = ra_msg->icmph.icmp6_hop_limit;
1209         }
1210
1211 skip_defrtr:
1212
1213         /*
1214          *      Update Reachable Time and Retrans Timer
1215          */
1216
1217         if (in6_dev->nd_parms) {
1218                 unsigned long rtime = ntohl(ra_msg->retrans_timer);
1219
1220                 if (rtime && rtime/1000 < MAX_SCHEDULE_TIMEOUT/HZ) {
1221                         rtime = (rtime*HZ)/1000;
1222                         if (rtime < HZ/10)
1223                                 rtime = HZ/10;
1224                         in6_dev->nd_parms->retrans_time = rtime;
1225                         in6_dev->tstamp = jiffies;
1226                         inet6_ifinfo_notify(RTM_NEWLINK, in6_dev);
1227                 }
1228
1229                 rtime = ntohl(ra_msg->reachable_time);
1230                 if (rtime && rtime/1000 < MAX_SCHEDULE_TIMEOUT/(3*HZ)) {
1231                         rtime = (rtime*HZ)/1000;
1232
1233                         if (rtime < HZ/10)
1234                                 rtime = HZ/10;
1235
1236                         if (rtime != in6_dev->nd_parms->base_reachable_time) {
1237                                 in6_dev->nd_parms->base_reachable_time = rtime;
1238                                 in6_dev->nd_parms->gc_staletime = 3 * rtime;
1239                                 in6_dev->nd_parms->reachable_time = neigh_rand_reach_time(rtime);
1240                                 in6_dev->tstamp = jiffies;
1241                                 inet6_ifinfo_notify(RTM_NEWLINK, in6_dev);
1242                         }
1243                 }
1244         }
1245
1246 #ifdef CONFIG_IPV6_NDISC_NODETYPE
1247 skip_linkparms:
1248 #endif
1249
1250         /*
1251          *      Process options.
1252          */
1253
1254         if (!neigh)
1255                 neigh = __neigh_lookup(&nd_tbl, &ipv6_hdr(skb)->saddr,
1256                                        skb->dev, 1);
1257         if (neigh) {
1258                 u8 *lladdr = NULL;
1259                 if (ndopts.nd_opts_src_lladdr) {
1260                         lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr,
1261                                                      skb->dev);
1262                         if (!lladdr) {
1263                                 ND_PRINTK2(KERN_WARNING
1264                                            "ICMPv6 RA: invalid link-layer address length\n");
1265                                 goto out;
1266                         }
1267                 }
1268                 neigh_update(neigh, lladdr, NUD_STALE,
1269                              NEIGH_UPDATE_F_WEAK_OVERRIDE|
1270                              NEIGH_UPDATE_F_OVERRIDE|
1271                              NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1272                              NEIGH_UPDATE_F_ISROUTER);
1273         }
1274
1275 #ifdef CONFIG_IPV6_ROUTE_INFO
1276         if (in6_dev->cnf.accept_ra_rtr_pref && ndopts.nd_opts_ri) {
1277                 struct nd_opt_hdr *p;
1278                 for (p = ndopts.nd_opts_ri;
1279                      p;
1280                      p = ndisc_next_option(p, ndopts.nd_opts_ri_end)) {
1281                         struct route_info *ri = (struct route_info *)p;
1282 #ifdef CONFIG_IPV6_NDISC_NODETYPE
1283                         if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT &&
1284                             ri->prefix_len == 0)
1285                                 continue;
1286 #endif
1287                         if (ri->prefix_len > in6_dev->cnf.accept_ra_rt_info_max_plen)
1288                                 continue;
1289                         rt6_route_rcv(skb->dev, (u8*)p, (p->nd_opt_len) << 3,
1290                                       &ipv6_hdr(skb)->saddr);
1291                 }
1292         }
1293 #endif
1294
1295 #ifdef CONFIG_IPV6_NDISC_NODETYPE
1296         /* skip link-specific ndopts from interior routers */
1297         if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT)
1298                 goto out;
1299 #endif
1300
1301         if (in6_dev->cnf.accept_ra_pinfo && ndopts.nd_opts_pi) {
1302                 struct nd_opt_hdr *p;
1303                 for (p = ndopts.nd_opts_pi;
1304                      p;
1305                      p = ndisc_next_option(p, ndopts.nd_opts_pi_end)) {
1306                         addrconf_prefix_rcv(skb->dev, (u8*)p, (p->nd_opt_len) << 3);
1307                 }
1308         }
1309
1310         if (ndopts.nd_opts_mtu) {
1311                 __be32 n;
1312                 u32 mtu;
1313
1314                 memcpy(&n, ((u8*)(ndopts.nd_opts_mtu+1))+2, sizeof(mtu));
1315                 mtu = ntohl(n);
1316
1317                 if (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) {
1318                         ND_PRINTK2(KERN_WARNING
1319                                    "ICMPv6 RA: invalid mtu: %d\n",
1320                                    mtu);
1321                 } else if (in6_dev->cnf.mtu6 != mtu) {
1322                         in6_dev->cnf.mtu6 = mtu;
1323
1324                         if (rt)
1325                                 rt->u.dst.metrics[RTAX_MTU-1] = mtu;
1326
1327                         rt6_mtu_change(skb->dev, mtu);
1328                 }
1329         }
1330
1331         if (ndopts.nd_useropts) {
1332                 struct nd_opt_hdr *p;
1333                 for (p = ndopts.nd_useropts;
1334                      p;
1335                      p = ndisc_next_useropt(p, ndopts.nd_useropts_end)) {
1336                         ndisc_ra_useropt(skb, p);
1337                 }
1338         }
1339
1340         if (ndopts.nd_opts_tgt_lladdr || ndopts.nd_opts_rh) {
1341                 ND_PRINTK2(KERN_WARNING
1342                            "ICMPv6 RA: invalid RA options");
1343         }
1344 out:
1345         if (rt)
1346                 dst_release(&rt->u.dst);
1347         else if (neigh)
1348                 neigh_release(neigh);
1349         in6_dev_put(in6_dev);
1350 }
1351
1352 static void ndisc_redirect_rcv(struct sk_buff *skb)
1353 {
1354         struct inet6_dev *in6_dev;
1355         struct icmp6hdr *icmph;
1356         struct in6_addr *dest;
1357         struct in6_addr *target;        /* new first hop to destination */
1358         struct neighbour *neigh;
1359         int on_link = 0;
1360         struct ndisc_options ndopts;
1361         int optlen;
1362         u8 *lladdr = NULL;
1363
1364 #ifdef CONFIG_IPV6_NDISC_NODETYPE
1365         switch (skb->ndisc_nodetype) {
1366         case NDISC_NODETYPE_HOST:
1367         case NDISC_NODETYPE_NODEFAULT:
1368                 ND_PRINTK2(KERN_WARNING
1369                            "ICMPv6 Redirect: from host or unauthorized router\n");
1370                 return;
1371         }
1372 #endif
1373
1374         if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
1375                 ND_PRINTK2(KERN_WARNING
1376                            "ICMPv6 Redirect: source address is not link-local.\n");
1377                 return;
1378         }
1379
1380         optlen = skb->tail - skb->transport_header;
1381         optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1382
1383         if (optlen < 0) {
1384                 ND_PRINTK2(KERN_WARNING
1385                            "ICMPv6 Redirect: packet too short\n");
1386                 return;
1387         }
1388
1389         icmph = icmp6_hdr(skb);
1390         target = (struct in6_addr *) (icmph + 1);
1391         dest = target + 1;
1392
1393         if (ipv6_addr_is_multicast(dest)) {
1394                 ND_PRINTK2(KERN_WARNING
1395                            "ICMPv6 Redirect: destination address is multicast.\n");
1396                 return;
1397         }
1398
1399         if (ipv6_addr_equal(dest, target)) {
1400                 on_link = 1;
1401         } else if (ipv6_addr_type(target) !=
1402                    (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1403                 ND_PRINTK2(KERN_WARNING
1404                            "ICMPv6 Redirect: target address is not link-local unicast.\n");
1405                 return;
1406         }
1407
1408         in6_dev = in6_dev_get(skb->dev);
1409         if (!in6_dev)
1410                 return;
1411         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects) {
1412                 in6_dev_put(in6_dev);
1413                 return;
1414         }
1415
1416         /* RFC2461 8.1:
1417          *      The IP source address of the Redirect MUST be the same as the current
1418          *      first-hop router for the specified ICMP Destination Address.
1419          */
1420
1421         if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
1422                 ND_PRINTK2(KERN_WARNING
1423                            "ICMPv6 Redirect: invalid ND options\n");
1424                 in6_dev_put(in6_dev);
1425                 return;
1426         }
1427         if (ndopts.nd_opts_tgt_lladdr) {
1428                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1429                                              skb->dev);
1430                 if (!lladdr) {
1431                         ND_PRINTK2(KERN_WARNING
1432                                    "ICMPv6 Redirect: invalid link-layer address length\n");
1433                         in6_dev_put(in6_dev);
1434                         return;
1435                 }
1436         }
1437
1438         neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
1439         if (neigh) {
1440                 rt6_redirect(dest, &ipv6_hdr(skb)->daddr,
1441                              &ipv6_hdr(skb)->saddr, neigh, lladdr,
1442                              on_link);
1443                 neigh_release(neigh);
1444         }
1445         in6_dev_put(in6_dev);
1446 }
1447
1448 void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
1449                          const struct in6_addr *target)
1450 {
1451         struct net_device *dev = skb->dev;
1452         struct net *net = dev_net(dev);
1453         struct sock *sk = net->ipv6.ndisc_sk;
1454         int len = sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1455         struct sk_buff *buff;
1456         struct icmp6hdr *icmph;
1457         struct in6_addr saddr_buf;
1458         struct in6_addr *addrp;
1459         struct rt6_info *rt;
1460         struct dst_entry *dst;
1461         struct inet6_dev *idev;
1462         struct flowi fl;
1463         u8 *opt;
1464         int rd_len;
1465         int err;
1466         u8 ha_buf[MAX_ADDR_LEN], *ha = NULL;
1467
1468         if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) {
1469                 ND_PRINTK2(KERN_WARNING
1470                            "ICMPv6 Redirect: no link-local address on %s\n",
1471                            dev->name);
1472                 return;
1473         }
1474
1475         if (!ipv6_addr_equal(&ipv6_hdr(skb)->daddr, target) &&
1476             ipv6_addr_type(target) != (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1477                 ND_PRINTK2(KERN_WARNING
1478                         "ICMPv6 Redirect: target address is not link-local unicast.\n");
1479                 return;
1480         }
1481
1482         icmpv6_flow_init(sk, &fl, NDISC_REDIRECT,
1483                          &saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex);
1484
1485         dst = ip6_route_output(net, NULL, &fl);
1486         if (dst == NULL)
1487                 return;
1488
1489         err = xfrm_lookup(&dst, &fl, NULL, 0);
1490         if (err)
1491                 return;
1492
1493         rt = (struct rt6_info *) dst;
1494
1495         if (rt->rt6i_flags & RTF_GATEWAY) {
1496                 ND_PRINTK2(KERN_WARNING
1497                            "ICMPv6 Redirect: destination is not a neighbour.\n");
1498                 dst_release(dst);
1499                 return;
1500         }
1501         if (!xrlim_allow(dst, 1*HZ)) {
1502                 dst_release(dst);
1503                 return;
1504         }
1505
1506         if (dev->addr_len) {
1507                 read_lock_bh(&neigh->lock);
1508                 if (neigh->nud_state & NUD_VALID) {
1509                         memcpy(ha_buf, neigh->ha, dev->addr_len);
1510                         read_unlock_bh(&neigh->lock);
1511                         ha = ha_buf;
1512                         len += ndisc_opt_addr_space(dev);
1513                 } else
1514                         read_unlock_bh(&neigh->lock);
1515         }
1516
1517         rd_len = min_t(unsigned int,
1518                      IPV6_MIN_MTU-sizeof(struct ipv6hdr)-len, skb->len + 8);
1519         rd_len &= ~0x7;
1520         len += rd_len;
1521
1522         buff = sock_alloc_send_skb(sk,
1523                                    (MAX_HEADER + sizeof(struct ipv6hdr) +
1524                                     len + LL_ALLOCATED_SPACE(dev)),
1525                                    1, &err);
1526         if (buff == NULL) {
1527                 ND_PRINTK0(KERN_ERR
1528                            "ICMPv6 Redirect: %s() failed to allocate an skb.\n",
1529                            __func__);
1530                 dst_release(dst);
1531                 return;
1532         }
1533
1534         skb_reserve(buff, LL_RESERVED_SPACE(dev));
1535         ip6_nd_hdr(sk, buff, dev, &saddr_buf, &ipv6_hdr(skb)->saddr,
1536                    IPPROTO_ICMPV6, len);
1537
1538         skb_set_transport_header(buff, skb_tail_pointer(buff) - buff->data);
1539         skb_put(buff, len);
1540         icmph = icmp6_hdr(buff);
1541
1542         memset(icmph, 0, sizeof(struct icmp6hdr));
1543         icmph->icmp6_type = NDISC_REDIRECT;
1544
1545         /*
1546          *      copy target and destination addresses
1547          */
1548
1549         addrp = (struct in6_addr *)(icmph + 1);
1550         ipv6_addr_copy(addrp, target);
1551         addrp++;
1552         ipv6_addr_copy(addrp, &ipv6_hdr(skb)->daddr);
1553
1554         opt = (u8*) (addrp + 1);
1555
1556         /*
1557          *      include target_address option
1558          */
1559
1560         if (ha)
1561                 opt = ndisc_fill_addr_option(opt, ND_OPT_TARGET_LL_ADDR, ha,
1562                                              dev->addr_len, dev->type);
1563
1564         /*
1565          *      build redirect option and copy skb over to the new packet.
1566          */
1567
1568         memset(opt, 0, 8);
1569         *(opt++) = ND_OPT_REDIRECT_HDR;
1570         *(opt++) = (rd_len >> 3);
1571         opt += 6;
1572
1573         memcpy(opt, ipv6_hdr(skb), rd_len - 8);
1574
1575         icmph->icmp6_cksum = csum_ipv6_magic(&saddr_buf, &ipv6_hdr(skb)->saddr,
1576                                              len, IPPROTO_ICMPV6,
1577                                              csum_partial((u8 *) icmph, len, 0));
1578
1579         buff->dst = dst;
1580         idev = in6_dev_get(dst->dev);
1581         IP6_INC_STATS(idev, IPSTATS_MIB_OUTREQUESTS);
1582         err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, buff, NULL, dst->dev,
1583                       dst_output);
1584         if (!err) {
1585                 ICMP6MSGOUT_INC_STATS(idev, NDISC_REDIRECT);
1586                 ICMP6_INC_STATS(idev, ICMP6_MIB_OUTMSGS);
1587         }
1588
1589         if (likely(idev != NULL))
1590                 in6_dev_put(idev);
1591 }
1592
1593 static void pndisc_redo(struct sk_buff *skb)
1594 {
1595         ndisc_recv_ns(skb);
1596         kfree_skb(skb);
1597 }
1598
1599 int ndisc_rcv(struct sk_buff *skb)
1600 {
1601         struct nd_msg *msg;
1602
1603         if (!pskb_may_pull(skb, skb->len))
1604                 return 0;
1605
1606         msg = (struct nd_msg *)skb_transport_header(skb);
1607
1608         __skb_push(skb, skb->data - skb_transport_header(skb));
1609
1610         if (ipv6_hdr(skb)->hop_limit != 255) {
1611                 ND_PRINTK2(KERN_WARNING
1612                            "ICMPv6 NDISC: invalid hop-limit: %d\n",
1613                            ipv6_hdr(skb)->hop_limit);
1614                 return 0;
1615         }
1616
1617         if (msg->icmph.icmp6_code != 0) {
1618                 ND_PRINTK2(KERN_WARNING
1619                            "ICMPv6 NDISC: invalid ICMPv6 code: %d\n",
1620                            msg->icmph.icmp6_code);
1621                 return 0;
1622         }
1623
1624         memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
1625
1626         switch (msg->icmph.icmp6_type) {
1627         case NDISC_NEIGHBOUR_SOLICITATION:
1628                 ndisc_recv_ns(skb);
1629                 break;
1630
1631         case NDISC_NEIGHBOUR_ADVERTISEMENT:
1632                 ndisc_recv_na(skb);
1633                 break;
1634
1635         case NDISC_ROUTER_SOLICITATION:
1636                 ndisc_recv_rs(skb);
1637                 break;
1638
1639         case NDISC_ROUTER_ADVERTISEMENT:
1640                 ndisc_router_discovery(skb);
1641                 break;
1642
1643         case NDISC_REDIRECT:
1644                 ndisc_redirect_rcv(skb);
1645                 break;
1646         }
1647
1648         return 0;
1649 }
1650
1651 static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
1652 {
1653         struct net_device *dev = ptr;
1654         struct net *net = dev_net(dev);
1655
1656         switch (event) {
1657         case NETDEV_CHANGEADDR:
1658                 neigh_changeaddr(&nd_tbl, dev);
1659                 fib6_run_gc(~0UL, net);
1660                 break;
1661         case NETDEV_DOWN:
1662                 neigh_ifdown(&nd_tbl, dev);
1663                 fib6_run_gc(~0UL, net);
1664                 break;
1665         default:
1666                 break;
1667         }
1668
1669         return NOTIFY_DONE;
1670 }
1671
1672 static struct notifier_block ndisc_netdev_notifier = {
1673         .notifier_call = ndisc_netdev_event,
1674 };
1675
1676 #ifdef CONFIG_SYSCTL
1677 static void ndisc_warn_deprecated_sysctl(struct ctl_table *ctl,
1678                                          const char *func, const char *dev_name)
1679 {
1680         static char warncomm[TASK_COMM_LEN];
1681         static int warned;
1682         if (strcmp(warncomm, current->comm) && warned < 5) {
1683                 strcpy(warncomm, current->comm);
1684                 printk(KERN_WARNING
1685                         "process `%s' is using deprecated sysctl (%s) "
1686                         "net.ipv6.neigh.%s.%s; "
1687                         "Use net.ipv6.neigh.%s.%s_ms "
1688                         "instead.\n",
1689                         warncomm, func,
1690                         dev_name, ctl->procname,
1691                         dev_name, ctl->procname);
1692                 warned++;
1693         }
1694 }
1695
1696 int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * filp, void __user *buffer, size_t *lenp, loff_t *ppos)
1697 {
1698         struct net_device *dev = ctl->extra1;
1699         struct inet6_dev *idev;
1700         int ret;
1701
1702         if ((strcmp(ctl->procname, "retrans_time") == 0) ||
1703             (strcmp(ctl->procname, "base_reachable_time") == 0))
1704                 ndisc_warn_deprecated_sysctl(ctl, "syscall", dev ? dev->name : "default");
1705
1706         if (strcmp(ctl->procname, "retrans_time") == 0)
1707                 ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1708
1709         else if (strcmp(ctl->procname, "base_reachable_time") == 0)
1710                 ret = proc_dointvec_jiffies(ctl, write,
1711                                             filp, buffer, lenp, ppos);
1712
1713         else if ((strcmp(ctl->procname, "retrans_time_ms") == 0) ||
1714                  (strcmp(ctl->procname, "base_reachable_time_ms") == 0))
1715                 ret = proc_dointvec_ms_jiffies(ctl, write,
1716                                                filp, buffer, lenp, ppos);
1717         else
1718                 ret = -1;
1719
1720         if (write && ret == 0 && dev && (idev = in6_dev_get(dev)) != NULL) {
1721                 if (ctl->data == &idev->nd_parms->base_reachable_time)
1722                         idev->nd_parms->reachable_time = neigh_rand_reach_time(idev->nd_parms->base_reachable_time);
1723                 idev->tstamp = jiffies;
1724                 inet6_ifinfo_notify(RTM_NEWLINK, idev);
1725                 in6_dev_put(idev);
1726         }
1727         return ret;
1728 }
1729
1730 static int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl, int __user *name,
1731                                         int nlen, void __user *oldval,
1732                                         size_t __user *oldlenp,
1733                                         void __user *newval, size_t newlen)
1734 {
1735         struct net_device *dev = ctl->extra1;
1736         struct inet6_dev *idev;
1737         int ret;
1738
1739         if (ctl->ctl_name == NET_NEIGH_RETRANS_TIME ||
1740             ctl->ctl_name == NET_NEIGH_REACHABLE_TIME)
1741                 ndisc_warn_deprecated_sysctl(ctl, "procfs", dev ? dev->name : "default");
1742
1743         switch (ctl->ctl_name) {
1744         case NET_NEIGH_REACHABLE_TIME:
1745                 ret = sysctl_jiffies(ctl, name, nlen,
1746                                      oldval, oldlenp, newval, newlen);
1747                 break;
1748         case NET_NEIGH_RETRANS_TIME_MS:
1749         case NET_NEIGH_REACHABLE_TIME_MS:
1750                  ret = sysctl_ms_jiffies(ctl, name, nlen,
1751                                          oldval, oldlenp, newval, newlen);
1752                  break;
1753         default:
1754                 ret = 0;
1755         }
1756
1757         if (newval && newlen && ret > 0 &&
1758             dev && (idev = in6_dev_get(dev)) != NULL) {
1759                 if (ctl->ctl_name == NET_NEIGH_REACHABLE_TIME ||
1760                     ctl->ctl_name == NET_NEIGH_REACHABLE_TIME_MS)
1761                         idev->nd_parms->reachable_time = neigh_rand_reach_time(idev->nd_parms->base_reachable_time);
1762                 idev->tstamp = jiffies;
1763                 inet6_ifinfo_notify(RTM_NEWLINK, idev);
1764                 in6_dev_put(idev);
1765         }
1766
1767         return ret;
1768 }
1769
1770 #endif
1771
1772 static int ndisc_net_init(struct net *net)
1773 {
1774         struct ipv6_pinfo *np;
1775         struct sock *sk;
1776         int err;
1777
1778         err = inet_ctl_sock_create(&sk, PF_INET6,
1779                                    SOCK_RAW, IPPROTO_ICMPV6, net);
1780         if (err < 0) {
1781                 ND_PRINTK0(KERN_ERR
1782                            "ICMPv6 NDISC: Failed to initialize the control socket (err %d).\n",
1783                            err);
1784                 return err;
1785         }
1786
1787         net->ipv6.ndisc_sk = sk;
1788
1789         np = inet6_sk(sk);
1790         np->hop_limit = 255;
1791         /* Do not loopback ndisc messages */
1792         np->mc_loop = 0;
1793
1794         return 0;
1795 }
1796
1797 static void ndisc_net_exit(struct net *net)
1798 {
1799         inet_ctl_sock_destroy(net->ipv6.ndisc_sk);
1800 }
1801
1802 static struct pernet_operations ndisc_net_ops = {
1803         .init = ndisc_net_init,
1804         .exit = ndisc_net_exit,
1805 };
1806
1807 int __init ndisc_init(void)
1808 {
1809         int err;
1810
1811         err = register_pernet_subsys(&ndisc_net_ops);
1812         if (err)
1813                 return err;
1814         /*
1815          * Initialize the neighbour table
1816          */
1817         neigh_table_init(&nd_tbl);
1818
1819 #ifdef CONFIG_SYSCTL
1820         err = neigh_sysctl_register(NULL, &nd_tbl.parms, NET_IPV6,
1821                                     NET_IPV6_NEIGH, "ipv6",
1822                                     &ndisc_ifinfo_sysctl_change,
1823                                     &ndisc_ifinfo_sysctl_strategy);
1824         if (err)
1825                 goto out_unregister_pernet;
1826 #endif
1827         err = register_netdevice_notifier(&ndisc_netdev_notifier);
1828         if (err)
1829                 goto out_unregister_sysctl;
1830 out:
1831         return err;
1832
1833 out_unregister_sysctl:
1834 #ifdef CONFIG_SYSCTL
1835         neigh_sysctl_unregister(&nd_tbl.parms);
1836 out_unregister_pernet:
1837 #endif
1838         unregister_pernet_subsys(&ndisc_net_ops);
1839         goto out;
1840 }
1841
1842 void ndisc_cleanup(void)
1843 {
1844         unregister_netdevice_notifier(&ndisc_netdev_notifier);
1845 #ifdef CONFIG_SYSCTL
1846         neigh_sysctl_unregister(&nd_tbl.parms);
1847 #endif
1848         neigh_table_clear(&nd_tbl);
1849         unregister_pernet_subsys(&ndisc_net_ops);
1850 }