sysctl: simplify ->strategy
[linux-2.6.git] / net / ipv6 / ndisc.c
1 /*
2  *      Neighbour Discovery for IPv6
3  *      Linux INET6 implementation
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *      Mike Shaver             <shaver@ingenia.com>
8  *
9  *      This program is free software; you can redistribute it and/or
10  *      modify it under the terms of the GNU General Public License
11  *      as published by the Free Software Foundation; either version
12  *      2 of the License, or (at your option) any later version.
13  */
14
15 /*
16  *      Changes:
17  *
18  *      Pierre Ynard                    :       export userland ND options
19  *                                              through netlink (RDNSS support)
20  *      Lars Fenneberg                  :       fixed MTU setting on receipt
21  *                                              of an RA.
22  *      Janos Farkas                    :       kmalloc failure checks
23  *      Alexey Kuznetsov                :       state machine reworked
24  *                                              and moved to net/core.
25  *      Pekka Savola                    :       RFC2461 validation
26  *      YOSHIFUJI Hideaki @USAGI        :       Verify ND options properly
27  */
28
29 /* Set to 3 to get tracing... */
30 #define ND_DEBUG 1
31
32 #define ND_PRINTK(fmt, args...) do { if (net_ratelimit()) { printk(fmt, ## args); } } while(0)
33 #define ND_NOPRINTK(x...) do { ; } while(0)
34 #define ND_PRINTK0 ND_PRINTK
35 #define ND_PRINTK1 ND_NOPRINTK
36 #define ND_PRINTK2 ND_NOPRINTK
37 #define ND_PRINTK3 ND_NOPRINTK
38 #if ND_DEBUG >= 1
39 #undef ND_PRINTK1
40 #define ND_PRINTK1 ND_PRINTK
41 #endif
42 #if ND_DEBUG >= 2
43 #undef ND_PRINTK2
44 #define ND_PRINTK2 ND_PRINTK
45 #endif
46 #if ND_DEBUG >= 3
47 #undef ND_PRINTK3
48 #define ND_PRINTK3 ND_PRINTK
49 #endif
50
51 #include <linux/module.h>
52 #include <linux/errno.h>
53 #include <linux/types.h>
54 #include <linux/socket.h>
55 #include <linux/sockios.h>
56 #include <linux/sched.h>
57 #include <linux/net.h>
58 #include <linux/in6.h>
59 #include <linux/route.h>
60 #include <linux/init.h>
61 #include <linux/rcupdate.h>
62 #ifdef CONFIG_SYSCTL
63 #include <linux/sysctl.h>
64 #endif
65
66 #include <linux/if_addr.h>
67 #include <linux/if_arp.h>
68 #include <linux/ipv6.h>
69 #include <linux/icmpv6.h>
70 #include <linux/jhash.h>
71
72 #include <net/sock.h>
73 #include <net/snmp.h>
74
75 #include <net/ipv6.h>
76 #include <net/protocol.h>
77 #include <net/ndisc.h>
78 #include <net/ip6_route.h>
79 #include <net/addrconf.h>
80 #include <net/icmp.h>
81
82 #include <net/netlink.h>
83 #include <linux/rtnetlink.h>
84
85 #include <net/flow.h>
86 #include <net/ip6_checksum.h>
87 #include <net/inet_common.h>
88 #include <linux/proc_fs.h>
89
90 #include <linux/netfilter.h>
91 #include <linux/netfilter_ipv6.h>
92
93 static u32 ndisc_hash(const void *pkey, const struct net_device *dev);
94 static int ndisc_constructor(struct neighbour *neigh);
95 static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb);
96 static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb);
97 static int pndisc_constructor(struct pneigh_entry *n);
98 static void pndisc_destructor(struct pneigh_entry *n);
99 static void pndisc_redo(struct sk_buff *skb);
100
101 static struct neigh_ops ndisc_generic_ops = {
102         .family =               AF_INET6,
103         .solicit =              ndisc_solicit,
104         .error_report =         ndisc_error_report,
105         .output =               neigh_resolve_output,
106         .connected_output =     neigh_connected_output,
107         .hh_output =            dev_queue_xmit,
108         .queue_xmit =           dev_queue_xmit,
109 };
110
111 static struct neigh_ops ndisc_hh_ops = {
112         .family =               AF_INET6,
113         .solicit =              ndisc_solicit,
114         .error_report =         ndisc_error_report,
115         .output =               neigh_resolve_output,
116         .connected_output =     neigh_resolve_output,
117         .hh_output =            dev_queue_xmit,
118         .queue_xmit =           dev_queue_xmit,
119 };
120
121
122 static struct neigh_ops ndisc_direct_ops = {
123         .family =               AF_INET6,
124         .output =               dev_queue_xmit,
125         .connected_output =     dev_queue_xmit,
126         .hh_output =            dev_queue_xmit,
127         .queue_xmit =           dev_queue_xmit,
128 };
129
130 struct neigh_table nd_tbl = {
131         .family =       AF_INET6,
132         .entry_size =   sizeof(struct neighbour) + sizeof(struct in6_addr),
133         .key_len =      sizeof(struct in6_addr),
134         .hash =         ndisc_hash,
135         .constructor =  ndisc_constructor,
136         .pconstructor = pndisc_constructor,
137         .pdestructor =  pndisc_destructor,
138         .proxy_redo =   pndisc_redo,
139         .id =           "ndisc_cache",
140         .parms = {
141                 .tbl =                  &nd_tbl,
142                 .base_reachable_time =  30 * HZ,
143                 .retrans_time =  1 * HZ,
144                 .gc_staletime = 60 * HZ,
145                 .reachable_time =               30 * HZ,
146                 .delay_probe_time =      5 * HZ,
147                 .queue_len =             3,
148                 .ucast_probes =  3,
149                 .mcast_probes =  3,
150                 .anycast_delay =         1 * HZ,
151                 .proxy_delay =          (8 * HZ) / 10,
152                 .proxy_qlen =           64,
153         },
154         .gc_interval =    30 * HZ,
155         .gc_thresh1 =    128,
156         .gc_thresh2 =    512,
157         .gc_thresh3 =   1024,
158 };
159
160 /* ND options */
161 struct ndisc_options {
162         struct nd_opt_hdr *nd_opt_array[__ND_OPT_ARRAY_MAX];
163 #ifdef CONFIG_IPV6_ROUTE_INFO
164         struct nd_opt_hdr *nd_opts_ri;
165         struct nd_opt_hdr *nd_opts_ri_end;
166 #endif
167         struct nd_opt_hdr *nd_useropts;
168         struct nd_opt_hdr *nd_useropts_end;
169 };
170
171 #define nd_opts_src_lladdr      nd_opt_array[ND_OPT_SOURCE_LL_ADDR]
172 #define nd_opts_tgt_lladdr      nd_opt_array[ND_OPT_TARGET_LL_ADDR]
173 #define nd_opts_pi              nd_opt_array[ND_OPT_PREFIX_INFO]
174 #define nd_opts_pi_end          nd_opt_array[__ND_OPT_PREFIX_INFO_END]
175 #define nd_opts_rh              nd_opt_array[ND_OPT_REDIRECT_HDR]
176 #define nd_opts_mtu             nd_opt_array[ND_OPT_MTU]
177
178 #define NDISC_OPT_SPACE(len) (((len)+2+7)&~7)
179
180 /*
181  * Return the padding between the option length and the start of the
182  * link addr.  Currently only IP-over-InfiniBand needs this, although
183  * if RFC 3831 IPv6-over-Fibre Channel is ever implemented it may
184  * also need a pad of 2.
185  */
186 static int ndisc_addr_option_pad(unsigned short type)
187 {
188         switch (type) {
189         case ARPHRD_INFINIBAND: return 2;
190         default:                return 0;
191         }
192 }
193
194 static inline int ndisc_opt_addr_space(struct net_device *dev)
195 {
196         return NDISC_OPT_SPACE(dev->addr_len + ndisc_addr_option_pad(dev->type));
197 }
198
199 static u8 *ndisc_fill_addr_option(u8 *opt, int type, void *data, int data_len,
200                                   unsigned short addr_type)
201 {
202         int space = NDISC_OPT_SPACE(data_len);
203         int pad   = ndisc_addr_option_pad(addr_type);
204
205         opt[0] = type;
206         opt[1] = space>>3;
207
208         memset(opt + 2, 0, pad);
209         opt   += pad;
210         space -= pad;
211
212         memcpy(opt+2, data, data_len);
213         data_len += 2;
214         opt += data_len;
215         if ((space -= data_len) > 0)
216                 memset(opt, 0, space);
217         return opt + space;
218 }
219
220 static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
221                                             struct nd_opt_hdr *end)
222 {
223         int type;
224         if (!cur || !end || cur >= end)
225                 return NULL;
226         type = cur->nd_opt_type;
227         do {
228                 cur = ((void *)cur) + (cur->nd_opt_len << 3);
229         } while(cur < end && cur->nd_opt_type != type);
230         return (cur <= end && cur->nd_opt_type == type ? cur : NULL);
231 }
232
233 static inline int ndisc_is_useropt(struct nd_opt_hdr *opt)
234 {
235         return (opt->nd_opt_type == ND_OPT_RDNSS);
236 }
237
238 static struct nd_opt_hdr *ndisc_next_useropt(struct nd_opt_hdr *cur,
239                                              struct nd_opt_hdr *end)
240 {
241         if (!cur || !end || cur >= end)
242                 return NULL;
243         do {
244                 cur = ((void *)cur) + (cur->nd_opt_len << 3);
245         } while(cur < end && !ndisc_is_useropt(cur));
246         return (cur <= end && ndisc_is_useropt(cur) ? cur : NULL);
247 }
248
249 static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
250                                                  struct ndisc_options *ndopts)
251 {
252         struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)opt;
253
254         if (!nd_opt || opt_len < 0 || !ndopts)
255                 return NULL;
256         memset(ndopts, 0, sizeof(*ndopts));
257         while (opt_len) {
258                 int l;
259                 if (opt_len < sizeof(struct nd_opt_hdr))
260                         return NULL;
261                 l = nd_opt->nd_opt_len << 3;
262                 if (opt_len < l || l == 0)
263                         return NULL;
264                 switch (nd_opt->nd_opt_type) {
265                 case ND_OPT_SOURCE_LL_ADDR:
266                 case ND_OPT_TARGET_LL_ADDR:
267                 case ND_OPT_MTU:
268                 case ND_OPT_REDIRECT_HDR:
269                         if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) {
270                                 ND_PRINTK2(KERN_WARNING
271                                            "%s(): duplicated ND6 option found: type=%d\n",
272                                            __func__,
273                                            nd_opt->nd_opt_type);
274                         } else {
275                                 ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt;
276                         }
277                         break;
278                 case ND_OPT_PREFIX_INFO:
279                         ndopts->nd_opts_pi_end = nd_opt;
280                         if (!ndopts->nd_opt_array[nd_opt->nd_opt_type])
281                                 ndopts->nd_opt_array[nd_opt->nd_opt_type] = nd_opt;
282                         break;
283 #ifdef CONFIG_IPV6_ROUTE_INFO
284                 case ND_OPT_ROUTE_INFO:
285                         ndopts->nd_opts_ri_end = nd_opt;
286                         if (!ndopts->nd_opts_ri)
287                                 ndopts->nd_opts_ri = nd_opt;
288                         break;
289 #endif
290                 default:
291                         if (ndisc_is_useropt(nd_opt)) {
292                                 ndopts->nd_useropts_end = nd_opt;
293                                 if (!ndopts->nd_useropts)
294                                         ndopts->nd_useropts = nd_opt;
295                         } else {
296                                 /*
297                                  * Unknown options must be silently ignored,
298                                  * to accommodate future extension to the
299                                  * protocol.
300                                  */
301                                 ND_PRINTK2(KERN_NOTICE
302                                            "%s(): ignored unsupported option; type=%d, len=%d\n",
303                                            __func__,
304                                            nd_opt->nd_opt_type, nd_opt->nd_opt_len);
305                         }
306                 }
307                 opt_len -= l;
308                 nd_opt = ((void *)nd_opt) + l;
309         }
310         return ndopts;
311 }
312
313 static inline u8 *ndisc_opt_addr_data(struct nd_opt_hdr *p,
314                                       struct net_device *dev)
315 {
316         u8 *lladdr = (u8 *)(p + 1);
317         int lladdrlen = p->nd_opt_len << 3;
318         int prepad = ndisc_addr_option_pad(dev->type);
319         if (lladdrlen != NDISC_OPT_SPACE(dev->addr_len + prepad))
320                 return NULL;
321         return (lladdr + prepad);
322 }
323
324 int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *dev, int dir)
325 {
326         switch (dev->type) {
327         case ARPHRD_ETHER:
328         case ARPHRD_IEEE802:    /* Not sure. Check it later. --ANK */
329         case ARPHRD_FDDI:
330                 ipv6_eth_mc_map(addr, buf);
331                 return 0;
332         case ARPHRD_IEEE802_TR:
333                 ipv6_tr_mc_map(addr,buf);
334                 return 0;
335         case ARPHRD_ARCNET:
336                 ipv6_arcnet_mc_map(addr, buf);
337                 return 0;
338         case ARPHRD_INFINIBAND:
339                 ipv6_ib_mc_map(addr, dev->broadcast, buf);
340                 return 0;
341         default:
342                 if (dir) {
343                         memcpy(buf, dev->broadcast, dev->addr_len);
344                         return 0;
345                 }
346         }
347         return -EINVAL;
348 }
349
350 EXPORT_SYMBOL(ndisc_mc_map);
351
352 static u32 ndisc_hash(const void *pkey, const struct net_device *dev)
353 {
354         const u32 *p32 = pkey;
355         u32 addr_hash, i;
356
357         addr_hash = 0;
358         for (i = 0; i < (sizeof(struct in6_addr) / sizeof(u32)); i++)
359                 addr_hash ^= *p32++;
360
361         return jhash_2words(addr_hash, dev->ifindex, nd_tbl.hash_rnd);
362 }
363
364 static int ndisc_constructor(struct neighbour *neigh)
365 {
366         struct in6_addr *addr = (struct in6_addr*)&neigh->primary_key;
367         struct net_device *dev = neigh->dev;
368         struct inet6_dev *in6_dev;
369         struct neigh_parms *parms;
370         int is_multicast = ipv6_addr_is_multicast(addr);
371
372         rcu_read_lock();
373         in6_dev = in6_dev_get(dev);
374         if (in6_dev == NULL) {
375                 rcu_read_unlock();
376                 return -EINVAL;
377         }
378
379         parms = in6_dev->nd_parms;
380         __neigh_parms_put(neigh->parms);
381         neigh->parms = neigh_parms_clone(parms);
382         rcu_read_unlock();
383
384         neigh->type = is_multicast ? RTN_MULTICAST : RTN_UNICAST;
385         if (!dev->header_ops) {
386                 neigh->nud_state = NUD_NOARP;
387                 neigh->ops = &ndisc_direct_ops;
388                 neigh->output = neigh->ops->queue_xmit;
389         } else {
390                 if (is_multicast) {
391                         neigh->nud_state = NUD_NOARP;
392                         ndisc_mc_map(addr, neigh->ha, dev, 1);
393                 } else if (dev->flags&(IFF_NOARP|IFF_LOOPBACK)) {
394                         neigh->nud_state = NUD_NOARP;
395                         memcpy(neigh->ha, dev->dev_addr, dev->addr_len);
396                         if (dev->flags&IFF_LOOPBACK)
397                                 neigh->type = RTN_LOCAL;
398                 } else if (dev->flags&IFF_POINTOPOINT) {
399                         neigh->nud_state = NUD_NOARP;
400                         memcpy(neigh->ha, dev->broadcast, dev->addr_len);
401                 }
402                 if (dev->header_ops->cache)
403                         neigh->ops = &ndisc_hh_ops;
404                 else
405                         neigh->ops = &ndisc_generic_ops;
406                 if (neigh->nud_state&NUD_VALID)
407                         neigh->output = neigh->ops->connected_output;
408                 else
409                         neigh->output = neigh->ops->output;
410         }
411         in6_dev_put(in6_dev);
412         return 0;
413 }
414
415 static int pndisc_constructor(struct pneigh_entry *n)
416 {
417         struct in6_addr *addr = (struct in6_addr*)&n->key;
418         struct in6_addr maddr;
419         struct net_device *dev = n->dev;
420
421         if (dev == NULL || __in6_dev_get(dev) == NULL)
422                 return -EINVAL;
423         addrconf_addr_solict_mult(addr, &maddr);
424         ipv6_dev_mc_inc(dev, &maddr);
425         return 0;
426 }
427
428 static void pndisc_destructor(struct pneigh_entry *n)
429 {
430         struct in6_addr *addr = (struct in6_addr*)&n->key;
431         struct in6_addr maddr;
432         struct net_device *dev = n->dev;
433
434         if (dev == NULL || __in6_dev_get(dev) == NULL)
435                 return;
436         addrconf_addr_solict_mult(addr, &maddr);
437         ipv6_dev_mc_dec(dev, &maddr);
438 }
439
440 /*
441  *      Send a Neighbour Advertisement
442  */
443 static void __ndisc_send(struct net_device *dev,
444                          struct neighbour *neigh,
445                          const struct in6_addr *daddr,
446                          const struct in6_addr *saddr,
447                          struct icmp6hdr *icmp6h, const struct in6_addr *target,
448                          int llinfo)
449 {
450         struct flowi fl;
451         struct dst_entry *dst;
452         struct net *net = dev_net(dev);
453         struct sock *sk = net->ipv6.ndisc_sk;
454         struct sk_buff *skb;
455         struct icmp6hdr *hdr;
456         struct inet6_dev *idev;
457         int len;
458         int err;
459         u8 *opt, type;
460
461         type = icmp6h->icmp6_type;
462
463         icmpv6_flow_init(sk, &fl, type, saddr, daddr, dev->ifindex);
464
465         dst = icmp6_dst_alloc(dev, neigh, daddr);
466         if (!dst)
467                 return;
468
469         err = xfrm_lookup(&dst, &fl, NULL, 0);
470         if (err < 0)
471                 return;
472
473         if (!dev->addr_len)
474                 llinfo = 0;
475
476         len = sizeof(struct icmp6hdr) + (target ? sizeof(*target) : 0);
477         if (llinfo)
478                 len += ndisc_opt_addr_space(dev);
479
480         skb = sock_alloc_send_skb(sk,
481                                   (MAX_HEADER + sizeof(struct ipv6hdr) +
482                                    len + LL_ALLOCATED_SPACE(dev)),
483                                   1, &err);
484         if (!skb) {
485                 ND_PRINTK0(KERN_ERR
486                            "ICMPv6 ND: %s() failed to allocate an skb.\n",
487                            __func__);
488                 dst_release(dst);
489                 return;
490         }
491
492         skb_reserve(skb, LL_RESERVED_SPACE(dev));
493         ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len);
494
495         skb->transport_header = skb->tail;
496         skb_put(skb, len);
497
498         hdr = (struct icmp6hdr *)skb_transport_header(skb);
499         memcpy(hdr, icmp6h, sizeof(*hdr));
500
501         opt = skb_transport_header(skb) + sizeof(struct icmp6hdr);
502         if (target) {
503                 ipv6_addr_copy((struct in6_addr *)opt, target);
504                 opt += sizeof(*target);
505         }
506
507         if (llinfo)
508                 ndisc_fill_addr_option(opt, llinfo, dev->dev_addr,
509                                        dev->addr_len, dev->type);
510
511         hdr->icmp6_cksum = csum_ipv6_magic(saddr, daddr, len,
512                                            IPPROTO_ICMPV6,
513                                            csum_partial((__u8 *) hdr,
514                                                         len, 0));
515
516         skb->dst = dst;
517
518         idev = in6_dev_get(dst->dev);
519         IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS);
520
521         err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
522                       dst_output);
523         if (!err) {
524                 ICMP6MSGOUT_INC_STATS(net, idev, type);
525                 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
526         }
527
528         if (likely(idev != NULL))
529                 in6_dev_put(idev);
530 }
531
532 static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
533                           const struct in6_addr *daddr,
534                           const struct in6_addr *solicited_addr,
535                           int router, int solicited, int override, int inc_opt)
536 {
537         struct in6_addr tmpaddr;
538         struct inet6_ifaddr *ifp;
539         const struct in6_addr *src_addr;
540         struct icmp6hdr icmp6h = {
541                 .icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT,
542         };
543
544         /* for anycast or proxy, solicited_addr != src_addr */
545         ifp = ipv6_get_ifaddr(dev_net(dev), solicited_addr, dev, 1);
546         if (ifp) {
547                 src_addr = solicited_addr;
548                 if (ifp->flags & IFA_F_OPTIMISTIC)
549                         override = 0;
550                 in6_ifa_put(ifp);
551         } else {
552                 if (ipv6_dev_get_saddr(dev_net(dev), dev, daddr,
553                                        inet6_sk(dev_net(dev)->ipv6.ndisc_sk)->srcprefs,
554                                        &tmpaddr))
555                         return;
556                 src_addr = &tmpaddr;
557         }
558
559         icmp6h.icmp6_router = router;
560         icmp6h.icmp6_solicited = solicited;
561         icmp6h.icmp6_override = override;
562
563         __ndisc_send(dev, neigh, daddr, src_addr,
564                      &icmp6h, solicited_addr,
565                      inc_opt ? ND_OPT_TARGET_LL_ADDR : 0);
566 }
567
568 void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh,
569                    const struct in6_addr *solicit,
570                    const struct in6_addr *daddr, const struct in6_addr *saddr)
571 {
572         struct in6_addr addr_buf;
573         struct icmp6hdr icmp6h = {
574                 .icmp6_type = NDISC_NEIGHBOUR_SOLICITATION,
575         };
576
577         if (saddr == NULL) {
578                 if (ipv6_get_lladdr(dev, &addr_buf,
579                                    (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)))
580                         return;
581                 saddr = &addr_buf;
582         }
583
584         __ndisc_send(dev, neigh, daddr, saddr,
585                      &icmp6h, solicit,
586                      !ipv6_addr_any(saddr) ? ND_OPT_SOURCE_LL_ADDR : 0);
587 }
588
589 void ndisc_send_rs(struct net_device *dev, const struct in6_addr *saddr,
590                    const struct in6_addr *daddr)
591 {
592         struct icmp6hdr icmp6h = {
593                 .icmp6_type = NDISC_ROUTER_SOLICITATION,
594         };
595         int send_sllao = dev->addr_len;
596
597 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
598         /*
599          * According to section 2.2 of RFC 4429, we must not
600          * send router solicitations with a sllao from
601          * optimistic addresses, but we may send the solicitation
602          * if we don't include the sllao.  So here we check
603          * if our address is optimistic, and if so, we
604          * suppress the inclusion of the sllao.
605          */
606         if (send_sllao) {
607                 struct inet6_ifaddr *ifp = ipv6_get_ifaddr(dev_net(dev), saddr,
608                                                            dev, 1);
609                 if (ifp) {
610                         if (ifp->flags & IFA_F_OPTIMISTIC)  {
611                                 send_sllao = 0;
612                         }
613                         in6_ifa_put(ifp);
614                 } else {
615                         send_sllao = 0;
616                 }
617         }
618 #endif
619         __ndisc_send(dev, NULL, daddr, saddr,
620                      &icmp6h, NULL,
621                      send_sllao ? ND_OPT_SOURCE_LL_ADDR : 0);
622 }
623
624
625 static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb)
626 {
627         /*
628          *      "The sender MUST return an ICMP
629          *       destination unreachable"
630          */
631         dst_link_failure(skb);
632         kfree_skb(skb);
633 }
634
635 /* Called with locked neigh: either read or both */
636
637 static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
638 {
639         struct in6_addr *saddr = NULL;
640         struct in6_addr mcaddr;
641         struct net_device *dev = neigh->dev;
642         struct in6_addr *target = (struct in6_addr *)&neigh->primary_key;
643         int probes = atomic_read(&neigh->probes);
644
645         if (skb && ipv6_chk_addr(dev_net(dev), &ipv6_hdr(skb)->saddr, dev, 1))
646                 saddr = &ipv6_hdr(skb)->saddr;
647
648         if ((probes -= neigh->parms->ucast_probes) < 0) {
649                 if (!(neigh->nud_state & NUD_VALID)) {
650                         ND_PRINTK1(KERN_DEBUG
651                                    "%s(): trying to ucast probe in NUD_INVALID: "
652                                    NIP6_FMT "\n",
653                                    __func__,
654                                    NIP6(*target));
655                 }
656                 ndisc_send_ns(dev, neigh, target, target, saddr);
657         } else if ((probes -= neigh->parms->app_probes) < 0) {
658 #ifdef CONFIG_ARPD
659                 neigh_app_ns(neigh);
660 #endif
661         } else {
662                 addrconf_addr_solict_mult(target, &mcaddr);
663                 ndisc_send_ns(dev, NULL, target, &mcaddr, saddr);
664         }
665 }
666
667 static int pndisc_is_router(const void *pkey,
668                             struct net_device *dev)
669 {
670         struct pneigh_entry *n;
671         int ret = -1;
672
673         read_lock_bh(&nd_tbl.lock);
674         n = __pneigh_lookup(&nd_tbl, dev_net(dev), pkey, dev);
675         if (n)
676                 ret = !!(n->flags & NTF_ROUTER);
677         read_unlock_bh(&nd_tbl.lock);
678
679         return ret;
680 }
681
682 static void ndisc_recv_ns(struct sk_buff *skb)
683 {
684         struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
685         struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
686         struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
687         u8 *lladdr = NULL;
688         u32 ndoptlen = skb->tail - (skb->transport_header +
689                                     offsetof(struct nd_msg, opt));
690         struct ndisc_options ndopts;
691         struct net_device *dev = skb->dev;
692         struct inet6_ifaddr *ifp;
693         struct inet6_dev *idev = NULL;
694         struct neighbour *neigh;
695         int dad = ipv6_addr_any(saddr);
696         int inc;
697         int is_router = -1;
698
699         if (ipv6_addr_is_multicast(&msg->target)) {
700                 ND_PRINTK2(KERN_WARNING
701                            "ICMPv6 NS: multicast target address");
702                 return;
703         }
704
705         /*
706          * RFC2461 7.1.1:
707          * DAD has to be destined for solicited node multicast address.
708          */
709         if (dad &&
710             !(daddr->s6_addr32[0] == htonl(0xff020000) &&
711               daddr->s6_addr32[1] == htonl(0x00000000) &&
712               daddr->s6_addr32[2] == htonl(0x00000001) &&
713               daddr->s6_addr [12] == 0xff )) {
714                 ND_PRINTK2(KERN_WARNING
715                            "ICMPv6 NS: bad DAD packet (wrong destination)\n");
716                 return;
717         }
718
719         if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) {
720                 ND_PRINTK2(KERN_WARNING
721                            "ICMPv6 NS: invalid ND options\n");
722                 return;
723         }
724
725         if (ndopts.nd_opts_src_lladdr) {
726                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr, dev);
727                 if (!lladdr) {
728                         ND_PRINTK2(KERN_WARNING
729                                    "ICMPv6 NS: invalid link-layer address length\n");
730                         return;
731                 }
732
733                 /* RFC2461 7.1.1:
734                  *      If the IP source address is the unspecified address,
735                  *      there MUST NOT be source link-layer address option
736                  *      in the message.
737                  */
738                 if (dad) {
739                         ND_PRINTK2(KERN_WARNING
740                                    "ICMPv6 NS: bad DAD packet (link-layer address option)\n");
741                         return;
742                 }
743         }
744
745         inc = ipv6_addr_is_multicast(daddr);
746
747         ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1);
748         if (ifp) {
749
750                 if (ifp->flags & (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC)) {
751                         if (dad) {
752                                 if (dev->type == ARPHRD_IEEE802_TR) {
753                                         const unsigned char *sadr;
754                                         sadr = skb_mac_header(skb);
755                                         if (((sadr[8] ^ dev->dev_addr[0]) & 0x7f) == 0 &&
756                                             sadr[9] == dev->dev_addr[1] &&
757                                             sadr[10] == dev->dev_addr[2] &&
758                                             sadr[11] == dev->dev_addr[3] &&
759                                             sadr[12] == dev->dev_addr[4] &&
760                                             sadr[13] == dev->dev_addr[5]) {
761                                                 /* looped-back to us */
762                                                 goto out;
763                                         }
764                                 }
765
766                                 /*
767                                  * We are colliding with another node
768                                  * who is doing DAD
769                                  * so fail our DAD process
770                                  */
771                                 addrconf_dad_failure(ifp);
772                                 return;
773                         } else {
774                                 /*
775                                  * This is not a dad solicitation.
776                                  * If we are an optimistic node,
777                                  * we should respond.
778                                  * Otherwise, we should ignore it.
779                                  */
780                                 if (!(ifp->flags & IFA_F_OPTIMISTIC))
781                                         goto out;
782                         }
783                 }
784
785                 idev = ifp->idev;
786         } else {
787                 struct net *net = dev_net(dev);
788
789                 idev = in6_dev_get(dev);
790                 if (!idev) {
791                         /* XXX: count this drop? */
792                         return;
793                 }
794
795                 if (ipv6_chk_acast_addr(net, dev, &msg->target) ||
796                     (idev->cnf.forwarding &&
797                      (net->ipv6.devconf_all->proxy_ndp || idev->cnf.proxy_ndp) &&
798                      (is_router = pndisc_is_router(&msg->target, dev)) >= 0)) {
799                         if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) &&
800                             skb->pkt_type != PACKET_HOST &&
801                             inc != 0 &&
802                             idev->nd_parms->proxy_delay != 0) {
803                                 /*
804                                  * for anycast or proxy,
805                                  * sender should delay its response
806                                  * by a random time between 0 and
807                                  * MAX_ANYCAST_DELAY_TIME seconds.
808                                  * (RFC2461) -- yoshfuji
809                                  */
810                                 struct sk_buff *n = skb_clone(skb, GFP_ATOMIC);
811                                 if (n)
812                                         pneigh_enqueue(&nd_tbl, idev->nd_parms, n);
813                                 goto out;
814                         }
815                 } else
816                         goto out;
817         }
818
819         if (is_router < 0)
820                 is_router = !!idev->cnf.forwarding;
821
822         if (dad) {
823                 ndisc_send_na(dev, NULL, &in6addr_linklocal_allnodes, &msg->target,
824                               is_router, 0, (ifp != NULL), 1);
825                 goto out;
826         }
827
828         if (inc)
829                 NEIGH_CACHE_STAT_INC(&nd_tbl, rcv_probes_mcast);
830         else
831                 NEIGH_CACHE_STAT_INC(&nd_tbl, rcv_probes_ucast);
832
833         /*
834          *      update / create cache entry
835          *      for the source address
836          */
837         neigh = __neigh_lookup(&nd_tbl, saddr, dev,
838                                !inc || lladdr || !dev->addr_len);
839         if (neigh)
840                 neigh_update(neigh, lladdr, NUD_STALE,
841                              NEIGH_UPDATE_F_WEAK_OVERRIDE|
842                              NEIGH_UPDATE_F_OVERRIDE);
843         if (neigh || !dev->header_ops) {
844                 ndisc_send_na(dev, neigh, saddr, &msg->target,
845                               is_router,
846                               1, (ifp != NULL && inc), inc);
847                 if (neigh)
848                         neigh_release(neigh);
849         }
850
851 out:
852         if (ifp)
853                 in6_ifa_put(ifp);
854         else
855                 in6_dev_put(idev);
856
857         return;
858 }
859
860 static void ndisc_recv_na(struct sk_buff *skb)
861 {
862         struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
863         struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
864         struct in6_addr *daddr = &ipv6_hdr(skb)->daddr;
865         u8 *lladdr = NULL;
866         u32 ndoptlen = skb->tail - (skb->transport_header +
867                                     offsetof(struct nd_msg, opt));
868         struct ndisc_options ndopts;
869         struct net_device *dev = skb->dev;
870         struct inet6_ifaddr *ifp;
871         struct neighbour *neigh;
872
873         if (skb->len < sizeof(struct nd_msg)) {
874                 ND_PRINTK2(KERN_WARNING
875                            "ICMPv6 NA: packet too short\n");
876                 return;
877         }
878
879         if (ipv6_addr_is_multicast(&msg->target)) {
880                 ND_PRINTK2(KERN_WARNING
881                            "ICMPv6 NA: target address is multicast.\n");
882                 return;
883         }
884
885         if (ipv6_addr_is_multicast(daddr) &&
886             msg->icmph.icmp6_solicited) {
887                 ND_PRINTK2(KERN_WARNING
888                            "ICMPv6 NA: solicited NA is multicasted.\n");
889                 return;
890         }
891
892         if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) {
893                 ND_PRINTK2(KERN_WARNING
894                            "ICMPv6 NS: invalid ND option\n");
895                 return;
896         }
897         if (ndopts.nd_opts_tgt_lladdr) {
898                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr, dev);
899                 if (!lladdr) {
900                         ND_PRINTK2(KERN_WARNING
901                                    "ICMPv6 NA: invalid link-layer address length\n");
902                         return;
903                 }
904         }
905         ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1);
906         if (ifp) {
907                 if (ifp->flags & IFA_F_TENTATIVE) {
908                         addrconf_dad_failure(ifp);
909                         return;
910                 }
911                 /* What should we make now? The advertisement
912                    is invalid, but ndisc specs say nothing
913                    about it. It could be misconfiguration, or
914                    an smart proxy agent tries to help us :-)
915                  */
916                 ND_PRINTK1(KERN_WARNING
917                            "ICMPv6 NA: someone advertises our address on %s!\n",
918                            ifp->idev->dev->name);
919                 in6_ifa_put(ifp);
920                 return;
921         }
922         neigh = neigh_lookup(&nd_tbl, &msg->target, dev);
923
924         if (neigh) {
925                 u8 old_flags = neigh->flags;
926                 struct net *net = dev_net(dev);
927
928                 if (neigh->nud_state & NUD_FAILED)
929                         goto out;
930
931                 /*
932                  * Don't update the neighbor cache entry on a proxy NA from
933                  * ourselves because either the proxied node is off link or it
934                  * has already sent a NA to us.
935                  */
936                 if (lladdr && !memcmp(lladdr, dev->dev_addr, dev->addr_len) &&
937                     net->ipv6.devconf_all->forwarding && net->ipv6.devconf_all->proxy_ndp &&
938                     pneigh_lookup(&nd_tbl, net, &msg->target, dev, 0)) {
939                         /* XXX: idev->cnf.prixy_ndp */
940                         goto out;
941                 }
942
943                 neigh_update(neigh, lladdr,
944                              msg->icmph.icmp6_solicited ? NUD_REACHABLE : NUD_STALE,
945                              NEIGH_UPDATE_F_WEAK_OVERRIDE|
946                              (msg->icmph.icmp6_override ? NEIGH_UPDATE_F_OVERRIDE : 0)|
947                              NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
948                              (msg->icmph.icmp6_router ? NEIGH_UPDATE_F_ISROUTER : 0));
949
950                 if ((old_flags & ~neigh->flags) & NTF_ROUTER) {
951                         /*
952                          * Change: router to host
953                          */
954                         struct rt6_info *rt;
955                         rt = rt6_get_dflt_router(saddr, dev);
956                         if (rt)
957                                 ip6_del_rt(rt);
958                 }
959
960 out:
961                 neigh_release(neigh);
962         }
963 }
964
965 static void ndisc_recv_rs(struct sk_buff *skb)
966 {
967         struct rs_msg *rs_msg = (struct rs_msg *)skb_transport_header(skb);
968         unsigned long ndoptlen = skb->len - sizeof(*rs_msg);
969         struct neighbour *neigh;
970         struct inet6_dev *idev;
971         struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
972         struct ndisc_options ndopts;
973         u8 *lladdr = NULL;
974
975         if (skb->len < sizeof(*rs_msg))
976                 return;
977
978         idev = in6_dev_get(skb->dev);
979         if (!idev) {
980                 if (net_ratelimit())
981                         ND_PRINTK1("ICMP6 RS: can't find in6 device\n");
982                 return;
983         }
984
985         /* Don't accept RS if we're not in router mode */
986         if (!idev->cnf.forwarding)
987                 goto out;
988
989         /*
990          * Don't update NCE if src = ::;
991          * this implies that the source node has no ip address assigned yet.
992          */
993         if (ipv6_addr_any(saddr))
994                 goto out;
995
996         /* Parse ND options */
997         if (!ndisc_parse_options(rs_msg->opt, ndoptlen, &ndopts)) {
998                 if (net_ratelimit())
999                         ND_PRINTK2("ICMP6 NS: invalid ND option, ignored\n");
1000                 goto out;
1001         }
1002
1003         if (ndopts.nd_opts_src_lladdr) {
1004                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr,
1005                                              skb->dev);
1006                 if (!lladdr)
1007                         goto out;
1008         }
1009
1010         neigh = __neigh_lookup(&nd_tbl, saddr, skb->dev, 1);
1011         if (neigh) {
1012                 neigh_update(neigh, lladdr, NUD_STALE,
1013                              NEIGH_UPDATE_F_WEAK_OVERRIDE|
1014                              NEIGH_UPDATE_F_OVERRIDE|
1015                              NEIGH_UPDATE_F_OVERRIDE_ISROUTER);
1016                 neigh_release(neigh);
1017         }
1018 out:
1019         in6_dev_put(idev);
1020 }
1021
1022 static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt)
1023 {
1024         struct icmp6hdr *icmp6h = (struct icmp6hdr *)skb_transport_header(ra);
1025         struct sk_buff *skb;
1026         struct nlmsghdr *nlh;
1027         struct nduseroptmsg *ndmsg;
1028         struct net *net = dev_net(ra->dev);
1029         int err;
1030         int base_size = NLMSG_ALIGN(sizeof(struct nduseroptmsg)
1031                                     + (opt->nd_opt_len << 3));
1032         size_t msg_size = base_size + nla_total_size(sizeof(struct in6_addr));
1033
1034         skb = nlmsg_new(msg_size, GFP_ATOMIC);
1035         if (skb == NULL) {
1036                 err = -ENOBUFS;
1037                 goto errout;
1038         }
1039
1040         nlh = nlmsg_put(skb, 0, 0, RTM_NEWNDUSEROPT, base_size, 0);
1041         if (nlh == NULL) {
1042                 goto nla_put_failure;
1043         }
1044
1045         ndmsg = nlmsg_data(nlh);
1046         ndmsg->nduseropt_family = AF_INET6;
1047         ndmsg->nduseropt_ifindex = ra->dev->ifindex;
1048         ndmsg->nduseropt_icmp_type = icmp6h->icmp6_type;
1049         ndmsg->nduseropt_icmp_code = icmp6h->icmp6_code;
1050         ndmsg->nduseropt_opts_len = opt->nd_opt_len << 3;
1051
1052         memcpy(ndmsg + 1, opt, opt->nd_opt_len << 3);
1053
1054         NLA_PUT(skb, NDUSEROPT_SRCADDR, sizeof(struct in6_addr),
1055                 &ipv6_hdr(ra)->saddr);
1056         nlmsg_end(skb, nlh);
1057
1058         err = rtnl_notify(skb, net, 0, RTNLGRP_ND_USEROPT, NULL,
1059                           GFP_ATOMIC);
1060         if (err < 0)
1061                 goto errout;
1062
1063         return;
1064
1065 nla_put_failure:
1066         nlmsg_free(skb);
1067         err = -EMSGSIZE;
1068 errout:
1069         rtnl_set_sk_err(net, RTNLGRP_ND_USEROPT, err);
1070 }
1071
1072 static void ndisc_router_discovery(struct sk_buff *skb)
1073 {
1074         struct ra_msg *ra_msg = (struct ra_msg *)skb_transport_header(skb);
1075         struct neighbour *neigh = NULL;
1076         struct inet6_dev *in6_dev;
1077         struct rt6_info *rt = NULL;
1078         int lifetime;
1079         struct ndisc_options ndopts;
1080         int optlen;
1081         unsigned int pref = 0;
1082
1083         __u8 * opt = (__u8 *)(ra_msg + 1);
1084
1085         optlen = (skb->tail - skb->transport_header) - sizeof(struct ra_msg);
1086
1087         if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
1088                 ND_PRINTK2(KERN_WARNING
1089                            "ICMPv6 RA: source address is not link-local.\n");
1090                 return;
1091         }
1092         if (optlen < 0) {
1093                 ND_PRINTK2(KERN_WARNING
1094                            "ICMPv6 RA: packet too short\n");
1095                 return;
1096         }
1097
1098 #ifdef CONFIG_IPV6_NDISC_NODETYPE
1099         if (skb->ndisc_nodetype == NDISC_NODETYPE_HOST) {
1100                 ND_PRINTK2(KERN_WARNING
1101                            "ICMPv6 RA: from host or unauthorized router\n");
1102                 return;
1103         }
1104 #endif
1105
1106         /*
1107          *      set the RA_RECV flag in the interface
1108          */
1109
1110         in6_dev = in6_dev_get(skb->dev);
1111         if (in6_dev == NULL) {
1112                 ND_PRINTK0(KERN_ERR
1113                            "ICMPv6 RA: can't find inet6 device for %s.\n",
1114                            skb->dev->name);
1115                 return;
1116         }
1117         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_ra) {
1118                 in6_dev_put(in6_dev);
1119                 return;
1120         }
1121
1122         if (!ndisc_parse_options(opt, optlen, &ndopts)) {
1123                 in6_dev_put(in6_dev);
1124                 ND_PRINTK2(KERN_WARNING
1125                            "ICMP6 RA: invalid ND options\n");
1126                 return;
1127         }
1128
1129 #ifdef CONFIG_IPV6_NDISC_NODETYPE
1130         /* skip link-specific parameters from interior routers */
1131         if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT)
1132                 goto skip_linkparms;
1133 #endif
1134
1135         if (in6_dev->if_flags & IF_RS_SENT) {
1136                 /*
1137                  *      flag that an RA was received after an RS was sent
1138                  *      out on this interface.
1139                  */
1140                 in6_dev->if_flags |= IF_RA_RCVD;
1141         }
1142
1143         /*
1144          * Remember the managed/otherconf flags from most recently
1145          * received RA message (RFC 2462) -- yoshfuji
1146          */
1147         in6_dev->if_flags = (in6_dev->if_flags & ~(IF_RA_MANAGED |
1148                                 IF_RA_OTHERCONF)) |
1149                                 (ra_msg->icmph.icmp6_addrconf_managed ?
1150                                         IF_RA_MANAGED : 0) |
1151                                 (ra_msg->icmph.icmp6_addrconf_other ?
1152                                         IF_RA_OTHERCONF : 0);
1153
1154         if (!in6_dev->cnf.accept_ra_defrtr)
1155                 goto skip_defrtr;
1156
1157         lifetime = ntohs(ra_msg->icmph.icmp6_rt_lifetime);
1158
1159 #ifdef CONFIG_IPV6_ROUTER_PREF
1160         pref = ra_msg->icmph.icmp6_router_pref;
1161         /* 10b is handled as if it were 00b (medium) */
1162         if (pref == ICMPV6_ROUTER_PREF_INVALID ||
1163             !in6_dev->cnf.accept_ra_rtr_pref)
1164                 pref = ICMPV6_ROUTER_PREF_MEDIUM;
1165 #endif
1166
1167         rt = rt6_get_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev);
1168
1169         if (rt)
1170                 neigh = rt->rt6i_nexthop;
1171
1172         if (rt && lifetime == 0) {
1173                 neigh_clone(neigh);
1174                 ip6_del_rt(rt);
1175                 rt = NULL;
1176         }
1177
1178         if (rt == NULL && lifetime) {
1179                 ND_PRINTK3(KERN_DEBUG
1180                            "ICMPv6 RA: adding default router.\n");
1181
1182                 rt = rt6_add_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev, pref);
1183                 if (rt == NULL) {
1184                         ND_PRINTK0(KERN_ERR
1185                                    "ICMPv6 RA: %s() failed to add default route.\n",
1186                                    __func__);
1187                         in6_dev_put(in6_dev);
1188                         return;
1189                 }
1190
1191                 neigh = rt->rt6i_nexthop;
1192                 if (neigh == NULL) {
1193                         ND_PRINTK0(KERN_ERR
1194                                    "ICMPv6 RA: %s() got default router without neighbour.\n",
1195                                    __func__);
1196                         dst_release(&rt->u.dst);
1197                         in6_dev_put(in6_dev);
1198                         return;
1199                 }
1200                 neigh->flags |= NTF_ROUTER;
1201         } else if (rt) {
1202                 rt->rt6i_flags |= (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
1203         }
1204
1205         if (rt)
1206                 rt->rt6i_expires = jiffies + (HZ * lifetime);
1207
1208         if (ra_msg->icmph.icmp6_hop_limit) {
1209                 in6_dev->cnf.hop_limit = ra_msg->icmph.icmp6_hop_limit;
1210                 if (rt)
1211                         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = ra_msg->icmph.icmp6_hop_limit;
1212         }
1213
1214 skip_defrtr:
1215
1216         /*
1217          *      Update Reachable Time and Retrans Timer
1218          */
1219
1220         if (in6_dev->nd_parms) {
1221                 unsigned long rtime = ntohl(ra_msg->retrans_timer);
1222
1223                 if (rtime && rtime/1000 < MAX_SCHEDULE_TIMEOUT/HZ) {
1224                         rtime = (rtime*HZ)/1000;
1225                         if (rtime < HZ/10)
1226                                 rtime = HZ/10;
1227                         in6_dev->nd_parms->retrans_time = rtime;
1228                         in6_dev->tstamp = jiffies;
1229                         inet6_ifinfo_notify(RTM_NEWLINK, in6_dev);
1230                 }
1231
1232                 rtime = ntohl(ra_msg->reachable_time);
1233                 if (rtime && rtime/1000 < MAX_SCHEDULE_TIMEOUT/(3*HZ)) {
1234                         rtime = (rtime*HZ)/1000;
1235
1236                         if (rtime < HZ/10)
1237                                 rtime = HZ/10;
1238
1239                         if (rtime != in6_dev->nd_parms->base_reachable_time) {
1240                                 in6_dev->nd_parms->base_reachable_time = rtime;
1241                                 in6_dev->nd_parms->gc_staletime = 3 * rtime;
1242                                 in6_dev->nd_parms->reachable_time = neigh_rand_reach_time(rtime);
1243                                 in6_dev->tstamp = jiffies;
1244                                 inet6_ifinfo_notify(RTM_NEWLINK, in6_dev);
1245                         }
1246                 }
1247         }
1248
1249 #ifdef CONFIG_IPV6_NDISC_NODETYPE
1250 skip_linkparms:
1251 #endif
1252
1253         /*
1254          *      Process options.
1255          */
1256
1257         if (!neigh)
1258                 neigh = __neigh_lookup(&nd_tbl, &ipv6_hdr(skb)->saddr,
1259                                        skb->dev, 1);
1260         if (neigh) {
1261                 u8 *lladdr = NULL;
1262                 if (ndopts.nd_opts_src_lladdr) {
1263                         lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr,
1264                                                      skb->dev);
1265                         if (!lladdr) {
1266                                 ND_PRINTK2(KERN_WARNING
1267                                            "ICMPv6 RA: invalid link-layer address length\n");
1268                                 goto out;
1269                         }
1270                 }
1271                 neigh_update(neigh, lladdr, NUD_STALE,
1272                              NEIGH_UPDATE_F_WEAK_OVERRIDE|
1273                              NEIGH_UPDATE_F_OVERRIDE|
1274                              NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1275                              NEIGH_UPDATE_F_ISROUTER);
1276         }
1277
1278 #ifdef CONFIG_IPV6_ROUTE_INFO
1279         if (in6_dev->cnf.accept_ra_rtr_pref && ndopts.nd_opts_ri) {
1280                 struct nd_opt_hdr *p;
1281                 for (p = ndopts.nd_opts_ri;
1282                      p;
1283                      p = ndisc_next_option(p, ndopts.nd_opts_ri_end)) {
1284                         struct route_info *ri = (struct route_info *)p;
1285 #ifdef CONFIG_IPV6_NDISC_NODETYPE
1286                         if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT &&
1287                             ri->prefix_len == 0)
1288                                 continue;
1289 #endif
1290                         if (ri->prefix_len > in6_dev->cnf.accept_ra_rt_info_max_plen)
1291                                 continue;
1292                         rt6_route_rcv(skb->dev, (u8*)p, (p->nd_opt_len) << 3,
1293                                       &ipv6_hdr(skb)->saddr);
1294                 }
1295         }
1296 #endif
1297
1298 #ifdef CONFIG_IPV6_NDISC_NODETYPE
1299         /* skip link-specific ndopts from interior routers */
1300         if (skb->ndisc_nodetype == NDISC_NODETYPE_NODEFAULT)
1301                 goto out;
1302 #endif
1303
1304         if (in6_dev->cnf.accept_ra_pinfo && ndopts.nd_opts_pi) {
1305                 struct nd_opt_hdr *p;
1306                 for (p = ndopts.nd_opts_pi;
1307                      p;
1308                      p = ndisc_next_option(p, ndopts.nd_opts_pi_end)) {
1309                         addrconf_prefix_rcv(skb->dev, (u8*)p, (p->nd_opt_len) << 3);
1310                 }
1311         }
1312
1313         if (ndopts.nd_opts_mtu) {
1314                 __be32 n;
1315                 u32 mtu;
1316
1317                 memcpy(&n, ((u8*)(ndopts.nd_opts_mtu+1))+2, sizeof(mtu));
1318                 mtu = ntohl(n);
1319
1320                 if (mtu < IPV6_MIN_MTU || mtu > skb->dev->mtu) {
1321                         ND_PRINTK2(KERN_WARNING
1322                                    "ICMPv6 RA: invalid mtu: %d\n",
1323                                    mtu);
1324                 } else if (in6_dev->cnf.mtu6 != mtu) {
1325                         in6_dev->cnf.mtu6 = mtu;
1326
1327                         if (rt)
1328                                 rt->u.dst.metrics[RTAX_MTU-1] = mtu;
1329
1330                         rt6_mtu_change(skb->dev, mtu);
1331                 }
1332         }
1333
1334         if (ndopts.nd_useropts) {
1335                 struct nd_opt_hdr *p;
1336                 for (p = ndopts.nd_useropts;
1337                      p;
1338                      p = ndisc_next_useropt(p, ndopts.nd_useropts_end)) {
1339                         ndisc_ra_useropt(skb, p);
1340                 }
1341         }
1342
1343         if (ndopts.nd_opts_tgt_lladdr || ndopts.nd_opts_rh) {
1344                 ND_PRINTK2(KERN_WARNING
1345                            "ICMPv6 RA: invalid RA options");
1346         }
1347 out:
1348         if (rt)
1349                 dst_release(&rt->u.dst);
1350         else if (neigh)
1351                 neigh_release(neigh);
1352         in6_dev_put(in6_dev);
1353 }
1354
1355 static void ndisc_redirect_rcv(struct sk_buff *skb)
1356 {
1357         struct inet6_dev *in6_dev;
1358         struct icmp6hdr *icmph;
1359         struct in6_addr *dest;
1360         struct in6_addr *target;        /* new first hop to destination */
1361         struct neighbour *neigh;
1362         int on_link = 0;
1363         struct ndisc_options ndopts;
1364         int optlen;
1365         u8 *lladdr = NULL;
1366
1367 #ifdef CONFIG_IPV6_NDISC_NODETYPE
1368         switch (skb->ndisc_nodetype) {
1369         case NDISC_NODETYPE_HOST:
1370         case NDISC_NODETYPE_NODEFAULT:
1371                 ND_PRINTK2(KERN_WARNING
1372                            "ICMPv6 Redirect: from host or unauthorized router\n");
1373                 return;
1374         }
1375 #endif
1376
1377         if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
1378                 ND_PRINTK2(KERN_WARNING
1379                            "ICMPv6 Redirect: source address is not link-local.\n");
1380                 return;
1381         }
1382
1383         optlen = skb->tail - skb->transport_header;
1384         optlen -= sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1385
1386         if (optlen < 0) {
1387                 ND_PRINTK2(KERN_WARNING
1388                            "ICMPv6 Redirect: packet too short\n");
1389                 return;
1390         }
1391
1392         icmph = icmp6_hdr(skb);
1393         target = (struct in6_addr *) (icmph + 1);
1394         dest = target + 1;
1395
1396         if (ipv6_addr_is_multicast(dest)) {
1397                 ND_PRINTK2(KERN_WARNING
1398                            "ICMPv6 Redirect: destination address is multicast.\n");
1399                 return;
1400         }
1401
1402         if (ipv6_addr_equal(dest, target)) {
1403                 on_link = 1;
1404         } else if (ipv6_addr_type(target) !=
1405                    (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1406                 ND_PRINTK2(KERN_WARNING
1407                            "ICMPv6 Redirect: target address is not link-local unicast.\n");
1408                 return;
1409         }
1410
1411         in6_dev = in6_dev_get(skb->dev);
1412         if (!in6_dev)
1413                 return;
1414         if (in6_dev->cnf.forwarding || !in6_dev->cnf.accept_redirects) {
1415                 in6_dev_put(in6_dev);
1416                 return;
1417         }
1418
1419         /* RFC2461 8.1:
1420          *      The IP source address of the Redirect MUST be the same as the current
1421          *      first-hop router for the specified ICMP Destination Address.
1422          */
1423
1424         if (!ndisc_parse_options((u8*)(dest + 1), optlen, &ndopts)) {
1425                 ND_PRINTK2(KERN_WARNING
1426                            "ICMPv6 Redirect: invalid ND options\n");
1427                 in6_dev_put(in6_dev);
1428                 return;
1429         }
1430         if (ndopts.nd_opts_tgt_lladdr) {
1431                 lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr,
1432                                              skb->dev);
1433                 if (!lladdr) {
1434                         ND_PRINTK2(KERN_WARNING
1435                                    "ICMPv6 Redirect: invalid link-layer address length\n");
1436                         in6_dev_put(in6_dev);
1437                         return;
1438                 }
1439         }
1440
1441         neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1);
1442         if (neigh) {
1443                 rt6_redirect(dest, &ipv6_hdr(skb)->daddr,
1444                              &ipv6_hdr(skb)->saddr, neigh, lladdr,
1445                              on_link);
1446                 neigh_release(neigh);
1447         }
1448         in6_dev_put(in6_dev);
1449 }
1450
1451 void ndisc_send_redirect(struct sk_buff *skb, struct neighbour *neigh,
1452                          const struct in6_addr *target)
1453 {
1454         struct net_device *dev = skb->dev;
1455         struct net *net = dev_net(dev);
1456         struct sock *sk = net->ipv6.ndisc_sk;
1457         int len = sizeof(struct icmp6hdr) + 2 * sizeof(struct in6_addr);
1458         struct sk_buff *buff;
1459         struct icmp6hdr *icmph;
1460         struct in6_addr saddr_buf;
1461         struct in6_addr *addrp;
1462         struct rt6_info *rt;
1463         struct dst_entry *dst;
1464         struct inet6_dev *idev;
1465         struct flowi fl;
1466         u8 *opt;
1467         int rd_len;
1468         int err;
1469         u8 ha_buf[MAX_ADDR_LEN], *ha = NULL;
1470
1471         if (ipv6_get_lladdr(dev, &saddr_buf, IFA_F_TENTATIVE)) {
1472                 ND_PRINTK2(KERN_WARNING
1473                            "ICMPv6 Redirect: no link-local address on %s\n",
1474                            dev->name);
1475                 return;
1476         }
1477
1478         if (!ipv6_addr_equal(&ipv6_hdr(skb)->daddr, target) &&
1479             ipv6_addr_type(target) != (IPV6_ADDR_UNICAST|IPV6_ADDR_LINKLOCAL)) {
1480                 ND_PRINTK2(KERN_WARNING
1481                         "ICMPv6 Redirect: target address is not link-local unicast.\n");
1482                 return;
1483         }
1484
1485         icmpv6_flow_init(sk, &fl, NDISC_REDIRECT,
1486                          &saddr_buf, &ipv6_hdr(skb)->saddr, dev->ifindex);
1487
1488         dst = ip6_route_output(net, NULL, &fl);
1489         if (dst == NULL)
1490                 return;
1491
1492         err = xfrm_lookup(&dst, &fl, NULL, 0);
1493         if (err)
1494                 return;
1495
1496         rt = (struct rt6_info *) dst;
1497
1498         if (rt->rt6i_flags & RTF_GATEWAY) {
1499                 ND_PRINTK2(KERN_WARNING
1500                            "ICMPv6 Redirect: destination is not a neighbour.\n");
1501                 dst_release(dst);
1502                 return;
1503         }
1504         if (!xrlim_allow(dst, 1*HZ)) {
1505                 dst_release(dst);
1506                 return;
1507         }
1508
1509         if (dev->addr_len) {
1510                 read_lock_bh(&neigh->lock);
1511                 if (neigh->nud_state & NUD_VALID) {
1512                         memcpy(ha_buf, neigh->ha, dev->addr_len);
1513                         read_unlock_bh(&neigh->lock);
1514                         ha = ha_buf;
1515                         len += ndisc_opt_addr_space(dev);
1516                 } else
1517                         read_unlock_bh(&neigh->lock);
1518         }
1519
1520         rd_len = min_t(unsigned int,
1521                      IPV6_MIN_MTU-sizeof(struct ipv6hdr)-len, skb->len + 8);
1522         rd_len &= ~0x7;
1523         len += rd_len;
1524
1525         buff = sock_alloc_send_skb(sk,
1526                                    (MAX_HEADER + sizeof(struct ipv6hdr) +
1527                                     len + LL_ALLOCATED_SPACE(dev)),
1528                                    1, &err);
1529         if (buff == NULL) {
1530                 ND_PRINTK0(KERN_ERR
1531                            "ICMPv6 Redirect: %s() failed to allocate an skb.\n",
1532                            __func__);
1533                 dst_release(dst);
1534                 return;
1535         }
1536
1537         skb_reserve(buff, LL_RESERVED_SPACE(dev));
1538         ip6_nd_hdr(sk, buff, dev, &saddr_buf, &ipv6_hdr(skb)->saddr,
1539                    IPPROTO_ICMPV6, len);
1540
1541         skb_set_transport_header(buff, skb_tail_pointer(buff) - buff->data);
1542         skb_put(buff, len);
1543         icmph = icmp6_hdr(buff);
1544
1545         memset(icmph, 0, sizeof(struct icmp6hdr));
1546         icmph->icmp6_type = NDISC_REDIRECT;
1547
1548         /*
1549          *      copy target and destination addresses
1550          */
1551
1552         addrp = (struct in6_addr *)(icmph + 1);
1553         ipv6_addr_copy(addrp, target);
1554         addrp++;
1555         ipv6_addr_copy(addrp, &ipv6_hdr(skb)->daddr);
1556
1557         opt = (u8*) (addrp + 1);
1558
1559         /*
1560          *      include target_address option
1561          */
1562
1563         if (ha)
1564                 opt = ndisc_fill_addr_option(opt, ND_OPT_TARGET_LL_ADDR, ha,
1565                                              dev->addr_len, dev->type);
1566
1567         /*
1568          *      build redirect option and copy skb over to the new packet.
1569          */
1570
1571         memset(opt, 0, 8);
1572         *(opt++) = ND_OPT_REDIRECT_HDR;
1573         *(opt++) = (rd_len >> 3);
1574         opt += 6;
1575
1576         memcpy(opt, ipv6_hdr(skb), rd_len - 8);
1577
1578         icmph->icmp6_cksum = csum_ipv6_magic(&saddr_buf, &ipv6_hdr(skb)->saddr,
1579                                              len, IPPROTO_ICMPV6,
1580                                              csum_partial((u8 *) icmph, len, 0));
1581
1582         buff->dst = dst;
1583         idev = in6_dev_get(dst->dev);
1584         IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTREQUESTS);
1585         err = NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, buff, NULL, dst->dev,
1586                       dst_output);
1587         if (!err) {
1588                 ICMP6MSGOUT_INC_STATS(net, idev, NDISC_REDIRECT);
1589                 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1590         }
1591
1592         if (likely(idev != NULL))
1593                 in6_dev_put(idev);
1594 }
1595
1596 static void pndisc_redo(struct sk_buff *skb)
1597 {
1598         ndisc_recv_ns(skb);
1599         kfree_skb(skb);
1600 }
1601
1602 int ndisc_rcv(struct sk_buff *skb)
1603 {
1604         struct nd_msg *msg;
1605
1606         if (!pskb_may_pull(skb, skb->len))
1607                 return 0;
1608
1609         msg = (struct nd_msg *)skb_transport_header(skb);
1610
1611         __skb_push(skb, skb->data - skb_transport_header(skb));
1612
1613         if (ipv6_hdr(skb)->hop_limit != 255) {
1614                 ND_PRINTK2(KERN_WARNING
1615                            "ICMPv6 NDISC: invalid hop-limit: %d\n",
1616                            ipv6_hdr(skb)->hop_limit);
1617                 return 0;
1618         }
1619
1620         if (msg->icmph.icmp6_code != 0) {
1621                 ND_PRINTK2(KERN_WARNING
1622                            "ICMPv6 NDISC: invalid ICMPv6 code: %d\n",
1623                            msg->icmph.icmp6_code);
1624                 return 0;
1625         }
1626
1627         memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
1628
1629         switch (msg->icmph.icmp6_type) {
1630         case NDISC_NEIGHBOUR_SOLICITATION:
1631                 ndisc_recv_ns(skb);
1632                 break;
1633
1634         case NDISC_NEIGHBOUR_ADVERTISEMENT:
1635                 ndisc_recv_na(skb);
1636                 break;
1637
1638         case NDISC_ROUTER_SOLICITATION:
1639                 ndisc_recv_rs(skb);
1640                 break;
1641
1642         case NDISC_ROUTER_ADVERTISEMENT:
1643                 ndisc_router_discovery(skb);
1644                 break;
1645
1646         case NDISC_REDIRECT:
1647                 ndisc_redirect_rcv(skb);
1648                 break;
1649         }
1650
1651         return 0;
1652 }
1653
1654 static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
1655 {
1656         struct net_device *dev = ptr;
1657         struct net *net = dev_net(dev);
1658
1659         switch (event) {
1660         case NETDEV_CHANGEADDR:
1661                 neigh_changeaddr(&nd_tbl, dev);
1662                 fib6_run_gc(~0UL, net);
1663                 break;
1664         case NETDEV_DOWN:
1665                 neigh_ifdown(&nd_tbl, dev);
1666                 fib6_run_gc(~0UL, net);
1667                 break;
1668         default:
1669                 break;
1670         }
1671
1672         return NOTIFY_DONE;
1673 }
1674
1675 static struct notifier_block ndisc_netdev_notifier = {
1676         .notifier_call = ndisc_netdev_event,
1677 };
1678
1679 #ifdef CONFIG_SYSCTL
1680 static void ndisc_warn_deprecated_sysctl(struct ctl_table *ctl,
1681                                          const char *func, const char *dev_name)
1682 {
1683         static char warncomm[TASK_COMM_LEN];
1684         static int warned;
1685         if (strcmp(warncomm, current->comm) && warned < 5) {
1686                 strcpy(warncomm, current->comm);
1687                 printk(KERN_WARNING
1688                         "process `%s' is using deprecated sysctl (%s) "
1689                         "net.ipv6.neigh.%s.%s; "
1690                         "Use net.ipv6.neigh.%s.%s_ms "
1691                         "instead.\n",
1692                         warncomm, func,
1693                         dev_name, ctl->procname,
1694                         dev_name, ctl->procname);
1695                 warned++;
1696         }
1697 }
1698
1699 int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, struct file * filp, void __user *buffer, size_t *lenp, loff_t *ppos)
1700 {
1701         struct net_device *dev = ctl->extra1;
1702         struct inet6_dev *idev;
1703         int ret;
1704
1705         if ((strcmp(ctl->procname, "retrans_time") == 0) ||
1706             (strcmp(ctl->procname, "base_reachable_time") == 0))
1707                 ndisc_warn_deprecated_sysctl(ctl, "syscall", dev ? dev->name : "default");
1708
1709         if (strcmp(ctl->procname, "retrans_time") == 0)
1710                 ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1711
1712         else if (strcmp(ctl->procname, "base_reachable_time") == 0)
1713                 ret = proc_dointvec_jiffies(ctl, write,
1714                                             filp, buffer, lenp, ppos);
1715
1716         else if ((strcmp(ctl->procname, "retrans_time_ms") == 0) ||
1717                  (strcmp(ctl->procname, "base_reachable_time_ms") == 0))
1718                 ret = proc_dointvec_ms_jiffies(ctl, write,
1719                                                filp, buffer, lenp, ppos);
1720         else
1721                 ret = -1;
1722
1723         if (write && ret == 0 && dev && (idev = in6_dev_get(dev)) != NULL) {
1724                 if (ctl->data == &idev->nd_parms->base_reachable_time)
1725                         idev->nd_parms->reachable_time = neigh_rand_reach_time(idev->nd_parms->base_reachable_time);
1726                 idev->tstamp = jiffies;
1727                 inet6_ifinfo_notify(RTM_NEWLINK, idev);
1728                 in6_dev_put(idev);
1729         }
1730         return ret;
1731 }
1732
1733 int ndisc_ifinfo_sysctl_strategy(ctl_table *ctl,
1734                                  void __user *oldval, size_t __user *oldlenp,
1735                                  void __user *newval, size_t newlen)
1736 {
1737         struct net_device *dev = ctl->extra1;
1738         struct inet6_dev *idev;
1739         int ret;
1740
1741         if (ctl->ctl_name == NET_NEIGH_RETRANS_TIME ||
1742             ctl->ctl_name == NET_NEIGH_REACHABLE_TIME)
1743                 ndisc_warn_deprecated_sysctl(ctl, "procfs", dev ? dev->name : "default");
1744
1745         switch (ctl->ctl_name) {
1746         case NET_NEIGH_REACHABLE_TIME:
1747                 ret = sysctl_jiffies(ctl, oldval, oldlenp, newval, newlen);
1748                 break;
1749         case NET_NEIGH_RETRANS_TIME_MS:
1750         case NET_NEIGH_REACHABLE_TIME_MS:
1751                  ret = sysctl_ms_jiffies(ctl, oldval, oldlenp, newval, newlen);
1752                  break;
1753         default:
1754                 ret = 0;
1755         }
1756
1757         if (newval && newlen && ret > 0 &&
1758             dev && (idev = in6_dev_get(dev)) != NULL) {
1759                 if (ctl->ctl_name == NET_NEIGH_REACHABLE_TIME ||
1760                     ctl->ctl_name == NET_NEIGH_REACHABLE_TIME_MS)
1761                         idev->nd_parms->reachable_time = neigh_rand_reach_time(idev->nd_parms->base_reachable_time);
1762                 idev->tstamp = jiffies;
1763                 inet6_ifinfo_notify(RTM_NEWLINK, idev);
1764                 in6_dev_put(idev);
1765         }
1766
1767         return ret;
1768 }
1769
1770 #endif
1771
1772 static int ndisc_net_init(struct net *net)
1773 {
1774         struct ipv6_pinfo *np;
1775         struct sock *sk;
1776         int err;
1777
1778         err = inet_ctl_sock_create(&sk, PF_INET6,
1779                                    SOCK_RAW, IPPROTO_ICMPV6, net);
1780         if (err < 0) {
1781                 ND_PRINTK0(KERN_ERR
1782                            "ICMPv6 NDISC: Failed to initialize the control socket (err %d).\n",
1783                            err);
1784                 return err;
1785         }
1786
1787         net->ipv6.ndisc_sk = sk;
1788
1789         np = inet6_sk(sk);
1790         np->hop_limit = 255;
1791         /* Do not loopback ndisc messages */
1792         np->mc_loop = 0;
1793
1794         return 0;
1795 }
1796
1797 static void ndisc_net_exit(struct net *net)
1798 {
1799         inet_ctl_sock_destroy(net->ipv6.ndisc_sk);
1800 }
1801
1802 static struct pernet_operations ndisc_net_ops = {
1803         .init = ndisc_net_init,
1804         .exit = ndisc_net_exit,
1805 };
1806
1807 int __init ndisc_init(void)
1808 {
1809         int err;
1810
1811         err = register_pernet_subsys(&ndisc_net_ops);
1812         if (err)
1813                 return err;
1814         /*
1815          * Initialize the neighbour table
1816          */
1817         neigh_table_init(&nd_tbl);
1818
1819 #ifdef CONFIG_SYSCTL
1820         err = neigh_sysctl_register(NULL, &nd_tbl.parms, NET_IPV6,
1821                                     NET_IPV6_NEIGH, "ipv6",
1822                                     &ndisc_ifinfo_sysctl_change,
1823                                     &ndisc_ifinfo_sysctl_strategy);
1824         if (err)
1825                 goto out_unregister_pernet;
1826 #endif
1827         err = register_netdevice_notifier(&ndisc_netdev_notifier);
1828         if (err)
1829                 goto out_unregister_sysctl;
1830 out:
1831         return err;
1832
1833 out_unregister_sysctl:
1834 #ifdef CONFIG_SYSCTL
1835         neigh_sysctl_unregister(&nd_tbl.parms);
1836 out_unregister_pernet:
1837 #endif
1838         unregister_pernet_subsys(&ndisc_net_ops);
1839         goto out;
1840 }
1841
1842 void ndisc_cleanup(void)
1843 {
1844         unregister_netdevice_notifier(&ndisc_netdev_notifier);
1845 #ifdef CONFIG_SYSCTL
1846         neigh_sysctl_unregister(&nd_tbl.parms);
1847 #endif
1848         neigh_table_clear(&nd_tbl);
1849         unregister_pernet_subsys(&ndisc_net_ops);
1850 }