a7030fed1a18bdc2decf6dd75724c24a1dac3572
[linux-2.6.git] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>     
7  *
8  *      $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15
16 /*      Changes:
17  *
18  *      YOSHIFUJI Hideaki @USAGI
19  *              reworked default router selection.
20  *              - respect outgoing interface
21  *              - select from (probably) reachable routers (i.e.
22  *              routers in REACHABLE, STALE, DELAY or PROBE states).
23  *              - always select the same router if it is (probably)
24  *              reachable.  otherwise, round-robin the list.
25  */
26
27 #include <linux/capability.h>
28 #include <linux/config.h>
29 #include <linux/errno.h>
30 #include <linux/types.h>
31 #include <linux/times.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/route.h>
36 #include <linux/netdevice.h>
37 #include <linux/in6.h>
38 #include <linux/init.h>
39 #include <linux/netlink.h>
40 #include <linux/if_arp.h>
41
42 #ifdef  CONFIG_PROC_FS
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
45 #endif
46
47 #include <net/snmp.h>
48 #include <net/ipv6.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #include <net/ndisc.h>
52 #include <net/addrconf.h>
53 #include <net/tcp.h>
54 #include <linux/rtnetlink.h>
55 #include <net/dst.h>
56 #include <net/xfrm.h>
57
58 #include <asm/uaccess.h>
59
60 #ifdef CONFIG_SYSCTL
61 #include <linux/sysctl.h>
62 #endif
63
64 /* Set to 3 to get tracing. */
65 #define RT6_DEBUG 2
66
67 #if RT6_DEBUG >= 3
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
70 #else
71 #define RDBG(x)
72 #define RT6_TRACE(x...) do { ; } while (0)
73 #endif
74
75 #define CLONE_OFFLINK_ROUTE 0
76
77 #define RT6_SELECT_F_IFACE      0x1
78 #define RT6_SELECT_F_REACHABLE  0x2
79
80 static int ip6_rt_max_size = 4096;
81 static int ip6_rt_gc_min_interval = HZ / 2;
82 static int ip6_rt_gc_timeout = 60*HZ;
83 int ip6_rt_gc_interval = 30*HZ;
84 static int ip6_rt_gc_elasticity = 9;
85 static int ip6_rt_mtu_expires = 10*60*HZ;
86 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
87
88 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
89 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
90 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
91 static void             ip6_dst_destroy(struct dst_entry *);
92 static void             ip6_dst_ifdown(struct dst_entry *,
93                                        struct net_device *dev, int how);
94 static int               ip6_dst_gc(void);
95
96 static int              ip6_pkt_discard(struct sk_buff *skb);
97 static int              ip6_pkt_discard_out(struct sk_buff *skb);
98 static void             ip6_link_failure(struct sk_buff *skb);
99 static void             ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
100
101 static struct dst_ops ip6_dst_ops = {
102         .family                 =       AF_INET6,
103         .protocol               =       __constant_htons(ETH_P_IPV6),
104         .gc                     =       ip6_dst_gc,
105         .gc_thresh              =       1024,
106         .check                  =       ip6_dst_check,
107         .destroy                =       ip6_dst_destroy,
108         .ifdown                 =       ip6_dst_ifdown,
109         .negative_advice        =       ip6_negative_advice,
110         .link_failure           =       ip6_link_failure,
111         .update_pmtu            =       ip6_rt_update_pmtu,
112         .entry_size             =       sizeof(struct rt6_info),
113 };
114
115 struct rt6_info ip6_null_entry = {
116         .u = {
117                 .dst = {
118                         .__refcnt       = ATOMIC_INIT(1),
119                         .__use          = 1,
120                         .dev            = &loopback_dev,
121                         .obsolete       = -1,
122                         .error          = -ENETUNREACH,
123                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
124                         .input          = ip6_pkt_discard,
125                         .output         = ip6_pkt_discard_out,
126                         .ops            = &ip6_dst_ops,
127                         .path           = (struct dst_entry*)&ip6_null_entry,
128                 }
129         },
130         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
131         .rt6i_metric    = ~(u32) 0,
132         .rt6i_ref       = ATOMIC_INIT(1),
133 };
134
135 struct fib6_node ip6_routing_table = {
136         .leaf           = &ip6_null_entry,
137         .fn_flags       = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
138 };
139
140 /* Protects all the ip6 fib */
141
142 DEFINE_RWLOCK(rt6_lock);
143
144
145 /* allocate dst with ip6_dst_ops */
146 static __inline__ struct rt6_info *ip6_dst_alloc(void)
147 {
148         return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
149 }
150
151 static void ip6_dst_destroy(struct dst_entry *dst)
152 {
153         struct rt6_info *rt = (struct rt6_info *)dst;
154         struct inet6_dev *idev = rt->rt6i_idev;
155
156         if (idev != NULL) {
157                 rt->rt6i_idev = NULL;
158                 in6_dev_put(idev);
159         }       
160 }
161
162 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
163                            int how)
164 {
165         struct rt6_info *rt = (struct rt6_info *)dst;
166         struct inet6_dev *idev = rt->rt6i_idev;
167
168         if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
169                 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
170                 if (loopback_idev != NULL) {
171                         rt->rt6i_idev = loopback_idev;
172                         in6_dev_put(idev);
173                 }
174         }
175 }
176
177 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
178 {
179         return (rt->rt6i_flags & RTF_EXPIRES &&
180                 time_after(jiffies, rt->rt6i_expires));
181 }
182
183 /*
184  *      Route lookup. Any rt6_lock is implied.
185  */
186
187 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
188                                                     int oif,
189                                                     int strict)
190 {
191         struct rt6_info *local = NULL;
192         struct rt6_info *sprt;
193
194         if (oif) {
195                 for (sprt = rt; sprt; sprt = sprt->u.next) {
196                         struct net_device *dev = sprt->rt6i_dev;
197                         if (dev->ifindex == oif)
198                                 return sprt;
199                         if (dev->flags & IFF_LOOPBACK) {
200                                 if (sprt->rt6i_idev == NULL ||
201                                     sprt->rt6i_idev->dev->ifindex != oif) {
202                                         if (strict && oif)
203                                                 continue;
204                                         if (local && (!oif || 
205                                                       local->rt6i_idev->dev->ifindex == oif))
206                                                 continue;
207                                 }
208                                 local = sprt;
209                         }
210                 }
211
212                 if (local)
213                         return local;
214
215                 if (strict)
216                         return &ip6_null_entry;
217         }
218         return rt;
219 }
220
221 /*
222  * Default Router Selection (RFC 2461 6.3.6)
223  */
224 static int inline rt6_check_dev(struct rt6_info *rt, int oif)
225 {
226         struct net_device *dev = rt->rt6i_dev;
227         if (!oif || dev->ifindex == oif)
228                 return 2;
229         if ((dev->flags & IFF_LOOPBACK) &&
230             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
231                 return 1;
232         return 0;
233 }
234
235 static int inline rt6_check_neigh(struct rt6_info *rt)
236 {
237         struct neighbour *neigh = rt->rt6i_nexthop;
238         int m = 0;
239         if (neigh) {
240                 read_lock_bh(&neigh->lock);
241                 if (neigh->nud_state & NUD_VALID)
242                         m = 1;
243                 read_unlock_bh(&neigh->lock);
244         }
245         return m;
246 }
247
248 static int rt6_score_route(struct rt6_info *rt, int oif,
249                            int strict)
250 {
251         int m = rt6_check_dev(rt, oif);
252         if (!m && (strict & RT6_SELECT_F_IFACE))
253                 return -1;
254 #ifdef CONFIG_IPV6_ROUTER_PREF
255         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
256 #endif
257         if (rt6_check_neigh(rt))
258                 m |= 16;
259         else if (strict & RT6_SELECT_F_REACHABLE)
260                 return -1;
261         return m;
262 }
263
264 static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
265                                    int strict)
266 {
267         struct rt6_info *match = NULL, *last = NULL;
268         struct rt6_info *rt, *rt0 = *head;
269         u32 metric;
270         int mpri = -1;
271
272         RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
273                   __FUNCTION__, head, head ? *head : NULL, oif);
274
275         for (rt = rt0, metric = rt0->rt6i_metric;
276              rt && rt->rt6i_metric == metric;
277              rt = rt->u.next) {
278                 int m;
279
280                 if (rt6_check_expired(rt))
281                         continue;
282
283                 last = rt;
284
285                 m = rt6_score_route(rt, oif, strict);
286                 if (m < 0)
287                         continue;
288
289                 if (m > mpri) {
290                         match = rt;
291                         mpri = m;
292                 }
293         }
294
295         if (!match &&
296             (strict & RT6_SELECT_F_REACHABLE) &&
297             last && last != rt0) {
298                 /* no entries matched; do round-robin */
299                 *head = rt0->u.next;
300                 rt0->u.next = last->u.next;
301                 last->u.next = rt0;
302         }
303
304         RT6_TRACE("%s() => %p, score=%d\n",
305                   __FUNCTION__, match, mpri);
306
307         return (match ? match : &ip6_null_entry);
308 }
309
310 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
311                             int oif, int strict)
312 {
313         struct fib6_node *fn;
314         struct rt6_info *rt;
315
316         read_lock_bh(&rt6_lock);
317         fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
318         rt = rt6_device_match(fn->leaf, oif, strict);
319         dst_hold(&rt->u.dst);
320         rt->u.dst.__use++;
321         read_unlock_bh(&rt6_lock);
322
323         rt->u.dst.lastuse = jiffies;
324         if (rt->u.dst.error == 0)
325                 return rt;
326         dst_release(&rt->u.dst);
327         return NULL;
328 }
329
330 /* ip6_ins_rt is called with FREE rt6_lock.
331    It takes new route entry, the addition fails by any reason the
332    route is freed. In any case, if caller does not hold it, it may
333    be destroyed.
334  */
335
336 int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
337                 void *_rtattr, struct netlink_skb_parms *req)
338 {
339         int err;
340
341         write_lock_bh(&rt6_lock);
342         err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
343         write_unlock_bh(&rt6_lock);
344
345         return err;
346 }
347
348 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
349                                       struct in6_addr *saddr)
350 {
351         struct rt6_info *rt;
352
353         /*
354          *      Clone the route.
355          */
356
357         rt = ip6_rt_copy(ort);
358
359         if (rt) {
360                 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
361                         if (rt->rt6i_dst.plen != 128 &&
362                             ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
363                                 rt->rt6i_flags |= RTF_ANYCAST;
364                         ipv6_addr_copy(&rt->rt6i_gateway, daddr);
365                 }
366
367                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
368                 rt->rt6i_dst.plen = 128;
369                 rt->rt6i_flags |= RTF_CACHE;
370                 rt->u.dst.flags |= DST_HOST;
371
372 #ifdef CONFIG_IPV6_SUBTREES
373                 if (rt->rt6i_src.plen && saddr) {
374                         ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
375                         rt->rt6i_src.plen = 128;
376                 }
377 #endif
378
379                 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
380
381         }
382
383         return rt;
384 }
385
386 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
387 {
388         struct rt6_info *rt = ip6_rt_copy(ort);
389         if (rt) {
390                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
391                 rt->rt6i_dst.plen = 128;
392                 rt->rt6i_flags |= RTF_CACHE;
393                 if (rt->rt6i_flags & RTF_REJECT)
394                         rt->u.dst.error = ort->u.dst.error;
395                 rt->u.dst.flags |= DST_HOST;
396                 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
397         }
398         return rt;
399 }
400
401 #define BACKTRACK() \
402 if (rt == &ip6_null_entry) { \
403        while ((fn = fn->parent) != NULL) { \
404                 if (fn->fn_flags & RTN_ROOT) { \
405                         goto out; \
406                 } \
407                 if (fn->fn_flags & RTN_RTINFO) \
408                         goto restart; \
409         } \
410 }
411
412
413 void ip6_route_input(struct sk_buff *skb)
414 {
415         struct fib6_node *fn;
416         struct rt6_info *rt, *nrt;
417         int strict;
418         int attempts = 3;
419         int err;
420         int reachable = RT6_SELECT_F_REACHABLE;
421
422         strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
423
424 relookup:
425         read_lock_bh(&rt6_lock);
426
427 restart_2:
428         fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
429                          &skb->nh.ipv6h->saddr);
430
431 restart:
432         rt = rt6_select(&fn->leaf, skb->dev->ifindex, strict | reachable);
433         BACKTRACK();
434         if (rt == &ip6_null_entry ||
435             rt->rt6i_flags & RTF_CACHE)
436                 goto out;
437
438         dst_hold(&rt->u.dst);
439         read_unlock_bh(&rt6_lock);
440
441         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
442                 nrt = rt6_alloc_cow(rt, &skb->nh.ipv6h->daddr, &skb->nh.ipv6h->saddr);
443         else {
444 #if CLONE_OFFLINK_ROUTE
445                 nrt = rt6_alloc_clone(rt, &skb->nh.ipv6h->daddr);
446 #else
447                 goto out2;
448 #endif
449         }
450
451         dst_release(&rt->u.dst);
452         rt = nrt ? : &ip6_null_entry;
453
454         dst_hold(&rt->u.dst);
455         if (nrt) {
456                 err = ip6_ins_rt(nrt, NULL, NULL, &NETLINK_CB(skb));
457                 if (!err)
458                         goto out2;
459         }
460
461         if (--attempts <= 0)
462                 goto out2;
463
464         /*
465          * Race condition! In the gap, when rt6_lock was
466          * released someone could insert this route.  Relookup.
467          */
468         dst_release(&rt->u.dst);
469         goto relookup;
470
471 out:
472         if (reachable) {
473                 reachable = 0;
474                 goto restart_2;
475         }
476         dst_hold(&rt->u.dst);
477         read_unlock_bh(&rt6_lock);
478 out2:
479         rt->u.dst.lastuse = jiffies;
480         rt->u.dst.__use++;
481         skb->dst = (struct dst_entry *) rt;
482         return;
483 }
484
485 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
486 {
487         struct fib6_node *fn;
488         struct rt6_info *rt, *nrt;
489         int strict;
490         int attempts = 3;
491         int err;
492         int reachable = RT6_SELECT_F_REACHABLE;
493
494         strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
495
496 relookup:
497         read_lock_bh(&rt6_lock);
498
499 restart_2:
500         fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
501
502 restart:
503         rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
504         BACKTRACK();
505         if (rt == &ip6_null_entry ||
506             rt->rt6i_flags & RTF_CACHE)
507                 goto out;
508
509         dst_hold(&rt->u.dst);
510         read_unlock_bh(&rt6_lock);
511
512         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
513                 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
514         else {
515 #if CLONE_OFFLINK_ROUTE
516                 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
517 #else
518                 goto out2;
519 #endif
520         }
521
522         dst_release(&rt->u.dst);
523         rt = nrt ? : &ip6_null_entry;
524
525         dst_hold(&rt->u.dst);
526         if (nrt) {
527                 err = ip6_ins_rt(nrt, NULL, NULL, NULL);
528                 if (!err)
529                         goto out2;
530         }
531
532         if (--attempts <= 0)
533                 goto out2;
534
535         /*
536          * Race condition! In the gap, when rt6_lock was
537          * released someone could insert this route.  Relookup.
538          */
539         dst_release(&rt->u.dst);
540         goto relookup;
541
542 out:
543         if (reachable) {
544                 reachable = 0;
545                 goto restart_2;
546         }
547         dst_hold(&rt->u.dst);
548         read_unlock_bh(&rt6_lock);
549 out2:
550         rt->u.dst.lastuse = jiffies;
551         rt->u.dst.__use++;
552         return &rt->u.dst;
553 }
554
555
556 /*
557  *      Destination cache support functions
558  */
559
560 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
561 {
562         struct rt6_info *rt;
563
564         rt = (struct rt6_info *) dst;
565
566         if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
567                 return dst;
568
569         return NULL;
570 }
571
572 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
573 {
574         struct rt6_info *rt = (struct rt6_info *) dst;
575
576         if (rt) {
577                 if (rt->rt6i_flags & RTF_CACHE)
578                         ip6_del_rt(rt, NULL, NULL, NULL);
579                 else
580                         dst_release(dst);
581         }
582         return NULL;
583 }
584
585 static void ip6_link_failure(struct sk_buff *skb)
586 {
587         struct rt6_info *rt;
588
589         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
590
591         rt = (struct rt6_info *) skb->dst;
592         if (rt) {
593                 if (rt->rt6i_flags&RTF_CACHE) {
594                         dst_set_expires(&rt->u.dst, 0);
595                         rt->rt6i_flags |= RTF_EXPIRES;
596                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
597                         rt->rt6i_node->fn_sernum = -1;
598         }
599 }
600
601 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
602 {
603         struct rt6_info *rt6 = (struct rt6_info*)dst;
604
605         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
606                 rt6->rt6i_flags |= RTF_MODIFIED;
607                 if (mtu < IPV6_MIN_MTU) {
608                         mtu = IPV6_MIN_MTU;
609                         dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
610                 }
611                 dst->metrics[RTAX_MTU-1] = mtu;
612         }
613 }
614
615 /* Protected by rt6_lock.  */
616 static struct dst_entry *ndisc_dst_gc_list;
617 static int ipv6_get_mtu(struct net_device *dev);
618
619 static inline unsigned int ipv6_advmss(unsigned int mtu)
620 {
621         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
622
623         if (mtu < ip6_rt_min_advmss)
624                 mtu = ip6_rt_min_advmss;
625
626         /*
627          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 
628          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 
629          * IPV6_MAXPLEN is also valid and means: "any MSS, 
630          * rely only on pmtu discovery"
631          */
632         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
633                 mtu = IPV6_MAXPLEN;
634         return mtu;
635 }
636
637 struct dst_entry *ndisc_dst_alloc(struct net_device *dev, 
638                                   struct neighbour *neigh,
639                                   struct in6_addr *addr,
640                                   int (*output)(struct sk_buff *))
641 {
642         struct rt6_info *rt;
643         struct inet6_dev *idev = in6_dev_get(dev);
644
645         if (unlikely(idev == NULL))
646                 return NULL;
647
648         rt = ip6_dst_alloc();
649         if (unlikely(rt == NULL)) {
650                 in6_dev_put(idev);
651                 goto out;
652         }
653
654         dev_hold(dev);
655         if (neigh)
656                 neigh_hold(neigh);
657         else
658                 neigh = ndisc_get_neigh(dev, addr);
659
660         rt->rt6i_dev      = dev;
661         rt->rt6i_idev     = idev;
662         rt->rt6i_nexthop  = neigh;
663         atomic_set(&rt->u.dst.__refcnt, 1);
664         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
665         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
666         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
667         rt->u.dst.output  = output;
668
669 #if 0   /* there's no chance to use these for ndisc */
670         rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST 
671                                 ? DST_HOST 
672                                 : 0;
673         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
674         rt->rt6i_dst.plen = 128;
675 #endif
676
677         write_lock_bh(&rt6_lock);
678         rt->u.dst.next = ndisc_dst_gc_list;
679         ndisc_dst_gc_list = &rt->u.dst;
680         write_unlock_bh(&rt6_lock);
681
682         fib6_force_start_gc();
683
684 out:
685         return (struct dst_entry *)rt;
686 }
687
688 int ndisc_dst_gc(int *more)
689 {
690         struct dst_entry *dst, *next, **pprev;
691         int freed;
692
693         next = NULL;
694         pprev = &ndisc_dst_gc_list;
695         freed = 0;
696         while ((dst = *pprev) != NULL) {
697                 if (!atomic_read(&dst->__refcnt)) {
698                         *pprev = dst->next;
699                         dst_free(dst);
700                         freed++;
701                 } else {
702                         pprev = &dst->next;
703                         (*more)++;
704                 }
705         }
706
707         return freed;
708 }
709
710 static int ip6_dst_gc(void)
711 {
712         static unsigned expire = 30*HZ;
713         static unsigned long last_gc;
714         unsigned long now = jiffies;
715
716         if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
717             atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
718                 goto out;
719
720         expire++;
721         fib6_run_gc(expire);
722         last_gc = now;
723         if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
724                 expire = ip6_rt_gc_timeout>>1;
725
726 out:
727         expire -= expire>>ip6_rt_gc_elasticity;
728         return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
729 }
730
731 /* Clean host part of a prefix. Not necessary in radix tree,
732    but results in cleaner routing tables.
733
734    Remove it only when all the things will work!
735  */
736
737 static int ipv6_get_mtu(struct net_device *dev)
738 {
739         int mtu = IPV6_MIN_MTU;
740         struct inet6_dev *idev;
741
742         idev = in6_dev_get(dev);
743         if (idev) {
744                 mtu = idev->cnf.mtu6;
745                 in6_dev_put(idev);
746         }
747         return mtu;
748 }
749
750 int ipv6_get_hoplimit(struct net_device *dev)
751 {
752         int hoplimit = ipv6_devconf.hop_limit;
753         struct inet6_dev *idev;
754
755         idev = in6_dev_get(dev);
756         if (idev) {
757                 hoplimit = idev->cnf.hop_limit;
758                 in6_dev_put(idev);
759         }
760         return hoplimit;
761 }
762
763 /*
764  *
765  */
766
767 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, 
768                 void *_rtattr, struct netlink_skb_parms *req)
769 {
770         int err;
771         struct rtmsg *r;
772         struct rtattr **rta;
773         struct rt6_info *rt = NULL;
774         struct net_device *dev = NULL;
775         struct inet6_dev *idev = NULL;
776         int addr_type;
777
778         rta = (struct rtattr **) _rtattr;
779
780         if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
781                 return -EINVAL;
782 #ifndef CONFIG_IPV6_SUBTREES
783         if (rtmsg->rtmsg_src_len)
784                 return -EINVAL;
785 #endif
786         if (rtmsg->rtmsg_ifindex) {
787                 err = -ENODEV;
788                 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
789                 if (!dev)
790                         goto out;
791                 idev = in6_dev_get(dev);
792                 if (!idev)
793                         goto out;
794         }
795
796         if (rtmsg->rtmsg_metric == 0)
797                 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
798
799         rt = ip6_dst_alloc();
800
801         if (rt == NULL) {
802                 err = -ENOMEM;
803                 goto out;
804         }
805
806         rt->u.dst.obsolete = -1;
807         rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
808         if (nlh && (r = NLMSG_DATA(nlh))) {
809                 rt->rt6i_protocol = r->rtm_protocol;
810         } else {
811                 rt->rt6i_protocol = RTPROT_BOOT;
812         }
813
814         addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
815
816         if (addr_type & IPV6_ADDR_MULTICAST)
817                 rt->u.dst.input = ip6_mc_input;
818         else
819                 rt->u.dst.input = ip6_forward;
820
821         rt->u.dst.output = ip6_output;
822
823         ipv6_addr_prefix(&rt->rt6i_dst.addr, 
824                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
825         rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
826         if (rt->rt6i_dst.plen == 128)
827                rt->u.dst.flags = DST_HOST;
828
829 #ifdef CONFIG_IPV6_SUBTREES
830         ipv6_addr_prefix(&rt->rt6i_src.addr, 
831                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
832         rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
833 #endif
834
835         rt->rt6i_metric = rtmsg->rtmsg_metric;
836
837         /* We cannot add true routes via loopback here,
838            they would result in kernel looping; promote them to reject routes
839          */
840         if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
841             (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
842                 /* hold loopback dev/idev if we haven't done so. */
843                 if (dev != &loopback_dev) {
844                         if (dev) {
845                                 dev_put(dev);
846                                 in6_dev_put(idev);
847                         }
848                         dev = &loopback_dev;
849                         dev_hold(dev);
850                         idev = in6_dev_get(dev);
851                         if (!idev) {
852                                 err = -ENODEV;
853                                 goto out;
854                         }
855                 }
856                 rt->u.dst.output = ip6_pkt_discard_out;
857                 rt->u.dst.input = ip6_pkt_discard;
858                 rt->u.dst.error = -ENETUNREACH;
859                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
860                 goto install_route;
861         }
862
863         if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
864                 struct in6_addr *gw_addr;
865                 int gwa_type;
866
867                 gw_addr = &rtmsg->rtmsg_gateway;
868                 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
869                 gwa_type = ipv6_addr_type(gw_addr);
870
871                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
872                         struct rt6_info *grt;
873
874                         /* IPv6 strictly inhibits using not link-local
875                            addresses as nexthop address.
876                            Otherwise, router will not able to send redirects.
877                            It is very good, but in some (rare!) circumstances
878                            (SIT, PtP, NBMA NOARP links) it is handy to allow
879                            some exceptions. --ANK
880                          */
881                         err = -EINVAL;
882                         if (!(gwa_type&IPV6_ADDR_UNICAST))
883                                 goto out;
884
885                         grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
886
887                         err = -EHOSTUNREACH;
888                         if (grt == NULL)
889                                 goto out;
890                         if (dev) {
891                                 if (dev != grt->rt6i_dev) {
892                                         dst_release(&grt->u.dst);
893                                         goto out;
894                                 }
895                         } else {
896                                 dev = grt->rt6i_dev;
897                                 idev = grt->rt6i_idev;
898                                 dev_hold(dev);
899                                 in6_dev_hold(grt->rt6i_idev);
900                         }
901                         if (!(grt->rt6i_flags&RTF_GATEWAY))
902                                 err = 0;
903                         dst_release(&grt->u.dst);
904
905                         if (err)
906                                 goto out;
907                 }
908                 err = -EINVAL;
909                 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
910                         goto out;
911         }
912
913         err = -ENODEV;
914         if (dev == NULL)
915                 goto out;
916
917         if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
918                 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
919                 if (IS_ERR(rt->rt6i_nexthop)) {
920                         err = PTR_ERR(rt->rt6i_nexthop);
921                         rt->rt6i_nexthop = NULL;
922                         goto out;
923                 }
924         }
925
926         rt->rt6i_flags = rtmsg->rtmsg_flags;
927
928 install_route:
929         if (rta && rta[RTA_METRICS-1]) {
930                 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
931                 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
932
933                 while (RTA_OK(attr, attrlen)) {
934                         unsigned flavor = attr->rta_type;
935                         if (flavor) {
936                                 if (flavor > RTAX_MAX) {
937                                         err = -EINVAL;
938                                         goto out;
939                                 }
940                                 rt->u.dst.metrics[flavor-1] =
941                                         *(u32 *)RTA_DATA(attr);
942                         }
943                         attr = RTA_NEXT(attr, attrlen);
944                 }
945         }
946
947         if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
948                 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
949         if (!rt->u.dst.metrics[RTAX_MTU-1])
950                 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
951         if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
952                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
953         rt->u.dst.dev = dev;
954         rt->rt6i_idev = idev;
955         return ip6_ins_rt(rt, nlh, _rtattr, req);
956
957 out:
958         if (dev)
959                 dev_put(dev);
960         if (idev)
961                 in6_dev_put(idev);
962         if (rt)
963                 dst_free((struct dst_entry *) rt);
964         return err;
965 }
966
967 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
968 {
969         int err;
970
971         write_lock_bh(&rt6_lock);
972
973         err = fib6_del(rt, nlh, _rtattr, req);
974         dst_release(&rt->u.dst);
975
976         write_unlock_bh(&rt6_lock);
977
978         return err;
979 }
980
981 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
982 {
983         struct fib6_node *fn;
984         struct rt6_info *rt;
985         int err = -ESRCH;
986
987         read_lock_bh(&rt6_lock);
988
989         fn = fib6_locate(&ip6_routing_table,
990                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
991                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
992         
993         if (fn) {
994                 for (rt = fn->leaf; rt; rt = rt->u.next) {
995                         if (rtmsg->rtmsg_ifindex &&
996                             (rt->rt6i_dev == NULL ||
997                              rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
998                                 continue;
999                         if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1000                             !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1001                                 continue;
1002                         if (rtmsg->rtmsg_metric &&
1003                             rtmsg->rtmsg_metric != rt->rt6i_metric)
1004                                 continue;
1005                         dst_hold(&rt->u.dst);
1006                         read_unlock_bh(&rt6_lock);
1007
1008                         return ip6_del_rt(rt, nlh, _rtattr, req);
1009                 }
1010         }
1011         read_unlock_bh(&rt6_lock);
1012
1013         return err;
1014 }
1015
1016 /*
1017  *      Handle redirects
1018  */
1019 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1020                   struct neighbour *neigh, u8 *lladdr, int on_link)
1021 {
1022         struct rt6_info *rt, *nrt;
1023
1024         /* Locate old route to this destination. */
1025         rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
1026
1027         if (rt == NULL)
1028                 return;
1029
1030         if (neigh->dev != rt->rt6i_dev)
1031                 goto out;
1032
1033         /*
1034          * Current route is on-link; redirect is always invalid.
1035          * 
1036          * Seems, previous statement is not true. It could
1037          * be node, which looks for us as on-link (f.e. proxy ndisc)
1038          * But then router serving it might decide, that we should
1039          * know truth 8)8) --ANK (980726).
1040          */
1041         if (!(rt->rt6i_flags&RTF_GATEWAY))
1042                 goto out;
1043
1044         /*
1045          *      RFC 2461 specifies that redirects should only be
1046          *      accepted if they come from the nexthop to the target.
1047          *      Due to the way default routers are chosen, this notion
1048          *      is a bit fuzzy and one might need to check all default
1049          *      routers.
1050          */
1051         if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) {
1052                 if (rt->rt6i_flags & RTF_DEFAULT) {
1053                         struct rt6_info *rt1;
1054
1055                         read_lock(&rt6_lock);
1056                         for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1057                                 if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) {
1058                                         dst_hold(&rt1->u.dst);
1059                                         dst_release(&rt->u.dst);
1060                                         read_unlock(&rt6_lock);
1061                                         rt = rt1;
1062                                         goto source_ok;
1063                                 }
1064                         }
1065                         read_unlock(&rt6_lock);
1066                 }
1067                 if (net_ratelimit())
1068                         printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1069                                "for redirect target\n");
1070                 goto out;
1071         }
1072
1073 source_ok:
1074
1075         /*
1076          *      We have finally decided to accept it.
1077          */
1078
1079         neigh_update(neigh, lladdr, NUD_STALE, 
1080                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1081                      NEIGH_UPDATE_F_OVERRIDE|
1082                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1083                                      NEIGH_UPDATE_F_ISROUTER))
1084                      );
1085
1086         /*
1087          * Redirect received -> path was valid.
1088          * Look, redirects are sent only in response to data packets,
1089          * so that this nexthop apparently is reachable. --ANK
1090          */
1091         dst_confirm(&rt->u.dst);
1092
1093         /* Duplicate redirect: silently ignore. */
1094         if (neigh == rt->u.dst.neighbour)
1095                 goto out;
1096
1097         nrt = ip6_rt_copy(rt);
1098         if (nrt == NULL)
1099                 goto out;
1100
1101         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1102         if (on_link)
1103                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1104
1105         ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1106         nrt->rt6i_dst.plen = 128;
1107         nrt->u.dst.flags |= DST_HOST;
1108
1109         ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1110         nrt->rt6i_nexthop = neigh_clone(neigh);
1111         /* Reset pmtu, it may be better */
1112         nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1113         nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1114
1115         if (ip6_ins_rt(nrt, NULL, NULL, NULL))
1116                 goto out;
1117
1118         if (rt->rt6i_flags&RTF_CACHE) {
1119                 ip6_del_rt(rt, NULL, NULL, NULL);
1120                 return;
1121         }
1122
1123 out:
1124         dst_release(&rt->u.dst);
1125         return;
1126 }
1127
1128 /*
1129  *      Handle ICMP "packet too big" messages
1130  *      i.e. Path MTU discovery
1131  */
1132
1133 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1134                         struct net_device *dev, u32 pmtu)
1135 {
1136         struct rt6_info *rt, *nrt;
1137         int allfrag = 0;
1138
1139         rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1140         if (rt == NULL)
1141                 return;
1142
1143         if (pmtu >= dst_mtu(&rt->u.dst))
1144                 goto out;
1145
1146         if (pmtu < IPV6_MIN_MTU) {
1147                 /*
1148                  * According to RFC2460, PMTU is set to the IPv6 Minimum Link 
1149                  * MTU (1280) and a fragment header should always be included
1150                  * after a node receiving Too Big message reporting PMTU is
1151                  * less than the IPv6 Minimum Link MTU.
1152                  */
1153                 pmtu = IPV6_MIN_MTU;
1154                 allfrag = 1;
1155         }
1156
1157         /* New mtu received -> path was valid.
1158            They are sent only in response to data packets,
1159            so that this nexthop apparently is reachable. --ANK
1160          */
1161         dst_confirm(&rt->u.dst);
1162
1163         /* Host route. If it is static, it would be better
1164            not to override it, but add new one, so that
1165            when cache entry will expire old pmtu
1166            would return automatically.
1167          */
1168         if (rt->rt6i_flags & RTF_CACHE) {
1169                 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1170                 if (allfrag)
1171                         rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1172                 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1173                 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1174                 goto out;
1175         }
1176
1177         /* Network route.
1178            Two cases are possible:
1179            1. It is connected route. Action: COW
1180            2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1181          */
1182         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1183                 nrt = rt6_alloc_cow(rt, daddr, saddr);
1184         else
1185                 nrt = rt6_alloc_clone(rt, daddr);
1186
1187         if (nrt) {
1188                 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1189                 if (allfrag)
1190                         nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1191
1192                 /* According to RFC 1981, detecting PMTU increase shouldn't be
1193                  * happened within 5 mins, the recommended timer is 10 mins.
1194                  * Here this route expiration time is set to ip6_rt_mtu_expires
1195                  * which is 10 mins. After 10 mins the decreased pmtu is expired
1196                  * and detecting PMTU increase will be automatically happened.
1197                  */
1198                 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1199                 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1200
1201                 ip6_ins_rt(nrt, NULL, NULL, NULL);
1202         }
1203 out:
1204         dst_release(&rt->u.dst);
1205 }
1206
1207 /*
1208  *      Misc support functions
1209  */
1210
1211 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1212 {
1213         struct rt6_info *rt = ip6_dst_alloc();
1214
1215         if (rt) {
1216                 rt->u.dst.input = ort->u.dst.input;
1217                 rt->u.dst.output = ort->u.dst.output;
1218
1219                 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1220                 rt->u.dst.dev = ort->u.dst.dev;
1221                 if (rt->u.dst.dev)
1222                         dev_hold(rt->u.dst.dev);
1223                 rt->rt6i_idev = ort->rt6i_idev;
1224                 if (rt->rt6i_idev)
1225                         in6_dev_hold(rt->rt6i_idev);
1226                 rt->u.dst.lastuse = jiffies;
1227                 rt->rt6i_expires = 0;
1228
1229                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1230                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1231                 rt->rt6i_metric = 0;
1232
1233                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1234 #ifdef CONFIG_IPV6_SUBTREES
1235                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1236 #endif
1237         }
1238         return rt;
1239 }
1240
1241 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1242 {       
1243         struct rt6_info *rt;
1244         struct fib6_node *fn;
1245
1246         fn = &ip6_routing_table;
1247
1248         write_lock_bh(&rt6_lock);
1249         for (rt = fn->leaf; rt; rt=rt->u.next) {
1250                 if (dev == rt->rt6i_dev &&
1251                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1252                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1253                         break;
1254         }
1255         if (rt)
1256                 dst_hold(&rt->u.dst);
1257         write_unlock_bh(&rt6_lock);
1258         return rt;
1259 }
1260
1261 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1262                                      struct net_device *dev,
1263                                      unsigned int pref)
1264 {
1265         struct in6_rtmsg rtmsg;
1266
1267         memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1268         rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1269         ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1270         rtmsg.rtmsg_metric = 1024;
1271         rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES |
1272                             RTF_PREF(pref);
1273
1274         rtmsg.rtmsg_ifindex = dev->ifindex;
1275
1276         ip6_route_add(&rtmsg, NULL, NULL, NULL);
1277         return rt6_get_dflt_router(gwaddr, dev);
1278 }
1279
1280 void rt6_purge_dflt_routers(void)
1281 {
1282         struct rt6_info *rt;
1283
1284 restart:
1285         read_lock_bh(&rt6_lock);
1286         for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1287                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1288                         dst_hold(&rt->u.dst);
1289
1290                         read_unlock_bh(&rt6_lock);
1291
1292                         ip6_del_rt(rt, NULL, NULL, NULL);
1293
1294                         goto restart;
1295                 }
1296         }
1297         read_unlock_bh(&rt6_lock);
1298 }
1299
1300 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1301 {
1302         struct in6_rtmsg rtmsg;
1303         int err;
1304
1305         switch(cmd) {
1306         case SIOCADDRT:         /* Add a route */
1307         case SIOCDELRT:         /* Delete a route */
1308                 if (!capable(CAP_NET_ADMIN))
1309                         return -EPERM;
1310                 err = copy_from_user(&rtmsg, arg,
1311                                      sizeof(struct in6_rtmsg));
1312                 if (err)
1313                         return -EFAULT;
1314                         
1315                 rtnl_lock();
1316                 switch (cmd) {
1317                 case SIOCADDRT:
1318                         err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
1319                         break;
1320                 case SIOCDELRT:
1321                         err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
1322                         break;
1323                 default:
1324                         err = -EINVAL;
1325                 }
1326                 rtnl_unlock();
1327
1328                 return err;
1329         };
1330
1331         return -EINVAL;
1332 }
1333
1334 /*
1335  *      Drop the packet on the floor
1336  */
1337
1338 static int ip6_pkt_discard(struct sk_buff *skb)
1339 {
1340         IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1341         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1342         kfree_skb(skb);
1343         return 0;
1344 }
1345
1346 static int ip6_pkt_discard_out(struct sk_buff *skb)
1347 {
1348         skb->dev = skb->dst->dev;
1349         return ip6_pkt_discard(skb);
1350 }
1351
1352 /*
1353  *      Allocate a dst for local (unicast / anycast) address.
1354  */
1355
1356 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1357                                     const struct in6_addr *addr,
1358                                     int anycast)
1359 {
1360         struct rt6_info *rt = ip6_dst_alloc();
1361
1362         if (rt == NULL)
1363                 return ERR_PTR(-ENOMEM);
1364
1365         dev_hold(&loopback_dev);
1366         in6_dev_hold(idev);
1367
1368         rt->u.dst.flags = DST_HOST;
1369         rt->u.dst.input = ip6_input;
1370         rt->u.dst.output = ip6_output;
1371         rt->rt6i_dev = &loopback_dev;
1372         rt->rt6i_idev = idev;
1373         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1374         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1375         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1376         rt->u.dst.obsolete = -1;
1377
1378         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1379         if (anycast)
1380                 rt->rt6i_flags |= RTF_ANYCAST;
1381         else
1382                 rt->rt6i_flags |= RTF_LOCAL;
1383         rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1384         if (rt->rt6i_nexthop == NULL) {
1385                 dst_free((struct dst_entry *) rt);
1386                 return ERR_PTR(-ENOMEM);
1387         }
1388
1389         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1390         rt->rt6i_dst.plen = 128;
1391
1392         atomic_set(&rt->u.dst.__refcnt, 1);
1393
1394         return rt;
1395 }
1396
1397 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1398 {
1399         if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1400             rt != &ip6_null_entry) {
1401                 RT6_TRACE("deleted by ifdown %p\n", rt);
1402                 return -1;
1403         }
1404         return 0;
1405 }
1406
1407 void rt6_ifdown(struct net_device *dev)
1408 {
1409         write_lock_bh(&rt6_lock);
1410         fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1411         write_unlock_bh(&rt6_lock);
1412 }
1413
1414 struct rt6_mtu_change_arg
1415 {
1416         struct net_device *dev;
1417         unsigned mtu;
1418 };
1419
1420 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1421 {
1422         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1423         struct inet6_dev *idev;
1424
1425         /* In IPv6 pmtu discovery is not optional,
1426            so that RTAX_MTU lock cannot disable it.
1427            We still use this lock to block changes
1428            caused by addrconf/ndisc.
1429         */
1430
1431         idev = __in6_dev_get(arg->dev);
1432         if (idev == NULL)
1433                 return 0;
1434
1435         /* For administrative MTU increase, there is no way to discover
1436            IPv6 PMTU increase, so PMTU increase should be updated here.
1437            Since RFC 1981 doesn't include administrative MTU increase
1438            update PMTU increase is a MUST. (i.e. jumbo frame)
1439          */
1440         /*
1441            If new MTU is less than route PMTU, this new MTU will be the
1442            lowest MTU in the path, update the route PMTU to reflect PMTU
1443            decreases; if new MTU is greater than route PMTU, and the
1444            old MTU is the lowest MTU in the path, update the route PMTU
1445            to reflect the increase. In this case if the other nodes' MTU
1446            also have the lowest MTU, TOO BIG MESSAGE will be lead to
1447            PMTU discouvery.
1448          */
1449         if (rt->rt6i_dev == arg->dev &&
1450             !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1451             (dst_mtu(&rt->u.dst) > arg->mtu ||
1452              (dst_mtu(&rt->u.dst) < arg->mtu &&
1453               dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1454                 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1455         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1456         return 0;
1457 }
1458
1459 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1460 {
1461         struct rt6_mtu_change_arg arg;
1462
1463         arg.dev = dev;
1464         arg.mtu = mtu;
1465         read_lock_bh(&rt6_lock);
1466         fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1467         read_unlock_bh(&rt6_lock);
1468 }
1469
1470 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1471                               struct in6_rtmsg *rtmsg)
1472 {
1473         memset(rtmsg, 0, sizeof(*rtmsg));
1474
1475         rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1476         rtmsg->rtmsg_src_len = r->rtm_src_len;
1477         rtmsg->rtmsg_flags = RTF_UP;
1478         if (r->rtm_type == RTN_UNREACHABLE)
1479                 rtmsg->rtmsg_flags |= RTF_REJECT;
1480
1481         if (rta[RTA_GATEWAY-1]) {
1482                 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1483                         return -EINVAL;
1484                 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1485                 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1486         }
1487         if (rta[RTA_DST-1]) {
1488                 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1489                         return -EINVAL;
1490                 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1491         }
1492         if (rta[RTA_SRC-1]) {
1493                 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1494                         return -EINVAL;
1495                 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1496         }
1497         if (rta[RTA_OIF-1]) {
1498                 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1499                         return -EINVAL;
1500                 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1501         }
1502         if (rta[RTA_PRIORITY-1]) {
1503                 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1504                         return -EINVAL;
1505                 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1506         }
1507         return 0;
1508 }
1509
1510 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1511 {
1512         struct rtmsg *r = NLMSG_DATA(nlh);
1513         struct in6_rtmsg rtmsg;
1514
1515         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1516                 return -EINVAL;
1517         return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1518 }
1519
1520 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1521 {
1522         struct rtmsg *r = NLMSG_DATA(nlh);
1523         struct in6_rtmsg rtmsg;
1524
1525         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1526                 return -EINVAL;
1527         return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1528 }
1529
1530 struct rt6_rtnl_dump_arg
1531 {
1532         struct sk_buff *skb;
1533         struct netlink_callback *cb;
1534 };
1535
1536 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1537                          struct in6_addr *dst, struct in6_addr *src,
1538                          int iif, int type, u32 pid, u32 seq,
1539                          int prefix, unsigned int flags)
1540 {
1541         struct rtmsg *rtm;
1542         struct nlmsghdr  *nlh;
1543         unsigned char    *b = skb->tail;
1544         struct rta_cacheinfo ci;
1545
1546         if (prefix) {   /* user wants prefix routes only */
1547                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1548                         /* success since this is not a prefix route */
1549                         return 1;
1550                 }
1551         }
1552
1553         nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
1554         rtm = NLMSG_DATA(nlh);
1555         rtm->rtm_family = AF_INET6;
1556         rtm->rtm_dst_len = rt->rt6i_dst.plen;
1557         rtm->rtm_src_len = rt->rt6i_src.plen;
1558         rtm->rtm_tos = 0;
1559         rtm->rtm_table = RT_TABLE_MAIN;
1560         if (rt->rt6i_flags&RTF_REJECT)
1561                 rtm->rtm_type = RTN_UNREACHABLE;
1562         else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1563                 rtm->rtm_type = RTN_LOCAL;
1564         else
1565                 rtm->rtm_type = RTN_UNICAST;
1566         rtm->rtm_flags = 0;
1567         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1568         rtm->rtm_protocol = rt->rt6i_protocol;
1569         if (rt->rt6i_flags&RTF_DYNAMIC)
1570                 rtm->rtm_protocol = RTPROT_REDIRECT;
1571         else if (rt->rt6i_flags & RTF_ADDRCONF)
1572                 rtm->rtm_protocol = RTPROT_KERNEL;
1573         else if (rt->rt6i_flags&RTF_DEFAULT)
1574                 rtm->rtm_protocol = RTPROT_RA;
1575
1576         if (rt->rt6i_flags&RTF_CACHE)
1577                 rtm->rtm_flags |= RTM_F_CLONED;
1578
1579         if (dst) {
1580                 RTA_PUT(skb, RTA_DST, 16, dst);
1581                 rtm->rtm_dst_len = 128;
1582         } else if (rtm->rtm_dst_len)
1583                 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1584 #ifdef CONFIG_IPV6_SUBTREES
1585         if (src) {
1586                 RTA_PUT(skb, RTA_SRC, 16, src);
1587                 rtm->rtm_src_len = 128;
1588         } else if (rtm->rtm_src_len)
1589                 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1590 #endif
1591         if (iif)
1592                 RTA_PUT(skb, RTA_IIF, 4, &iif);
1593         else if (dst) {
1594                 struct in6_addr saddr_buf;
1595                 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1596                         RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1597         }
1598         if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1599                 goto rtattr_failure;
1600         if (rt->u.dst.neighbour)
1601                 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1602         if (rt->u.dst.dev)
1603                 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1604         RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1605         ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1606         if (rt->rt6i_expires)
1607                 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1608         else
1609                 ci.rta_expires = 0;
1610         ci.rta_used = rt->u.dst.__use;
1611         ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1612         ci.rta_error = rt->u.dst.error;
1613         ci.rta_id = 0;
1614         ci.rta_ts = 0;
1615         ci.rta_tsage = 0;
1616         RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1617         nlh->nlmsg_len = skb->tail - b;
1618         return skb->len;
1619
1620 nlmsg_failure:
1621 rtattr_failure:
1622         skb_trim(skb, b - skb->data);
1623         return -1;
1624 }
1625
1626 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1627 {
1628         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1629         int prefix;
1630
1631         if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1632                 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1633                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1634         } else
1635                 prefix = 0;
1636
1637         return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1638                      NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1639                      prefix, NLM_F_MULTI);
1640 }
1641
1642 static int fib6_dump_node(struct fib6_walker_t *w)
1643 {
1644         int res;
1645         struct rt6_info *rt;
1646
1647         for (rt = w->leaf; rt; rt = rt->u.next) {
1648                 res = rt6_dump_route(rt, w->args);
1649                 if (res < 0) {
1650                         /* Frame is full, suspend walking */
1651                         w->leaf = rt;
1652                         return 1;
1653                 }
1654                 BUG_TRAP(res!=0);
1655         }
1656         w->leaf = NULL;
1657         return 0;
1658 }
1659
1660 static void fib6_dump_end(struct netlink_callback *cb)
1661 {
1662         struct fib6_walker_t *w = (void*)cb->args[0];
1663
1664         if (w) {
1665                 cb->args[0] = 0;
1666                 fib6_walker_unlink(w);
1667                 kfree(w);
1668         }
1669         cb->done = (void*)cb->args[1];
1670         cb->args[1] = 0;
1671 }
1672
1673 static int fib6_dump_done(struct netlink_callback *cb)
1674 {
1675         fib6_dump_end(cb);
1676         return cb->done ? cb->done(cb) : 0;
1677 }
1678
1679 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1680 {
1681         struct rt6_rtnl_dump_arg arg;
1682         struct fib6_walker_t *w;
1683         int res;
1684
1685         arg.skb = skb;
1686         arg.cb = cb;
1687
1688         w = (void*)cb->args[0];
1689         if (w == NULL) {
1690                 /* New dump:
1691                  * 
1692                  * 1. hook callback destructor.
1693                  */
1694                 cb->args[1] = (long)cb->done;
1695                 cb->done = fib6_dump_done;
1696
1697                 /*
1698                  * 2. allocate and initialize walker.
1699                  */
1700                 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1701                 if (w == NULL)
1702                         return -ENOMEM;
1703                 RT6_TRACE("dump<%p", w);
1704                 memset(w, 0, sizeof(*w));
1705                 w->root = &ip6_routing_table;
1706                 w->func = fib6_dump_node;
1707                 w->args = &arg;
1708                 cb->args[0] = (long)w;
1709                 read_lock_bh(&rt6_lock);
1710                 res = fib6_walk(w);
1711                 read_unlock_bh(&rt6_lock);
1712         } else {
1713                 w->args = &arg;
1714                 read_lock_bh(&rt6_lock);
1715                 res = fib6_walk_continue(w);
1716                 read_unlock_bh(&rt6_lock);
1717         }
1718 #if RT6_DEBUG >= 3
1719         if (res <= 0 && skb->len == 0)
1720                 RT6_TRACE("%p>dump end\n", w);
1721 #endif
1722         res = res < 0 ? res : skb->len;
1723         /* res < 0 is an error. (really, impossible)
1724            res == 0 means that dump is complete, but skb still can contain data.
1725            res > 0 dump is not complete, but frame is full.
1726          */
1727         /* Destroy walker, if dump of this table is complete. */
1728         if (res <= 0)
1729                 fib6_dump_end(cb);
1730         return res;
1731 }
1732
1733 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1734 {
1735         struct rtattr **rta = arg;
1736         int iif = 0;
1737         int err = -ENOBUFS;
1738         struct sk_buff *skb;
1739         struct flowi fl;
1740         struct rt6_info *rt;
1741
1742         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1743         if (skb == NULL)
1744                 goto out;
1745
1746         /* Reserve room for dummy headers, this skb can pass
1747            through good chunk of routing engine.
1748          */
1749         skb->mac.raw = skb->data;
1750         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1751
1752         memset(&fl, 0, sizeof(fl));
1753         if (rta[RTA_SRC-1])
1754                 ipv6_addr_copy(&fl.fl6_src,
1755                                (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1756         if (rta[RTA_DST-1])
1757                 ipv6_addr_copy(&fl.fl6_dst,
1758                                (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1759
1760         if (rta[RTA_IIF-1])
1761                 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1762
1763         if (iif) {
1764                 struct net_device *dev;
1765                 dev = __dev_get_by_index(iif);
1766                 if (!dev) {
1767                         err = -ENODEV;
1768                         goto out_free;
1769                 }
1770         }
1771
1772         fl.oif = 0;
1773         if (rta[RTA_OIF-1])
1774                 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1775
1776         rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1777
1778         skb->dst = &rt->u.dst;
1779
1780         NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1781         err = rt6_fill_node(skb, rt, 
1782                             &fl.fl6_dst, &fl.fl6_src,
1783                             iif,
1784                             RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1785                             nlh->nlmsg_seq, 0, 0);
1786         if (err < 0) {
1787                 err = -EMSGSIZE;
1788                 goto out_free;
1789         }
1790
1791         err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1792         if (err > 0)
1793                 err = 0;
1794 out:
1795         return err;
1796 out_free:
1797         kfree_skb(skb);
1798         goto out;       
1799 }
1800
1801 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh, 
1802                         struct netlink_skb_parms *req)
1803 {
1804         struct sk_buff *skb;
1805         int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1806         u32 pid = current->pid;
1807         u32 seq = 0;
1808
1809         if (req)
1810                 pid = req->pid;
1811         if (nlh)
1812                 seq = nlh->nlmsg_seq;
1813         
1814         skb = alloc_skb(size, gfp_any());
1815         if (!skb) {
1816                 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
1817                 return;
1818         }
1819         if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
1820                 kfree_skb(skb);
1821                 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
1822                 return;
1823         }
1824         NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
1825         netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
1826 }
1827
1828 /*
1829  *      /proc
1830  */
1831
1832 #ifdef CONFIG_PROC_FS
1833
1834 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1835
1836 struct rt6_proc_arg
1837 {
1838         char *buffer;
1839         int offset;
1840         int length;
1841         int skip;
1842         int len;
1843 };
1844
1845 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1846 {
1847         struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1848         int i;
1849
1850         if (arg->skip < arg->offset / RT6_INFO_LEN) {
1851                 arg->skip++;
1852                 return 0;
1853         }
1854
1855         if (arg->len >= arg->length)
1856                 return 0;
1857
1858         for (i=0; i<16; i++) {
1859                 sprintf(arg->buffer + arg->len, "%02x",
1860                         rt->rt6i_dst.addr.s6_addr[i]);
1861                 arg->len += 2;
1862         }
1863         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1864                             rt->rt6i_dst.plen);
1865
1866 #ifdef CONFIG_IPV6_SUBTREES
1867         for (i=0; i<16; i++) {
1868                 sprintf(arg->buffer + arg->len, "%02x",
1869                         rt->rt6i_src.addr.s6_addr[i]);
1870                 arg->len += 2;
1871         }
1872         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1873                             rt->rt6i_src.plen);
1874 #else
1875         sprintf(arg->buffer + arg->len,
1876                 "00000000000000000000000000000000 00 ");
1877         arg->len += 36;
1878 #endif
1879
1880         if (rt->rt6i_nexthop) {
1881                 for (i=0; i<16; i++) {
1882                         sprintf(arg->buffer + arg->len, "%02x",
1883                                 rt->rt6i_nexthop->primary_key[i]);
1884                         arg->len += 2;
1885                 }
1886         } else {
1887                 sprintf(arg->buffer + arg->len,
1888                         "00000000000000000000000000000000");
1889                 arg->len += 32;
1890         }
1891         arg->len += sprintf(arg->buffer + arg->len,
1892                             " %08x %08x %08x %08x %8s\n",
1893                             rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1894                             rt->u.dst.__use, rt->rt6i_flags, 
1895                             rt->rt6i_dev ? rt->rt6i_dev->name : "");
1896         return 0;
1897 }
1898
1899 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1900 {
1901         struct rt6_proc_arg arg;
1902         arg.buffer = buffer;
1903         arg.offset = offset;
1904         arg.length = length;
1905         arg.skip = 0;
1906         arg.len = 0;
1907
1908         read_lock_bh(&rt6_lock);
1909         fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1910         read_unlock_bh(&rt6_lock);
1911
1912         *start = buffer;
1913         if (offset)
1914                 *start += offset % RT6_INFO_LEN;
1915
1916         arg.len -= offset % RT6_INFO_LEN;
1917
1918         if (arg.len > length)
1919                 arg.len = length;
1920         if (arg.len < 0)
1921                 arg.len = 0;
1922
1923         return arg.len;
1924 }
1925
1926 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1927 {
1928         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1929                       rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1930                       rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1931                       rt6_stats.fib_rt_cache,
1932                       atomic_read(&ip6_dst_ops.entries),
1933                       rt6_stats.fib_discarded_routes);
1934
1935         return 0;
1936 }
1937
1938 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1939 {
1940         return single_open(file, rt6_stats_seq_show, NULL);
1941 }
1942
1943 static struct file_operations rt6_stats_seq_fops = {
1944         .owner   = THIS_MODULE,
1945         .open    = rt6_stats_seq_open,
1946         .read    = seq_read,
1947         .llseek  = seq_lseek,
1948         .release = single_release,
1949 };
1950 #endif  /* CONFIG_PROC_FS */
1951
1952 #ifdef CONFIG_SYSCTL
1953
1954 static int flush_delay;
1955
1956 static
1957 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
1958                               void __user *buffer, size_t *lenp, loff_t *ppos)
1959 {
1960         if (write) {
1961                 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1962                 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
1963                 return 0;
1964         } else
1965                 return -EINVAL;
1966 }
1967
1968 ctl_table ipv6_route_table[] = {
1969         {
1970                 .ctl_name       =       NET_IPV6_ROUTE_FLUSH, 
1971                 .procname       =       "flush",
1972                 .data           =       &flush_delay,
1973                 .maxlen         =       sizeof(int),
1974                 .mode           =       0200,
1975                 .proc_handler   =       &ipv6_sysctl_rtcache_flush
1976         },
1977         {
1978                 .ctl_name       =       NET_IPV6_ROUTE_GC_THRESH,
1979                 .procname       =       "gc_thresh",
1980                 .data           =       &ip6_dst_ops.gc_thresh,
1981                 .maxlen         =       sizeof(int),
1982                 .mode           =       0644,
1983                 .proc_handler   =       &proc_dointvec,
1984         },
1985         {
1986                 .ctl_name       =       NET_IPV6_ROUTE_MAX_SIZE,
1987                 .procname       =       "max_size",
1988                 .data           =       &ip6_rt_max_size,
1989                 .maxlen         =       sizeof(int),
1990                 .mode           =       0644,
1991                 .proc_handler   =       &proc_dointvec,
1992         },
1993         {
1994                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL,
1995                 .procname       =       "gc_min_interval",
1996                 .data           =       &ip6_rt_gc_min_interval,
1997                 .maxlen         =       sizeof(int),
1998                 .mode           =       0644,
1999                 .proc_handler   =       &proc_dointvec_jiffies,
2000                 .strategy       =       &sysctl_jiffies,
2001         },
2002         {
2003                 .ctl_name       =       NET_IPV6_ROUTE_GC_TIMEOUT,
2004                 .procname       =       "gc_timeout",
2005                 .data           =       &ip6_rt_gc_timeout,
2006                 .maxlen         =       sizeof(int),
2007                 .mode           =       0644,
2008                 .proc_handler   =       &proc_dointvec_jiffies,
2009                 .strategy       =       &sysctl_jiffies,
2010         },
2011         {
2012                 .ctl_name       =       NET_IPV6_ROUTE_GC_INTERVAL,
2013                 .procname       =       "gc_interval",
2014                 .data           =       &ip6_rt_gc_interval,
2015                 .maxlen         =       sizeof(int),
2016                 .mode           =       0644,
2017                 .proc_handler   =       &proc_dointvec_jiffies,
2018                 .strategy       =       &sysctl_jiffies,
2019         },
2020         {
2021                 .ctl_name       =       NET_IPV6_ROUTE_GC_ELASTICITY,
2022                 .procname       =       "gc_elasticity",
2023                 .data           =       &ip6_rt_gc_elasticity,
2024                 .maxlen         =       sizeof(int),
2025                 .mode           =       0644,
2026                 .proc_handler   =       &proc_dointvec_jiffies,
2027                 .strategy       =       &sysctl_jiffies,
2028         },
2029         {
2030                 .ctl_name       =       NET_IPV6_ROUTE_MTU_EXPIRES,
2031                 .procname       =       "mtu_expires",
2032                 .data           =       &ip6_rt_mtu_expires,
2033                 .maxlen         =       sizeof(int),
2034                 .mode           =       0644,
2035                 .proc_handler   =       &proc_dointvec_jiffies,
2036                 .strategy       =       &sysctl_jiffies,
2037         },
2038         {
2039                 .ctl_name       =       NET_IPV6_ROUTE_MIN_ADVMSS,
2040                 .procname       =       "min_adv_mss",
2041                 .data           =       &ip6_rt_min_advmss,
2042                 .maxlen         =       sizeof(int),
2043                 .mode           =       0644,
2044                 .proc_handler   =       &proc_dointvec_jiffies,
2045                 .strategy       =       &sysctl_jiffies,
2046         },
2047         {
2048                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2049                 .procname       =       "gc_min_interval_ms",
2050                 .data           =       &ip6_rt_gc_min_interval,
2051                 .maxlen         =       sizeof(int),
2052                 .mode           =       0644,
2053                 .proc_handler   =       &proc_dointvec_ms_jiffies,
2054                 .strategy       =       &sysctl_ms_jiffies,
2055         },
2056         { .ctl_name = 0 }
2057 };
2058
2059 #endif
2060
2061 void __init ip6_route_init(void)
2062 {
2063         struct proc_dir_entry *p;
2064
2065         ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2066                                                      sizeof(struct rt6_info),
2067                                                      0, SLAB_HWCACHE_ALIGN,
2068                                                      NULL, NULL);
2069         if (!ip6_dst_ops.kmem_cachep)
2070                 panic("cannot create ip6_dst_cache");
2071
2072         fib6_init();
2073 #ifdef  CONFIG_PROC_FS
2074         p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2075         if (p)
2076                 p->owner = THIS_MODULE;
2077
2078         proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2079 #endif
2080 #ifdef CONFIG_XFRM
2081         xfrm6_init();
2082 #endif
2083 }
2084
2085 void ip6_route_cleanup(void)
2086 {
2087 #ifdef CONFIG_PROC_FS
2088         proc_net_remove("ipv6_route");
2089         proc_net_remove("rt6_stats");
2090 #endif
2091 #ifdef CONFIG_XFRM
2092         xfrm6_fini();
2093 #endif
2094         rt6_ifdown(NULL);
2095         fib6_gc_cleanup();
2096         kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2097 }