[NETLINK]: Convert netlink users to use group numbers instead of bitmasks
[linux-2.6.git] / net / ipv4 / fib_semantics.c
1 /*
2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
3  *              operating system.  INET is implemented using the  BSD Socket
4  *              interface as the means of communication with the user level.
5  *
6  *              IPv4 Forwarding Information Base: semantics.
7  *
8  * Version:     $Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $
9  *
10  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11  *
12  *              This program is free software; you can redistribute it and/or
13  *              modify it under the terms of the GNU General Public License
14  *              as published by the Free Software Foundation; either version
15  *              2 of the License, or (at your option) any later version.
16  */
17
18 #include <linux/config.h>
19 #include <asm/uaccess.h>
20 #include <asm/system.h>
21 #include <linux/bitops.h>
22 #include <linux/types.h>
23 #include <linux/kernel.h>
24 #include <linux/jiffies.h>
25 #include <linux/mm.h>
26 #include <linux/string.h>
27 #include <linux/socket.h>
28 #include <linux/sockios.h>
29 #include <linux/errno.h>
30 #include <linux/in.h>
31 #include <linux/inet.h>
32 #include <linux/netdevice.h>
33 #include <linux/if_arp.h>
34 #include <linux/proc_fs.h>
35 #include <linux/skbuff.h>
36 #include <linux/netlink.h>
37 #include <linux/init.h>
38
39 #include <net/ip.h>
40 #include <net/protocol.h>
41 #include <net/route.h>
42 #include <net/tcp.h>
43 #include <net/sock.h>
44 #include <net/ip_fib.h>
45 #include <net/ip_mp_alg.h>
46
47 #include "fib_lookup.h"
48
49 #define FSprintk(a...)
50
51 static DEFINE_RWLOCK(fib_info_lock);
52 static struct hlist_head *fib_info_hash;
53 static struct hlist_head *fib_info_laddrhash;
54 static unsigned int fib_hash_size;
55 static unsigned int fib_info_cnt;
56
57 #define DEVINDEX_HASHBITS 8
58 #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
59 static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
60
61 #ifdef CONFIG_IP_ROUTE_MULTIPATH
62
63 static DEFINE_SPINLOCK(fib_multipath_lock);
64
65 #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
66 for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
67
68 #define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
69 for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
70
71 #else /* CONFIG_IP_ROUTE_MULTIPATH */
72
73 /* Hope, that gcc will optimize it to get rid of dummy loop */
74
75 #define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
76 for (nhsel=0; nhsel < 1; nhsel++)
77
78 #define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
79 for (nhsel=0; nhsel < 1; nhsel++)
80
81 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
82
83 #define endfor_nexthops(fi) }
84
85
86 static struct 
87 {
88         int     error;
89         u8      scope;
90 } fib_props[RTA_MAX + 1] = {
91         {
92                 .error  = 0,
93                 .scope  = RT_SCOPE_NOWHERE,
94         },      /* RTN_UNSPEC */
95         {
96                 .error  = 0,
97                 .scope  = RT_SCOPE_UNIVERSE,
98         },      /* RTN_UNICAST */
99         {
100                 .error  = 0,
101                 .scope  = RT_SCOPE_HOST,
102         },      /* RTN_LOCAL */
103         {
104                 .error  = 0,
105                 .scope  = RT_SCOPE_LINK,
106         },      /* RTN_BROADCAST */
107         {
108                 .error  = 0,
109                 .scope  = RT_SCOPE_LINK,
110         },      /* RTN_ANYCAST */
111         {
112                 .error  = 0,
113                 .scope  = RT_SCOPE_UNIVERSE,
114         },      /* RTN_MULTICAST */
115         {
116                 .error  = -EINVAL,
117                 .scope  = RT_SCOPE_UNIVERSE,
118         },      /* RTN_BLACKHOLE */
119         {
120                 .error  = -EHOSTUNREACH,
121                 .scope  = RT_SCOPE_UNIVERSE,
122         },      /* RTN_UNREACHABLE */
123         {
124                 .error  = -EACCES,
125                 .scope  = RT_SCOPE_UNIVERSE,
126         },      /* RTN_PROHIBIT */
127         {
128                 .error  = -EAGAIN,
129                 .scope  = RT_SCOPE_UNIVERSE,
130         },      /* RTN_THROW */
131         {
132                 .error  = -EINVAL,
133                 .scope  = RT_SCOPE_NOWHERE,
134         },      /* RTN_NAT */
135         {
136                 .error  = -EINVAL,
137                 .scope  = RT_SCOPE_NOWHERE,
138         },      /* RTN_XRESOLVE */
139 };
140
141
142 /* Release a nexthop info record */
143
144 void free_fib_info(struct fib_info *fi)
145 {
146         if (fi->fib_dead == 0) {
147                 printk("Freeing alive fib_info %p\n", fi);
148                 return;
149         }
150         change_nexthops(fi) {
151                 if (nh->nh_dev)
152                         dev_put(nh->nh_dev);
153                 nh->nh_dev = NULL;
154         } endfor_nexthops(fi);
155         fib_info_cnt--;
156         kfree(fi);
157 }
158
159 void fib_release_info(struct fib_info *fi)
160 {
161         write_lock(&fib_info_lock);
162         if (fi && --fi->fib_treeref == 0) {
163                 hlist_del(&fi->fib_hash);
164                 if (fi->fib_prefsrc)
165                         hlist_del(&fi->fib_lhash);
166                 change_nexthops(fi) {
167                         if (!nh->nh_dev)
168                                 continue;
169                         hlist_del(&nh->nh_hash);
170                 } endfor_nexthops(fi)
171                 fi->fib_dead = 1;
172                 fib_info_put(fi);
173         }
174         write_unlock(&fib_info_lock);
175 }
176
177 static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
178 {
179         const struct fib_nh *onh = ofi->fib_nh;
180
181         for_nexthops(fi) {
182                 if (nh->nh_oif != onh->nh_oif ||
183                     nh->nh_gw  != onh->nh_gw ||
184                     nh->nh_scope != onh->nh_scope ||
185 #ifdef CONFIG_IP_ROUTE_MULTIPATH
186                     nh->nh_weight != onh->nh_weight ||
187 #endif
188 #ifdef CONFIG_NET_CLS_ROUTE
189                     nh->nh_tclassid != onh->nh_tclassid ||
190 #endif
191                     ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
192                         return -1;
193                 onh++;
194         } endfor_nexthops(fi);
195         return 0;
196 }
197
198 static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
199 {
200         unsigned int mask = (fib_hash_size - 1);
201         unsigned int val = fi->fib_nhs;
202
203         val ^= fi->fib_protocol;
204         val ^= fi->fib_prefsrc;
205         val ^= fi->fib_priority;
206
207         return (val ^ (val >> 7) ^ (val >> 12)) & mask;
208 }
209
210 static struct fib_info *fib_find_info(const struct fib_info *nfi)
211 {
212         struct hlist_head *head;
213         struct hlist_node *node;
214         struct fib_info *fi;
215         unsigned int hash;
216
217         hash = fib_info_hashfn(nfi);
218         head = &fib_info_hash[hash];
219
220         hlist_for_each_entry(fi, node, head, fib_hash) {
221                 if (fi->fib_nhs != nfi->fib_nhs)
222                         continue;
223                 if (nfi->fib_protocol == fi->fib_protocol &&
224                     nfi->fib_prefsrc == fi->fib_prefsrc &&
225                     nfi->fib_priority == fi->fib_priority &&
226                     memcmp(nfi->fib_metrics, fi->fib_metrics,
227                            sizeof(fi->fib_metrics)) == 0 &&
228                     ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
229                     (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
230                         return fi;
231         }
232
233         return NULL;
234 }
235
236 static inline unsigned int fib_devindex_hashfn(unsigned int val)
237 {
238         unsigned int mask = DEVINDEX_HASHSIZE - 1;
239
240         return (val ^
241                 (val >> DEVINDEX_HASHBITS) ^
242                 (val >> (DEVINDEX_HASHBITS * 2))) & mask;
243 }
244
245 /* Check, that the gateway is already configured.
246    Used only by redirect accept routine.
247  */
248
249 int ip_fib_check_default(u32 gw, struct net_device *dev)
250 {
251         struct hlist_head *head;
252         struct hlist_node *node;
253         struct fib_nh *nh;
254         unsigned int hash;
255
256         read_lock(&fib_info_lock);
257
258         hash = fib_devindex_hashfn(dev->ifindex);
259         head = &fib_info_devhash[hash];
260         hlist_for_each_entry(nh, node, head, nh_hash) {
261                 if (nh->nh_dev == dev &&
262                     nh->nh_gw == gw &&
263                     !(nh->nh_flags&RTNH_F_DEAD)) {
264                         read_unlock(&fib_info_lock);
265                         return 0;
266                 }
267         }
268
269         read_unlock(&fib_info_lock);
270
271         return -1;
272 }
273
274 void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
275                int z, int tb_id,
276                struct nlmsghdr *n, struct netlink_skb_parms *req)
277 {
278         struct sk_buff *skb;
279         u32 pid = req ? req->pid : n->nlmsg_pid;
280         int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
281
282         skb = alloc_skb(size, GFP_KERNEL);
283         if (!skb)
284                 return;
285
286         if (fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id,
287                           fa->fa_type, fa->fa_scope, &key, z,
288                           fa->fa_tos,
289                           fa->fa_info, 0) < 0) {
290                 kfree_skb(skb);
291                 return;
292         }
293         NETLINK_CB(skb).dst_group = RTNLGRP_IPV4_ROUTE;
294         if (n->nlmsg_flags&NLM_F_ECHO)
295                 atomic_inc(&skb->users);
296         netlink_broadcast(rtnl, skb, pid, RTNLGRP_IPV4_ROUTE, GFP_KERNEL);
297         if (n->nlmsg_flags&NLM_F_ECHO)
298                 netlink_unicast(rtnl, skb, pid, MSG_DONTWAIT);
299 }
300
301 /* Return the first fib alias matching TOS with
302  * priority less than or equal to PRIO.
303  */
304 struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
305 {
306         if (fah) {
307                 struct fib_alias *fa;
308                 list_for_each_entry(fa, fah, fa_list) {
309                         if (fa->fa_tos > tos)
310                                 continue;
311                         if (fa->fa_info->fib_priority >= prio ||
312                             fa->fa_tos < tos)
313                                 return fa;
314                 }
315         }
316         return NULL;
317 }
318
319 int fib_detect_death(struct fib_info *fi, int order,
320                      struct fib_info **last_resort, int *last_idx, int *dflt)
321 {
322         struct neighbour *n;
323         int state = NUD_NONE;
324
325         n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
326         if (n) {
327                 state = n->nud_state;
328                 neigh_release(n);
329         }
330         if (state==NUD_REACHABLE)
331                 return 0;
332         if ((state&NUD_VALID) && order != *dflt)
333                 return 0;
334         if ((state&NUD_VALID) ||
335             (*last_idx<0 && order > *dflt)) {
336                 *last_resort = fi;
337                 *last_idx = order;
338         }
339         return 1;
340 }
341
342 #ifdef CONFIG_IP_ROUTE_MULTIPATH
343
344 static u32 fib_get_attr32(struct rtattr *attr, int attrlen, int type)
345 {
346         while (RTA_OK(attr,attrlen)) {
347                 if (attr->rta_type == type)
348                         return *(u32*)RTA_DATA(attr);
349                 attr = RTA_NEXT(attr, attrlen);
350         }
351         return 0;
352 }
353
354 static int
355 fib_count_nexthops(struct rtattr *rta)
356 {
357         int nhs = 0;
358         struct rtnexthop *nhp = RTA_DATA(rta);
359         int nhlen = RTA_PAYLOAD(rta);
360
361         while (nhlen >= (int)sizeof(struct rtnexthop)) {
362                 if ((nhlen -= nhp->rtnh_len) < 0)
363                         return 0;
364                 nhs++;
365                 nhp = RTNH_NEXT(nhp);
366         };
367         return nhs;
368 }
369
370 static int
371 fib_get_nhs(struct fib_info *fi, const struct rtattr *rta, const struct rtmsg *r)
372 {
373         struct rtnexthop *nhp = RTA_DATA(rta);
374         int nhlen = RTA_PAYLOAD(rta);
375
376         change_nexthops(fi) {
377                 int attrlen = nhlen - sizeof(struct rtnexthop);
378                 if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
379                         return -EINVAL;
380                 nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags;
381                 nh->nh_oif = nhp->rtnh_ifindex;
382                 nh->nh_weight = nhp->rtnh_hops + 1;
383                 if (attrlen) {
384                         nh->nh_gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
385 #ifdef CONFIG_NET_CLS_ROUTE
386                         nh->nh_tclassid = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
387 #endif
388                 }
389                 nhp = RTNH_NEXT(nhp);
390         } endfor_nexthops(fi);
391         return 0;
392 }
393
394 #endif
395
396 int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta,
397                  struct fib_info *fi)
398 {
399 #ifdef CONFIG_IP_ROUTE_MULTIPATH
400         struct rtnexthop *nhp;
401         int nhlen;
402 #endif
403
404         if (rta->rta_priority &&
405             *rta->rta_priority != fi->fib_priority)
406                 return 1;
407
408         if (rta->rta_oif || rta->rta_gw) {
409                 if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) &&
410                     (!rta->rta_gw  || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 4) == 0))
411                         return 0;
412                 return 1;
413         }
414
415 #ifdef CONFIG_IP_ROUTE_MULTIPATH
416         if (rta->rta_mp == NULL)
417                 return 0;
418         nhp = RTA_DATA(rta->rta_mp);
419         nhlen = RTA_PAYLOAD(rta->rta_mp);
420         
421         for_nexthops(fi) {
422                 int attrlen = nhlen - sizeof(struct rtnexthop);
423                 u32 gw;
424
425                 if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0)
426                         return -EINVAL;
427                 if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif)
428                         return 1;
429                 if (attrlen) {
430                         gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY);
431                         if (gw && gw != nh->nh_gw)
432                                 return 1;
433 #ifdef CONFIG_NET_CLS_ROUTE
434                         gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW);
435                         if (gw && gw != nh->nh_tclassid)
436                                 return 1;
437 #endif
438                 }
439                 nhp = RTNH_NEXT(nhp);
440         } endfor_nexthops(fi);
441 #endif
442         return 0;
443 }
444
445
446 /*
447    Picture
448    -------
449
450    Semantics of nexthop is very messy by historical reasons.
451    We have to take into account, that:
452    a) gateway can be actually local interface address,
453       so that gatewayed route is direct.
454    b) gateway must be on-link address, possibly
455       described not by an ifaddr, but also by a direct route.
456    c) If both gateway and interface are specified, they should not
457       contradict.
458    d) If we use tunnel routes, gateway could be not on-link.
459
460    Attempt to reconcile all of these (alas, self-contradictory) conditions
461    results in pretty ugly and hairy code with obscure logic.
462
463    I chose to generalized it instead, so that the size
464    of code does not increase practically, but it becomes
465    much more general.
466    Every prefix is assigned a "scope" value: "host" is local address,
467    "link" is direct route,
468    [ ... "site" ... "interior" ... ]
469    and "universe" is true gateway route with global meaning.
470
471    Every prefix refers to a set of "nexthop"s (gw, oif),
472    where gw must have narrower scope. This recursion stops
473    when gw has LOCAL scope or if "nexthop" is declared ONLINK,
474    which means that gw is forced to be on link.
475
476    Code is still hairy, but now it is apparently logically
477    consistent and very flexible. F.e. as by-product it allows
478    to co-exists in peace independent exterior and interior
479    routing processes.
480
481    Normally it looks as following.
482
483    {universe prefix}  -> (gw, oif) [scope link]
484                           |
485                           |-> {link prefix} -> (gw, oif) [scope local]
486                                                 |
487                                                 |-> {local prefix} (terminal node)
488  */
489
490 static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_nh *nh)
491 {
492         int err;
493
494         if (nh->nh_gw) {
495                 struct fib_result res;
496
497 #ifdef CONFIG_IP_ROUTE_PERVASIVE
498                 if (nh->nh_flags&RTNH_F_PERVASIVE)
499                         return 0;
500 #endif
501                 if (nh->nh_flags&RTNH_F_ONLINK) {
502                         struct net_device *dev;
503
504                         if (r->rtm_scope >= RT_SCOPE_LINK)
505                                 return -EINVAL;
506                         if (inet_addr_type(nh->nh_gw) != RTN_UNICAST)
507                                 return -EINVAL;
508                         if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL)
509                                 return -ENODEV;
510                         if (!(dev->flags&IFF_UP))
511                                 return -ENETDOWN;
512                         nh->nh_dev = dev;
513                         dev_hold(dev);
514                         nh->nh_scope = RT_SCOPE_LINK;
515                         return 0;
516                 }
517                 {
518                         struct flowi fl = { .nl_u = { .ip4_u =
519                                                       { .daddr = nh->nh_gw,
520                                                         .scope = r->rtm_scope + 1 } },
521                                             .oif = nh->nh_oif };
522
523                         /* It is not necessary, but requires a bit of thinking */
524                         if (fl.fl4_scope < RT_SCOPE_LINK)
525                                 fl.fl4_scope = RT_SCOPE_LINK;
526                         if ((err = fib_lookup(&fl, &res)) != 0)
527                                 return err;
528                 }
529                 err = -EINVAL;
530                 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
531                         goto out;
532                 nh->nh_scope = res.scope;
533                 nh->nh_oif = FIB_RES_OIF(res);
534                 if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
535                         goto out;
536                 dev_hold(nh->nh_dev);
537                 err = -ENETDOWN;
538                 if (!(nh->nh_dev->flags & IFF_UP))
539                         goto out;
540                 err = 0;
541 out:
542                 fib_res_put(&res);
543                 return err;
544         } else {
545                 struct in_device *in_dev;
546
547                 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
548                         return -EINVAL;
549
550                 in_dev = inetdev_by_index(nh->nh_oif);
551                 if (in_dev == NULL)
552                         return -ENODEV;
553                 if (!(in_dev->dev->flags&IFF_UP)) {
554                         in_dev_put(in_dev);
555                         return -ENETDOWN;
556                 }
557                 nh->nh_dev = in_dev->dev;
558                 dev_hold(nh->nh_dev);
559                 nh->nh_scope = RT_SCOPE_HOST;
560                 in_dev_put(in_dev);
561         }
562         return 0;
563 }
564
565 static inline unsigned int fib_laddr_hashfn(u32 val)
566 {
567         unsigned int mask = (fib_hash_size - 1);
568
569         return (val ^ (val >> 7) ^ (val >> 14)) & mask;
570 }
571
572 static struct hlist_head *fib_hash_alloc(int bytes)
573 {
574         if (bytes <= PAGE_SIZE)
575                 return kmalloc(bytes, GFP_KERNEL);
576         else
577                 return (struct hlist_head *)
578                         __get_free_pages(GFP_KERNEL, get_order(bytes));
579 }
580
581 static void fib_hash_free(struct hlist_head *hash, int bytes)
582 {
583         if (!hash)
584                 return;
585
586         if (bytes <= PAGE_SIZE)
587                 kfree(hash);
588         else
589                 free_pages((unsigned long) hash, get_order(bytes));
590 }
591
592 static void fib_hash_move(struct hlist_head *new_info_hash,
593                           struct hlist_head *new_laddrhash,
594                           unsigned int new_size)
595 {
596         struct hlist_head *old_info_hash, *old_laddrhash;
597         unsigned int old_size = fib_hash_size;
598         unsigned int i, bytes;
599
600         write_lock(&fib_info_lock);
601         old_info_hash = fib_info_hash;
602         old_laddrhash = fib_info_laddrhash;
603         fib_hash_size = new_size;
604
605         for (i = 0; i < old_size; i++) {
606                 struct hlist_head *head = &fib_info_hash[i];
607                 struct hlist_node *node, *n;
608                 struct fib_info *fi;
609
610                 hlist_for_each_entry_safe(fi, node, n, head, fib_hash) {
611                         struct hlist_head *dest;
612                         unsigned int new_hash;
613
614                         hlist_del(&fi->fib_hash);
615
616                         new_hash = fib_info_hashfn(fi);
617                         dest = &new_info_hash[new_hash];
618                         hlist_add_head(&fi->fib_hash, dest);
619                 }
620         }
621         fib_info_hash = new_info_hash;
622
623         for (i = 0; i < old_size; i++) {
624                 struct hlist_head *lhead = &fib_info_laddrhash[i];
625                 struct hlist_node *node, *n;
626                 struct fib_info *fi;
627
628                 hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) {
629                         struct hlist_head *ldest;
630                         unsigned int new_hash;
631
632                         hlist_del(&fi->fib_lhash);
633
634                         new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
635                         ldest = &new_laddrhash[new_hash];
636                         hlist_add_head(&fi->fib_lhash, ldest);
637                 }
638         }
639         fib_info_laddrhash = new_laddrhash;
640
641         write_unlock(&fib_info_lock);
642
643         bytes = old_size * sizeof(struct hlist_head *);
644         fib_hash_free(old_info_hash, bytes);
645         fib_hash_free(old_laddrhash, bytes);
646 }
647
648 struct fib_info *
649 fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
650                 const struct nlmsghdr *nlh, int *errp)
651 {
652         int err;
653         struct fib_info *fi = NULL;
654         struct fib_info *ofi;
655 #ifdef CONFIG_IP_ROUTE_MULTIPATH
656         int nhs = 1;
657 #else
658         const int nhs = 1;
659 #endif
660 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
661         u32 mp_alg = IP_MP_ALG_NONE;
662 #endif
663
664         /* Fast check to catch the most weird cases */
665         if (fib_props[r->rtm_type].scope > r->rtm_scope)
666                 goto err_inval;
667
668 #ifdef CONFIG_IP_ROUTE_MULTIPATH
669         if (rta->rta_mp) {
670                 nhs = fib_count_nexthops(rta->rta_mp);
671                 if (nhs == 0)
672                         goto err_inval;
673         }
674 #endif
675 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
676         if (rta->rta_mp_alg) {
677                 mp_alg = *rta->rta_mp_alg;
678
679                 if (mp_alg < IP_MP_ALG_NONE ||
680                     mp_alg > IP_MP_ALG_MAX)
681                         goto err_inval;
682         }
683 #endif
684
685         err = -ENOBUFS;
686         if (fib_info_cnt >= fib_hash_size) {
687                 unsigned int new_size = fib_hash_size << 1;
688                 struct hlist_head *new_info_hash;
689                 struct hlist_head *new_laddrhash;
690                 unsigned int bytes;
691
692                 if (!new_size)
693                         new_size = 1;
694                 bytes = new_size * sizeof(struct hlist_head *);
695                 new_info_hash = fib_hash_alloc(bytes);
696                 new_laddrhash = fib_hash_alloc(bytes);
697                 if (!new_info_hash || !new_laddrhash) {
698                         fib_hash_free(new_info_hash, bytes);
699                         fib_hash_free(new_laddrhash, bytes);
700                 } else {
701                         memset(new_info_hash, 0, bytes);
702                         memset(new_laddrhash, 0, bytes);
703
704                         fib_hash_move(new_info_hash, new_laddrhash, new_size);
705                 }
706
707                 if (!fib_hash_size)
708                         goto failure;
709         }
710
711         fi = kmalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
712         if (fi == NULL)
713                 goto failure;
714         fib_info_cnt++;
715         memset(fi, 0, sizeof(*fi)+nhs*sizeof(struct fib_nh));
716
717         fi->fib_protocol = r->rtm_protocol;
718
719         fi->fib_nhs = nhs;
720         change_nexthops(fi) {
721                 nh->nh_parent = fi;
722         } endfor_nexthops(fi)
723
724         fi->fib_flags = r->rtm_flags;
725         if (rta->rta_priority)
726                 fi->fib_priority = *rta->rta_priority;
727         if (rta->rta_mx) {
728                 int attrlen = RTA_PAYLOAD(rta->rta_mx);
729                 struct rtattr *attr = RTA_DATA(rta->rta_mx);
730
731                 while (RTA_OK(attr, attrlen)) {
732                         unsigned flavor = attr->rta_type;
733                         if (flavor) {
734                                 if (flavor > RTAX_MAX)
735                                         goto err_inval;
736                                 fi->fib_metrics[flavor-1] = *(unsigned*)RTA_DATA(attr);
737                         }
738                         attr = RTA_NEXT(attr, attrlen);
739                 }
740         }
741         if (rta->rta_prefsrc)
742                 memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 4);
743
744         if (rta->rta_mp) {
745 #ifdef CONFIG_IP_ROUTE_MULTIPATH
746                 if ((err = fib_get_nhs(fi, rta->rta_mp, r)) != 0)
747                         goto failure;
748                 if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif)
749                         goto err_inval;
750                 if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 4))
751                         goto err_inval;
752 #ifdef CONFIG_NET_CLS_ROUTE
753                 if (rta->rta_flow && memcmp(&fi->fib_nh->nh_tclassid, rta->rta_flow, 4))
754                         goto err_inval;
755 #endif
756 #else
757                 goto err_inval;
758 #endif
759         } else {
760                 struct fib_nh *nh = fi->fib_nh;
761                 if (rta->rta_oif)
762                         nh->nh_oif = *rta->rta_oif;
763                 if (rta->rta_gw)
764                         memcpy(&nh->nh_gw, rta->rta_gw, 4);
765 #ifdef CONFIG_NET_CLS_ROUTE
766                 if (rta->rta_flow)
767                         memcpy(&nh->nh_tclassid, rta->rta_flow, 4);
768 #endif
769                 nh->nh_flags = r->rtm_flags;
770 #ifdef CONFIG_IP_ROUTE_MULTIPATH
771                 nh->nh_weight = 1;
772 #endif
773         }
774
775 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
776         fi->fib_mp_alg = mp_alg;
777 #endif
778
779         if (fib_props[r->rtm_type].error) {
780                 if (rta->rta_gw || rta->rta_oif || rta->rta_mp)
781                         goto err_inval;
782                 goto link_it;
783         }
784
785         if (r->rtm_scope > RT_SCOPE_HOST)
786                 goto err_inval;
787
788         if (r->rtm_scope == RT_SCOPE_HOST) {
789                 struct fib_nh *nh = fi->fib_nh;
790
791                 /* Local address is added. */
792                 if (nhs != 1 || nh->nh_gw)
793                         goto err_inval;
794                 nh->nh_scope = RT_SCOPE_NOWHERE;
795                 nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif);
796                 err = -ENODEV;
797                 if (nh->nh_dev == NULL)
798                         goto failure;
799         } else {
800                 change_nexthops(fi) {
801                         if ((err = fib_check_nh(r, fi, nh)) != 0)
802                                 goto failure;
803                 } endfor_nexthops(fi)
804         }
805
806         if (fi->fib_prefsrc) {
807                 if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL ||
808                     memcmp(&fi->fib_prefsrc, rta->rta_dst, 4))
809                         if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
810                                 goto err_inval;
811         }
812
813 link_it:
814         if ((ofi = fib_find_info(fi)) != NULL) {
815                 fi->fib_dead = 1;
816                 free_fib_info(fi);
817                 ofi->fib_treeref++;
818                 return ofi;
819         }
820
821         fi->fib_treeref++;
822         atomic_inc(&fi->fib_clntref);
823         write_lock(&fib_info_lock);
824         hlist_add_head(&fi->fib_hash,
825                        &fib_info_hash[fib_info_hashfn(fi)]);
826         if (fi->fib_prefsrc) {
827                 struct hlist_head *head;
828
829                 head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
830                 hlist_add_head(&fi->fib_lhash, head);
831         }
832         change_nexthops(fi) {
833                 struct hlist_head *head;
834                 unsigned int hash;
835
836                 if (!nh->nh_dev)
837                         continue;
838                 hash = fib_devindex_hashfn(nh->nh_dev->ifindex);
839                 head = &fib_info_devhash[hash];
840                 hlist_add_head(&nh->nh_hash, head);
841         } endfor_nexthops(fi)
842         write_unlock(&fib_info_lock);
843         return fi;
844
845 err_inval:
846         err = -EINVAL;
847
848 failure:
849         *errp = err;
850         if (fi) {
851                 fi->fib_dead = 1;
852                 free_fib_info(fi);
853         }
854         return NULL;
855 }
856
857 int fib_semantic_match(struct list_head *head, const struct flowi *flp,
858                        struct fib_result *res, __u32 zone, __u32 mask, 
859                         int prefixlen)
860 {
861         struct fib_alias *fa;
862         int nh_sel = 0;
863
864         list_for_each_entry(fa, head, fa_list) {
865                 int err;
866
867                 if (fa->fa_tos &&
868                     fa->fa_tos != flp->fl4_tos)
869                         continue;
870
871                 if (fa->fa_scope < flp->fl4_scope)
872                         continue;
873
874                 fa->fa_state |= FA_S_ACCESSED;
875
876                 err = fib_props[fa->fa_type].error;
877                 if (err == 0) {
878                         struct fib_info *fi = fa->fa_info;
879
880                         if (fi->fib_flags & RTNH_F_DEAD)
881                                 continue;
882
883                         switch (fa->fa_type) {
884                         case RTN_UNICAST:
885                         case RTN_LOCAL:
886                         case RTN_BROADCAST:
887                         case RTN_ANYCAST:
888                         case RTN_MULTICAST:
889                                 for_nexthops(fi) {
890                                         if (nh->nh_flags&RTNH_F_DEAD)
891                                                 continue;
892                                         if (!flp->oif || flp->oif == nh->nh_oif)
893                                                 break;
894                                 }
895 #ifdef CONFIG_IP_ROUTE_MULTIPATH
896                                 if (nhsel < fi->fib_nhs) {
897                                         nh_sel = nhsel;
898                                         goto out_fill_res;
899                                 }
900 #else
901                                 if (nhsel < 1) {
902                                         goto out_fill_res;
903                                 }
904 #endif
905                                 endfor_nexthops(fi);
906                                 continue;
907
908                         default:
909                                 printk(KERN_DEBUG "impossible 102\n");
910                                 return -EINVAL;
911                         };
912                 }
913                 return err;
914         }
915         return 1;
916
917 out_fill_res:
918         res->prefixlen = prefixlen;
919         res->nh_sel = nh_sel;
920         res->type = fa->fa_type;
921         res->scope = fa->fa_scope;
922         res->fi = fa->fa_info;
923 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
924         res->netmask = mask;
925         res->network = zone &
926                 (0xFFFFFFFF >> (32 - prefixlen));
927 #endif
928         atomic_inc(&res->fi->fib_clntref);
929         return 0;
930 }
931
932 /* Find appropriate source address to this destination */
933
934 u32 __fib_res_prefsrc(struct fib_result *res)
935 {
936         return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
937 }
938
939 int
940 fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
941               u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos,
942               struct fib_info *fi, unsigned int flags)
943 {
944         struct rtmsg *rtm;
945         struct nlmsghdr  *nlh;
946         unsigned char    *b = skb->tail;
947
948         nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags);
949         rtm = NLMSG_DATA(nlh);
950         rtm->rtm_family = AF_INET;
951         rtm->rtm_dst_len = dst_len;
952         rtm->rtm_src_len = 0;
953         rtm->rtm_tos = tos;
954         rtm->rtm_table = tb_id;
955         rtm->rtm_type = type;
956         rtm->rtm_flags = fi->fib_flags;
957         rtm->rtm_scope = scope;
958         if (rtm->rtm_dst_len)
959                 RTA_PUT(skb, RTA_DST, 4, dst);
960         rtm->rtm_protocol = fi->fib_protocol;
961         if (fi->fib_priority)
962                 RTA_PUT(skb, RTA_PRIORITY, 4, &fi->fib_priority);
963 #ifdef CONFIG_NET_CLS_ROUTE
964         if (fi->fib_nh[0].nh_tclassid)
965                 RTA_PUT(skb, RTA_FLOW, 4, &fi->fib_nh[0].nh_tclassid);
966 #endif
967         if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
968                 goto rtattr_failure;
969         if (fi->fib_prefsrc)
970                 RTA_PUT(skb, RTA_PREFSRC, 4, &fi->fib_prefsrc);
971         if (fi->fib_nhs == 1) {
972                 if (fi->fib_nh->nh_gw)
973                         RTA_PUT(skb, RTA_GATEWAY, 4, &fi->fib_nh->nh_gw);
974                 if (fi->fib_nh->nh_oif)
975                         RTA_PUT(skb, RTA_OIF, sizeof(int), &fi->fib_nh->nh_oif);
976         }
977 #ifdef CONFIG_IP_ROUTE_MULTIPATH
978         if (fi->fib_nhs > 1) {
979                 struct rtnexthop *nhp;
980                 struct rtattr *mp_head;
981                 if (skb_tailroom(skb) <= RTA_SPACE(0))
982                         goto rtattr_failure;
983                 mp_head = (struct rtattr*)skb_put(skb, RTA_SPACE(0));
984
985                 for_nexthops(fi) {
986                         if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
987                                 goto rtattr_failure;
988                         nhp = (struct rtnexthop*)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
989                         nhp->rtnh_flags = nh->nh_flags & 0xFF;
990                         nhp->rtnh_hops = nh->nh_weight-1;
991                         nhp->rtnh_ifindex = nh->nh_oif;
992                         if (nh->nh_gw)
993                                 RTA_PUT(skb, RTA_GATEWAY, 4, &nh->nh_gw);
994                         nhp->rtnh_len = skb->tail - (unsigned char*)nhp;
995                 } endfor_nexthops(fi);
996                 mp_head->rta_type = RTA_MULTIPATH;
997                 mp_head->rta_len = skb->tail - (u8*)mp_head;
998         }
999 #endif
1000         nlh->nlmsg_len = skb->tail - b;
1001         return skb->len;
1002
1003 nlmsg_failure:
1004 rtattr_failure:
1005         skb_trim(skb, b - skb->data);
1006         return -1;
1007 }
1008
1009 #ifndef CONFIG_IP_NOSIOCRT
1010
1011 int
1012 fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm,
1013                     struct kern_rta *rta, struct rtentry *r)
1014 {
1015         int    plen;
1016         u32    *ptr;
1017
1018         memset(rtm, 0, sizeof(*rtm));
1019         memset(rta, 0, sizeof(*rta));
1020
1021         if (r->rt_dst.sa_family != AF_INET)
1022                 return -EAFNOSUPPORT;
1023
1024         /* Check mask for validity:
1025            a) it must be contiguous.
1026            b) destination must have all host bits clear.
1027            c) if application forgot to set correct family (AF_INET),
1028               reject request unless it is absolutely clear i.e.
1029               both family and mask are zero.
1030          */
1031         plen = 32;
1032         ptr = &((struct sockaddr_in*)&r->rt_dst)->sin_addr.s_addr;
1033         if (!(r->rt_flags&RTF_HOST)) {
1034                 u32 mask = ((struct sockaddr_in*)&r->rt_genmask)->sin_addr.s_addr;
1035                 if (r->rt_genmask.sa_family != AF_INET) {
1036                         if (mask || r->rt_genmask.sa_family)
1037                                 return -EAFNOSUPPORT;
1038                 }
1039                 if (bad_mask(mask, *ptr))
1040                         return -EINVAL;
1041                 plen = inet_mask_len(mask);
1042         }
1043
1044         nl->nlmsg_flags = NLM_F_REQUEST;
1045         nl->nlmsg_pid = current->pid;
1046         nl->nlmsg_seq = 0;
1047         nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm));
1048         if (cmd == SIOCDELRT) {
1049                 nl->nlmsg_type = RTM_DELROUTE;
1050                 nl->nlmsg_flags = 0;
1051         } else {
1052                 nl->nlmsg_type = RTM_NEWROUTE;
1053                 nl->nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE;
1054                 rtm->rtm_protocol = RTPROT_BOOT;
1055         }
1056
1057         rtm->rtm_dst_len = plen;
1058         rta->rta_dst = ptr;
1059
1060         if (r->rt_metric) {
1061                 *(u32*)&r->rt_pad3 = r->rt_metric - 1;
1062                 rta->rta_priority = (u32*)&r->rt_pad3;
1063         }
1064         if (r->rt_flags&RTF_REJECT) {
1065                 rtm->rtm_scope = RT_SCOPE_HOST;
1066                 rtm->rtm_type = RTN_UNREACHABLE;
1067                 return 0;
1068         }
1069         rtm->rtm_scope = RT_SCOPE_NOWHERE;
1070         rtm->rtm_type = RTN_UNICAST;
1071
1072         if (r->rt_dev) {
1073                 char *colon;
1074                 struct net_device *dev;
1075                 char   devname[IFNAMSIZ];
1076
1077                 if (copy_from_user(devname, r->rt_dev, IFNAMSIZ-1))
1078                         return -EFAULT;
1079                 devname[IFNAMSIZ-1] = 0;
1080                 colon = strchr(devname, ':');
1081                 if (colon)
1082                         *colon = 0;
1083                 dev = __dev_get_by_name(devname);
1084                 if (!dev)
1085                         return -ENODEV;
1086                 rta->rta_oif = &dev->ifindex;
1087                 if (colon) {
1088                         struct in_ifaddr *ifa;
1089                         struct in_device *in_dev = __in_dev_get(dev);
1090                         if (!in_dev)
1091                                 return -ENODEV;
1092                         *colon = ':';
1093                         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next)
1094                                 if (strcmp(ifa->ifa_label, devname) == 0)
1095                                         break;
1096                         if (ifa == NULL)
1097                                 return -ENODEV;
1098                         rta->rta_prefsrc = &ifa->ifa_local;
1099                 }
1100         }
1101
1102         ptr = &((struct sockaddr_in*)&r->rt_gateway)->sin_addr.s_addr;
1103         if (r->rt_gateway.sa_family == AF_INET && *ptr) {
1104                 rta->rta_gw = ptr;
1105                 if (r->rt_flags&RTF_GATEWAY && inet_addr_type(*ptr) == RTN_UNICAST)
1106                         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1107         }
1108
1109         if (cmd == SIOCDELRT)
1110                 return 0;
1111
1112         if (r->rt_flags&RTF_GATEWAY && rta->rta_gw == NULL)
1113                 return -EINVAL;
1114
1115         if (rtm->rtm_scope == RT_SCOPE_NOWHERE)
1116                 rtm->rtm_scope = RT_SCOPE_LINK;
1117
1118         if (r->rt_flags&(RTF_MTU|RTF_WINDOW|RTF_IRTT)) {
1119                 struct rtattr *rec;
1120                 struct rtattr *mx = kmalloc(RTA_LENGTH(3*RTA_LENGTH(4)), GFP_KERNEL);
1121                 if (mx == NULL)
1122                         return -ENOMEM;
1123                 rta->rta_mx = mx;
1124                 mx->rta_type = RTA_METRICS;
1125                 mx->rta_len  = RTA_LENGTH(0);
1126                 if (r->rt_flags&RTF_MTU) {
1127                         rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
1128                         rec->rta_type = RTAX_ADVMSS;
1129                         rec->rta_len = RTA_LENGTH(4);
1130                         mx->rta_len += RTA_LENGTH(4);
1131                         *(u32*)RTA_DATA(rec) = r->rt_mtu - 40;
1132                 }
1133                 if (r->rt_flags&RTF_WINDOW) {
1134                         rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
1135                         rec->rta_type = RTAX_WINDOW;
1136                         rec->rta_len = RTA_LENGTH(4);
1137                         mx->rta_len += RTA_LENGTH(4);
1138                         *(u32*)RTA_DATA(rec) = r->rt_window;
1139                 }
1140                 if (r->rt_flags&RTF_IRTT) {
1141                         rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len));
1142                         rec->rta_type = RTAX_RTT;
1143                         rec->rta_len = RTA_LENGTH(4);
1144                         mx->rta_len += RTA_LENGTH(4);
1145                         *(u32*)RTA_DATA(rec) = r->rt_irtt<<3;
1146                 }
1147         }
1148         return 0;
1149 }
1150
1151 #endif
1152
1153 /*
1154    Update FIB if:
1155    - local address disappeared -> we must delete all the entries
1156      referring to it.
1157    - device went down -> we must shutdown all nexthops going via it.
1158  */
1159
1160 int fib_sync_down(u32 local, struct net_device *dev, int force)
1161 {
1162         int ret = 0;
1163         int scope = RT_SCOPE_NOWHERE;
1164         
1165         if (force)
1166                 scope = -1;
1167
1168         if (local && fib_info_laddrhash) {
1169                 unsigned int hash = fib_laddr_hashfn(local);
1170                 struct hlist_head *head = &fib_info_laddrhash[hash];
1171                 struct hlist_node *node;
1172                 struct fib_info *fi;
1173
1174                 hlist_for_each_entry(fi, node, head, fib_lhash) {
1175                         if (fi->fib_prefsrc == local) {
1176                                 fi->fib_flags |= RTNH_F_DEAD;
1177                                 ret++;
1178                         }
1179                 }
1180         }
1181
1182         if (dev) {
1183                 struct fib_info *prev_fi = NULL;
1184                 unsigned int hash = fib_devindex_hashfn(dev->ifindex);
1185                 struct hlist_head *head = &fib_info_devhash[hash];
1186                 struct hlist_node *node;
1187                 struct fib_nh *nh;
1188
1189                 hlist_for_each_entry(nh, node, head, nh_hash) {
1190                         struct fib_info *fi = nh->nh_parent;
1191                         int dead;
1192
1193                         BUG_ON(!fi->fib_nhs);
1194                         if (nh->nh_dev != dev || fi == prev_fi)
1195                                 continue;
1196                         prev_fi = fi;
1197                         dead = 0;
1198                         change_nexthops(fi) {
1199                                 if (nh->nh_flags&RTNH_F_DEAD)
1200                                         dead++;
1201                                 else if (nh->nh_dev == dev &&
1202                                          nh->nh_scope != scope) {
1203                                         nh->nh_flags |= RTNH_F_DEAD;
1204 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1205                                         spin_lock_bh(&fib_multipath_lock);
1206                                         fi->fib_power -= nh->nh_power;
1207                                         nh->nh_power = 0;
1208                                         spin_unlock_bh(&fib_multipath_lock);
1209 #endif
1210                                         dead++;
1211                                 }
1212 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1213                                 if (force > 1 && nh->nh_dev == dev) {
1214                                         dead = fi->fib_nhs;
1215                                         break;
1216                                 }
1217 #endif
1218                         } endfor_nexthops(fi)
1219                         if (dead == fi->fib_nhs) {
1220                                 fi->fib_flags |= RTNH_F_DEAD;
1221                                 ret++;
1222                         }
1223                 }
1224         }
1225
1226         return ret;
1227 }
1228
1229 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1230
1231 /*
1232    Dead device goes up. We wake up dead nexthops.
1233    It takes sense only on multipath routes.
1234  */
1235
1236 int fib_sync_up(struct net_device *dev)
1237 {
1238         struct fib_info *prev_fi;
1239         unsigned int hash;
1240         struct hlist_head *head;
1241         struct hlist_node *node;
1242         struct fib_nh *nh;
1243         int ret;
1244
1245         if (!(dev->flags&IFF_UP))
1246                 return 0;
1247
1248         prev_fi = NULL;
1249         hash = fib_devindex_hashfn(dev->ifindex);
1250         head = &fib_info_devhash[hash];
1251         ret = 0;
1252
1253         hlist_for_each_entry(nh, node, head, nh_hash) {
1254                 struct fib_info *fi = nh->nh_parent;
1255                 int alive;
1256
1257                 BUG_ON(!fi->fib_nhs);
1258                 if (nh->nh_dev != dev || fi == prev_fi)
1259                         continue;
1260
1261                 prev_fi = fi;
1262                 alive = 0;
1263                 change_nexthops(fi) {
1264                         if (!(nh->nh_flags&RTNH_F_DEAD)) {
1265                                 alive++;
1266                                 continue;
1267                         }
1268                         if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
1269                                 continue;
1270                         if (nh->nh_dev != dev || __in_dev_get(dev) == NULL)
1271                                 continue;
1272                         alive++;
1273                         spin_lock_bh(&fib_multipath_lock);
1274                         nh->nh_power = 0;
1275                         nh->nh_flags &= ~RTNH_F_DEAD;
1276                         spin_unlock_bh(&fib_multipath_lock);
1277                 } endfor_nexthops(fi)
1278
1279                 if (alive > 0) {
1280                         fi->fib_flags &= ~RTNH_F_DEAD;
1281                         ret++;
1282                 }
1283         }
1284
1285         return ret;
1286 }
1287
1288 /*
1289    The algorithm is suboptimal, but it provides really
1290    fair weighted route distribution.
1291  */
1292
1293 void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
1294 {
1295         struct fib_info *fi = res->fi;
1296         int w;
1297
1298         spin_lock_bh(&fib_multipath_lock);
1299         if (fi->fib_power <= 0) {
1300                 int power = 0;
1301                 change_nexthops(fi) {
1302                         if (!(nh->nh_flags&RTNH_F_DEAD)) {
1303                                 power += nh->nh_weight;
1304                                 nh->nh_power = nh->nh_weight;
1305                         }
1306                 } endfor_nexthops(fi);
1307                 fi->fib_power = power;
1308                 if (power <= 0) {
1309                         spin_unlock_bh(&fib_multipath_lock);
1310                         /* Race condition: route has just become dead. */
1311                         res->nh_sel = 0;
1312                         return;
1313                 }
1314         }
1315
1316
1317         /* w should be random number [0..fi->fib_power-1],
1318            it is pretty bad approximation.
1319          */
1320
1321         w = jiffies % fi->fib_power;
1322
1323         change_nexthops(fi) {
1324                 if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
1325                         if ((w -= nh->nh_power) <= 0) {
1326                                 nh->nh_power--;
1327                                 fi->fib_power--;
1328                                 res->nh_sel = nhsel;
1329                                 spin_unlock_bh(&fib_multipath_lock);
1330                                 return;
1331                         }
1332                 }
1333         } endfor_nexthops(fi);
1334
1335         /* Race condition: route has just become dead. */
1336         res->nh_sel = 0;
1337         spin_unlock_bh(&fib_multipath_lock);
1338 }
1339 #endif