bonding, ipv4, ipv6, vlan: Handle NETDEV_BONDING_FAILOVER like NETDEV_NOTIFY_PEERS
[linux-3.10.git] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <asm/uaccess.h>
30 #include <asm/system.h>
31 #include <linux/bitops.h>
32 #include <linux/capability.h>
33 #include <linux/module.h>
34 #include <linux/types.h>
35 #include <linux/kernel.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #include <linux/slab.h>
54 #include <linux/hash.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66
67 #include "fib_lookup.h"
68
69 static struct ipv4_devconf ipv4_devconf = {
70         .data = {
71                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
72                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
73                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
74                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
75         },
76 };
77
78 static struct ipv4_devconf ipv4_devconf_dflt = {
79         .data = {
80                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
81                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
82                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
83                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
84                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
85         },
86 };
87
88 #define IPV4_DEVCONF_DFLT(net, attr) \
89         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
90
91 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
92         [IFA_LOCAL]             = { .type = NLA_U32 },
93         [IFA_ADDRESS]           = { .type = NLA_U32 },
94         [IFA_BROADCAST]         = { .type = NLA_U32 },
95         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
96 };
97
98 /* inet_addr_hash's shifting is dependent upon this IN4_ADDR_HSIZE
99  * value.  So if you change this define, make appropriate changes to
100  * inet_addr_hash as well.
101  */
102 #define IN4_ADDR_HSIZE  256
103 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
104 static DEFINE_SPINLOCK(inet_addr_hash_lock);
105
106 static inline unsigned int inet_addr_hash(struct net *net, __be32 addr)
107 {
108         u32 val = (__force u32) addr ^ hash_ptr(net, 8);
109
110         return ((val ^ (val >> 8) ^ (val >> 16) ^ (val >> 24)) &
111                 (IN4_ADDR_HSIZE - 1));
112 }
113
114 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
115 {
116         unsigned int hash = inet_addr_hash(net, ifa->ifa_local);
117
118         spin_lock(&inet_addr_hash_lock);
119         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
120         spin_unlock(&inet_addr_hash_lock);
121 }
122
123 static void inet_hash_remove(struct in_ifaddr *ifa)
124 {
125         spin_lock(&inet_addr_hash_lock);
126         hlist_del_init_rcu(&ifa->hash);
127         spin_unlock(&inet_addr_hash_lock);
128 }
129
130 /**
131  * __ip_dev_find - find the first device with a given source address.
132  * @net: the net namespace
133  * @addr: the source address
134  * @devref: if true, take a reference on the found device
135  *
136  * If a caller uses devref=false, it should be protected by RCU, or RTNL
137  */
138 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
139 {
140         unsigned int hash = inet_addr_hash(net, addr);
141         struct net_device *result = NULL;
142         struct in_ifaddr *ifa;
143         struct hlist_node *node;
144
145         rcu_read_lock();
146         hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) {
147                 struct net_device *dev = ifa->ifa_dev->dev;
148
149                 if (!net_eq(dev_net(dev), net))
150                         continue;
151                 if (ifa->ifa_local == addr) {
152                         result = dev;
153                         break;
154                 }
155         }
156         if (!result) {
157                 struct flowi4 fl4 = { .daddr = addr };
158                 struct fib_result res = { 0 };
159                 struct fib_table *local;
160
161                 /* Fallback to FIB local table so that communication
162                  * over loopback subnets work.
163                  */
164                 local = fib_get_table(net, RT_TABLE_LOCAL);
165                 if (local &&
166                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
167                     res.type == RTN_LOCAL)
168                         result = FIB_RES_DEV(res);
169         }
170         if (result && devref)
171                 dev_hold(result);
172         rcu_read_unlock();
173         return result;
174 }
175 EXPORT_SYMBOL(__ip_dev_find);
176
177 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
178
179 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
180 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
181                          int destroy);
182 #ifdef CONFIG_SYSCTL
183 static void devinet_sysctl_register(struct in_device *idev);
184 static void devinet_sysctl_unregister(struct in_device *idev);
185 #else
186 static inline void devinet_sysctl_register(struct in_device *idev)
187 {
188 }
189 static inline void devinet_sysctl_unregister(struct in_device *idev)
190 {
191 }
192 #endif
193
194 /* Locks all the inet devices. */
195
196 static struct in_ifaddr *inet_alloc_ifa(void)
197 {
198         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
199 }
200
201 static void inet_rcu_free_ifa(struct rcu_head *head)
202 {
203         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
204         if (ifa->ifa_dev)
205                 in_dev_put(ifa->ifa_dev);
206         kfree(ifa);
207 }
208
209 static inline void inet_free_ifa(struct in_ifaddr *ifa)
210 {
211         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
212 }
213
214 void in_dev_finish_destroy(struct in_device *idev)
215 {
216         struct net_device *dev = idev->dev;
217
218         WARN_ON(idev->ifa_list);
219         WARN_ON(idev->mc_list);
220 #ifdef NET_REFCNT_DEBUG
221         printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
222                idev, dev ? dev->name : "NIL");
223 #endif
224         dev_put(dev);
225         if (!idev->dead)
226                 pr_err("Freeing alive in_device %p\n", idev);
227         else
228                 kfree(idev);
229 }
230 EXPORT_SYMBOL(in_dev_finish_destroy);
231
232 static struct in_device *inetdev_init(struct net_device *dev)
233 {
234         struct in_device *in_dev;
235
236         ASSERT_RTNL();
237
238         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
239         if (!in_dev)
240                 goto out;
241         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
242                         sizeof(in_dev->cnf));
243         in_dev->cnf.sysctl = NULL;
244         in_dev->dev = dev;
245         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
246         if (!in_dev->arp_parms)
247                 goto out_kfree;
248         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
249                 dev_disable_lro(dev);
250         /* Reference in_dev->dev */
251         dev_hold(dev);
252         /* Account for reference dev->ip_ptr (below) */
253         in_dev_hold(in_dev);
254
255         devinet_sysctl_register(in_dev);
256         ip_mc_init_dev(in_dev);
257         if (dev->flags & IFF_UP)
258                 ip_mc_up(in_dev);
259
260         /* we can receive as soon as ip_ptr is set -- do this last */
261         rcu_assign_pointer(dev->ip_ptr, in_dev);
262 out:
263         return in_dev;
264 out_kfree:
265         kfree(in_dev);
266         in_dev = NULL;
267         goto out;
268 }
269
270 static void in_dev_rcu_put(struct rcu_head *head)
271 {
272         struct in_device *idev = container_of(head, struct in_device, rcu_head);
273         in_dev_put(idev);
274 }
275
276 static void inetdev_destroy(struct in_device *in_dev)
277 {
278         struct in_ifaddr *ifa;
279         struct net_device *dev;
280
281         ASSERT_RTNL();
282
283         dev = in_dev->dev;
284
285         in_dev->dead = 1;
286
287         ip_mc_destroy_dev(in_dev);
288
289         while ((ifa = in_dev->ifa_list) != NULL) {
290                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
291                 inet_free_ifa(ifa);
292         }
293
294         rcu_assign_pointer(dev->ip_ptr, NULL);
295
296         devinet_sysctl_unregister(in_dev);
297         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
298         arp_ifdown(dev);
299
300         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
301 }
302
303 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
304 {
305         rcu_read_lock();
306         for_primary_ifa(in_dev) {
307                 if (inet_ifa_match(a, ifa)) {
308                         if (!b || inet_ifa_match(b, ifa)) {
309                                 rcu_read_unlock();
310                                 return 1;
311                         }
312                 }
313         } endfor_ifa(in_dev);
314         rcu_read_unlock();
315         return 0;
316 }
317
318 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
319                          int destroy, struct nlmsghdr *nlh, u32 pid)
320 {
321         struct in_ifaddr *promote = NULL;
322         struct in_ifaddr *ifa, *ifa1 = *ifap;
323         struct in_ifaddr *last_prim = in_dev->ifa_list;
324         struct in_ifaddr *prev_prom = NULL;
325         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
326
327         ASSERT_RTNL();
328
329         /* 1. Deleting primary ifaddr forces deletion all secondaries
330          * unless alias promotion is set
331          **/
332
333         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
334                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
335
336                 while ((ifa = *ifap1) != NULL) {
337                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
338                             ifa1->ifa_scope <= ifa->ifa_scope)
339                                 last_prim = ifa;
340
341                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
342                             ifa1->ifa_mask != ifa->ifa_mask ||
343                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
344                                 ifap1 = &ifa->ifa_next;
345                                 prev_prom = ifa;
346                                 continue;
347                         }
348
349                         if (!do_promote) {
350                                 inet_hash_remove(ifa);
351                                 *ifap1 = ifa->ifa_next;
352
353                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
354                                 blocking_notifier_call_chain(&inetaddr_chain,
355                                                 NETDEV_DOWN, ifa);
356                                 inet_free_ifa(ifa);
357                         } else {
358                                 promote = ifa;
359                                 break;
360                         }
361                 }
362         }
363
364         /* On promotion all secondaries from subnet are changing
365          * the primary IP, we must remove all their routes silently
366          * and later to add them back with new prefsrc. Do this
367          * while all addresses are on the device list.
368          */
369         for (ifa = promote; ifa; ifa = ifa->ifa_next) {
370                 if (ifa1->ifa_mask == ifa->ifa_mask &&
371                     inet_ifa_match(ifa1->ifa_address, ifa))
372                         fib_del_ifaddr(ifa, ifa1);
373         }
374
375         /* 2. Unlink it */
376
377         *ifap = ifa1->ifa_next;
378         inet_hash_remove(ifa1);
379
380         /* 3. Announce address deletion */
381
382         /* Send message first, then call notifier.
383            At first sight, FIB update triggered by notifier
384            will refer to already deleted ifaddr, that could confuse
385            netlink listeners. It is not true: look, gated sees
386            that route deleted and if it still thinks that ifaddr
387            is valid, it will try to restore deleted routes... Grr.
388            So that, this order is correct.
389          */
390         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
391         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
392
393         if (promote) {
394                 struct in_ifaddr *next_sec = promote->ifa_next;
395
396                 if (prev_prom) {
397                         prev_prom->ifa_next = promote->ifa_next;
398                         promote->ifa_next = last_prim->ifa_next;
399                         last_prim->ifa_next = promote;
400                 }
401
402                 promote->ifa_flags &= ~IFA_F_SECONDARY;
403                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
404                 blocking_notifier_call_chain(&inetaddr_chain,
405                                 NETDEV_UP, promote);
406                 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
407                         if (ifa1->ifa_mask != ifa->ifa_mask ||
408                             !inet_ifa_match(ifa1->ifa_address, ifa))
409                                         continue;
410                         fib_add_ifaddr(ifa);
411                 }
412
413         }
414         if (destroy)
415                 inet_free_ifa(ifa1);
416 }
417
418 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
419                          int destroy)
420 {
421         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
422 }
423
424 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
425                              u32 pid)
426 {
427         struct in_device *in_dev = ifa->ifa_dev;
428         struct in_ifaddr *ifa1, **ifap, **last_primary;
429
430         ASSERT_RTNL();
431
432         if (!ifa->ifa_local) {
433                 inet_free_ifa(ifa);
434                 return 0;
435         }
436
437         ifa->ifa_flags &= ~IFA_F_SECONDARY;
438         last_primary = &in_dev->ifa_list;
439
440         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
441              ifap = &ifa1->ifa_next) {
442                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
443                     ifa->ifa_scope <= ifa1->ifa_scope)
444                         last_primary = &ifa1->ifa_next;
445                 if (ifa1->ifa_mask == ifa->ifa_mask &&
446                     inet_ifa_match(ifa1->ifa_address, ifa)) {
447                         if (ifa1->ifa_local == ifa->ifa_local) {
448                                 inet_free_ifa(ifa);
449                                 return -EEXIST;
450                         }
451                         if (ifa1->ifa_scope != ifa->ifa_scope) {
452                                 inet_free_ifa(ifa);
453                                 return -EINVAL;
454                         }
455                         ifa->ifa_flags |= IFA_F_SECONDARY;
456                 }
457         }
458
459         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
460                 net_srandom(ifa->ifa_local);
461                 ifap = last_primary;
462         }
463
464         ifa->ifa_next = *ifap;
465         *ifap = ifa;
466
467         inet_hash_insert(dev_net(in_dev->dev), ifa);
468
469         /* Send message first, then call notifier.
470            Notifier will trigger FIB update, so that
471            listeners of netlink will know about new ifaddr */
472         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
473         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
474
475         return 0;
476 }
477
478 static int inet_insert_ifa(struct in_ifaddr *ifa)
479 {
480         return __inet_insert_ifa(ifa, NULL, 0);
481 }
482
483 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
484 {
485         struct in_device *in_dev = __in_dev_get_rtnl(dev);
486
487         ASSERT_RTNL();
488
489         if (!in_dev) {
490                 inet_free_ifa(ifa);
491                 return -ENOBUFS;
492         }
493         ipv4_devconf_setall(in_dev);
494         if (ifa->ifa_dev != in_dev) {
495                 WARN_ON(ifa->ifa_dev);
496                 in_dev_hold(in_dev);
497                 ifa->ifa_dev = in_dev;
498         }
499         if (ipv4_is_loopback(ifa->ifa_local))
500                 ifa->ifa_scope = RT_SCOPE_HOST;
501         return inet_insert_ifa(ifa);
502 }
503
504 /* Caller must hold RCU or RTNL :
505  * We dont take a reference on found in_device
506  */
507 struct in_device *inetdev_by_index(struct net *net, int ifindex)
508 {
509         struct net_device *dev;
510         struct in_device *in_dev = NULL;
511
512         rcu_read_lock();
513         dev = dev_get_by_index_rcu(net, ifindex);
514         if (dev)
515                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
516         rcu_read_unlock();
517         return in_dev;
518 }
519 EXPORT_SYMBOL(inetdev_by_index);
520
521 /* Called only from RTNL semaphored context. No locks. */
522
523 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
524                                     __be32 mask)
525 {
526         ASSERT_RTNL();
527
528         for_primary_ifa(in_dev) {
529                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
530                         return ifa;
531         } endfor_ifa(in_dev);
532         return NULL;
533 }
534
535 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
536 {
537         struct net *net = sock_net(skb->sk);
538         struct nlattr *tb[IFA_MAX+1];
539         struct in_device *in_dev;
540         struct ifaddrmsg *ifm;
541         struct in_ifaddr *ifa, **ifap;
542         int err = -EINVAL;
543
544         ASSERT_RTNL();
545
546         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
547         if (err < 0)
548                 goto errout;
549
550         ifm = nlmsg_data(nlh);
551         in_dev = inetdev_by_index(net, ifm->ifa_index);
552         if (in_dev == NULL) {
553                 err = -ENODEV;
554                 goto errout;
555         }
556
557         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
558              ifap = &ifa->ifa_next) {
559                 if (tb[IFA_LOCAL] &&
560                     ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
561                         continue;
562
563                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
564                         continue;
565
566                 if (tb[IFA_ADDRESS] &&
567                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
568                     !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
569                         continue;
570
571                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
572                 return 0;
573         }
574
575         err = -EADDRNOTAVAIL;
576 errout:
577         return err;
578 }
579
580 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
581 {
582         struct nlattr *tb[IFA_MAX+1];
583         struct in_ifaddr *ifa;
584         struct ifaddrmsg *ifm;
585         struct net_device *dev;
586         struct in_device *in_dev;
587         int err;
588
589         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
590         if (err < 0)
591                 goto errout;
592
593         ifm = nlmsg_data(nlh);
594         err = -EINVAL;
595         if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
596                 goto errout;
597
598         dev = __dev_get_by_index(net, ifm->ifa_index);
599         err = -ENODEV;
600         if (dev == NULL)
601                 goto errout;
602
603         in_dev = __in_dev_get_rtnl(dev);
604         err = -ENOBUFS;
605         if (in_dev == NULL)
606                 goto errout;
607
608         ifa = inet_alloc_ifa();
609         if (ifa == NULL)
610                 /*
611                  * A potential indev allocation can be left alive, it stays
612                  * assigned to its device and is destroy with it.
613                  */
614                 goto errout;
615
616         ipv4_devconf_setall(in_dev);
617         in_dev_hold(in_dev);
618
619         if (tb[IFA_ADDRESS] == NULL)
620                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
621
622         INIT_HLIST_NODE(&ifa->hash);
623         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
624         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
625         ifa->ifa_flags = ifm->ifa_flags;
626         ifa->ifa_scope = ifm->ifa_scope;
627         ifa->ifa_dev = in_dev;
628
629         ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
630         ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
631
632         if (tb[IFA_BROADCAST])
633                 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
634
635         if (tb[IFA_LABEL])
636                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
637         else
638                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
639
640         return ifa;
641
642 errout:
643         return ERR_PTR(err);
644 }
645
646 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
647 {
648         struct net *net = sock_net(skb->sk);
649         struct in_ifaddr *ifa;
650
651         ASSERT_RTNL();
652
653         ifa = rtm_to_ifaddr(net, nlh);
654         if (IS_ERR(ifa))
655                 return PTR_ERR(ifa);
656
657         return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
658 }
659
660 /*
661  *      Determine a default network mask, based on the IP address.
662  */
663
664 static inline int inet_abc_len(__be32 addr)
665 {
666         int rc = -1;    /* Something else, probably a multicast. */
667
668         if (ipv4_is_zeronet(addr))
669                 rc = 0;
670         else {
671                 __u32 haddr = ntohl(addr);
672
673                 if (IN_CLASSA(haddr))
674                         rc = 8;
675                 else if (IN_CLASSB(haddr))
676                         rc = 16;
677                 else if (IN_CLASSC(haddr))
678                         rc = 24;
679         }
680
681         return rc;
682 }
683
684
685 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
686 {
687         struct ifreq ifr;
688         struct sockaddr_in sin_orig;
689         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
690         struct in_device *in_dev;
691         struct in_ifaddr **ifap = NULL;
692         struct in_ifaddr *ifa = NULL;
693         struct net_device *dev;
694         char *colon;
695         int ret = -EFAULT;
696         int tryaddrmatch = 0;
697
698         /*
699          *      Fetch the caller's info block into kernel space
700          */
701
702         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
703                 goto out;
704         ifr.ifr_name[IFNAMSIZ - 1] = 0;
705
706         /* save original address for comparison */
707         memcpy(&sin_orig, sin, sizeof(*sin));
708
709         colon = strchr(ifr.ifr_name, ':');
710         if (colon)
711                 *colon = 0;
712
713         dev_load(net, ifr.ifr_name);
714
715         switch (cmd) {
716         case SIOCGIFADDR:       /* Get interface address */
717         case SIOCGIFBRDADDR:    /* Get the broadcast address */
718         case SIOCGIFDSTADDR:    /* Get the destination address */
719         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
720                 /* Note that these ioctls will not sleep,
721                    so that we do not impose a lock.
722                    One day we will be forced to put shlock here (I mean SMP)
723                  */
724                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
725                 memset(sin, 0, sizeof(*sin));
726                 sin->sin_family = AF_INET;
727                 break;
728
729         case SIOCSIFFLAGS:
730                 ret = -EACCES;
731                 if (!capable(CAP_NET_ADMIN))
732                         goto out;
733                 break;
734         case SIOCSIFADDR:       /* Set interface address (and family) */
735         case SIOCSIFBRDADDR:    /* Set the broadcast address */
736         case SIOCSIFDSTADDR:    /* Set the destination address */
737         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
738                 ret = -EACCES;
739                 if (!capable(CAP_NET_ADMIN))
740                         goto out;
741                 ret = -EINVAL;
742                 if (sin->sin_family != AF_INET)
743                         goto out;
744                 break;
745         default:
746                 ret = -EINVAL;
747                 goto out;
748         }
749
750         rtnl_lock();
751
752         ret = -ENODEV;
753         dev = __dev_get_by_name(net, ifr.ifr_name);
754         if (!dev)
755                 goto done;
756
757         if (colon)
758                 *colon = ':';
759
760         in_dev = __in_dev_get_rtnl(dev);
761         if (in_dev) {
762                 if (tryaddrmatch) {
763                         /* Matthias Andree */
764                         /* compare label and address (4.4BSD style) */
765                         /* note: we only do this for a limited set of ioctls
766                            and only if the original address family was AF_INET.
767                            This is checked above. */
768                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
769                              ifap = &ifa->ifa_next) {
770                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
771                                     sin_orig.sin_addr.s_addr ==
772                                                         ifa->ifa_local) {
773                                         break; /* found */
774                                 }
775                         }
776                 }
777                 /* we didn't get a match, maybe the application is
778                    4.3BSD-style and passed in junk so we fall back to
779                    comparing just the label */
780                 if (!ifa) {
781                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
782                              ifap = &ifa->ifa_next)
783                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
784                                         break;
785                 }
786         }
787
788         ret = -EADDRNOTAVAIL;
789         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
790                 goto done;
791
792         switch (cmd) {
793         case SIOCGIFADDR:       /* Get interface address */
794                 sin->sin_addr.s_addr = ifa->ifa_local;
795                 goto rarok;
796
797         case SIOCGIFBRDADDR:    /* Get the broadcast address */
798                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
799                 goto rarok;
800
801         case SIOCGIFDSTADDR:    /* Get the destination address */
802                 sin->sin_addr.s_addr = ifa->ifa_address;
803                 goto rarok;
804
805         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
806                 sin->sin_addr.s_addr = ifa->ifa_mask;
807                 goto rarok;
808
809         case SIOCSIFFLAGS:
810                 if (colon) {
811                         ret = -EADDRNOTAVAIL;
812                         if (!ifa)
813                                 break;
814                         ret = 0;
815                         if (!(ifr.ifr_flags & IFF_UP))
816                                 inet_del_ifa(in_dev, ifap, 1);
817                         break;
818                 }
819                 ret = dev_change_flags(dev, ifr.ifr_flags);
820                 break;
821
822         case SIOCSIFADDR:       /* Set interface address (and family) */
823                 ret = -EINVAL;
824                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
825                         break;
826
827                 if (!ifa) {
828                         ret = -ENOBUFS;
829                         ifa = inet_alloc_ifa();
830                         INIT_HLIST_NODE(&ifa->hash);
831                         if (!ifa)
832                                 break;
833                         if (colon)
834                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
835                         else
836                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
837                 } else {
838                         ret = 0;
839                         if (ifa->ifa_local == sin->sin_addr.s_addr)
840                                 break;
841                         inet_del_ifa(in_dev, ifap, 0);
842                         ifa->ifa_broadcast = 0;
843                         ifa->ifa_scope = 0;
844                 }
845
846                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
847
848                 if (!(dev->flags & IFF_POINTOPOINT)) {
849                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
850                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
851                         if ((dev->flags & IFF_BROADCAST) &&
852                             ifa->ifa_prefixlen < 31)
853                                 ifa->ifa_broadcast = ifa->ifa_address |
854                                                      ~ifa->ifa_mask;
855                 } else {
856                         ifa->ifa_prefixlen = 32;
857                         ifa->ifa_mask = inet_make_mask(32);
858                 }
859                 ret = inet_set_ifa(dev, ifa);
860                 break;
861
862         case SIOCSIFBRDADDR:    /* Set the broadcast address */
863                 ret = 0;
864                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
865                         inet_del_ifa(in_dev, ifap, 0);
866                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
867                         inet_insert_ifa(ifa);
868                 }
869                 break;
870
871         case SIOCSIFDSTADDR:    /* Set the destination address */
872                 ret = 0;
873                 if (ifa->ifa_address == sin->sin_addr.s_addr)
874                         break;
875                 ret = -EINVAL;
876                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
877                         break;
878                 ret = 0;
879                 inet_del_ifa(in_dev, ifap, 0);
880                 ifa->ifa_address = sin->sin_addr.s_addr;
881                 inet_insert_ifa(ifa);
882                 break;
883
884         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
885
886                 /*
887                  *      The mask we set must be legal.
888                  */
889                 ret = -EINVAL;
890                 if (bad_mask(sin->sin_addr.s_addr, 0))
891                         break;
892                 ret = 0;
893                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
894                         __be32 old_mask = ifa->ifa_mask;
895                         inet_del_ifa(in_dev, ifap, 0);
896                         ifa->ifa_mask = sin->sin_addr.s_addr;
897                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
898
899                         /* See if current broadcast address matches
900                          * with current netmask, then recalculate
901                          * the broadcast address. Otherwise it's a
902                          * funny address, so don't touch it since
903                          * the user seems to know what (s)he's doing...
904                          */
905                         if ((dev->flags & IFF_BROADCAST) &&
906                             (ifa->ifa_prefixlen < 31) &&
907                             (ifa->ifa_broadcast ==
908                              (ifa->ifa_local|~old_mask))) {
909                                 ifa->ifa_broadcast = (ifa->ifa_local |
910                                                       ~sin->sin_addr.s_addr);
911                         }
912                         inet_insert_ifa(ifa);
913                 }
914                 break;
915         }
916 done:
917         rtnl_unlock();
918 out:
919         return ret;
920 rarok:
921         rtnl_unlock();
922         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
923         goto out;
924 }
925
926 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
927 {
928         struct in_device *in_dev = __in_dev_get_rtnl(dev);
929         struct in_ifaddr *ifa;
930         struct ifreq ifr;
931         int done = 0;
932
933         if (!in_dev)
934                 goto out;
935
936         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
937                 if (!buf) {
938                         done += sizeof(ifr);
939                         continue;
940                 }
941                 if (len < (int) sizeof(ifr))
942                         break;
943                 memset(&ifr, 0, sizeof(struct ifreq));
944                 if (ifa->ifa_label)
945                         strcpy(ifr.ifr_name, ifa->ifa_label);
946                 else
947                         strcpy(ifr.ifr_name, dev->name);
948
949                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
950                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
951                                                                 ifa->ifa_local;
952
953                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
954                         done = -EFAULT;
955                         break;
956                 }
957                 buf  += sizeof(struct ifreq);
958                 len  -= sizeof(struct ifreq);
959                 done += sizeof(struct ifreq);
960         }
961 out:
962         return done;
963 }
964
965 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
966 {
967         __be32 addr = 0;
968         struct in_device *in_dev;
969         struct net *net = dev_net(dev);
970
971         rcu_read_lock();
972         in_dev = __in_dev_get_rcu(dev);
973         if (!in_dev)
974                 goto no_in_dev;
975
976         for_primary_ifa(in_dev) {
977                 if (ifa->ifa_scope > scope)
978                         continue;
979                 if (!dst || inet_ifa_match(dst, ifa)) {
980                         addr = ifa->ifa_local;
981                         break;
982                 }
983                 if (!addr)
984                         addr = ifa->ifa_local;
985         } endfor_ifa(in_dev);
986
987         if (addr)
988                 goto out_unlock;
989 no_in_dev:
990
991         /* Not loopback addresses on loopback should be preferred
992            in this case. It is importnat that lo is the first interface
993            in dev_base list.
994          */
995         for_each_netdev_rcu(net, dev) {
996                 in_dev = __in_dev_get_rcu(dev);
997                 if (!in_dev)
998                         continue;
999
1000                 for_primary_ifa(in_dev) {
1001                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
1002                             ifa->ifa_scope <= scope) {
1003                                 addr = ifa->ifa_local;
1004                                 goto out_unlock;
1005                         }
1006                 } endfor_ifa(in_dev);
1007         }
1008 out_unlock:
1009         rcu_read_unlock();
1010         return addr;
1011 }
1012 EXPORT_SYMBOL(inet_select_addr);
1013
1014 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1015                               __be32 local, int scope)
1016 {
1017         int same = 0;
1018         __be32 addr = 0;
1019
1020         for_ifa(in_dev) {
1021                 if (!addr &&
1022                     (local == ifa->ifa_local || !local) &&
1023                     ifa->ifa_scope <= scope) {
1024                         addr = ifa->ifa_local;
1025                         if (same)
1026                                 break;
1027                 }
1028                 if (!same) {
1029                         same = (!local || inet_ifa_match(local, ifa)) &&
1030                                 (!dst || inet_ifa_match(dst, ifa));
1031                         if (same && addr) {
1032                                 if (local || !dst)
1033                                         break;
1034                                 /* Is the selected addr into dst subnet? */
1035                                 if (inet_ifa_match(addr, ifa))
1036                                         break;
1037                                 /* No, then can we use new local src? */
1038                                 if (ifa->ifa_scope <= scope) {
1039                                         addr = ifa->ifa_local;
1040                                         break;
1041                                 }
1042                                 /* search for large dst subnet for addr */
1043                                 same = 0;
1044                         }
1045                 }
1046         } endfor_ifa(in_dev);
1047
1048         return same ? addr : 0;
1049 }
1050
1051 /*
1052  * Confirm that local IP address exists using wildcards:
1053  * - in_dev: only on this interface, 0=any interface
1054  * - dst: only in the same subnet as dst, 0=any dst
1055  * - local: address, 0=autoselect the local address
1056  * - scope: maximum allowed scope value for the local address
1057  */
1058 __be32 inet_confirm_addr(struct in_device *in_dev,
1059                          __be32 dst, __be32 local, int scope)
1060 {
1061         __be32 addr = 0;
1062         struct net_device *dev;
1063         struct net *net;
1064
1065         if (scope != RT_SCOPE_LINK)
1066                 return confirm_addr_indev(in_dev, dst, local, scope);
1067
1068         net = dev_net(in_dev->dev);
1069         rcu_read_lock();
1070         for_each_netdev_rcu(net, dev) {
1071                 in_dev = __in_dev_get_rcu(dev);
1072                 if (in_dev) {
1073                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1074                         if (addr)
1075                                 break;
1076                 }
1077         }
1078         rcu_read_unlock();
1079
1080         return addr;
1081 }
1082
1083 /*
1084  *      Device notifier
1085  */
1086
1087 int register_inetaddr_notifier(struct notifier_block *nb)
1088 {
1089         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1090 }
1091 EXPORT_SYMBOL(register_inetaddr_notifier);
1092
1093 int unregister_inetaddr_notifier(struct notifier_block *nb)
1094 {
1095         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1096 }
1097 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1098
1099 /* Rename ifa_labels for a device name change. Make some effort to preserve
1100  * existing alias numbering and to create unique labels if possible.
1101 */
1102 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1103 {
1104         struct in_ifaddr *ifa;
1105         int named = 0;
1106
1107         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1108                 char old[IFNAMSIZ], *dot;
1109
1110                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1111                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1112                 if (named++ == 0)
1113                         goto skip;
1114                 dot = strchr(old, ':');
1115                 if (dot == NULL) {
1116                         sprintf(old, ":%d", named);
1117                         dot = old;
1118                 }
1119                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1120                         strcat(ifa->ifa_label, dot);
1121                 else
1122                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1123 skip:
1124                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1125         }
1126 }
1127
1128 static inline bool inetdev_valid_mtu(unsigned mtu)
1129 {
1130         return mtu >= 68;
1131 }
1132
1133 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1134                                         struct in_device *in_dev)
1135
1136 {
1137         struct in_ifaddr *ifa = in_dev->ifa_list;
1138
1139         if (!ifa)
1140                 return;
1141
1142         arp_send(ARPOP_REQUEST, ETH_P_ARP,
1143                  ifa->ifa_local, dev,
1144                  ifa->ifa_local, NULL,
1145                  dev->dev_addr, NULL);
1146 }
1147
1148 /* Called only under RTNL semaphore */
1149
1150 static int inetdev_event(struct notifier_block *this, unsigned long event,
1151                          void *ptr)
1152 {
1153         struct net_device *dev = ptr;
1154         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1155
1156         ASSERT_RTNL();
1157
1158         if (!in_dev) {
1159                 if (event == NETDEV_REGISTER) {
1160                         in_dev = inetdev_init(dev);
1161                         if (!in_dev)
1162                                 return notifier_from_errno(-ENOMEM);
1163                         if (dev->flags & IFF_LOOPBACK) {
1164                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1165                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1166                         }
1167                 } else if (event == NETDEV_CHANGEMTU) {
1168                         /* Re-enabling IP */
1169                         if (inetdev_valid_mtu(dev->mtu))
1170                                 in_dev = inetdev_init(dev);
1171                 }
1172                 goto out;
1173         }
1174
1175         switch (event) {
1176         case NETDEV_REGISTER:
1177                 printk(KERN_DEBUG "inetdev_event: bug\n");
1178                 rcu_assign_pointer(dev->ip_ptr, NULL);
1179                 break;
1180         case NETDEV_UP:
1181                 if (!inetdev_valid_mtu(dev->mtu))
1182                         break;
1183                 if (dev->flags & IFF_LOOPBACK) {
1184                         struct in_ifaddr *ifa = inet_alloc_ifa();
1185
1186                         if (ifa) {
1187                                 INIT_HLIST_NODE(&ifa->hash);
1188                                 ifa->ifa_local =
1189                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1190                                 ifa->ifa_prefixlen = 8;
1191                                 ifa->ifa_mask = inet_make_mask(8);
1192                                 in_dev_hold(in_dev);
1193                                 ifa->ifa_dev = in_dev;
1194                                 ifa->ifa_scope = RT_SCOPE_HOST;
1195                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1196                                 inet_insert_ifa(ifa);
1197                         }
1198                 }
1199                 ip_mc_up(in_dev);
1200                 /* fall through */
1201         case NETDEV_CHANGEADDR:
1202                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1203                         break;
1204                 /* fall through */
1205         case NETDEV_NOTIFY_PEERS:
1206         case NETDEV_BONDING_FAILOVER:
1207                 /* Send gratuitous ARP to notify of link change */
1208                 inetdev_send_gratuitous_arp(dev, in_dev);
1209                 break;
1210         case NETDEV_DOWN:
1211                 ip_mc_down(in_dev);
1212                 break;
1213         case NETDEV_PRE_TYPE_CHANGE:
1214                 ip_mc_unmap(in_dev);
1215                 break;
1216         case NETDEV_POST_TYPE_CHANGE:
1217                 ip_mc_remap(in_dev);
1218                 break;
1219         case NETDEV_CHANGEMTU:
1220                 if (inetdev_valid_mtu(dev->mtu))
1221                         break;
1222                 /* disable IP when MTU is not enough */
1223         case NETDEV_UNREGISTER:
1224                 inetdev_destroy(in_dev);
1225                 break;
1226         case NETDEV_CHANGENAME:
1227                 /* Do not notify about label change, this event is
1228                  * not interesting to applications using netlink.
1229                  */
1230                 inetdev_changename(dev, in_dev);
1231
1232                 devinet_sysctl_unregister(in_dev);
1233                 devinet_sysctl_register(in_dev);
1234                 break;
1235         }
1236 out:
1237         return NOTIFY_DONE;
1238 }
1239
1240 static struct notifier_block ip_netdev_notifier = {
1241         .notifier_call = inetdev_event,
1242 };
1243
1244 static inline size_t inet_nlmsg_size(void)
1245 {
1246         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1247                + nla_total_size(4) /* IFA_ADDRESS */
1248                + nla_total_size(4) /* IFA_LOCAL */
1249                + nla_total_size(4) /* IFA_BROADCAST */
1250                + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1251 }
1252
1253 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1254                             u32 pid, u32 seq, int event, unsigned int flags)
1255 {
1256         struct ifaddrmsg *ifm;
1257         struct nlmsghdr  *nlh;
1258
1259         nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1260         if (nlh == NULL)
1261                 return -EMSGSIZE;
1262
1263         ifm = nlmsg_data(nlh);
1264         ifm->ifa_family = AF_INET;
1265         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1266         ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1267         ifm->ifa_scope = ifa->ifa_scope;
1268         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1269
1270         if (ifa->ifa_address)
1271                 NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1272
1273         if (ifa->ifa_local)
1274                 NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1275
1276         if (ifa->ifa_broadcast)
1277                 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1278
1279         if (ifa->ifa_label[0])
1280                 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1281
1282         return nlmsg_end(skb, nlh);
1283
1284 nla_put_failure:
1285         nlmsg_cancel(skb, nlh);
1286         return -EMSGSIZE;
1287 }
1288
1289 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1290 {
1291         struct net *net = sock_net(skb->sk);
1292         int h, s_h;
1293         int idx, s_idx;
1294         int ip_idx, s_ip_idx;
1295         struct net_device *dev;
1296         struct in_device *in_dev;
1297         struct in_ifaddr *ifa;
1298         struct hlist_head *head;
1299         struct hlist_node *node;
1300
1301         s_h = cb->args[0];
1302         s_idx = idx = cb->args[1];
1303         s_ip_idx = ip_idx = cb->args[2];
1304
1305         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1306                 idx = 0;
1307                 head = &net->dev_index_head[h];
1308                 rcu_read_lock();
1309                 hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1310                         if (idx < s_idx)
1311                                 goto cont;
1312                         if (h > s_h || idx > s_idx)
1313                                 s_ip_idx = 0;
1314                         in_dev = __in_dev_get_rcu(dev);
1315                         if (!in_dev)
1316                                 goto cont;
1317
1318                         for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1319                              ifa = ifa->ifa_next, ip_idx++) {
1320                                 if (ip_idx < s_ip_idx)
1321                                         continue;
1322                                 if (inet_fill_ifaddr(skb, ifa,
1323                                              NETLINK_CB(cb->skb).pid,
1324                                              cb->nlh->nlmsg_seq,
1325                                              RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1326                                         rcu_read_unlock();
1327                                         goto done;
1328                                 }
1329                         }
1330 cont:
1331                         idx++;
1332                 }
1333                 rcu_read_unlock();
1334         }
1335
1336 done:
1337         cb->args[0] = h;
1338         cb->args[1] = idx;
1339         cb->args[2] = ip_idx;
1340
1341         return skb->len;
1342 }
1343
1344 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1345                       u32 pid)
1346 {
1347         struct sk_buff *skb;
1348         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1349         int err = -ENOBUFS;
1350         struct net *net;
1351
1352         net = dev_net(ifa->ifa_dev->dev);
1353         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1354         if (skb == NULL)
1355                 goto errout;
1356
1357         err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1358         if (err < 0) {
1359                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1360                 WARN_ON(err == -EMSGSIZE);
1361                 kfree_skb(skb);
1362                 goto errout;
1363         }
1364         rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1365         return;
1366 errout:
1367         if (err < 0)
1368                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1369 }
1370
1371 static size_t inet_get_link_af_size(const struct net_device *dev)
1372 {
1373         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1374
1375         if (!in_dev)
1376                 return 0;
1377
1378         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1379 }
1380
1381 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1382 {
1383         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1384         struct nlattr *nla;
1385         int i;
1386
1387         if (!in_dev)
1388                 return -ENODATA;
1389
1390         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1391         if (nla == NULL)
1392                 return -EMSGSIZE;
1393
1394         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1395                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1396
1397         return 0;
1398 }
1399
1400 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1401         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1402 };
1403
1404 static int inet_validate_link_af(const struct net_device *dev,
1405                                  const struct nlattr *nla)
1406 {
1407         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1408         int err, rem;
1409
1410         if (dev && !__in_dev_get_rtnl(dev))
1411                 return -EAFNOSUPPORT;
1412
1413         err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1414         if (err < 0)
1415                 return err;
1416
1417         if (tb[IFLA_INET_CONF]) {
1418                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1419                         int cfgid = nla_type(a);
1420
1421                         if (nla_len(a) < 4)
1422                                 return -EINVAL;
1423
1424                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1425                                 return -EINVAL;
1426                 }
1427         }
1428
1429         return 0;
1430 }
1431
1432 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1433 {
1434         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1435         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1436         int rem;
1437
1438         if (!in_dev)
1439                 return -EAFNOSUPPORT;
1440
1441         if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1442                 BUG();
1443
1444         if (tb[IFLA_INET_CONF]) {
1445                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1446                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1447         }
1448
1449         return 0;
1450 }
1451
1452 #ifdef CONFIG_SYSCTL
1453
1454 static void devinet_copy_dflt_conf(struct net *net, int i)
1455 {
1456         struct net_device *dev;
1457
1458         rcu_read_lock();
1459         for_each_netdev_rcu(net, dev) {
1460                 struct in_device *in_dev;
1461
1462                 in_dev = __in_dev_get_rcu(dev);
1463                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1464                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1465         }
1466         rcu_read_unlock();
1467 }
1468
1469 /* called with RTNL locked */
1470 static void inet_forward_change(struct net *net)
1471 {
1472         struct net_device *dev;
1473         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1474
1475         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1476         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1477
1478         for_each_netdev(net, dev) {
1479                 struct in_device *in_dev;
1480                 if (on)
1481                         dev_disable_lro(dev);
1482                 rcu_read_lock();
1483                 in_dev = __in_dev_get_rcu(dev);
1484                 if (in_dev)
1485                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1486                 rcu_read_unlock();
1487         }
1488 }
1489
1490 static int devinet_conf_proc(ctl_table *ctl, int write,
1491                              void __user *buffer,
1492                              size_t *lenp, loff_t *ppos)
1493 {
1494         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1495
1496         if (write) {
1497                 struct ipv4_devconf *cnf = ctl->extra1;
1498                 struct net *net = ctl->extra2;
1499                 int i = (int *)ctl->data - cnf->data;
1500
1501                 set_bit(i, cnf->state);
1502
1503                 if (cnf == net->ipv4.devconf_dflt)
1504                         devinet_copy_dflt_conf(net, i);
1505         }
1506
1507         return ret;
1508 }
1509
1510 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1511                                   void __user *buffer,
1512                                   size_t *lenp, loff_t *ppos)
1513 {
1514         int *valp = ctl->data;
1515         int val = *valp;
1516         loff_t pos = *ppos;
1517         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1518
1519         if (write && *valp != val) {
1520                 struct net *net = ctl->extra2;
1521
1522                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1523                         if (!rtnl_trylock()) {
1524                                 /* Restore the original values before restarting */
1525                                 *valp = val;
1526                                 *ppos = pos;
1527                                 return restart_syscall();
1528                         }
1529                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1530                                 inet_forward_change(net);
1531                         } else if (*valp) {
1532                                 struct ipv4_devconf *cnf = ctl->extra1;
1533                                 struct in_device *idev =
1534                                         container_of(cnf, struct in_device, cnf);
1535                                 dev_disable_lro(idev->dev);
1536                         }
1537                         rtnl_unlock();
1538                         rt_cache_flush(net, 0);
1539                 }
1540         }
1541
1542         return ret;
1543 }
1544
1545 static int ipv4_doint_and_flush(ctl_table *ctl, int write,
1546                                 void __user *buffer,
1547                                 size_t *lenp, loff_t *ppos)
1548 {
1549         int *valp = ctl->data;
1550         int val = *valp;
1551         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1552         struct net *net = ctl->extra2;
1553
1554         if (write && *valp != val)
1555                 rt_cache_flush(net, 0);
1556
1557         return ret;
1558 }
1559
1560 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1561         { \
1562                 .procname       = name, \
1563                 .data           = ipv4_devconf.data + \
1564                                   IPV4_DEVCONF_ ## attr - 1, \
1565                 .maxlen         = sizeof(int), \
1566                 .mode           = mval, \
1567                 .proc_handler   = proc, \
1568                 .extra1         = &ipv4_devconf, \
1569         }
1570
1571 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1572         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1573
1574 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1575         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1576
1577 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1578         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1579
1580 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1581         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1582
1583 static struct devinet_sysctl_table {
1584         struct ctl_table_header *sysctl_header;
1585         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1586         char *dev_name;
1587 } devinet_sysctl = {
1588         .devinet_vars = {
1589                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1590                                              devinet_sysctl_forward),
1591                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1592
1593                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1594                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1595                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1596                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1597                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1598                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1599                                         "accept_source_route"),
1600                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1601                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1602                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1603                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1604                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1605                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1606                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1607                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1608                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1609                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1610                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1611                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1612                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1613
1614                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1615                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1616                 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1617                                               "force_igmp_version"),
1618                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1619                                               "promote_secondaries"),
1620         },
1621 };
1622
1623 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1624                                         struct ipv4_devconf *p)
1625 {
1626         int i;
1627         struct devinet_sysctl_table *t;
1628
1629 #define DEVINET_CTL_PATH_DEV    3
1630
1631         struct ctl_path devinet_ctl_path[] = {
1632                 { .procname = "net",  },
1633                 { .procname = "ipv4", },
1634                 { .procname = "conf", },
1635                 { /* to be set */ },
1636                 { },
1637         };
1638
1639         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1640         if (!t)
1641                 goto out;
1642
1643         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1644                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1645                 t->devinet_vars[i].extra1 = p;
1646                 t->devinet_vars[i].extra2 = net;
1647         }
1648
1649         /*
1650          * Make a copy of dev_name, because '.procname' is regarded as const
1651          * by sysctl and we wouldn't want anyone to change it under our feet
1652          * (see SIOCSIFNAME).
1653          */
1654         t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1655         if (!t->dev_name)
1656                 goto free;
1657
1658         devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1659
1660         t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1661                         t->devinet_vars);
1662         if (!t->sysctl_header)
1663                 goto free_procname;
1664
1665         p->sysctl = t;
1666         return 0;
1667
1668 free_procname:
1669         kfree(t->dev_name);
1670 free:
1671         kfree(t);
1672 out:
1673         return -ENOBUFS;
1674 }
1675
1676 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1677 {
1678         struct devinet_sysctl_table *t = cnf->sysctl;
1679
1680         if (t == NULL)
1681                 return;
1682
1683         cnf->sysctl = NULL;
1684         unregister_sysctl_table(t->sysctl_header);
1685         kfree(t->dev_name);
1686         kfree(t);
1687 }
1688
1689 static void devinet_sysctl_register(struct in_device *idev)
1690 {
1691         neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
1692         __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1693                                         &idev->cnf);
1694 }
1695
1696 static void devinet_sysctl_unregister(struct in_device *idev)
1697 {
1698         __devinet_sysctl_unregister(&idev->cnf);
1699         neigh_sysctl_unregister(idev->arp_parms);
1700 }
1701
1702 static struct ctl_table ctl_forward_entry[] = {
1703         {
1704                 .procname       = "ip_forward",
1705                 .data           = &ipv4_devconf.data[
1706                                         IPV4_DEVCONF_FORWARDING - 1],
1707                 .maxlen         = sizeof(int),
1708                 .mode           = 0644,
1709                 .proc_handler   = devinet_sysctl_forward,
1710                 .extra1         = &ipv4_devconf,
1711                 .extra2         = &init_net,
1712         },
1713         { },
1714 };
1715
1716 static __net_initdata struct ctl_path net_ipv4_path[] = {
1717         { .procname = "net", },
1718         { .procname = "ipv4", },
1719         { },
1720 };
1721 #endif
1722
1723 static __net_init int devinet_init_net(struct net *net)
1724 {
1725         int err;
1726         struct ipv4_devconf *all, *dflt;
1727 #ifdef CONFIG_SYSCTL
1728         struct ctl_table *tbl = ctl_forward_entry;
1729         struct ctl_table_header *forw_hdr;
1730 #endif
1731
1732         err = -ENOMEM;
1733         all = &ipv4_devconf;
1734         dflt = &ipv4_devconf_dflt;
1735
1736         if (!net_eq(net, &init_net)) {
1737                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1738                 if (all == NULL)
1739                         goto err_alloc_all;
1740
1741                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1742                 if (dflt == NULL)
1743                         goto err_alloc_dflt;
1744
1745 #ifdef CONFIG_SYSCTL
1746                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1747                 if (tbl == NULL)
1748                         goto err_alloc_ctl;
1749
1750                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
1751                 tbl[0].extra1 = all;
1752                 tbl[0].extra2 = net;
1753 #endif
1754         }
1755
1756 #ifdef CONFIG_SYSCTL
1757         err = __devinet_sysctl_register(net, "all", all);
1758         if (err < 0)
1759                 goto err_reg_all;
1760
1761         err = __devinet_sysctl_register(net, "default", dflt);
1762         if (err < 0)
1763                 goto err_reg_dflt;
1764
1765         err = -ENOMEM;
1766         forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1767         if (forw_hdr == NULL)
1768                 goto err_reg_ctl;
1769         net->ipv4.forw_hdr = forw_hdr;
1770 #endif
1771
1772         net->ipv4.devconf_all = all;
1773         net->ipv4.devconf_dflt = dflt;
1774         return 0;
1775
1776 #ifdef CONFIG_SYSCTL
1777 err_reg_ctl:
1778         __devinet_sysctl_unregister(dflt);
1779 err_reg_dflt:
1780         __devinet_sysctl_unregister(all);
1781 err_reg_all:
1782         if (tbl != ctl_forward_entry)
1783                 kfree(tbl);
1784 err_alloc_ctl:
1785 #endif
1786         if (dflt != &ipv4_devconf_dflt)
1787                 kfree(dflt);
1788 err_alloc_dflt:
1789         if (all != &ipv4_devconf)
1790                 kfree(all);
1791 err_alloc_all:
1792         return err;
1793 }
1794
1795 static __net_exit void devinet_exit_net(struct net *net)
1796 {
1797 #ifdef CONFIG_SYSCTL
1798         struct ctl_table *tbl;
1799
1800         tbl = net->ipv4.forw_hdr->ctl_table_arg;
1801         unregister_net_sysctl_table(net->ipv4.forw_hdr);
1802         __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1803         __devinet_sysctl_unregister(net->ipv4.devconf_all);
1804         kfree(tbl);
1805 #endif
1806         kfree(net->ipv4.devconf_dflt);
1807         kfree(net->ipv4.devconf_all);
1808 }
1809
1810 static __net_initdata struct pernet_operations devinet_ops = {
1811         .init = devinet_init_net,
1812         .exit = devinet_exit_net,
1813 };
1814
1815 static struct rtnl_af_ops inet_af_ops = {
1816         .family           = AF_INET,
1817         .fill_link_af     = inet_fill_link_af,
1818         .get_link_af_size = inet_get_link_af_size,
1819         .validate_link_af = inet_validate_link_af,
1820         .set_link_af      = inet_set_link_af,
1821 };
1822
1823 void __init devinet_init(void)
1824 {
1825         int i;
1826
1827         for (i = 0; i < IN4_ADDR_HSIZE; i++)
1828                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
1829
1830         register_pernet_subsys(&devinet_ops);
1831
1832         register_gifconf(PF_INET, inet_gifconf);
1833         register_netdevice_notifier(&ip_netdev_notifier);
1834
1835         rtnl_af_register(&inet_af_ops);
1836
1837         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1838         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1839         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1840 }
1841