net: wireless: bcmdhd: Put p2p_dev_addr under DHD_P2P_DEV_ADDR_FROM_SYSFS
[linux-2.6.git] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  *      Derived from the IP parts of dev.c 1.0.19
10  *              Authors:        Ross Biro
11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
13  *
14  *      Additional Authors:
15  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
16  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
17  *
18  *      Changes:
19  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
20  *                                      lists.
21  *              Cyrus Durgin:           updated for kmod
22  *              Matthias Andree:        in devinet_ioctl, compare label and
23  *                                      address (4.4BSD alias style support),
24  *                                      fall back to comparing just the label
25  *                                      if no match found.
26  */
27
28
29 #include <asm/uaccess.h>
30 #include <asm/system.h>
31 #include <linux/bitops.h>
32 #include <linux/capability.h>
33 #include <linux/module.h>
34 #include <linux/types.h>
35 #include <linux/kernel.h>
36 #include <linux/string.h>
37 #include <linux/mm.h>
38 #include <linux/socket.h>
39 #include <linux/sockios.h>
40 #include <linux/in.h>
41 #include <linux/errno.h>
42 #include <linux/interrupt.h>
43 #include <linux/if_addr.h>
44 #include <linux/if_ether.h>
45 #include <linux/inet.h>
46 #include <linux/netdevice.h>
47 #include <linux/etherdevice.h>
48 #include <linux/skbuff.h>
49 #include <linux/init.h>
50 #include <linux/notifier.h>
51 #include <linux/inetdevice.h>
52 #include <linux/igmp.h>
53 #include <linux/slab.h>
54 #include <linux/hash.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/tcp.h>
63 #include <net/route.h>
64 #include <net/ip_fib.h>
65 #include <net/rtnetlink.h>
66 #include <net/net_namespace.h>
67
68 #include "fib_lookup.h"
69
70 static struct ipv4_devconf ipv4_devconf = {
71         .data = {
72                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
73                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
74                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
75                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
76         },
77 };
78
79 static struct ipv4_devconf ipv4_devconf_dflt = {
80         .data = {
81                 [IPV4_DEVCONF_ACCEPT_REDIRECTS - 1] = 1,
82                 [IPV4_DEVCONF_SEND_REDIRECTS - 1] = 1,
83                 [IPV4_DEVCONF_SECURE_REDIRECTS - 1] = 1,
84                 [IPV4_DEVCONF_SHARED_MEDIA - 1] = 1,
85                 [IPV4_DEVCONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
86         },
87 };
88
89 #define IPV4_DEVCONF_DFLT(net, attr) \
90         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
91
92 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
93         [IFA_LOCAL]             = { .type = NLA_U32 },
94         [IFA_ADDRESS]           = { .type = NLA_U32 },
95         [IFA_BROADCAST]         = { .type = NLA_U32 },
96         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
97 };
98
99 /* inet_addr_hash's shifting is dependent upon this IN4_ADDR_HSIZE
100  * value.  So if you change this define, make appropriate changes to
101  * inet_addr_hash as well.
102  */
103 #define IN4_ADDR_HSIZE  256
104 static struct hlist_head inet_addr_lst[IN4_ADDR_HSIZE];
105 static DEFINE_SPINLOCK(inet_addr_hash_lock);
106
107 static inline unsigned int inet_addr_hash(struct net *net, __be32 addr)
108 {
109         u32 val = (__force u32) addr ^ hash_ptr(net, 8);
110
111         return ((val ^ (val >> 8) ^ (val >> 16) ^ (val >> 24)) &
112                 (IN4_ADDR_HSIZE - 1));
113 }
114
115 static void inet_hash_insert(struct net *net, struct in_ifaddr *ifa)
116 {
117         unsigned int hash = inet_addr_hash(net, ifa->ifa_local);
118
119         spin_lock(&inet_addr_hash_lock);
120         hlist_add_head_rcu(&ifa->hash, &inet_addr_lst[hash]);
121         spin_unlock(&inet_addr_hash_lock);
122 }
123
124 static void inet_hash_remove(struct in_ifaddr *ifa)
125 {
126         spin_lock(&inet_addr_hash_lock);
127         hlist_del_init_rcu(&ifa->hash);
128         spin_unlock(&inet_addr_hash_lock);
129 }
130
131 /**
132  * __ip_dev_find - find the first device with a given source address.
133  * @net: the net namespace
134  * @addr: the source address
135  * @devref: if true, take a reference on the found device
136  *
137  * If a caller uses devref=false, it should be protected by RCU, or RTNL
138  */
139 struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref)
140 {
141         unsigned int hash = inet_addr_hash(net, addr);
142         struct net_device *result = NULL;
143         struct in_ifaddr *ifa;
144         struct hlist_node *node;
145
146         rcu_read_lock();
147         hlist_for_each_entry_rcu(ifa, node, &inet_addr_lst[hash], hash) {
148                 struct net_device *dev = ifa->ifa_dev->dev;
149
150                 if (!net_eq(dev_net(dev), net))
151                         continue;
152                 if (ifa->ifa_local == addr) {
153                         result = dev;
154                         break;
155                 }
156         }
157         if (!result) {
158                 struct flowi4 fl4 = { .daddr = addr };
159                 struct fib_result res = { 0 };
160                 struct fib_table *local;
161
162                 /* Fallback to FIB local table so that communication
163                  * over loopback subnets work.
164                  */
165                 local = fib_get_table(net, RT_TABLE_LOCAL);
166                 if (local &&
167                     !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) &&
168                     res.type == RTN_LOCAL)
169                         result = FIB_RES_DEV(res);
170         }
171         if (result && devref)
172                 dev_hold(result);
173         rcu_read_unlock();
174         return result;
175 }
176 EXPORT_SYMBOL(__ip_dev_find);
177
178 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
179
180 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
181 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
182                          int destroy);
183 #ifdef CONFIG_SYSCTL
184 static void devinet_sysctl_register(struct in_device *idev);
185 static void devinet_sysctl_unregister(struct in_device *idev);
186 #else
187 static inline void devinet_sysctl_register(struct in_device *idev)
188 {
189 }
190 static inline void devinet_sysctl_unregister(struct in_device *idev)
191 {
192 }
193 #endif
194
195 /* Locks all the inet devices. */
196
197 static struct in_ifaddr *inet_alloc_ifa(void)
198 {
199         return kzalloc(sizeof(struct in_ifaddr), GFP_KERNEL);
200 }
201
202 static void inet_rcu_free_ifa(struct rcu_head *head)
203 {
204         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
205         if (ifa->ifa_dev)
206                 in_dev_put(ifa->ifa_dev);
207         kfree(ifa);
208 }
209
210 static inline void inet_free_ifa(struct in_ifaddr *ifa)
211 {
212         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
213 }
214
215 void in_dev_finish_destroy(struct in_device *idev)
216 {
217         struct net_device *dev = idev->dev;
218
219         WARN_ON(idev->ifa_list);
220         WARN_ON(idev->mc_list);
221 #ifdef NET_REFCNT_DEBUG
222         printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
223                idev, dev ? dev->name : "NIL");
224 #endif
225         dev_put(dev);
226         if (!idev->dead)
227                 pr_err("Freeing alive in_device %p\n", idev);
228         else
229                 kfree(idev);
230 }
231 EXPORT_SYMBOL(in_dev_finish_destroy);
232
233 static struct in_device *inetdev_init(struct net_device *dev)
234 {
235         struct in_device *in_dev;
236
237         ASSERT_RTNL();
238
239         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
240         if (!in_dev)
241                 goto out;
242         memcpy(&in_dev->cnf, dev_net(dev)->ipv4.devconf_dflt,
243                         sizeof(in_dev->cnf));
244         in_dev->cnf.sysctl = NULL;
245         in_dev->dev = dev;
246         in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl);
247         if (!in_dev->arp_parms)
248                 goto out_kfree;
249         if (IPV4_DEVCONF(in_dev->cnf, FORWARDING))
250                 dev_disable_lro(dev);
251         /* Reference in_dev->dev */
252         dev_hold(dev);
253         /* Account for reference dev->ip_ptr (below) */
254         in_dev_hold(in_dev);
255
256         devinet_sysctl_register(in_dev);
257         ip_mc_init_dev(in_dev);
258         if (dev->flags & IFF_UP)
259                 ip_mc_up(in_dev);
260
261         /* we can receive as soon as ip_ptr is set -- do this last */
262         rcu_assign_pointer(dev->ip_ptr, in_dev);
263 out:
264         return in_dev;
265 out_kfree:
266         kfree(in_dev);
267         in_dev = NULL;
268         goto out;
269 }
270
271 static void in_dev_rcu_put(struct rcu_head *head)
272 {
273         struct in_device *idev = container_of(head, struct in_device, rcu_head);
274         in_dev_put(idev);
275 }
276
277 static void inetdev_destroy(struct in_device *in_dev)
278 {
279         struct in_ifaddr *ifa;
280         struct net_device *dev;
281
282         ASSERT_RTNL();
283
284         dev = in_dev->dev;
285
286         in_dev->dead = 1;
287
288         ip_mc_destroy_dev(in_dev);
289
290         while ((ifa = in_dev->ifa_list) != NULL) {
291                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
292                 inet_free_ifa(ifa);
293         }
294
295         rcu_assign_pointer(dev->ip_ptr, NULL);
296
297         devinet_sysctl_unregister(in_dev);
298         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
299         arp_ifdown(dev);
300
301         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
302 }
303
304 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
305 {
306         rcu_read_lock();
307         for_primary_ifa(in_dev) {
308                 if (inet_ifa_match(a, ifa)) {
309                         if (!b || inet_ifa_match(b, ifa)) {
310                                 rcu_read_unlock();
311                                 return 1;
312                         }
313                 }
314         } endfor_ifa(in_dev);
315         rcu_read_unlock();
316         return 0;
317 }
318
319 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
320                          int destroy, struct nlmsghdr *nlh, u32 pid)
321 {
322         struct in_ifaddr *promote = NULL;
323         struct in_ifaddr *ifa, *ifa1 = *ifap;
324         struct in_ifaddr *last_prim = in_dev->ifa_list;
325         struct in_ifaddr *prev_prom = NULL;
326         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
327
328         ASSERT_RTNL();
329
330         /* 1. Deleting primary ifaddr forces deletion all secondaries
331          * unless alias promotion is set
332          **/
333
334         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
335                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
336
337                 while ((ifa = *ifap1) != NULL) {
338                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
339                             ifa1->ifa_scope <= ifa->ifa_scope)
340                                 last_prim = ifa;
341
342                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
343                             ifa1->ifa_mask != ifa->ifa_mask ||
344                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
345                                 ifap1 = &ifa->ifa_next;
346                                 prev_prom = ifa;
347                                 continue;
348                         }
349
350                         if (!do_promote) {
351                                 inet_hash_remove(ifa);
352                                 *ifap1 = ifa->ifa_next;
353
354                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
355                                 blocking_notifier_call_chain(&inetaddr_chain,
356                                                 NETDEV_DOWN, ifa);
357                                 inet_free_ifa(ifa);
358                         } else {
359                                 promote = ifa;
360                                 break;
361                         }
362                 }
363         }
364
365         /* On promotion all secondaries from subnet are changing
366          * the primary IP, we must remove all their routes silently
367          * and later to add them back with new prefsrc. Do this
368          * while all addresses are on the device list.
369          */
370         for (ifa = promote; ifa; ifa = ifa->ifa_next) {
371                 if (ifa1->ifa_mask == ifa->ifa_mask &&
372                     inet_ifa_match(ifa1->ifa_address, ifa))
373                         fib_del_ifaddr(ifa, ifa1);
374         }
375
376         /* 2. Unlink it */
377
378         *ifap = ifa1->ifa_next;
379         inet_hash_remove(ifa1);
380
381         /* 3. Announce address deletion */
382
383         /* Send message first, then call notifier.
384            At first sight, FIB update triggered by notifier
385            will refer to already deleted ifaddr, that could confuse
386            netlink listeners. It is not true: look, gated sees
387            that route deleted and if it still thinks that ifaddr
388            is valid, it will try to restore deleted routes... Grr.
389            So that, this order is correct.
390          */
391         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
392         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
393
394         if (promote) {
395                 struct in_ifaddr *next_sec = promote->ifa_next;
396
397                 if (prev_prom) {
398                         prev_prom->ifa_next = promote->ifa_next;
399                         promote->ifa_next = last_prim->ifa_next;
400                         last_prim->ifa_next = promote;
401                 }
402
403                 promote->ifa_flags &= ~IFA_F_SECONDARY;
404                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
405                 blocking_notifier_call_chain(&inetaddr_chain,
406                                 NETDEV_UP, promote);
407                 for (ifa = next_sec; ifa; ifa = ifa->ifa_next) {
408                         if (ifa1->ifa_mask != ifa->ifa_mask ||
409                             !inet_ifa_match(ifa1->ifa_address, ifa))
410                                         continue;
411                         fib_add_ifaddr(ifa);
412                 }
413
414         }
415         if (destroy)
416                 inet_free_ifa(ifa1);
417 }
418
419 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
420                          int destroy)
421 {
422         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
423 }
424
425 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
426                              u32 pid)
427 {
428         struct in_device *in_dev = ifa->ifa_dev;
429         struct in_ifaddr *ifa1, **ifap, **last_primary;
430
431         ASSERT_RTNL();
432
433         if (!ifa->ifa_local) {
434                 inet_free_ifa(ifa);
435                 return 0;
436         }
437
438         ifa->ifa_flags &= ~IFA_F_SECONDARY;
439         last_primary = &in_dev->ifa_list;
440
441         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
442              ifap = &ifa1->ifa_next) {
443                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
444                     ifa->ifa_scope <= ifa1->ifa_scope)
445                         last_primary = &ifa1->ifa_next;
446                 if (ifa1->ifa_mask == ifa->ifa_mask &&
447                     inet_ifa_match(ifa1->ifa_address, ifa)) {
448                         if (ifa1->ifa_local == ifa->ifa_local) {
449                                 inet_free_ifa(ifa);
450                                 return -EEXIST;
451                         }
452                         if (ifa1->ifa_scope != ifa->ifa_scope) {
453                                 inet_free_ifa(ifa);
454                                 return -EINVAL;
455                         }
456                         ifa->ifa_flags |= IFA_F_SECONDARY;
457                 }
458         }
459
460         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
461                 net_srandom(ifa->ifa_local);
462                 ifap = last_primary;
463         }
464
465         ifa->ifa_next = *ifap;
466         *ifap = ifa;
467
468         inet_hash_insert(dev_net(in_dev->dev), ifa);
469
470         /* Send message first, then call notifier.
471            Notifier will trigger FIB update, so that
472            listeners of netlink will know about new ifaddr */
473         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
474         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
475
476         return 0;
477 }
478
479 static int inet_insert_ifa(struct in_ifaddr *ifa)
480 {
481         return __inet_insert_ifa(ifa, NULL, 0);
482 }
483
484 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
485 {
486         struct in_device *in_dev = __in_dev_get_rtnl(dev);
487
488         ASSERT_RTNL();
489
490         if (!in_dev) {
491                 inet_free_ifa(ifa);
492                 return -ENOBUFS;
493         }
494         ipv4_devconf_setall(in_dev);
495         if (ifa->ifa_dev != in_dev) {
496                 WARN_ON(ifa->ifa_dev);
497                 in_dev_hold(in_dev);
498                 ifa->ifa_dev = in_dev;
499         }
500         if (ipv4_is_loopback(ifa->ifa_local))
501                 ifa->ifa_scope = RT_SCOPE_HOST;
502         return inet_insert_ifa(ifa);
503 }
504
505 /* Caller must hold RCU or RTNL :
506  * We dont take a reference on found in_device
507  */
508 struct in_device *inetdev_by_index(struct net *net, int ifindex)
509 {
510         struct net_device *dev;
511         struct in_device *in_dev = NULL;
512
513         rcu_read_lock();
514         dev = dev_get_by_index_rcu(net, ifindex);
515         if (dev)
516                 in_dev = rcu_dereference_rtnl(dev->ip_ptr);
517         rcu_read_unlock();
518         return in_dev;
519 }
520 EXPORT_SYMBOL(inetdev_by_index);
521
522 /* Called only from RTNL semaphored context. No locks. */
523
524 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
525                                     __be32 mask)
526 {
527         ASSERT_RTNL();
528
529         for_primary_ifa(in_dev) {
530                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
531                         return ifa;
532         } endfor_ifa(in_dev);
533         return NULL;
534 }
535
536 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
537 {
538         struct net *net = sock_net(skb->sk);
539         struct nlattr *tb[IFA_MAX+1];
540         struct in_device *in_dev;
541         struct ifaddrmsg *ifm;
542         struct in_ifaddr *ifa, **ifap;
543         int err = -EINVAL;
544
545         ASSERT_RTNL();
546
547         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
548         if (err < 0)
549                 goto errout;
550
551         ifm = nlmsg_data(nlh);
552         in_dev = inetdev_by_index(net, ifm->ifa_index);
553         if (in_dev == NULL) {
554                 err = -ENODEV;
555                 goto errout;
556         }
557
558         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
559              ifap = &ifa->ifa_next) {
560                 if (tb[IFA_LOCAL] &&
561                     ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
562                         continue;
563
564                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
565                         continue;
566
567                 if (tb[IFA_ADDRESS] &&
568                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
569                     !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
570                         continue;
571
572                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
573                 return 0;
574         }
575
576         err = -EADDRNOTAVAIL;
577 errout:
578         return err;
579 }
580
581 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
582 {
583         struct nlattr *tb[IFA_MAX+1];
584         struct in_ifaddr *ifa;
585         struct ifaddrmsg *ifm;
586         struct net_device *dev;
587         struct in_device *in_dev;
588         int err;
589
590         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
591         if (err < 0)
592                 goto errout;
593
594         ifm = nlmsg_data(nlh);
595         err = -EINVAL;
596         if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
597                 goto errout;
598
599         dev = __dev_get_by_index(net, ifm->ifa_index);
600         err = -ENODEV;
601         if (dev == NULL)
602                 goto errout;
603
604         in_dev = __in_dev_get_rtnl(dev);
605         err = -ENOBUFS;
606         if (in_dev == NULL)
607                 goto errout;
608
609         ifa = inet_alloc_ifa();
610         if (ifa == NULL)
611                 /*
612                  * A potential indev allocation can be left alive, it stays
613                  * assigned to its device and is destroy with it.
614                  */
615                 goto errout;
616
617         ipv4_devconf_setall(in_dev);
618         in_dev_hold(in_dev);
619
620         if (tb[IFA_ADDRESS] == NULL)
621                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
622
623         INIT_HLIST_NODE(&ifa->hash);
624         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
625         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
626         ifa->ifa_flags = ifm->ifa_flags;
627         ifa->ifa_scope = ifm->ifa_scope;
628         ifa->ifa_dev = in_dev;
629
630         ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
631         ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
632
633         if (tb[IFA_BROADCAST])
634                 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
635
636         if (tb[IFA_LABEL])
637                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
638         else
639                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
640
641         return ifa;
642
643 errout:
644         return ERR_PTR(err);
645 }
646
647 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
648 {
649         struct net *net = sock_net(skb->sk);
650         struct in_ifaddr *ifa;
651
652         ASSERT_RTNL();
653
654         ifa = rtm_to_ifaddr(net, nlh);
655         if (IS_ERR(ifa))
656                 return PTR_ERR(ifa);
657
658         return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
659 }
660
661 /*
662  *      Determine a default network mask, based on the IP address.
663  */
664
665 static inline int inet_abc_len(__be32 addr)
666 {
667         int rc = -1;    /* Something else, probably a multicast. */
668
669         if (ipv4_is_zeronet(addr))
670                 rc = 0;
671         else {
672                 __u32 haddr = ntohl(addr);
673
674                 if (IN_CLASSA(haddr))
675                         rc = 8;
676                 else if (IN_CLASSB(haddr))
677                         rc = 16;
678                 else if (IN_CLASSC(haddr))
679                         rc = 24;
680         }
681
682         return rc;
683 }
684
685
686 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
687 {
688         struct ifreq ifr;
689         struct sockaddr_in sin_orig;
690         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
691         struct in_device *in_dev;
692         struct in_ifaddr **ifap = NULL;
693         struct in_ifaddr *ifa = NULL;
694         struct net_device *dev;
695         char *colon;
696         int ret = -EFAULT;
697         int tryaddrmatch = 0;
698
699         /*
700          *      Fetch the caller's info block into kernel space
701          */
702
703         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
704                 goto out;
705         ifr.ifr_name[IFNAMSIZ - 1] = 0;
706
707         /* save original address for comparison */
708         memcpy(&sin_orig, sin, sizeof(*sin));
709
710         colon = strchr(ifr.ifr_name, ':');
711         if (colon)
712                 *colon = 0;
713
714         dev_load(net, ifr.ifr_name);
715
716         switch (cmd) {
717         case SIOCGIFADDR:       /* Get interface address */
718         case SIOCGIFBRDADDR:    /* Get the broadcast address */
719         case SIOCGIFDSTADDR:    /* Get the destination address */
720         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
721                 /* Note that these ioctls will not sleep,
722                    so that we do not impose a lock.
723                    One day we will be forced to put shlock here (I mean SMP)
724                  */
725                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
726                 memset(sin, 0, sizeof(*sin));
727                 sin->sin_family = AF_INET;
728                 break;
729
730         case SIOCSIFFLAGS:
731                 ret = -EACCES;
732                 if (!capable(CAP_NET_ADMIN))
733                         goto out;
734                 break;
735         case SIOCSIFADDR:       /* Set interface address (and family) */
736         case SIOCSIFBRDADDR:    /* Set the broadcast address */
737         case SIOCSIFDSTADDR:    /* Set the destination address */
738         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
739         case SIOCKILLADDR:      /* Nuke all sockets on this address */
740                 ret = -EACCES;
741                 if (!capable(CAP_NET_ADMIN))
742                         goto out;
743                 ret = -EINVAL;
744                 if (sin->sin_family != AF_INET)
745                         goto out;
746                 break;
747         default:
748                 ret = -EINVAL;
749                 goto out;
750         }
751
752         rtnl_lock();
753
754         ret = -ENODEV;
755         dev = __dev_get_by_name(net, ifr.ifr_name);
756         if (!dev)
757                 goto done;
758
759         if (colon)
760                 *colon = ':';
761
762         in_dev = __in_dev_get_rtnl(dev);
763         if (in_dev) {
764                 if (tryaddrmatch) {
765                         /* Matthias Andree */
766                         /* compare label and address (4.4BSD style) */
767                         /* note: we only do this for a limited set of ioctls
768                            and only if the original address family was AF_INET.
769                            This is checked above. */
770                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
771                              ifap = &ifa->ifa_next) {
772                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
773                                     sin_orig.sin_addr.s_addr ==
774                                                         ifa->ifa_local) {
775                                         break; /* found */
776                                 }
777                         }
778                 }
779                 /* we didn't get a match, maybe the application is
780                    4.3BSD-style and passed in junk so we fall back to
781                    comparing just the label */
782                 if (!ifa) {
783                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
784                              ifap = &ifa->ifa_next)
785                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
786                                         break;
787                 }
788         }
789
790         ret = -EADDRNOTAVAIL;
791         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS
792             && cmd != SIOCKILLADDR)
793                 goto done;
794
795         switch (cmd) {
796         case SIOCGIFADDR:       /* Get interface address */
797                 sin->sin_addr.s_addr = ifa->ifa_local;
798                 goto rarok;
799
800         case SIOCGIFBRDADDR:    /* Get the broadcast address */
801                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
802                 goto rarok;
803
804         case SIOCGIFDSTADDR:    /* Get the destination address */
805                 sin->sin_addr.s_addr = ifa->ifa_address;
806                 goto rarok;
807
808         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
809                 sin->sin_addr.s_addr = ifa->ifa_mask;
810                 goto rarok;
811
812         case SIOCSIFFLAGS:
813                 if (colon) {
814                         ret = -EADDRNOTAVAIL;
815                         if (!ifa)
816                                 break;
817                         ret = 0;
818                         if (!(ifr.ifr_flags & IFF_UP))
819                                 inet_del_ifa(in_dev, ifap, 1);
820                         break;
821                 }
822                 ret = dev_change_flags(dev, ifr.ifr_flags);
823                 break;
824
825         case SIOCSIFADDR:       /* Set interface address (and family) */
826                 ret = -EINVAL;
827                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
828                         break;
829
830                 if (!ifa) {
831                         ret = -ENOBUFS;
832                         ifa = inet_alloc_ifa();
833                         INIT_HLIST_NODE(&ifa->hash);
834                         if (!ifa)
835                                 break;
836                         if (colon)
837                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
838                         else
839                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
840                 } else {
841                         ret = 0;
842                         if (ifa->ifa_local == sin->sin_addr.s_addr)
843                                 break;
844                         inet_del_ifa(in_dev, ifap, 0);
845                         ifa->ifa_broadcast = 0;
846                         ifa->ifa_scope = 0;
847                 }
848
849                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
850
851                 if (!(dev->flags & IFF_POINTOPOINT)) {
852                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
853                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
854                         if ((dev->flags & IFF_BROADCAST) &&
855                             ifa->ifa_prefixlen < 31)
856                                 ifa->ifa_broadcast = ifa->ifa_address |
857                                                      ~ifa->ifa_mask;
858                 } else {
859                         ifa->ifa_prefixlen = 32;
860                         ifa->ifa_mask = inet_make_mask(32);
861                 }
862                 ret = inet_set_ifa(dev, ifa);
863                 break;
864
865         case SIOCSIFBRDADDR:    /* Set the broadcast address */
866                 ret = 0;
867                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
868                         inet_del_ifa(in_dev, ifap, 0);
869                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
870                         inet_insert_ifa(ifa);
871                 }
872                 break;
873
874         case SIOCSIFDSTADDR:    /* Set the destination address */
875                 ret = 0;
876                 if (ifa->ifa_address == sin->sin_addr.s_addr)
877                         break;
878                 ret = -EINVAL;
879                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
880                         break;
881                 ret = 0;
882                 inet_del_ifa(in_dev, ifap, 0);
883                 ifa->ifa_address = sin->sin_addr.s_addr;
884                 inet_insert_ifa(ifa);
885                 break;
886
887         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
888
889                 /*
890                  *      The mask we set must be legal.
891                  */
892                 ret = -EINVAL;
893                 if (bad_mask(sin->sin_addr.s_addr, 0))
894                         break;
895                 ret = 0;
896                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
897                         __be32 old_mask = ifa->ifa_mask;
898                         inet_del_ifa(in_dev, ifap, 0);
899                         ifa->ifa_mask = sin->sin_addr.s_addr;
900                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
901
902                         /* See if current broadcast address matches
903                          * with current netmask, then recalculate
904                          * the broadcast address. Otherwise it's a
905                          * funny address, so don't touch it since
906                          * the user seems to know what (s)he's doing...
907                          */
908                         if ((dev->flags & IFF_BROADCAST) &&
909                             (ifa->ifa_prefixlen < 31) &&
910                             (ifa->ifa_broadcast ==
911                              (ifa->ifa_local|~old_mask))) {
912                                 ifa->ifa_broadcast = (ifa->ifa_local |
913                                                       ~sin->sin_addr.s_addr);
914                         }
915                         inet_insert_ifa(ifa);
916                 }
917                 break;
918         case SIOCKILLADDR:      /* Nuke all connections on this address */
919                 ret = tcp_nuke_addr(net, (struct sockaddr *) sin);
920                 break;
921         }
922 done:
923         rtnl_unlock();
924 out:
925         return ret;
926 rarok:
927         rtnl_unlock();
928         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
929         goto out;
930 }
931
932 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
933 {
934         struct in_device *in_dev = __in_dev_get_rtnl(dev);
935         struct in_ifaddr *ifa;
936         struct ifreq ifr;
937         int done = 0;
938
939         if (!in_dev)
940                 goto out;
941
942         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
943                 if (!buf) {
944                         done += sizeof(ifr);
945                         continue;
946                 }
947                 if (len < (int) sizeof(ifr))
948                         break;
949                 memset(&ifr, 0, sizeof(struct ifreq));
950                 if (ifa->ifa_label)
951                         strcpy(ifr.ifr_name, ifa->ifa_label);
952                 else
953                         strcpy(ifr.ifr_name, dev->name);
954
955                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
956                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
957                                                                 ifa->ifa_local;
958
959                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
960                         done = -EFAULT;
961                         break;
962                 }
963                 buf  += sizeof(struct ifreq);
964                 len  -= sizeof(struct ifreq);
965                 done += sizeof(struct ifreq);
966         }
967 out:
968         return done;
969 }
970
971 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
972 {
973         __be32 addr = 0;
974         struct in_device *in_dev;
975         struct net *net = dev_net(dev);
976
977         rcu_read_lock();
978         in_dev = __in_dev_get_rcu(dev);
979         if (!in_dev)
980                 goto no_in_dev;
981
982         for_primary_ifa(in_dev) {
983                 if (ifa->ifa_scope > scope)
984                         continue;
985                 if (!dst || inet_ifa_match(dst, ifa)) {
986                         addr = ifa->ifa_local;
987                         break;
988                 }
989                 if (!addr)
990                         addr = ifa->ifa_local;
991         } endfor_ifa(in_dev);
992
993         if (addr)
994                 goto out_unlock;
995 no_in_dev:
996
997         /* Not loopback addresses on loopback should be preferred
998            in this case. It is importnat that lo is the first interface
999            in dev_base list.
1000          */
1001         for_each_netdev_rcu(net, dev) {
1002                 in_dev = __in_dev_get_rcu(dev);
1003                 if (!in_dev)
1004                         continue;
1005
1006                 for_primary_ifa(in_dev) {
1007                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
1008                             ifa->ifa_scope <= scope) {
1009                                 addr = ifa->ifa_local;
1010                                 goto out_unlock;
1011                         }
1012                 } endfor_ifa(in_dev);
1013         }
1014 out_unlock:
1015         rcu_read_unlock();
1016         return addr;
1017 }
1018 EXPORT_SYMBOL(inet_select_addr);
1019
1020 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
1021                               __be32 local, int scope)
1022 {
1023         int same = 0;
1024         __be32 addr = 0;
1025
1026         for_ifa(in_dev) {
1027                 if (!addr &&
1028                     (local == ifa->ifa_local || !local) &&
1029                     ifa->ifa_scope <= scope) {
1030                         addr = ifa->ifa_local;
1031                         if (same)
1032                                 break;
1033                 }
1034                 if (!same) {
1035                         same = (!local || inet_ifa_match(local, ifa)) &&
1036                                 (!dst || inet_ifa_match(dst, ifa));
1037                         if (same && addr) {
1038                                 if (local || !dst)
1039                                         break;
1040                                 /* Is the selected addr into dst subnet? */
1041                                 if (inet_ifa_match(addr, ifa))
1042                                         break;
1043                                 /* No, then can we use new local src? */
1044                                 if (ifa->ifa_scope <= scope) {
1045                                         addr = ifa->ifa_local;
1046                                         break;
1047                                 }
1048                                 /* search for large dst subnet for addr */
1049                                 same = 0;
1050                         }
1051                 }
1052         } endfor_ifa(in_dev);
1053
1054         return same ? addr : 0;
1055 }
1056
1057 /*
1058  * Confirm that local IP address exists using wildcards:
1059  * - in_dev: only on this interface, 0=any interface
1060  * - dst: only in the same subnet as dst, 0=any dst
1061  * - local: address, 0=autoselect the local address
1062  * - scope: maximum allowed scope value for the local address
1063  */
1064 __be32 inet_confirm_addr(struct in_device *in_dev,
1065                          __be32 dst, __be32 local, int scope)
1066 {
1067         __be32 addr = 0;
1068         struct net_device *dev;
1069         struct net *net;
1070
1071         if (scope != RT_SCOPE_LINK)
1072                 return confirm_addr_indev(in_dev, dst, local, scope);
1073
1074         net = dev_net(in_dev->dev);
1075         rcu_read_lock();
1076         for_each_netdev_rcu(net, dev) {
1077                 in_dev = __in_dev_get_rcu(dev);
1078                 if (in_dev) {
1079                         addr = confirm_addr_indev(in_dev, dst, local, scope);
1080                         if (addr)
1081                                 break;
1082                 }
1083         }
1084         rcu_read_unlock();
1085
1086         return addr;
1087 }
1088
1089 /*
1090  *      Device notifier
1091  */
1092
1093 int register_inetaddr_notifier(struct notifier_block *nb)
1094 {
1095         return blocking_notifier_chain_register(&inetaddr_chain, nb);
1096 }
1097 EXPORT_SYMBOL(register_inetaddr_notifier);
1098
1099 int unregister_inetaddr_notifier(struct notifier_block *nb)
1100 {
1101         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1102 }
1103 EXPORT_SYMBOL(unregister_inetaddr_notifier);
1104
1105 /* Rename ifa_labels for a device name change. Make some effort to preserve
1106  * existing alias numbering and to create unique labels if possible.
1107 */
1108 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1109 {
1110         struct in_ifaddr *ifa;
1111         int named = 0;
1112
1113         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1114                 char old[IFNAMSIZ], *dot;
1115
1116                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1117                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1118                 if (named++ == 0)
1119                         goto skip;
1120                 dot = strchr(old, ':');
1121                 if (dot == NULL) {
1122                         sprintf(old, ":%d", named);
1123                         dot = old;
1124                 }
1125                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ)
1126                         strcat(ifa->ifa_label, dot);
1127                 else
1128                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1129 skip:
1130                 rtmsg_ifa(RTM_NEWADDR, ifa, NULL, 0);
1131         }
1132 }
1133
1134 static inline bool inetdev_valid_mtu(unsigned mtu)
1135 {
1136         return mtu >= 68;
1137 }
1138
1139 static void inetdev_send_gratuitous_arp(struct net_device *dev,
1140                                         struct in_device *in_dev)
1141
1142 {
1143         struct in_ifaddr *ifa;
1144
1145         for (ifa = in_dev->ifa_list; ifa;
1146              ifa = ifa->ifa_next) {
1147                 arp_send(ARPOP_REQUEST, ETH_P_ARP,
1148                          ifa->ifa_local, dev,
1149                          ifa->ifa_local, NULL,
1150                          dev->dev_addr, NULL);
1151         }
1152 }
1153
1154 /* Called only under RTNL semaphore */
1155
1156 static int inetdev_event(struct notifier_block *this, unsigned long event,
1157                          void *ptr)
1158 {
1159         struct net_device *dev = ptr;
1160         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1161
1162         ASSERT_RTNL();
1163
1164         if (!in_dev) {
1165                 if (event == NETDEV_REGISTER) {
1166                         in_dev = inetdev_init(dev);
1167                         if (!in_dev)
1168                                 return notifier_from_errno(-ENOMEM);
1169                         if (dev->flags & IFF_LOOPBACK) {
1170                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1171                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1172                         }
1173                 } else if (event == NETDEV_CHANGEMTU) {
1174                         /* Re-enabling IP */
1175                         if (inetdev_valid_mtu(dev->mtu))
1176                                 in_dev = inetdev_init(dev);
1177                 }
1178                 goto out;
1179         }
1180
1181         switch (event) {
1182         case NETDEV_REGISTER:
1183                 printk(KERN_DEBUG "inetdev_event: bug\n");
1184                 rcu_assign_pointer(dev->ip_ptr, NULL);
1185                 break;
1186         case NETDEV_UP:
1187                 if (!inetdev_valid_mtu(dev->mtu))
1188                         break;
1189                 if (dev->flags & IFF_LOOPBACK) {
1190                         struct in_ifaddr *ifa = inet_alloc_ifa();
1191
1192                         if (ifa) {
1193                                 INIT_HLIST_NODE(&ifa->hash);
1194                                 ifa->ifa_local =
1195                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1196                                 ifa->ifa_prefixlen = 8;
1197                                 ifa->ifa_mask = inet_make_mask(8);
1198                                 in_dev_hold(in_dev);
1199                                 ifa->ifa_dev = in_dev;
1200                                 ifa->ifa_scope = RT_SCOPE_HOST;
1201                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1202                                 inet_insert_ifa(ifa);
1203                         }
1204                 }
1205                 ip_mc_up(in_dev);
1206                 /* fall through */
1207         case NETDEV_CHANGEADDR:
1208                 if (!IN_DEV_ARP_NOTIFY(in_dev))
1209                         break;
1210                 /* fall through */
1211         case NETDEV_NOTIFY_PEERS:
1212                 /* Send gratuitous ARP to notify of link change */
1213                 inetdev_send_gratuitous_arp(dev, in_dev);
1214                 break;
1215         case NETDEV_DOWN:
1216                 ip_mc_down(in_dev);
1217                 break;
1218         case NETDEV_PRE_TYPE_CHANGE:
1219                 ip_mc_unmap(in_dev);
1220                 break;
1221         case NETDEV_POST_TYPE_CHANGE:
1222                 ip_mc_remap(in_dev);
1223                 break;
1224         case NETDEV_CHANGEMTU:
1225                 if (inetdev_valid_mtu(dev->mtu))
1226                         break;
1227                 /* disable IP when MTU is not enough */
1228         case NETDEV_UNREGISTER:
1229                 inetdev_destroy(in_dev);
1230                 break;
1231         case NETDEV_CHANGENAME:
1232                 /* Do not notify about label change, this event is
1233                  * not interesting to applications using netlink.
1234                  */
1235                 inetdev_changename(dev, in_dev);
1236
1237                 devinet_sysctl_unregister(in_dev);
1238                 devinet_sysctl_register(in_dev);
1239                 break;
1240         }
1241 out:
1242         return NOTIFY_DONE;
1243 }
1244
1245 static struct notifier_block ip_netdev_notifier = {
1246         .notifier_call = inetdev_event,
1247 };
1248
1249 static inline size_t inet_nlmsg_size(void)
1250 {
1251         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1252                + nla_total_size(4) /* IFA_ADDRESS */
1253                + nla_total_size(4) /* IFA_LOCAL */
1254                + nla_total_size(4) /* IFA_BROADCAST */
1255                + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1256 }
1257
1258 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1259                             u32 pid, u32 seq, int event, unsigned int flags)
1260 {
1261         struct ifaddrmsg *ifm;
1262         struct nlmsghdr  *nlh;
1263
1264         nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1265         if (nlh == NULL)
1266                 return -EMSGSIZE;
1267
1268         ifm = nlmsg_data(nlh);
1269         ifm->ifa_family = AF_INET;
1270         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1271         ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1272         ifm->ifa_scope = ifa->ifa_scope;
1273         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1274
1275         if (ifa->ifa_address)
1276                 NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1277
1278         if (ifa->ifa_local)
1279                 NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1280
1281         if (ifa->ifa_broadcast)
1282                 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1283
1284         if (ifa->ifa_label[0])
1285                 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1286
1287         return nlmsg_end(skb, nlh);
1288
1289 nla_put_failure:
1290         nlmsg_cancel(skb, nlh);
1291         return -EMSGSIZE;
1292 }
1293
1294 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1295 {
1296         struct net *net = sock_net(skb->sk);
1297         int h, s_h;
1298         int idx, s_idx;
1299         int ip_idx, s_ip_idx;
1300         struct net_device *dev;
1301         struct in_device *in_dev;
1302         struct in_ifaddr *ifa;
1303         struct hlist_head *head;
1304         struct hlist_node *node;
1305
1306         s_h = cb->args[0];
1307         s_idx = idx = cb->args[1];
1308         s_ip_idx = ip_idx = cb->args[2];
1309
1310         for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
1311                 idx = 0;
1312                 head = &net->dev_index_head[h];
1313                 rcu_read_lock();
1314                 hlist_for_each_entry_rcu(dev, node, head, index_hlist) {
1315                         if (idx < s_idx)
1316                                 goto cont;
1317                         if (h > s_h || idx > s_idx)
1318                                 s_ip_idx = 0;
1319                         in_dev = __in_dev_get_rcu(dev);
1320                         if (!in_dev)
1321                                 goto cont;
1322
1323                         for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1324                              ifa = ifa->ifa_next, ip_idx++) {
1325                                 if (ip_idx < s_ip_idx)
1326                                         continue;
1327                                 if (inet_fill_ifaddr(skb, ifa,
1328                                              NETLINK_CB(cb->skb).pid,
1329                                              cb->nlh->nlmsg_seq,
1330                                              RTM_NEWADDR, NLM_F_MULTI) <= 0) {
1331                                         rcu_read_unlock();
1332                                         goto done;
1333                                 }
1334                         }
1335 cont:
1336                         idx++;
1337                 }
1338                 rcu_read_unlock();
1339         }
1340
1341 done:
1342         cb->args[0] = h;
1343         cb->args[1] = idx;
1344         cb->args[2] = ip_idx;
1345
1346         return skb->len;
1347 }
1348
1349 static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
1350                       u32 pid)
1351 {
1352         struct sk_buff *skb;
1353         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1354         int err = -ENOBUFS;
1355         struct net *net;
1356
1357         net = dev_net(ifa->ifa_dev->dev);
1358         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1359         if (skb == NULL)
1360                 goto errout;
1361
1362         err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1363         if (err < 0) {
1364                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1365                 WARN_ON(err == -EMSGSIZE);
1366                 kfree_skb(skb);
1367                 goto errout;
1368         }
1369         rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1370         return;
1371 errout:
1372         if (err < 0)
1373                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1374 }
1375
1376 static size_t inet_get_link_af_size(const struct net_device *dev)
1377 {
1378         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1379
1380         if (!in_dev)
1381                 return 0;
1382
1383         return nla_total_size(IPV4_DEVCONF_MAX * 4); /* IFLA_INET_CONF */
1384 }
1385
1386 static int inet_fill_link_af(struct sk_buff *skb, const struct net_device *dev)
1387 {
1388         struct in_device *in_dev = rcu_dereference_rtnl(dev->ip_ptr);
1389         struct nlattr *nla;
1390         int i;
1391
1392         if (!in_dev)
1393                 return -ENODATA;
1394
1395         nla = nla_reserve(skb, IFLA_INET_CONF, IPV4_DEVCONF_MAX * 4);
1396         if (nla == NULL)
1397                 return -EMSGSIZE;
1398
1399         for (i = 0; i < IPV4_DEVCONF_MAX; i++)
1400                 ((u32 *) nla_data(nla))[i] = in_dev->cnf.data[i];
1401
1402         return 0;
1403 }
1404
1405 static const struct nla_policy inet_af_policy[IFLA_INET_MAX+1] = {
1406         [IFLA_INET_CONF]        = { .type = NLA_NESTED },
1407 };
1408
1409 static int inet_validate_link_af(const struct net_device *dev,
1410                                  const struct nlattr *nla)
1411 {
1412         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1413         int err, rem;
1414
1415         if (dev && !__in_dev_get_rtnl(dev))
1416                 return -EAFNOSUPPORT;
1417
1418         err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy);
1419         if (err < 0)
1420                 return err;
1421
1422         if (tb[IFLA_INET_CONF]) {
1423                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem) {
1424                         int cfgid = nla_type(a);
1425
1426                         if (nla_len(a) < 4)
1427                                 return -EINVAL;
1428
1429                         if (cfgid <= 0 || cfgid > IPV4_DEVCONF_MAX)
1430                                 return -EINVAL;
1431                 }
1432         }
1433
1434         return 0;
1435 }
1436
1437 static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla)
1438 {
1439         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1440         struct nlattr *a, *tb[IFLA_INET_MAX+1];
1441         int rem;
1442
1443         if (!in_dev)
1444                 return -EAFNOSUPPORT;
1445
1446         if (nla_parse_nested(tb, IFLA_INET_MAX, nla, NULL) < 0)
1447                 BUG();
1448
1449         if (tb[IFLA_INET_CONF]) {
1450                 nla_for_each_nested(a, tb[IFLA_INET_CONF], rem)
1451                         ipv4_devconf_set(in_dev, nla_type(a), nla_get_u32(a));
1452         }
1453
1454         return 0;
1455 }
1456
1457 #ifdef CONFIG_SYSCTL
1458
1459 static void devinet_copy_dflt_conf(struct net *net, int i)
1460 {
1461         struct net_device *dev;
1462
1463         rcu_read_lock();
1464         for_each_netdev_rcu(net, dev) {
1465                 struct in_device *in_dev;
1466
1467                 in_dev = __in_dev_get_rcu(dev);
1468                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1469                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1470         }
1471         rcu_read_unlock();
1472 }
1473
1474 /* called with RTNL locked */
1475 static void inet_forward_change(struct net *net)
1476 {
1477         struct net_device *dev;
1478         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1479
1480         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1481         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1482
1483         for_each_netdev(net, dev) {
1484                 struct in_device *in_dev;
1485                 if (on)
1486                         dev_disable_lro(dev);
1487                 rcu_read_lock();
1488                 in_dev = __in_dev_get_rcu(dev);
1489                 if (in_dev)
1490                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1491                 rcu_read_unlock();
1492         }
1493 }
1494
1495 static int devinet_conf_proc(ctl_table *ctl, int write,
1496                              void __user *buffer,
1497                              size_t *lenp, loff_t *ppos)
1498 {
1499         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1500
1501         if (write) {
1502                 struct ipv4_devconf *cnf = ctl->extra1;
1503                 struct net *net = ctl->extra2;
1504                 int i = (int *)ctl->data - cnf->data;
1505
1506                 set_bit(i, cnf->state);
1507
1508                 if (cnf == net->ipv4.devconf_dflt)
1509                         devinet_copy_dflt_conf(net, i);
1510         }
1511
1512         return ret;
1513 }
1514
1515 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1516                                   void __user *buffer,
1517                                   size_t *lenp, loff_t *ppos)
1518 {
1519         int *valp = ctl->data;
1520         int val = *valp;
1521         loff_t pos = *ppos;
1522         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1523
1524         if (write && *valp != val) {
1525                 struct net *net = ctl->extra2;
1526
1527                 if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING)) {
1528                         if (!rtnl_trylock()) {
1529                                 /* Restore the original values before restarting */
1530                                 *valp = val;
1531                                 *ppos = pos;
1532                                 return restart_syscall();
1533                         }
1534                         if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING)) {
1535                                 inet_forward_change(net);
1536                         } else if (*valp) {
1537                                 struct ipv4_devconf *cnf = ctl->extra1;
1538                                 struct in_device *idev =
1539                                         container_of(cnf, struct in_device, cnf);
1540                                 dev_disable_lro(idev->dev);
1541                         }
1542                         rtnl_unlock();
1543                         rt_cache_flush(net, 0);
1544                 }
1545         }
1546
1547         return ret;
1548 }
1549
1550 static int ipv4_doint_and_flush(ctl_table *ctl, int write,
1551                                 void __user *buffer,
1552                                 size_t *lenp, loff_t *ppos)
1553 {
1554         int *valp = ctl->data;
1555         int val = *valp;
1556         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
1557         struct net *net = ctl->extra2;
1558
1559         if (write && *valp != val)
1560                 rt_cache_flush(net, 0);
1561
1562         return ret;
1563 }
1564
1565 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc) \
1566         { \
1567                 .procname       = name, \
1568                 .data           = ipv4_devconf.data + \
1569                                   IPV4_DEVCONF_ ## attr - 1, \
1570                 .maxlen         = sizeof(int), \
1571                 .mode           = mval, \
1572                 .proc_handler   = proc, \
1573                 .extra1         = &ipv4_devconf, \
1574         }
1575
1576 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1577         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc)
1578
1579 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1580         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc)
1581
1582 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc) \
1583         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc)
1584
1585 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1586         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush)
1587
1588 static struct devinet_sysctl_table {
1589         struct ctl_table_header *sysctl_header;
1590         struct ctl_table devinet_vars[__IPV4_DEVCONF_MAX];
1591         char *dev_name;
1592 } devinet_sysctl = {
1593         .devinet_vars = {
1594                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1595                                              devinet_sysctl_forward),
1596                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1597
1598                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1599                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1600                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1601                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1602                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1603                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1604                                         "accept_source_route"),
1605                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
1606                 DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
1607                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1608                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1609                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1610                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1611                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1612                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1613                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1614                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1615                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1616                 DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
1617                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
1618
1619                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1620                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1621                 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1622                                               "force_igmp_version"),
1623                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1624                                               "promote_secondaries"),
1625         },
1626 };
1627
1628 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1629                                         struct ipv4_devconf *p)
1630 {
1631         int i;
1632         struct devinet_sysctl_table *t;
1633
1634 #define DEVINET_CTL_PATH_DEV    3
1635
1636         struct ctl_path devinet_ctl_path[] = {
1637                 { .procname = "net",  },
1638                 { .procname = "ipv4", },
1639                 { .procname = "conf", },
1640                 { /* to be set */ },
1641                 { },
1642         };
1643
1644         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1645         if (!t)
1646                 goto out;
1647
1648         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1649                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1650                 t->devinet_vars[i].extra1 = p;
1651                 t->devinet_vars[i].extra2 = net;
1652         }
1653
1654         /*
1655          * Make a copy of dev_name, because '.procname' is regarded as const
1656          * by sysctl and we wouldn't want anyone to change it under our feet
1657          * (see SIOCSIFNAME).
1658          */
1659         t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1660         if (!t->dev_name)
1661                 goto free;
1662
1663         devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1664
1665         t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1666                         t->devinet_vars);
1667         if (!t->sysctl_header)
1668                 goto free_procname;
1669
1670         p->sysctl = t;
1671         return 0;
1672
1673 free_procname:
1674         kfree(t->dev_name);
1675 free:
1676         kfree(t);
1677 out:
1678         return -ENOBUFS;
1679 }
1680
1681 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1682 {
1683         struct devinet_sysctl_table *t = cnf->sysctl;
1684
1685         if (t == NULL)
1686                 return;
1687
1688         cnf->sysctl = NULL;
1689         unregister_net_sysctl_table(t->sysctl_header);
1690         kfree(t->dev_name);
1691         kfree(t);
1692 }
1693
1694 static void devinet_sysctl_register(struct in_device *idev)
1695 {
1696         neigh_sysctl_register(idev->dev, idev->arp_parms, "ipv4", NULL);
1697         __devinet_sysctl_register(dev_net(idev->dev), idev->dev->name,
1698                                         &idev->cnf);
1699 }
1700
1701 static void devinet_sysctl_unregister(struct in_device *idev)
1702 {
1703         __devinet_sysctl_unregister(&idev->cnf);
1704         neigh_sysctl_unregister(idev->arp_parms);
1705 }
1706
1707 static struct ctl_table ctl_forward_entry[] = {
1708         {
1709                 .procname       = "ip_forward",
1710                 .data           = &ipv4_devconf.data[
1711                                         IPV4_DEVCONF_FORWARDING - 1],
1712                 .maxlen         = sizeof(int),
1713                 .mode           = 0644,
1714                 .proc_handler   = devinet_sysctl_forward,
1715                 .extra1         = &ipv4_devconf,
1716                 .extra2         = &init_net,
1717         },
1718         { },
1719 };
1720
1721 static __net_initdata struct ctl_path net_ipv4_path[] = {
1722         { .procname = "net", },
1723         { .procname = "ipv4", },
1724         { },
1725 };
1726 #endif
1727
1728 static __net_init int devinet_init_net(struct net *net)
1729 {
1730         int err;
1731         struct ipv4_devconf *all, *dflt;
1732 #ifdef CONFIG_SYSCTL
1733         struct ctl_table *tbl = ctl_forward_entry;
1734         struct ctl_table_header *forw_hdr;
1735 #endif
1736
1737         err = -ENOMEM;
1738         all = &ipv4_devconf;
1739         dflt = &ipv4_devconf_dflt;
1740
1741         if (!net_eq(net, &init_net)) {
1742                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1743                 if (all == NULL)
1744                         goto err_alloc_all;
1745
1746                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1747                 if (dflt == NULL)
1748                         goto err_alloc_dflt;
1749
1750 #ifdef CONFIG_SYSCTL
1751                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1752                 if (tbl == NULL)
1753                         goto err_alloc_ctl;
1754
1755                 tbl[0].data = &all->data[IPV4_DEVCONF_FORWARDING - 1];
1756                 tbl[0].extra1 = all;
1757                 tbl[0].extra2 = net;
1758 #endif
1759         }
1760
1761 #ifdef CONFIG_SYSCTL
1762         err = __devinet_sysctl_register(net, "all", all);
1763         if (err < 0)
1764                 goto err_reg_all;
1765
1766         err = __devinet_sysctl_register(net, "default", dflt);
1767         if (err < 0)
1768                 goto err_reg_dflt;
1769
1770         err = -ENOMEM;
1771         forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1772         if (forw_hdr == NULL)
1773                 goto err_reg_ctl;
1774         net->ipv4.forw_hdr = forw_hdr;
1775 #endif
1776
1777         net->ipv4.devconf_all = all;
1778         net->ipv4.devconf_dflt = dflt;
1779         return 0;
1780
1781 #ifdef CONFIG_SYSCTL
1782 err_reg_ctl:
1783         __devinet_sysctl_unregister(dflt);
1784 err_reg_dflt:
1785         __devinet_sysctl_unregister(all);
1786 err_reg_all:
1787         if (tbl != ctl_forward_entry)
1788                 kfree(tbl);
1789 err_alloc_ctl:
1790 #endif
1791         if (dflt != &ipv4_devconf_dflt)
1792                 kfree(dflt);
1793 err_alloc_dflt:
1794         if (all != &ipv4_devconf)
1795                 kfree(all);
1796 err_alloc_all:
1797         return err;
1798 }
1799
1800 static __net_exit void devinet_exit_net(struct net *net)
1801 {
1802 #ifdef CONFIG_SYSCTL
1803         struct ctl_table *tbl;
1804
1805         tbl = net->ipv4.forw_hdr->ctl_table_arg;
1806         unregister_net_sysctl_table(net->ipv4.forw_hdr);
1807         __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1808         __devinet_sysctl_unregister(net->ipv4.devconf_all);
1809         kfree(tbl);
1810 #endif
1811         kfree(net->ipv4.devconf_dflt);
1812         kfree(net->ipv4.devconf_all);
1813 }
1814
1815 static __net_initdata struct pernet_operations devinet_ops = {
1816         .init = devinet_init_net,
1817         .exit = devinet_exit_net,
1818 };
1819
1820 static struct rtnl_af_ops inet_af_ops = {
1821         .family           = AF_INET,
1822         .fill_link_af     = inet_fill_link_af,
1823         .get_link_af_size = inet_get_link_af_size,
1824         .validate_link_af = inet_validate_link_af,
1825         .set_link_af      = inet_set_link_af,
1826 };
1827
1828 void __init devinet_init(void)
1829 {
1830         int i;
1831
1832         for (i = 0; i < IN4_ADDR_HSIZE; i++)
1833                 INIT_HLIST_HEAD(&inet_addr_lst[i]);
1834
1835         register_pernet_subsys(&devinet_ops);
1836
1837         register_gifconf(PF_INET, inet_gifconf);
1838         register_netdevice_notifier(&ip_netdev_notifier);
1839
1840         rtnl_af_register(&inet_af_ops);
1841
1842         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL);
1843         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL);
1844         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL);
1845 }
1846