blob: e91390533999fe35e2609efbaf07e10c24426d59 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * NET3 Protocol independent device support routines.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Derived from the non IP parts of dev.c 1.0.19
Jesper Juhl02c30a82005-05-05 16:16:16 -070010 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -070011 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
13 *
14 * Additional Authors:
15 * Florian la Roche <rzsfl@rz.uni-sb.de>
16 * Alan Cox <gw4pts@gw4pts.ampr.org>
17 * David Hinds <dahinds@users.sourceforge.net>
18 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19 * Adam Sulmicki <adam@cfar.umd.edu>
20 * Pekka Riikonen <priikone@poesidon.pspt.fi>
21 *
22 * Changes:
23 * D.J. Barrow : Fixed bug where dev->refcnt gets set
24 * to 2 if register_netdev gets called
25 * before net_dev_init & also removed a
26 * few lines of code in the process.
27 * Alan Cox : device private ioctl copies fields back.
28 * Alan Cox : Transmit queue code does relevant
29 * stunts to keep the queue safe.
30 * Alan Cox : Fixed double lock.
31 * Alan Cox : Fixed promisc NULL pointer trap
32 * ???????? : Support the full private ioctl range
33 * Alan Cox : Moved ioctl permission check into
34 * drivers
35 * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI
36 * Alan Cox : 100 backlog just doesn't cut it when
37 * you start doing multicast video 8)
38 * Alan Cox : Rewrote net_bh and list manager.
39 * Alan Cox : Fix ETH_P_ALL echoback lengths.
40 * Alan Cox : Took out transmit every packet pass
41 * Saved a few bytes in the ioctl handler
42 * Alan Cox : Network driver sets packet type before
43 * calling netif_rx. Saves a function
44 * call a packet.
45 * Alan Cox : Hashed net_bh()
46 * Richard Kooijman: Timestamp fixes.
47 * Alan Cox : Wrong field in SIOCGIFDSTADDR
48 * Alan Cox : Device lock protection.
49 * Alan Cox : Fixed nasty side effect of device close
50 * changes.
51 * Rudi Cilibrasi : Pass the right thing to
52 * set_mac_address()
53 * Dave Miller : 32bit quantity for the device lock to
54 * make it work out on a Sparc.
55 * Bjorn Ekwall : Added KERNELD hack.
56 * Alan Cox : Cleaned up the backlog initialise.
57 * Craig Metz : SIOCGIFCONF fix if space for under
58 * 1 device.
59 * Thomas Bogendoerfer : Return ENODEV for dev_open, if there
60 * is no device open function.
61 * Andi Kleen : Fix error reporting for SIOCGIFCONF
62 * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF
63 * Cyrus Durgin : Cleaned for KMOD
64 * Adam Sulmicki : Bug Fix : Network Device Unload
65 * A network device unload needs to purge
66 * the backlog queue.
67 * Paul Rusty Russell : SIOCSIFNAME
68 * Pekka Riikonen : Netdev boot-time settings code
69 * Andrew Morton : Make unregister_netdevice wait
70 * indefinitely on dev->refcnt
71 * J Hadi Salim : - Backlog queue sampling
72 * - netif_rx() feedback
73 */
74
75#include <asm/uaccess.h>
76#include <asm/system.h>
77#include <linux/bitops.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080078#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070079#include <linux/cpu.h>
80#include <linux/types.h>
81#include <linux/kernel.h>
82#include <linux/sched.h>
Arjan van de Ven4a3e2f72006-03-20 22:33:17 -080083#include <linux/mutex.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070084#include <linux/string.h>
85#include <linux/mm.h>
86#include <linux/socket.h>
87#include <linux/sockios.h>
88#include <linux/errno.h>
89#include <linux/interrupt.h>
90#include <linux/if_ether.h>
91#include <linux/netdevice.h>
92#include <linux/etherdevice.h>
Ben Hutchings0187bdf2008-06-19 16:15:47 -070093#include <linux/ethtool.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070094#include <linux/notifier.h>
95#include <linux/skbuff.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020096#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070097#include <net/sock.h>
98#include <linux/rtnetlink.h>
99#include <linux/proc_fs.h>
100#include <linux/seq_file.h>
101#include <linux/stat.h>
102#include <linux/if_bridge.h>
Patrick McHardyb863ceb2007-07-14 18:55:06 -0700103#include <linux/if_macvlan.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700104#include <net/dst.h>
105#include <net/pkt_sched.h>
106#include <net/checksum.h>
107#include <linux/highmem.h>
108#include <linux/init.h>
109#include <linux/kmod.h>
110#include <linux/module.h>
111#include <linux/kallsyms.h>
112#include <linux/netpoll.h>
113#include <linux/rcupdate.h>
114#include <linux/delay.h>
Johannes Berg295f4a12007-04-26 20:43:56 -0700115#include <net/wext.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700116#include <net/iw_handler.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700117#include <asm/current.h>
Steve Grubb5bdb9882005-12-03 08:39:35 -0500118#include <linux/audit.h>
Chris Leechdb217332006-06-17 21:24:58 -0700119#include <linux/dmaengine.h>
Herbert Xuf6a78bf2006-06-22 02:57:17 -0700120#include <linux/err.h>
David S. Millerc7fa9d12006-08-15 16:34:13 -0700121#include <linux/ctype.h>
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700122#include <linux/if_arp.h>
Ben Hutchings6de329e2008-06-16 17:02:28 -0700123#include <linux/if_vlan.h>
David S. Miller8f0f2222008-07-15 03:47:03 -0700124#include <linux/ip.h>
125#include <linux/ipv6.h>
126#include <linux/in.h>
David S. Millerb6b2fed2008-07-21 09:48:06 -0700127#include <linux/jhash.h>
128#include <linux/random.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700129
Pavel Emelyanov342709e2007-10-23 21:14:45 -0700130#include "net-sysfs.h"
131
Linus Torvalds1da177e2005-04-16 15:20:36 -0700132/*
133 * The list of packet types we will receive (as opposed to discard)
134 * and the routines to invoke.
135 *
136 * Why 16. Because with 16 the only overlap we get on a hash of the
137 * low nibble of the protocol value is RARP/SNAP/X.25.
138 *
139 * NOTE: That is no longer true with the addition of VLAN tags. Not
140 * sure which should go first, but I bet it won't make much
141 * difference if we are running VLANs. The good news is that
142 * this protocol won't be in the list unless compiled in, so
Stephen Hemminger3041a062006-05-26 13:25:24 -0700143 * the average user (w/out VLANs) will not be adversely affected.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700144 * --BLG
145 *
146 * 0800 IP
147 * 8100 802.1Q VLAN
148 * 0001 802.3
149 * 0002 AX.25
150 * 0004 802.2
151 * 8035 RARP
152 * 0005 SNAP
153 * 0805 X.25
154 * 0806 ARP
155 * 8137 IPX
156 * 0009 Localtalk
157 * 86DD IPv6
158 */
159
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +0800160#define PTYPE_HASH_SIZE (16)
161#define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1)
162
Linus Torvalds1da177e2005-04-16 15:20:36 -0700163static DEFINE_SPINLOCK(ptype_lock);
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +0800164static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
Stephen Hemminger6b2bedc2007-03-12 14:33:50 -0700165static struct list_head ptype_all __read_mostly; /* Taps */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700166
Chris Leechdb217332006-06-17 21:24:58 -0700167#ifdef CONFIG_NET_DMA
Dan Williamsd379b012007-07-09 11:56:42 -0700168struct net_dma {
169 struct dma_client client;
170 spinlock_t lock;
171 cpumask_t channel_mask;
Mike Travis0c0b0ac2008-05-02 16:43:08 -0700172 struct dma_chan **channels;
Dan Williamsd379b012007-07-09 11:56:42 -0700173};
174
175static enum dma_state_client
176netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
177 enum dma_state state);
178
179static struct net_dma net_dma = {
180 .client = {
181 .event_callback = netdev_dma_event,
182 },
183};
Chris Leechdb217332006-06-17 21:24:58 -0700184#endif
185
Linus Torvalds1da177e2005-04-16 15:20:36 -0700186/*
Pavel Emelianov7562f872007-05-03 15:13:45 -0700187 * The @dev_base_head list is protected by @dev_base_lock and the rtnl
Linus Torvalds1da177e2005-04-16 15:20:36 -0700188 * semaphore.
189 *
190 * Pure readers hold dev_base_lock for reading.
191 *
192 * Writers must hold the rtnl semaphore while they loop through the
Pavel Emelianov7562f872007-05-03 15:13:45 -0700193 * dev_base_head list, and hold dev_base_lock for writing when they do the
Linus Torvalds1da177e2005-04-16 15:20:36 -0700194 * actual updates. This allows pure readers to access the list even
195 * while a writer is preparing to update it.
196 *
197 * To put it another way, dev_base_lock is held for writing only to
198 * protect against pure readers; the rtnl semaphore provides the
199 * protection against other writers.
200 *
201 * See, for example usages, register_netdevice() and
202 * unregister_netdevice(), which must be called with the rtnl
203 * semaphore held.
204 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700205DEFINE_RWLOCK(dev_base_lock);
206
Linus Torvalds1da177e2005-04-16 15:20:36 -0700207EXPORT_SYMBOL(dev_base_lock);
208
209#define NETDEV_HASHBITS 8
Eric W. Biederman881d9662007-09-17 11:56:21 -0700210#define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700211
Eric W. Biederman881d9662007-09-17 11:56:21 -0700212static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700213{
214 unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
Eric W. Biederman881d9662007-09-17 11:56:21 -0700215 return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700216}
217
Eric W. Biederman881d9662007-09-17 11:56:21 -0700218static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700219{
Eric W. Biederman881d9662007-09-17 11:56:21 -0700220 return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700221}
222
Eric W. Biedermance286d32007-09-12 13:53:49 +0200223/* Device list insertion */
224static int list_netdevice(struct net_device *dev)
225{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900226 struct net *net = dev_net(dev);
Eric W. Biedermance286d32007-09-12 13:53:49 +0200227
228 ASSERT_RTNL();
229
230 write_lock_bh(&dev_base_lock);
231 list_add_tail(&dev->dev_list, &net->dev_base_head);
232 hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
233 hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex));
234 write_unlock_bh(&dev_base_lock);
235 return 0;
236}
237
238/* Device list removal */
239static void unlist_netdevice(struct net_device *dev)
240{
241 ASSERT_RTNL();
242
243 /* Unlink dev from the device chain */
244 write_lock_bh(&dev_base_lock);
245 list_del(&dev->dev_list);
246 hlist_del(&dev->name_hlist);
247 hlist_del(&dev->index_hlist);
248 write_unlock_bh(&dev_base_lock);
249}
250
Linus Torvalds1da177e2005-04-16 15:20:36 -0700251/*
252 * Our notifier list
253 */
254
Alan Sternf07d5b92006-05-09 15:23:03 -0700255static RAW_NOTIFIER_HEAD(netdev_chain);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700256
257/*
258 * Device drivers call our routines to queue packets here. We empty the
259 * queue in the local softnet handler.
260 */
Stephen Hemmingerbea33482007-10-03 16:41:36 -0700261
262DEFINE_PER_CPU(struct softnet_data, softnet_data);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700263
David S. Millercf508b12008-07-22 14:16:42 -0700264#ifdef CONFIG_LOCKDEP
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700265/*
David S. Millerc773e842008-07-08 23:13:53 -0700266 * register_netdevice() inits txq->_xmit_lock and sets lockdep class
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700267 * according to dev->type
268 */
269static const unsigned short netdev_lock_type[] =
270 {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
271 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
272 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
273 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
274 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
275 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
276 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
277 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
278 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
279 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
280 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
281 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
282 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
283 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_VOID,
284 ARPHRD_NONE};
285
286static const char *netdev_lock_name[] =
287 {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
288 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
289 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
290 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
291 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
292 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
293 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
294 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
295 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
296 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
297 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
298 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
299 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
300 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_VOID",
301 "_xmit_NONE"};
302
303static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
David S. Millercf508b12008-07-22 14:16:42 -0700304static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700305
306static inline unsigned short netdev_lock_pos(unsigned short dev_type)
307{
308 int i;
309
310 for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
311 if (netdev_lock_type[i] == dev_type)
312 return i;
313 /* the last key is used by default */
314 return ARRAY_SIZE(netdev_lock_type) - 1;
315}
316
David S. Millercf508b12008-07-22 14:16:42 -0700317static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
318 unsigned short dev_type)
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700319{
320 int i;
321
322 i = netdev_lock_pos(dev_type);
323 lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
324 netdev_lock_name[i]);
325}
David S. Millercf508b12008-07-22 14:16:42 -0700326
327static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
328{
329 int i;
330
331 i = netdev_lock_pos(dev->type);
332 lockdep_set_class_and_name(&dev->addr_list_lock,
333 &netdev_addr_lock_key[i],
334 netdev_lock_name[i]);
335}
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700336#else
David S. Millercf508b12008-07-22 14:16:42 -0700337static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
338 unsigned short dev_type)
339{
340}
341static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700342{
343}
344#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700345
346/*******************************************************************************
347
348 Protocol management and registration routines
349
350*******************************************************************************/
351
352/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700353 * Add a protocol ID to the list. Now that the input handler is
354 * smarter we can dispense with all the messy stuff that used to be
355 * here.
356 *
357 * BEWARE!!! Protocol handlers, mangling input packets,
358 * MUST BE last in hash buckets and checking protocol handlers
359 * MUST start from promiscuous ptype_all chain in net_bh.
360 * It is true now, do not change it.
361 * Explanation follows: if protocol handler, mangling packet, will
362 * be the first on list, it is not able to sense, that packet
363 * is cloned and should be copied-on-write, so that it will
364 * change it and subsequent readers will get broken packet.
365 * --ANK (980803)
366 */
367
368/**
369 * dev_add_pack - add packet handler
370 * @pt: packet type declaration
371 *
372 * Add a protocol handler to the networking stack. The passed &packet_type
373 * is linked into kernel lists and may not be freed until it has been
374 * removed from the kernel lists.
375 *
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900376 * This call does not sleep therefore it can not
Linus Torvalds1da177e2005-04-16 15:20:36 -0700377 * guarantee all CPU's that are in middle of receiving packets
378 * will see the new packet type (until the next received packet).
379 */
380
381void dev_add_pack(struct packet_type *pt)
382{
383 int hash;
384
385 spin_lock_bh(&ptype_lock);
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -0700386 if (pt->type == htons(ETH_P_ALL))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700387 list_add_rcu(&pt->list, &ptype_all);
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -0700388 else {
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +0800389 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700390 list_add_rcu(&pt->list, &ptype_base[hash]);
391 }
392 spin_unlock_bh(&ptype_lock);
393}
394
Linus Torvalds1da177e2005-04-16 15:20:36 -0700395/**
396 * __dev_remove_pack - remove packet handler
397 * @pt: packet type declaration
398 *
399 * Remove a protocol handler that was previously added to the kernel
400 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
401 * from the kernel lists and can be freed or reused once this function
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900402 * returns.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700403 *
404 * The packet type might still be in use by receivers
405 * and must not be freed until after all the CPU's have gone
406 * through a quiescent state.
407 */
408void __dev_remove_pack(struct packet_type *pt)
409{
410 struct list_head *head;
411 struct packet_type *pt1;
412
413 spin_lock_bh(&ptype_lock);
414
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -0700415 if (pt->type == htons(ETH_P_ALL))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700416 head = &ptype_all;
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -0700417 else
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +0800418 head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700419
420 list_for_each_entry(pt1, head, list) {
421 if (pt == pt1) {
422 list_del_rcu(&pt->list);
423 goto out;
424 }
425 }
426
427 printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
428out:
429 spin_unlock_bh(&ptype_lock);
430}
431/**
432 * dev_remove_pack - remove packet handler
433 * @pt: packet type declaration
434 *
435 * Remove a protocol handler that was previously added to the kernel
436 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
437 * from the kernel lists and can be freed or reused once this function
438 * returns.
439 *
440 * This call sleeps to guarantee that no CPU is looking at the packet
441 * type after return.
442 */
443void dev_remove_pack(struct packet_type *pt)
444{
445 __dev_remove_pack(pt);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900446
Linus Torvalds1da177e2005-04-16 15:20:36 -0700447 synchronize_net();
448}
449
450/******************************************************************************
451
452 Device Boot-time Settings Routines
453
454*******************************************************************************/
455
456/* Boot time configuration table */
457static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
458
459/**
460 * netdev_boot_setup_add - add new setup entry
461 * @name: name of the device
462 * @map: configured settings for the device
463 *
464 * Adds new setup entry to the dev_boot_setup list. The function
465 * returns 0 on error and 1 on success. This is a generic routine to
466 * all netdevices.
467 */
468static int netdev_boot_setup_add(char *name, struct ifmap *map)
469{
470 struct netdev_boot_setup *s;
471 int i;
472
473 s = dev_boot_setup;
474 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
475 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
476 memset(s[i].name, 0, sizeof(s[i].name));
Wang Chen93b3cff2008-07-01 19:57:19 -0700477 strlcpy(s[i].name, name, IFNAMSIZ);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700478 memcpy(&s[i].map, map, sizeof(s[i].map));
479 break;
480 }
481 }
482
483 return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
484}
485
486/**
487 * netdev_boot_setup_check - check boot time settings
488 * @dev: the netdevice
489 *
490 * Check boot time settings for the device.
491 * The found settings are set for the device to be used
492 * later in the device probing.
493 * Returns 0 if no settings found, 1 if they are.
494 */
495int netdev_boot_setup_check(struct net_device *dev)
496{
497 struct netdev_boot_setup *s = dev_boot_setup;
498 int i;
499
500 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
501 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
Wang Chen93b3cff2008-07-01 19:57:19 -0700502 !strcmp(dev->name, s[i].name)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700503 dev->irq = s[i].map.irq;
504 dev->base_addr = s[i].map.base_addr;
505 dev->mem_start = s[i].map.mem_start;
506 dev->mem_end = s[i].map.mem_end;
507 return 1;
508 }
509 }
510 return 0;
511}
512
513
514/**
515 * netdev_boot_base - get address from boot time settings
516 * @prefix: prefix for network device
517 * @unit: id for network device
518 *
519 * Check boot time settings for the base address of device.
520 * The found settings are set for the device to be used
521 * later in the device probing.
522 * Returns 0 if no settings found.
523 */
524unsigned long netdev_boot_base(const char *prefix, int unit)
525{
526 const struct netdev_boot_setup *s = dev_boot_setup;
527 char name[IFNAMSIZ];
528 int i;
529
530 sprintf(name, "%s%d", prefix, unit);
531
532 /*
533 * If device already registered then return base of 1
534 * to indicate not to probe for this interface
535 */
Eric W. Biederman881d9662007-09-17 11:56:21 -0700536 if (__dev_get_by_name(&init_net, name))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700537 return 1;
538
539 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
540 if (!strcmp(name, s[i].name))
541 return s[i].map.base_addr;
542 return 0;
543}
544
545/*
546 * Saves at boot time configured settings for any netdevice.
547 */
548int __init netdev_boot_setup(char *str)
549{
550 int ints[5];
551 struct ifmap map;
552
553 str = get_options(str, ARRAY_SIZE(ints), ints);
554 if (!str || !*str)
555 return 0;
556
557 /* Save settings */
558 memset(&map, 0, sizeof(map));
559 if (ints[0] > 0)
560 map.irq = ints[1];
561 if (ints[0] > 1)
562 map.base_addr = ints[2];
563 if (ints[0] > 2)
564 map.mem_start = ints[3];
565 if (ints[0] > 3)
566 map.mem_end = ints[4];
567
568 /* Add new entry to the list */
569 return netdev_boot_setup_add(str, &map);
570}
571
572__setup("netdev=", netdev_boot_setup);
573
574/*******************************************************************************
575
576 Device Interface Subroutines
577
578*******************************************************************************/
579
580/**
581 * __dev_get_by_name - find a device by its name
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700582 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700583 * @name: name to find
584 *
585 * Find an interface by name. Must be called under RTNL semaphore
586 * or @dev_base_lock. If the name is found a pointer to the device
587 * is returned. If the name is not found then %NULL is returned. The
588 * reference counters are not incremented so the caller must be
589 * careful with locks.
590 */
591
Eric W. Biederman881d9662007-09-17 11:56:21 -0700592struct net_device *__dev_get_by_name(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700593{
594 struct hlist_node *p;
595
Eric W. Biederman881d9662007-09-17 11:56:21 -0700596 hlist_for_each(p, dev_name_hash(net, name)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700597 struct net_device *dev
598 = hlist_entry(p, struct net_device, name_hlist);
599 if (!strncmp(dev->name, name, IFNAMSIZ))
600 return dev;
601 }
602 return NULL;
603}
604
605/**
606 * dev_get_by_name - find a device by its name
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700607 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700608 * @name: name to find
609 *
610 * Find an interface by name. This can be called from any
611 * context and does its own locking. The returned handle has
612 * the usage count incremented and the caller must use dev_put() to
613 * release it when it is no longer needed. %NULL is returned if no
614 * matching device is found.
615 */
616
Eric W. Biederman881d9662007-09-17 11:56:21 -0700617struct net_device *dev_get_by_name(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700618{
619 struct net_device *dev;
620
621 read_lock(&dev_base_lock);
Eric W. Biederman881d9662007-09-17 11:56:21 -0700622 dev = __dev_get_by_name(net, name);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700623 if (dev)
624 dev_hold(dev);
625 read_unlock(&dev_base_lock);
626 return dev;
627}
628
629/**
630 * __dev_get_by_index - find a device by its ifindex
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700631 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700632 * @ifindex: index of device
633 *
634 * Search for an interface by index. Returns %NULL if the device
635 * is not found or a pointer to the device. The device has not
636 * had its reference counter increased so the caller must be careful
637 * about locking. The caller must hold either the RTNL semaphore
638 * or @dev_base_lock.
639 */
640
Eric W. Biederman881d9662007-09-17 11:56:21 -0700641struct net_device *__dev_get_by_index(struct net *net, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700642{
643 struct hlist_node *p;
644
Eric W. Biederman881d9662007-09-17 11:56:21 -0700645 hlist_for_each(p, dev_index_hash(net, ifindex)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700646 struct net_device *dev
647 = hlist_entry(p, struct net_device, index_hlist);
648 if (dev->ifindex == ifindex)
649 return dev;
650 }
651 return NULL;
652}
653
654
655/**
656 * dev_get_by_index - find a device by its ifindex
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700657 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700658 * @ifindex: index of device
659 *
660 * Search for an interface by index. Returns NULL if the device
661 * is not found or a pointer to the device. The device returned has
662 * had a reference added and the pointer is safe until the user calls
663 * dev_put to indicate they have finished with it.
664 */
665
Eric W. Biederman881d9662007-09-17 11:56:21 -0700666struct net_device *dev_get_by_index(struct net *net, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700667{
668 struct net_device *dev;
669
670 read_lock(&dev_base_lock);
Eric W. Biederman881d9662007-09-17 11:56:21 -0700671 dev = __dev_get_by_index(net, ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700672 if (dev)
673 dev_hold(dev);
674 read_unlock(&dev_base_lock);
675 return dev;
676}
677
678/**
679 * dev_getbyhwaddr - find a device by its hardware address
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700680 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700681 * @type: media type of device
682 * @ha: hardware address
683 *
684 * Search for an interface by MAC address. Returns NULL if the device
685 * is not found or a pointer to the device. The caller must hold the
686 * rtnl semaphore. The returned device has not had its ref count increased
687 * and the caller must therefore be careful about locking
688 *
689 * BUGS:
690 * If the API was consistent this would be __dev_get_by_hwaddr
691 */
692
Eric W. Biederman881d9662007-09-17 11:56:21 -0700693struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700694{
695 struct net_device *dev;
696
697 ASSERT_RTNL();
698
Denis V. Lunev81103a52007-12-12 10:47:38 -0800699 for_each_netdev(net, dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700700 if (dev->type == type &&
701 !memcmp(dev->dev_addr, ha, dev->addr_len))
Pavel Emelianov7562f872007-05-03 15:13:45 -0700702 return dev;
703
704 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700705}
706
Jochen Friedrichcf309e32005-09-22 04:44:55 -0300707EXPORT_SYMBOL(dev_getbyhwaddr);
708
Eric W. Biederman881d9662007-09-17 11:56:21 -0700709struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700710{
711 struct net_device *dev;
712
713 ASSERT_RTNL();
Eric W. Biederman881d9662007-09-17 11:56:21 -0700714 for_each_netdev(net, dev)
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700715 if (dev->type == type)
Pavel Emelianov7562f872007-05-03 15:13:45 -0700716 return dev;
717
718 return NULL;
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700719}
720
721EXPORT_SYMBOL(__dev_getfirstbyhwtype);
722
Eric W. Biederman881d9662007-09-17 11:56:21 -0700723struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700724{
725 struct net_device *dev;
726
727 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -0700728 dev = __dev_getfirstbyhwtype(net, type);
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700729 if (dev)
730 dev_hold(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700731 rtnl_unlock();
732 return dev;
733}
734
735EXPORT_SYMBOL(dev_getfirstbyhwtype);
736
737/**
738 * dev_get_by_flags - find any device with given flags
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700739 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700740 * @if_flags: IFF_* values
741 * @mask: bitmask of bits in if_flags to check
742 *
743 * Search for any interface with the given flags. Returns NULL if a device
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900744 * is not found or a pointer to the device. The device returned has
Linus Torvalds1da177e2005-04-16 15:20:36 -0700745 * had a reference added and the pointer is safe until the user calls
746 * dev_put to indicate they have finished with it.
747 */
748
Eric W. Biederman881d9662007-09-17 11:56:21 -0700749struct net_device * dev_get_by_flags(struct net *net, unsigned short if_flags, unsigned short mask)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700750{
Pavel Emelianov7562f872007-05-03 15:13:45 -0700751 struct net_device *dev, *ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700752
Pavel Emelianov7562f872007-05-03 15:13:45 -0700753 ret = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700754 read_lock(&dev_base_lock);
Eric W. Biederman881d9662007-09-17 11:56:21 -0700755 for_each_netdev(net, dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700756 if (((dev->flags ^ if_flags) & mask) == 0) {
757 dev_hold(dev);
Pavel Emelianov7562f872007-05-03 15:13:45 -0700758 ret = dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700759 break;
760 }
761 }
762 read_unlock(&dev_base_lock);
Pavel Emelianov7562f872007-05-03 15:13:45 -0700763 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700764}
765
766/**
767 * dev_valid_name - check if name is okay for network device
768 * @name: name string
769 *
770 * Network device names need to be valid file names to
David S. Millerc7fa9d12006-08-15 16:34:13 -0700771 * to allow sysfs to work. We also disallow any kind of
772 * whitespace.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700773 */
Mitch Williamsc2373ee2005-11-09 10:34:45 -0800774int dev_valid_name(const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700775{
David S. Millerc7fa9d12006-08-15 16:34:13 -0700776 if (*name == '\0')
777 return 0;
Stephen Hemmingerb6fe17d2006-08-29 17:06:13 -0700778 if (strlen(name) >= IFNAMSIZ)
779 return 0;
David S. Millerc7fa9d12006-08-15 16:34:13 -0700780 if (!strcmp(name, ".") || !strcmp(name, ".."))
781 return 0;
782
783 while (*name) {
784 if (*name == '/' || isspace(*name))
785 return 0;
786 name++;
787 }
788 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700789}
790
791/**
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200792 * __dev_alloc_name - allocate a name for a device
793 * @net: network namespace to allocate the device name in
Linus Torvalds1da177e2005-04-16 15:20:36 -0700794 * @name: name format string
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200795 * @buf: scratch buffer and result name string
Linus Torvalds1da177e2005-04-16 15:20:36 -0700796 *
797 * Passed a format string - eg "lt%d" it will try and find a suitable
Stephen Hemminger3041a062006-05-26 13:25:24 -0700798 * id. It scans list of devices to build up a free map, then chooses
799 * the first empty slot. The caller must hold the dev_base or rtnl lock
800 * while allocating the name and adding the device in order to avoid
801 * duplicates.
802 * Limited to bits_per_byte * page size devices (ie 32K on most platforms).
803 * Returns the number of the unit assigned or a negative errno code.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700804 */
805
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200806static int __dev_alloc_name(struct net *net, const char *name, char *buf)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700807{
808 int i = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700809 const char *p;
810 const int max_netdevices = 8*PAGE_SIZE;
Stephen Hemmingercfcabdc2007-10-09 01:59:42 -0700811 unsigned long *inuse;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700812 struct net_device *d;
813
814 p = strnchr(name, IFNAMSIZ-1, '%');
815 if (p) {
816 /*
817 * Verify the string as this thing may have come from
818 * the user. There must be either one "%d" and no other "%"
819 * characters.
820 */
821 if (p[1] != 'd' || strchr(p + 2, '%'))
822 return -EINVAL;
823
824 /* Use one page as a bit array of possible slots */
Stephen Hemmingercfcabdc2007-10-09 01:59:42 -0700825 inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700826 if (!inuse)
827 return -ENOMEM;
828
Eric W. Biederman881d9662007-09-17 11:56:21 -0700829 for_each_netdev(net, d) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700830 if (!sscanf(d->name, name, &i))
831 continue;
832 if (i < 0 || i >= max_netdevices)
833 continue;
834
835 /* avoid cases where sscanf is not exact inverse of printf */
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200836 snprintf(buf, IFNAMSIZ, name, i);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700837 if (!strncmp(buf, d->name, IFNAMSIZ))
838 set_bit(i, inuse);
839 }
840
841 i = find_first_zero_bit(inuse, max_netdevices);
842 free_page((unsigned long) inuse);
843 }
844
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200845 snprintf(buf, IFNAMSIZ, name, i);
846 if (!__dev_get_by_name(net, buf))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700847 return i;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700848
849 /* It is possible to run out of possible slots
850 * when the name is long and there isn't enough space left
851 * for the digits, or if all bits are used.
852 */
853 return -ENFILE;
854}
855
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200856/**
857 * dev_alloc_name - allocate a name for a device
858 * @dev: device
859 * @name: name format string
860 *
861 * Passed a format string - eg "lt%d" it will try and find a suitable
862 * id. It scans list of devices to build up a free map, then chooses
863 * the first empty slot. The caller must hold the dev_base or rtnl lock
864 * while allocating the name and adding the device in order to avoid
865 * duplicates.
866 * Limited to bits_per_byte * page size devices (ie 32K on most platforms).
867 * Returns the number of the unit assigned or a negative errno code.
868 */
869
870int dev_alloc_name(struct net_device *dev, const char *name)
871{
872 char buf[IFNAMSIZ];
873 struct net *net;
874 int ret;
875
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900876 BUG_ON(!dev_net(dev));
877 net = dev_net(dev);
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200878 ret = __dev_alloc_name(net, name, buf);
879 if (ret >= 0)
880 strlcpy(dev->name, buf, IFNAMSIZ);
881 return ret;
882}
883
Linus Torvalds1da177e2005-04-16 15:20:36 -0700884
885/**
886 * dev_change_name - change name of a device
887 * @dev: device
888 * @newname: name (or format string) must be at least IFNAMSIZ
889 *
890 * Change name of a device, can pass format strings "eth%d".
891 * for wildcarding.
892 */
893int dev_change_name(struct net_device *dev, char *newname)
894{
Herbert Xufcc5a032007-07-30 17:03:38 -0700895 char oldname[IFNAMSIZ];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700896 int err = 0;
Herbert Xufcc5a032007-07-30 17:03:38 -0700897 int ret;
Eric W. Biederman881d9662007-09-17 11:56:21 -0700898 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700899
900 ASSERT_RTNL();
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900901 BUG_ON(!dev_net(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700902
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900903 net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700904 if (dev->flags & IFF_UP)
905 return -EBUSY;
906
907 if (!dev_valid_name(newname))
908 return -EINVAL;
909
Stephen Hemmingerc8d90dc2007-10-26 03:53:42 -0700910 if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
911 return 0;
912
Herbert Xufcc5a032007-07-30 17:03:38 -0700913 memcpy(oldname, dev->name, IFNAMSIZ);
914
Linus Torvalds1da177e2005-04-16 15:20:36 -0700915 if (strchr(newname, '%')) {
916 err = dev_alloc_name(dev, newname);
917 if (err < 0)
918 return err;
919 strcpy(newname, dev->name);
920 }
Eric W. Biederman881d9662007-09-17 11:56:21 -0700921 else if (__dev_get_by_name(net, newname))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700922 return -EEXIST;
923 else
924 strlcpy(dev->name, newname, IFNAMSIZ);
925
Herbert Xufcc5a032007-07-30 17:03:38 -0700926rollback:
Stephen Hemmingerdcc99772008-05-14 22:33:38 -0700927 err = device_rename(&dev->dev, dev->name);
928 if (err) {
929 memcpy(dev->name, oldname, IFNAMSIZ);
930 return err;
931 }
Herbert Xu7f988ea2007-07-30 16:35:46 -0700932
933 write_lock_bh(&dev_base_lock);
Eric W. Biederman92749822007-04-03 00:07:30 -0600934 hlist_del(&dev->name_hlist);
Eric W. Biederman881d9662007-09-17 11:56:21 -0700935 hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
Herbert Xu7f988ea2007-07-30 16:35:46 -0700936 write_unlock_bh(&dev_base_lock);
937
Pavel Emelyanov056925a2007-09-16 15:42:43 -0700938 ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
Herbert Xufcc5a032007-07-30 17:03:38 -0700939 ret = notifier_to_errno(ret);
940
941 if (ret) {
942 if (err) {
943 printk(KERN_ERR
944 "%s: name change rollback failed: %d.\n",
945 dev->name, ret);
946 } else {
947 err = ret;
948 memcpy(dev->name, oldname, IFNAMSIZ);
949 goto rollback;
950 }
951 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700952
953 return err;
954}
955
956/**
Stephen Hemminger0b815a12008-09-22 21:28:11 -0700957 * dev_set_alias - change ifalias of a device
958 * @dev: device
959 * @alias: name up to IFALIASZ
960 *
961 * Set ifalias for a device,
962 */
963int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
964{
965 ASSERT_RTNL();
966
967 if (len >= IFALIASZ)
968 return -EINVAL;
969
970 dev->ifalias = krealloc(dev->ifalias, len+1, GFP_KERNEL);
971 if (!dev->ifalias)
972 return -ENOMEM;
973
974 strlcpy(dev->ifalias, alias, len+1);
975 return len;
976}
977
978
979/**
Stephen Hemminger3041a062006-05-26 13:25:24 -0700980 * netdev_features_change - device changes features
Stephen Hemmingerd8a33ac2005-05-29 14:13:47 -0700981 * @dev: device to cause notification
982 *
983 * Called to indicate a device has changed features.
984 */
985void netdev_features_change(struct net_device *dev)
986{
Pavel Emelyanov056925a2007-09-16 15:42:43 -0700987 call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
Stephen Hemmingerd8a33ac2005-05-29 14:13:47 -0700988}
989EXPORT_SYMBOL(netdev_features_change);
990
991/**
Linus Torvalds1da177e2005-04-16 15:20:36 -0700992 * netdev_state_change - device changes state
993 * @dev: device to cause notification
994 *
995 * Called to indicate a device has changed state. This function calls
996 * the notifier chains for netdev_chain and sends a NEWLINK message
997 * to the routing socket.
998 */
999void netdev_state_change(struct net_device *dev)
1000{
1001 if (dev->flags & IFF_UP) {
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001002 call_netdevice_notifiers(NETDEV_CHANGE, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001003 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
1004 }
1005}
1006
Or Gerlitzc1da4ac2008-06-13 18:12:00 -07001007void netdev_bonding_change(struct net_device *dev)
1008{
1009 call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, dev);
1010}
1011EXPORT_SYMBOL(netdev_bonding_change);
1012
Linus Torvalds1da177e2005-04-16 15:20:36 -07001013/**
1014 * dev_load - load a network module
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07001015 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -07001016 * @name: name of interface
1017 *
1018 * If a network interface is not present and the process has suitable
1019 * privileges this function loads the module. If module loading is not
1020 * available in this kernel then it becomes a nop.
1021 */
1022
Eric W. Biederman881d9662007-09-17 11:56:21 -07001023void dev_load(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001024{
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001025 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001026
1027 read_lock(&dev_base_lock);
Eric W. Biederman881d9662007-09-17 11:56:21 -07001028 dev = __dev_get_by_name(net, name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001029 read_unlock(&dev_base_lock);
1030
1031 if (!dev && capable(CAP_SYS_MODULE))
1032 request_module("%s", name);
1033}
1034
Linus Torvalds1da177e2005-04-16 15:20:36 -07001035/**
1036 * dev_open - prepare an interface for use.
1037 * @dev: device to open
1038 *
1039 * Takes a device from down to up state. The device's private open
1040 * function is invoked and then the multicast lists are loaded. Finally
1041 * the device is moved into the up state and a %NETDEV_UP message is
1042 * sent to the netdev notifier chain.
1043 *
1044 * Calling this function on an active interface is a nop. On a failure
1045 * a negative errno code is returned.
1046 */
1047int dev_open(struct net_device *dev)
1048{
1049 int ret = 0;
1050
Ben Hutchingse46b66b2008-05-08 02:53:17 -07001051 ASSERT_RTNL();
1052
Linus Torvalds1da177e2005-04-16 15:20:36 -07001053 /*
1054 * Is it already up?
1055 */
1056
1057 if (dev->flags & IFF_UP)
1058 return 0;
1059
1060 /*
1061 * Is it even present?
1062 */
1063 if (!netif_device_present(dev))
1064 return -ENODEV;
1065
1066 /*
1067 * Call device private open method
1068 */
1069 set_bit(__LINK_STATE_START, &dev->state);
Jeff Garzikbada3392007-10-23 20:19:37 -07001070
1071 if (dev->validate_addr)
1072 ret = dev->validate_addr(dev);
1073
1074 if (!ret && dev->open)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001075 ret = dev->open(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001076
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001077 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001078 * If it went open OK then:
1079 */
1080
Jeff Garzikbada3392007-10-23 20:19:37 -07001081 if (ret)
1082 clear_bit(__LINK_STATE_START, &dev->state);
1083 else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001084 /*
1085 * Set the flags.
1086 */
1087 dev->flags |= IFF_UP;
1088
1089 /*
1090 * Initialize multicasting status
1091 */
Patrick McHardy4417da62007-06-27 01:28:10 -07001092 dev_set_rx_mode(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001093
1094 /*
1095 * Wakeup transmit queue engine
1096 */
1097 dev_activate(dev);
1098
1099 /*
1100 * ... and announce new interface.
1101 */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001102 call_netdevice_notifiers(NETDEV_UP, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001103 }
Jeff Garzikbada3392007-10-23 20:19:37 -07001104
Linus Torvalds1da177e2005-04-16 15:20:36 -07001105 return ret;
1106}
1107
1108/**
1109 * dev_close - shutdown an interface.
1110 * @dev: device to shutdown
1111 *
1112 * This function moves an active device into down state. A
1113 * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1114 * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1115 * chain.
1116 */
1117int dev_close(struct net_device *dev)
1118{
Ben Hutchingse46b66b2008-05-08 02:53:17 -07001119 ASSERT_RTNL();
1120
David S. Miller9d5010d2007-09-12 14:33:25 +02001121 might_sleep();
1122
Linus Torvalds1da177e2005-04-16 15:20:36 -07001123 if (!(dev->flags & IFF_UP))
1124 return 0;
1125
1126 /*
1127 * Tell people we are going down, so that they can
1128 * prepare to death, when device is still operating.
1129 */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001130 call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001131
Linus Torvalds1da177e2005-04-16 15:20:36 -07001132 clear_bit(__LINK_STATE_START, &dev->state);
1133
1134 /* Synchronize to scheduled poll. We cannot touch poll list,
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001135 * it can be even on different cpu. So just clear netif_running().
1136 *
1137 * dev->stop() will invoke napi_disable() on all of it's
1138 * napi_struct instances on this device.
1139 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001140 smp_mb__after_clear_bit(); /* Commit netif_running(). */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001141
Matti Linnanvuorid8b2a4d2008-02-12 23:10:11 -08001142 dev_deactivate(dev);
1143
Linus Torvalds1da177e2005-04-16 15:20:36 -07001144 /*
1145 * Call the device specific close. This cannot fail.
1146 * Only if device is UP
1147 *
1148 * We allow it to be called even after a DETACH hot-plug
1149 * event.
1150 */
1151 if (dev->stop)
1152 dev->stop(dev);
1153
1154 /*
1155 * Device is now down.
1156 */
1157
1158 dev->flags &= ~IFF_UP;
1159
1160 /*
1161 * Tell people we are down
1162 */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001163 call_netdevice_notifiers(NETDEV_DOWN, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001164
1165 return 0;
1166}
1167
1168
Ben Hutchings0187bdf2008-06-19 16:15:47 -07001169/**
1170 * dev_disable_lro - disable Large Receive Offload on a device
1171 * @dev: device
1172 *
1173 * Disable Large Receive Offload (LRO) on a net device. Must be
1174 * called under RTNL. This is needed if received packets may be
1175 * forwarded to another interface.
1176 */
1177void dev_disable_lro(struct net_device *dev)
1178{
1179 if (dev->ethtool_ops && dev->ethtool_ops->get_flags &&
1180 dev->ethtool_ops->set_flags) {
1181 u32 flags = dev->ethtool_ops->get_flags(dev);
1182 if (flags & ETH_FLAG_LRO) {
1183 flags &= ~ETH_FLAG_LRO;
1184 dev->ethtool_ops->set_flags(dev, flags);
1185 }
1186 }
1187 WARN_ON(dev->features & NETIF_F_LRO);
1188}
1189EXPORT_SYMBOL(dev_disable_lro);
1190
1191
Eric W. Biederman881d9662007-09-17 11:56:21 -07001192static int dev_boot_phase = 1;
1193
Linus Torvalds1da177e2005-04-16 15:20:36 -07001194/*
1195 * Device change register/unregister. These are not inline or static
1196 * as we export them to the world.
1197 */
1198
1199/**
1200 * register_netdevice_notifier - register a network notifier block
1201 * @nb: notifier
1202 *
1203 * Register a notifier to be called when network device events occur.
1204 * The notifier passed is linked into the kernel structures and must
1205 * not be reused until it has been unregistered. A negative errno code
1206 * is returned on a failure.
1207 *
1208 * When registered all registration and up events are replayed
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001209 * to the new notifier to allow device to have a race free
Linus Torvalds1da177e2005-04-16 15:20:36 -07001210 * view of the network device list.
1211 */
1212
1213int register_netdevice_notifier(struct notifier_block *nb)
1214{
1215 struct net_device *dev;
Herbert Xufcc5a032007-07-30 17:03:38 -07001216 struct net_device *last;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001217 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001218 int err;
1219
1220 rtnl_lock();
Alan Sternf07d5b92006-05-09 15:23:03 -07001221 err = raw_notifier_chain_register(&netdev_chain, nb);
Herbert Xufcc5a032007-07-30 17:03:38 -07001222 if (err)
1223 goto unlock;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001224 if (dev_boot_phase)
1225 goto unlock;
1226 for_each_net(net) {
1227 for_each_netdev(net, dev) {
1228 err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
1229 err = notifier_to_errno(err);
1230 if (err)
1231 goto rollback;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001232
Eric W. Biederman881d9662007-09-17 11:56:21 -07001233 if (!(dev->flags & IFF_UP))
1234 continue;
Herbert Xufcc5a032007-07-30 17:03:38 -07001235
Eric W. Biederman881d9662007-09-17 11:56:21 -07001236 nb->notifier_call(nb, NETDEV_UP, dev);
1237 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001238 }
Herbert Xufcc5a032007-07-30 17:03:38 -07001239
1240unlock:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001241 rtnl_unlock();
1242 return err;
Herbert Xufcc5a032007-07-30 17:03:38 -07001243
1244rollback:
1245 last = dev;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001246 for_each_net(net) {
1247 for_each_netdev(net, dev) {
1248 if (dev == last)
1249 break;
Herbert Xufcc5a032007-07-30 17:03:38 -07001250
Eric W. Biederman881d9662007-09-17 11:56:21 -07001251 if (dev->flags & IFF_UP) {
1252 nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1253 nb->notifier_call(nb, NETDEV_DOWN, dev);
1254 }
1255 nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
Herbert Xufcc5a032007-07-30 17:03:38 -07001256 }
Herbert Xufcc5a032007-07-30 17:03:38 -07001257 }
Pavel Emelyanovc67625a2007-11-14 15:53:16 -08001258
1259 raw_notifier_chain_unregister(&netdev_chain, nb);
Herbert Xufcc5a032007-07-30 17:03:38 -07001260 goto unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001261}
1262
1263/**
1264 * unregister_netdevice_notifier - unregister a network notifier block
1265 * @nb: notifier
1266 *
1267 * Unregister a notifier previously registered by
1268 * register_netdevice_notifier(). The notifier is unlinked into the
1269 * kernel structures and may then be reused. A negative errno code
1270 * is returned on a failure.
1271 */
1272
1273int unregister_netdevice_notifier(struct notifier_block *nb)
1274{
Herbert Xu9f514952006-03-25 01:24:25 -08001275 int err;
1276
1277 rtnl_lock();
Alan Sternf07d5b92006-05-09 15:23:03 -07001278 err = raw_notifier_chain_unregister(&netdev_chain, nb);
Herbert Xu9f514952006-03-25 01:24:25 -08001279 rtnl_unlock();
1280 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001281}
1282
1283/**
1284 * call_netdevice_notifiers - call all network notifier blocks
1285 * @val: value passed unmodified to notifier function
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07001286 * @dev: net_device pointer passed unmodified to notifier function
Linus Torvalds1da177e2005-04-16 15:20:36 -07001287 *
1288 * Call all network notifier blocks. Parameters and return value
Alan Sternf07d5b92006-05-09 15:23:03 -07001289 * are as for raw_notifier_call_chain().
Linus Torvalds1da177e2005-04-16 15:20:36 -07001290 */
1291
Eric W. Biedermanad7379d2007-09-16 15:33:32 -07001292int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001293{
Eric W. Biedermanad7379d2007-09-16 15:33:32 -07001294 return raw_notifier_call_chain(&netdev_chain, val, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001295}
1296
1297/* When > 0 there are consumers of rx skb time stamps */
1298static atomic_t netstamp_needed = ATOMIC_INIT(0);
1299
1300void net_enable_timestamp(void)
1301{
1302 atomic_inc(&netstamp_needed);
1303}
1304
1305void net_disable_timestamp(void)
1306{
1307 atomic_dec(&netstamp_needed);
1308}
1309
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001310static inline void net_timestamp(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001311{
1312 if (atomic_read(&netstamp_needed))
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001313 __net_timestamp(skb);
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -07001314 else
1315 skb->tstamp.tv64 = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001316}
1317
1318/*
1319 * Support routine. Sends outgoing frames to any network
1320 * taps currently in use.
1321 */
1322
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001323static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001324{
1325 struct packet_type *ptype;
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001326
1327 net_timestamp(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001328
1329 rcu_read_lock();
1330 list_for_each_entry_rcu(ptype, &ptype_all, list) {
1331 /* Never send packets back to the socket
1332 * they originated from - MvS (miquels@drinkel.ow.org)
1333 */
1334 if ((ptype->dev == dev || !ptype->dev) &&
1335 (ptype->af_packet_priv == NULL ||
1336 (struct sock *)ptype->af_packet_priv != skb->sk)) {
1337 struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1338 if (!skb2)
1339 break;
1340
1341 /* skb->nh should be correctly
1342 set by sender, so that the second statement is
1343 just protection against buggy protocols.
1344 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07001345 skb_reset_mac_header(skb2);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001346
Arnaldo Carvalho de Melod56f90a2007-04-10 20:50:43 -07001347 if (skb_network_header(skb2) < skb2->data ||
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001348 skb2->network_header > skb2->tail) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001349 if (net_ratelimit())
1350 printk(KERN_CRIT "protocol %04x is "
1351 "buggy, dev %s\n",
1352 skb2->protocol, dev->name);
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -07001353 skb_reset_network_header(skb2);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001354 }
1355
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07001356 skb2->transport_header = skb2->network_header;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001357 skb2->pkt_type = PACKET_OUTGOING;
David S. Millerf2ccd8f2005-08-09 19:34:12 -07001358 ptype->func(skb2, skb->dev, ptype, skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001359 }
1360 }
1361 rcu_read_unlock();
1362}
1363
Denis Vlasenko56079432006-03-29 15:57:29 -08001364
Jarek Poplawskidef82a12008-08-17 21:54:43 -07001365static inline void __netif_reschedule(struct Qdisc *q)
1366{
1367 struct softnet_data *sd;
1368 unsigned long flags;
1369
1370 local_irq_save(flags);
1371 sd = &__get_cpu_var(softnet_data);
1372 q->next_sched = sd->output_queue;
1373 sd->output_queue = q;
1374 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1375 local_irq_restore(flags);
1376}
1377
David S. Miller37437bb2008-07-16 02:15:04 -07001378void __netif_schedule(struct Qdisc *q)
Denis Vlasenko56079432006-03-29 15:57:29 -08001379{
Jarek Poplawskidef82a12008-08-17 21:54:43 -07001380 if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
1381 __netif_reschedule(q);
Denis Vlasenko56079432006-03-29 15:57:29 -08001382}
1383EXPORT_SYMBOL(__netif_schedule);
1384
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001385void dev_kfree_skb_irq(struct sk_buff *skb)
Denis Vlasenko56079432006-03-29 15:57:29 -08001386{
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001387 if (atomic_dec_and_test(&skb->users)) {
1388 struct softnet_data *sd;
1389 unsigned long flags;
Denis Vlasenko56079432006-03-29 15:57:29 -08001390
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001391 local_irq_save(flags);
1392 sd = &__get_cpu_var(softnet_data);
1393 skb->next = sd->completion_queue;
1394 sd->completion_queue = skb;
1395 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1396 local_irq_restore(flags);
1397 }
Denis Vlasenko56079432006-03-29 15:57:29 -08001398}
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001399EXPORT_SYMBOL(dev_kfree_skb_irq);
Denis Vlasenko56079432006-03-29 15:57:29 -08001400
1401void dev_kfree_skb_any(struct sk_buff *skb)
1402{
1403 if (in_irq() || irqs_disabled())
1404 dev_kfree_skb_irq(skb);
1405 else
1406 dev_kfree_skb(skb);
1407}
1408EXPORT_SYMBOL(dev_kfree_skb_any);
1409
1410
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001411/**
1412 * netif_device_detach - mark device as removed
1413 * @dev: network device
1414 *
1415 * Mark device as removed from system and therefore no longer available.
1416 */
Denis Vlasenko56079432006-03-29 15:57:29 -08001417void netif_device_detach(struct net_device *dev)
1418{
1419 if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1420 netif_running(dev)) {
1421 netif_stop_queue(dev);
1422 }
1423}
1424EXPORT_SYMBOL(netif_device_detach);
1425
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001426/**
1427 * netif_device_attach - mark device as attached
1428 * @dev: network device
1429 *
1430 * Mark device as attached from system and restart if needed.
1431 */
Denis Vlasenko56079432006-03-29 15:57:29 -08001432void netif_device_attach(struct net_device *dev)
1433{
1434 if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1435 netif_running(dev)) {
1436 netif_wake_queue(dev);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001437 __netdev_watchdog_up(dev);
Denis Vlasenko56079432006-03-29 15:57:29 -08001438 }
1439}
1440EXPORT_SYMBOL(netif_device_attach);
1441
Ben Hutchings6de329e2008-06-16 17:02:28 -07001442static bool can_checksum_protocol(unsigned long features, __be16 protocol)
1443{
1444 return ((features & NETIF_F_GEN_CSUM) ||
1445 ((features & NETIF_F_IP_CSUM) &&
1446 protocol == htons(ETH_P_IP)) ||
1447 ((features & NETIF_F_IPV6_CSUM) &&
1448 protocol == htons(ETH_P_IPV6)));
1449}
1450
1451static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
1452{
1453 if (can_checksum_protocol(dev->features, skb->protocol))
1454 return true;
1455
1456 if (skb->protocol == htons(ETH_P_8021Q)) {
1457 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1458 if (can_checksum_protocol(dev->features & dev->vlan_features,
1459 veh->h_vlan_encapsulated_proto))
1460 return true;
1461 }
1462
1463 return false;
1464}
Denis Vlasenko56079432006-03-29 15:57:29 -08001465
Linus Torvalds1da177e2005-04-16 15:20:36 -07001466/*
1467 * Invalidate hardware checksum when packet is to be mangled, and
1468 * complete checksum manually on outgoing path.
1469 */
Patrick McHardy84fa7932006-08-29 16:44:56 -07001470int skb_checksum_help(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001471{
Al Virod3bc23e2006-11-14 21:24:49 -08001472 __wsum csum;
Herbert Xu663ead32007-04-09 11:59:07 -07001473 int ret = 0, offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001474
Patrick McHardy84fa7932006-08-29 16:44:56 -07001475 if (skb->ip_summed == CHECKSUM_COMPLETE)
Herbert Xua430a432006-07-08 13:34:56 -07001476 goto out_set_summed;
1477
1478 if (unlikely(skb_shinfo(skb)->gso_size)) {
Herbert Xua430a432006-07-08 13:34:56 -07001479 /* Let GSO fix up the checksum. */
1480 goto out_set_summed;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001481 }
1482
Herbert Xua0308472007-10-15 01:47:15 -07001483 offset = skb->csum_start - skb_headroom(skb);
1484 BUG_ON(offset >= skb_headlen(skb));
1485 csum = skb_checksum(skb, offset, skb->len - offset, 0);
1486
1487 offset += skb->csum_offset;
1488 BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
1489
1490 if (skb_cloned(skb) &&
1491 !skb_clone_writable(skb, offset + sizeof(__sum16))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001492 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1493 if (ret)
1494 goto out;
1495 }
1496
Herbert Xua0308472007-10-15 01:47:15 -07001497 *(__sum16 *)(skb->data + offset) = csum_fold(csum);
Herbert Xua430a432006-07-08 13:34:56 -07001498out_set_summed:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001499 skb->ip_summed = CHECKSUM_NONE;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001500out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001501 return ret;
1502}
1503
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001504/**
1505 * skb_gso_segment - Perform segmentation on skb.
1506 * @skb: buffer to segment
Herbert Xu576a30e2006-06-27 13:22:38 -07001507 * @features: features for the output path (see dev->features)
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001508 *
1509 * This function segments the given skb and returns a list of segments.
Herbert Xu576a30e2006-06-27 13:22:38 -07001510 *
1511 * It may return NULL if the skb requires no segmentation. This is
1512 * only possible when GSO is used for verifying header integrity.
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001513 */
Herbert Xu576a30e2006-06-27 13:22:38 -07001514struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001515{
1516 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1517 struct packet_type *ptype;
Al Viro252e3342006-11-14 20:48:11 -08001518 __be16 type = skb->protocol;
Herbert Xua430a432006-07-08 13:34:56 -07001519 int err;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001520
1521 BUG_ON(skb_shinfo(skb)->frag_list);
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001522
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07001523 skb_reset_mac_header(skb);
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07001524 skb->mac_len = skb->network_header - skb->mac_header;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001525 __skb_pull(skb, skb->mac_len);
1526
Herbert Xuf9d106a2007-04-23 22:36:13 -07001527 if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) {
Herbert Xua430a432006-07-08 13:34:56 -07001528 if (skb_header_cloned(skb) &&
1529 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1530 return ERR_PTR(err);
1531 }
1532
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001533 rcu_read_lock();
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08001534 list_for_each_entry_rcu(ptype,
1535 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001536 if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
Patrick McHardy84fa7932006-08-29 16:44:56 -07001537 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
Herbert Xua430a432006-07-08 13:34:56 -07001538 err = ptype->gso_send_check(skb);
1539 segs = ERR_PTR(err);
1540 if (err || skb_gso_ok(skb, features))
1541 break;
Arnaldo Carvalho de Melod56f90a2007-04-10 20:50:43 -07001542 __skb_push(skb, (skb->data -
1543 skb_network_header(skb)));
Herbert Xua430a432006-07-08 13:34:56 -07001544 }
Herbert Xu576a30e2006-06-27 13:22:38 -07001545 segs = ptype->gso_segment(skb, features);
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001546 break;
1547 }
1548 }
1549 rcu_read_unlock();
1550
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -07001551 __skb_push(skb, skb->data - skb_mac_header(skb));
Herbert Xu576a30e2006-06-27 13:22:38 -07001552
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001553 return segs;
1554}
1555
1556EXPORT_SYMBOL(skb_gso_segment);
1557
Herbert Xufb286bb2005-11-10 13:01:24 -08001558/* Take action when hardware reception checksum errors are detected. */
1559#ifdef CONFIG_BUG
1560void netdev_rx_csum_fault(struct net_device *dev)
1561{
1562 if (net_ratelimit()) {
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001563 printk(KERN_ERR "%s: hw csum failure.\n",
Stephen Hemminger246a4212005-12-08 15:21:39 -08001564 dev ? dev->name : "<unknown>");
Herbert Xufb286bb2005-11-10 13:01:24 -08001565 dump_stack();
1566 }
1567}
1568EXPORT_SYMBOL(netdev_rx_csum_fault);
1569#endif
1570
Linus Torvalds1da177e2005-04-16 15:20:36 -07001571/* Actually, we should eliminate this check as soon as we know, that:
1572 * 1. IOMMU is present and allows to map all the memory.
1573 * 2. No high memory really exists on this machine.
1574 */
1575
1576static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1577{
Herbert Xu3d3a8532006-06-27 13:33:10 -07001578#ifdef CONFIG_HIGHMEM
Linus Torvalds1da177e2005-04-16 15:20:36 -07001579 int i;
1580
1581 if (dev->features & NETIF_F_HIGHDMA)
1582 return 0;
1583
1584 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1585 if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1586 return 1;
1587
Herbert Xu3d3a8532006-06-27 13:33:10 -07001588#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001589 return 0;
1590}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001591
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001592struct dev_gso_cb {
1593 void (*destructor)(struct sk_buff *skb);
1594};
1595
1596#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1597
1598static void dev_gso_skb_destructor(struct sk_buff *skb)
1599{
1600 struct dev_gso_cb *cb;
1601
1602 do {
1603 struct sk_buff *nskb = skb->next;
1604
1605 skb->next = nskb->next;
1606 nskb->next = NULL;
1607 kfree_skb(nskb);
1608 } while (skb->next);
1609
1610 cb = DEV_GSO_CB(skb);
1611 if (cb->destructor)
1612 cb->destructor(skb);
1613}
1614
1615/**
1616 * dev_gso_segment - Perform emulated hardware segmentation on skb.
1617 * @skb: buffer to segment
1618 *
1619 * This function segments the given skb and stores the list of segments
1620 * in skb->next.
1621 */
1622static int dev_gso_segment(struct sk_buff *skb)
1623{
1624 struct net_device *dev = skb->dev;
1625 struct sk_buff *segs;
Herbert Xu576a30e2006-06-27 13:22:38 -07001626 int features = dev->features & ~(illegal_highdma(dev, skb) ?
1627 NETIF_F_SG : 0);
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001628
Herbert Xu576a30e2006-06-27 13:22:38 -07001629 segs = skb_gso_segment(skb, features);
1630
1631 /* Verifying header integrity only. */
1632 if (!segs)
1633 return 0;
1634
Hirofumi Nakagawa801678c2008-04-29 01:03:09 -07001635 if (IS_ERR(segs))
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001636 return PTR_ERR(segs);
1637
1638 skb->next = segs;
1639 DEV_GSO_CB(skb)->destructor = skb->destructor;
1640 skb->destructor = dev_gso_skb_destructor;
1641
1642 return 0;
1643}
1644
David S. Millerfd2ea0a2008-07-17 01:56:23 -07001645int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1646 struct netdev_queue *txq)
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001647{
1648 if (likely(!skb->next)) {
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -07001649 if (!list_empty(&ptype_all))
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001650 dev_queue_xmit_nit(skb, dev);
1651
Herbert Xu576a30e2006-06-27 13:22:38 -07001652 if (netif_needs_gso(dev, skb)) {
1653 if (unlikely(dev_gso_segment(skb)))
1654 goto out_kfree_skb;
1655 if (skb->next)
1656 goto gso;
1657 }
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001658
Herbert Xu576a30e2006-06-27 13:22:38 -07001659 return dev->hard_start_xmit(skb, dev);
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001660 }
1661
Herbert Xu576a30e2006-06-27 13:22:38 -07001662gso:
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001663 do {
1664 struct sk_buff *nskb = skb->next;
1665 int rc;
1666
1667 skb->next = nskb->next;
1668 nskb->next = NULL;
1669 rc = dev->hard_start_xmit(nskb, dev);
1670 if (unlikely(rc)) {
Michael Chanf54d9e82006-06-25 23:57:04 -07001671 nskb->next = skb->next;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001672 skb->next = nskb;
1673 return rc;
1674 }
David S. Millerfd2ea0a2008-07-17 01:56:23 -07001675 if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
Michael Chanf54d9e82006-06-25 23:57:04 -07001676 return NETDEV_TX_BUSY;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001677 } while (skb->next);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001678
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001679 skb->destructor = DEV_GSO_CB(skb)->destructor;
1680
1681out_kfree_skb:
1682 kfree_skb(skb);
1683 return 0;
1684}
1685
David S. Millerb6b2fed2008-07-21 09:48:06 -07001686static u32 simple_tx_hashrnd;
1687static int simple_tx_hashrnd_initialized = 0;
1688
David S. Miller8f0f2222008-07-15 03:47:03 -07001689static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb)
1690{
David S. Millerb6b2fed2008-07-21 09:48:06 -07001691 u32 addr1, addr2, ports;
1692 u32 hash, ihl;
David S. Miller8f0f2222008-07-15 03:47:03 -07001693 u8 ip_proto;
David S. Millerb6b2fed2008-07-21 09:48:06 -07001694
1695 if (unlikely(!simple_tx_hashrnd_initialized)) {
1696 get_random_bytes(&simple_tx_hashrnd, 4);
1697 simple_tx_hashrnd_initialized = 1;
1698 }
David S. Miller8f0f2222008-07-15 03:47:03 -07001699
1700 switch (skb->protocol) {
Arnaldo Carvalho de Melo60678042008-09-20 22:20:49 -07001701 case htons(ETH_P_IP):
David S. Miller8f0f2222008-07-15 03:47:03 -07001702 ip_proto = ip_hdr(skb)->protocol;
David S. Millerb6b2fed2008-07-21 09:48:06 -07001703 addr1 = ip_hdr(skb)->saddr;
1704 addr2 = ip_hdr(skb)->daddr;
David S. Miller8f0f2222008-07-15 03:47:03 -07001705 ihl = ip_hdr(skb)->ihl;
David S. Miller8f0f2222008-07-15 03:47:03 -07001706 break;
Arnaldo Carvalho de Melo60678042008-09-20 22:20:49 -07001707 case htons(ETH_P_IPV6):
David S. Miller8f0f2222008-07-15 03:47:03 -07001708 ip_proto = ipv6_hdr(skb)->nexthdr;
David S. Millerb6b2fed2008-07-21 09:48:06 -07001709 addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3];
1710 addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3];
David S. Miller8f0f2222008-07-15 03:47:03 -07001711 ihl = (40 >> 2);
David S. Miller8f0f2222008-07-15 03:47:03 -07001712 break;
1713 default:
1714 return 0;
1715 }
1716
David S. Miller8f0f2222008-07-15 03:47:03 -07001717
1718 switch (ip_proto) {
1719 case IPPROTO_TCP:
1720 case IPPROTO_UDP:
1721 case IPPROTO_DCCP:
1722 case IPPROTO_ESP:
1723 case IPPROTO_AH:
1724 case IPPROTO_SCTP:
1725 case IPPROTO_UDPLITE:
David S. Millerb6b2fed2008-07-21 09:48:06 -07001726 ports = *((u32 *) (skb_network_header(skb) + (ihl * 4)));
David S. Miller8f0f2222008-07-15 03:47:03 -07001727 break;
1728
1729 default:
David S. Millerb6b2fed2008-07-21 09:48:06 -07001730 ports = 0;
David S. Miller8f0f2222008-07-15 03:47:03 -07001731 break;
1732 }
1733
David S. Millerb6b2fed2008-07-21 09:48:06 -07001734 hash = jhash_3words(addr1, addr2, ports, simple_tx_hashrnd);
1735
1736 return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
David S. Miller8f0f2222008-07-15 03:47:03 -07001737}
1738
David S. Millere8a04642008-07-17 00:34:19 -07001739static struct netdev_queue *dev_pick_tx(struct net_device *dev,
1740 struct sk_buff *skb)
1741{
David S. Millerfd2ea0a2008-07-17 01:56:23 -07001742 u16 queue_index = 0;
1743
David S. Millereae792b2008-07-15 03:03:33 -07001744 if (dev->select_queue)
1745 queue_index = dev->select_queue(dev, skb);
David S. Miller8f0f2222008-07-15 03:47:03 -07001746 else if (dev->real_num_tx_queues > 1)
1747 queue_index = simple_tx_hash(dev, skb);
David S. Millereae792b2008-07-15 03:03:33 -07001748
David S. Millerfd2ea0a2008-07-17 01:56:23 -07001749 skb_set_queue_mapping(skb, queue_index);
1750 return netdev_get_tx_queue(dev, queue_index);
David S. Millere8a04642008-07-17 00:34:19 -07001751}
1752
Dave Jonesd29f7492008-07-22 14:09:06 -07001753/**
1754 * dev_queue_xmit - transmit a buffer
1755 * @skb: buffer to transmit
1756 *
1757 * Queue a buffer for transmission to a network device. The caller must
1758 * have set the device and priority and built the buffer before calling
1759 * this function. The function can be called from an interrupt.
1760 *
1761 * A negative errno code is returned on a failure. A success does not
1762 * guarantee the frame will be transmitted as it may be dropped due
1763 * to congestion or traffic shaping.
1764 *
1765 * -----------------------------------------------------------------------------------
1766 * I notice this method can also return errors from the queue disciplines,
1767 * including NET_XMIT_DROP, which is a positive value. So, errors can also
1768 * be positive.
1769 *
1770 * Regardless of the return value, the skb is consumed, so it is currently
1771 * difficult to retry a send to this method. (You can bump the ref count
1772 * before sending to hold a reference for retry if you are careful.)
1773 *
1774 * When calling this method, interrupts MUST be enabled. This is because
1775 * the BH enable code must have IRQs enabled so that it will not deadlock.
1776 * --BLG
1777 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001778int dev_queue_xmit(struct sk_buff *skb)
1779{
1780 struct net_device *dev = skb->dev;
David S. Millerdc2b4842008-07-08 17:18:23 -07001781 struct netdev_queue *txq;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001782 struct Qdisc *q;
1783 int rc = -ENOMEM;
1784
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001785 /* GSO will handle the following emulations directly. */
1786 if (netif_needs_gso(dev, skb))
1787 goto gso;
1788
Linus Torvalds1da177e2005-04-16 15:20:36 -07001789 if (skb_shinfo(skb)->frag_list &&
1790 !(dev->features & NETIF_F_FRAGLIST) &&
Herbert Xu364c6ba2006-06-09 16:10:40 -07001791 __skb_linearize(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001792 goto out_kfree_skb;
1793
1794 /* Fragmented skb is linearized if device does not support SG,
1795 * or if at least one of fragments is in highmem and device
1796 * does not support DMA from it.
1797 */
1798 if (skb_shinfo(skb)->nr_frags &&
1799 (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
Herbert Xu364c6ba2006-06-09 16:10:40 -07001800 __skb_linearize(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001801 goto out_kfree_skb;
1802
1803 /* If packet is not checksummed and device does not support
1804 * checksumming for this protocol, complete checksumming here.
1805 */
Herbert Xu663ead32007-04-09 11:59:07 -07001806 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1807 skb_set_transport_header(skb, skb->csum_start -
1808 skb_headroom(skb));
Ben Hutchings6de329e2008-06-16 17:02:28 -07001809 if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb))
1810 goto out_kfree_skb;
Herbert Xu663ead32007-04-09 11:59:07 -07001811 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001812
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001813gso:
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001814 /* Disable soft irqs for various locks below. Also
1815 * stops preemption for RCU.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001816 */
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001817 rcu_read_lock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001818
David S. Millereae792b2008-07-15 03:03:33 -07001819 txq = dev_pick_tx(dev, skb);
David S. Millerb0e1e642008-07-08 17:42:10 -07001820 q = rcu_dereference(txq->qdisc);
David S. Miller37437bb2008-07-16 02:15:04 -07001821
Linus Torvalds1da177e2005-04-16 15:20:36 -07001822#ifdef CONFIG_NET_CLS_ACT
1823 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1824#endif
1825 if (q->enqueue) {
David S. Miller5fb66222008-08-02 20:02:43 -07001826 spinlock_t *root_lock = qdisc_lock(q);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001827
David S. Miller37437bb2008-07-16 02:15:04 -07001828 spin_lock(root_lock);
1829
David S. Millera9312ae2008-08-17 21:51:03 -07001830 if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
David S. Miller96d20312008-08-17 23:37:16 -07001831 kfree_skb(skb);
David S. Millera9312ae2008-08-17 21:51:03 -07001832 rc = NET_XMIT_DROP;
David S. Miller96d20312008-08-17 23:37:16 -07001833 } else {
1834 rc = qdisc_enqueue_root(skb, q);
1835 qdisc_run(q);
David S. Millera9312ae2008-08-17 21:51:03 -07001836 }
David S. Miller37437bb2008-07-16 02:15:04 -07001837 spin_unlock(root_lock);
1838
David S. Miller37437bb2008-07-16 02:15:04 -07001839 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001840 }
1841
1842 /* The device has no queue. Common case for software devices:
1843 loopback, all the sorts of tunnels...
1844
Herbert Xu932ff272006-06-09 12:20:56 -07001845 Really, it is unlikely that netif_tx_lock protection is necessary
1846 here. (f.e. loopback and IP tunnels are clean ignoring statistics
Linus Torvalds1da177e2005-04-16 15:20:36 -07001847 counters.)
1848 However, it is possible, that they rely on protection
1849 made by us here.
1850
1851 Check this and shot the lock. It is not prone from deadlocks.
1852 Either shot noqueue qdisc, it is even simpler 8)
1853 */
1854 if (dev->flags & IFF_UP) {
1855 int cpu = smp_processor_id(); /* ok because BHs are off */
1856
David S. Millerc773e842008-07-08 23:13:53 -07001857 if (txq->xmit_lock_owner != cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001858
David S. Millerc773e842008-07-08 23:13:53 -07001859 HARD_TX_LOCK(dev, txq, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001860
David S. Millerfd2ea0a2008-07-17 01:56:23 -07001861 if (!netif_tx_queue_stopped(txq)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001862 rc = 0;
David S. Millerfd2ea0a2008-07-17 01:56:23 -07001863 if (!dev_hard_start_xmit(skb, dev, txq)) {
David S. Millerc773e842008-07-08 23:13:53 -07001864 HARD_TX_UNLOCK(dev, txq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001865 goto out;
1866 }
1867 }
David S. Millerc773e842008-07-08 23:13:53 -07001868 HARD_TX_UNLOCK(dev, txq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001869 if (net_ratelimit())
1870 printk(KERN_CRIT "Virtual device %s asks to "
1871 "queue packet!\n", dev->name);
1872 } else {
1873 /* Recursion is detected! It is possible,
1874 * unfortunately */
1875 if (net_ratelimit())
1876 printk(KERN_CRIT "Dead loop on virtual device "
1877 "%s, fix it urgently!\n", dev->name);
1878 }
1879 }
1880
1881 rc = -ENETDOWN;
Herbert Xud4828d82006-06-22 02:28:18 -07001882 rcu_read_unlock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001883
1884out_kfree_skb:
1885 kfree_skb(skb);
1886 return rc;
1887out:
Herbert Xud4828d82006-06-22 02:28:18 -07001888 rcu_read_unlock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001889 return rc;
1890}
1891
1892
1893/*=======================================================================
1894 Receiver routines
1895 =======================================================================*/
1896
Stephen Hemminger6b2bedc2007-03-12 14:33:50 -07001897int netdev_max_backlog __read_mostly = 1000;
1898int netdev_budget __read_mostly = 300;
1899int weight_p __read_mostly = 64; /* old backlog weight */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001900
1901DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1902
1903
Linus Torvalds1da177e2005-04-16 15:20:36 -07001904/**
1905 * netif_rx - post buffer to the network code
1906 * @skb: buffer to post
1907 *
1908 * This function receives a packet from a device driver and queues it for
1909 * the upper (protocol) levels to process. It always succeeds. The buffer
1910 * may be dropped during processing for congestion control or by the
1911 * protocol layers.
1912 *
1913 * return values:
1914 * NET_RX_SUCCESS (no congestion)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001915 * NET_RX_DROP (packet was dropped)
1916 *
1917 */
1918
1919int netif_rx(struct sk_buff *skb)
1920{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001921 struct softnet_data *queue;
1922 unsigned long flags;
1923
1924 /* if netpoll wants it, pretend we never saw it */
1925 if (netpoll_rx(skb))
1926 return NET_RX_DROP;
1927
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -07001928 if (!skb->tstamp.tv64)
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001929 net_timestamp(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001930
1931 /*
1932 * The code is rearranged so that the path is the most
1933 * short when CPU is congested, but is still operating.
1934 */
1935 local_irq_save(flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001936 queue = &__get_cpu_var(softnet_data);
1937
1938 __get_cpu_var(netdev_rx_stat).total++;
1939 if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1940 if (queue->input_pkt_queue.qlen) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001941enqueue:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001942 __skb_queue_tail(&queue->input_pkt_queue, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001943 local_irq_restore(flags);
Stephen Hemminger34008d82005-06-23 20:10:00 -07001944 return NET_RX_SUCCESS;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001945 }
1946
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001947 napi_schedule(&queue->backlog);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001948 goto enqueue;
1949 }
1950
Linus Torvalds1da177e2005-04-16 15:20:36 -07001951 __get_cpu_var(netdev_rx_stat).dropped++;
1952 local_irq_restore(flags);
1953
1954 kfree_skb(skb);
1955 return NET_RX_DROP;
1956}
1957
1958int netif_rx_ni(struct sk_buff *skb)
1959{
1960 int err;
1961
1962 preempt_disable();
1963 err = netif_rx(skb);
1964 if (local_softirq_pending())
1965 do_softirq();
1966 preempt_enable();
1967
1968 return err;
1969}
1970
1971EXPORT_SYMBOL(netif_rx_ni);
1972
Linus Torvalds1da177e2005-04-16 15:20:36 -07001973static void net_tx_action(struct softirq_action *h)
1974{
1975 struct softnet_data *sd = &__get_cpu_var(softnet_data);
1976
1977 if (sd->completion_queue) {
1978 struct sk_buff *clist;
1979
1980 local_irq_disable();
1981 clist = sd->completion_queue;
1982 sd->completion_queue = NULL;
1983 local_irq_enable();
1984
1985 while (clist) {
1986 struct sk_buff *skb = clist;
1987 clist = clist->next;
1988
Ilpo Järvinen547b7922008-07-25 21:43:18 -07001989 WARN_ON(atomic_read(&skb->users));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001990 __kfree_skb(skb);
1991 }
1992 }
1993
1994 if (sd->output_queue) {
David S. Miller37437bb2008-07-16 02:15:04 -07001995 struct Qdisc *head;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001996
1997 local_irq_disable();
1998 head = sd->output_queue;
1999 sd->output_queue = NULL;
2000 local_irq_enable();
2001
2002 while (head) {
David S. Miller37437bb2008-07-16 02:15:04 -07002003 struct Qdisc *q = head;
2004 spinlock_t *root_lock;
2005
Linus Torvalds1da177e2005-04-16 15:20:36 -07002006 head = head->next_sched;
2007
David S. Miller5fb66222008-08-02 20:02:43 -07002008 root_lock = qdisc_lock(q);
David S. Miller37437bb2008-07-16 02:15:04 -07002009 if (spin_trylock(root_lock)) {
Jarek Poplawskidef82a12008-08-17 21:54:43 -07002010 smp_mb__before_clear_bit();
2011 clear_bit(__QDISC_STATE_SCHED,
2012 &q->state);
David S. Miller37437bb2008-07-16 02:15:04 -07002013 qdisc_run(q);
2014 spin_unlock(root_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002015 } else {
David S. Miller195648b2008-08-19 04:00:36 -07002016 if (!test_bit(__QDISC_STATE_DEACTIVATED,
Jarek Poplawskie8a83e12008-09-07 18:41:21 -07002017 &q->state)) {
David S. Miller195648b2008-08-19 04:00:36 -07002018 __netif_reschedule(q);
Jarek Poplawskie8a83e12008-09-07 18:41:21 -07002019 } else {
2020 smp_mb__before_clear_bit();
2021 clear_bit(__QDISC_STATE_SCHED,
2022 &q->state);
2023 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002024 }
2025 }
2026 }
2027}
2028
Stephen Hemminger6f05f622007-03-08 20:46:03 -08002029static inline int deliver_skb(struct sk_buff *skb,
2030 struct packet_type *pt_prev,
2031 struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002032{
2033 atomic_inc(&skb->users);
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002034 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002035}
2036
2037#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
Stephen Hemminger6229e362007-03-21 13:38:47 -07002038/* These hooks defined here for ATM */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002039struct net_bridge;
2040struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
2041 unsigned char *addr);
Stephen Hemminger6229e362007-03-21 13:38:47 -07002042void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002043
Stephen Hemminger6229e362007-03-21 13:38:47 -07002044/*
2045 * If bridge module is loaded call bridging hook.
2046 * returns NULL if packet was consumed.
2047 */
2048struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
2049 struct sk_buff *skb) __read_mostly;
2050static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
2051 struct packet_type **pt_prev, int *ret,
2052 struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002053{
2054 struct net_bridge_port *port;
2055
Stephen Hemminger6229e362007-03-21 13:38:47 -07002056 if (skb->pkt_type == PACKET_LOOPBACK ||
2057 (port = rcu_dereference(skb->dev->br_port)) == NULL)
2058 return skb;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002059
2060 if (*pt_prev) {
Stephen Hemminger6229e362007-03-21 13:38:47 -07002061 *ret = deliver_skb(skb, *pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002062 *pt_prev = NULL;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002063 }
2064
Stephen Hemminger6229e362007-03-21 13:38:47 -07002065 return br_handle_frame_hook(port, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002066}
2067#else
Stephen Hemminger6229e362007-03-21 13:38:47 -07002068#define handle_bridge(skb, pt_prev, ret, orig_dev) (skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002069#endif
2070
Patrick McHardyb863ceb2007-07-14 18:55:06 -07002071#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
2072struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
2073EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
2074
2075static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
2076 struct packet_type **pt_prev,
2077 int *ret,
2078 struct net_device *orig_dev)
2079{
2080 if (skb->dev->macvlan_port == NULL)
2081 return skb;
2082
2083 if (*pt_prev) {
2084 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2085 *pt_prev = NULL;
2086 }
2087 return macvlan_handle_frame_hook(skb);
2088}
2089#else
2090#define handle_macvlan(skb, pt_prev, ret, orig_dev) (skb)
2091#endif
2092
Linus Torvalds1da177e2005-04-16 15:20:36 -07002093#ifdef CONFIG_NET_CLS_ACT
2094/* TODO: Maybe we should just force sch_ingress to be compiled in
2095 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
2096 * a compare and 2 stores extra right now if we dont have it on
2097 * but have CONFIG_NET_CLS_ACT
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002098 * NOTE: This doesnt stop any functionality; if you dont have
Linus Torvalds1da177e2005-04-16 15:20:36 -07002099 * the ingress scheduler, you just cant add policies on ingress.
2100 *
2101 */
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002102static int ing_filter(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002103{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002104 struct net_device *dev = skb->dev;
Herbert Xuf697c3e2007-10-14 00:38:47 -07002105 u32 ttl = G_TC_RTTL(skb->tc_verd);
David S. Miller555353c2008-07-08 17:33:13 -07002106 struct netdev_queue *rxq;
2107 int result = TC_ACT_OK;
2108 struct Qdisc *q;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002109
Herbert Xuf697c3e2007-10-14 00:38:47 -07002110 if (MAX_RED_LOOP < ttl++) {
2111 printk(KERN_WARNING
2112 "Redir loop detected Dropping packet (%d->%d)\n",
2113 skb->iif, dev->ifindex);
2114 return TC_ACT_SHOT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002115 }
2116
Herbert Xuf697c3e2007-10-14 00:38:47 -07002117 skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
2118 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
2119
David S. Miller555353c2008-07-08 17:33:13 -07002120 rxq = &dev->rx_queue;
2121
David S. Miller83874002008-07-17 00:53:03 -07002122 q = rxq->qdisc;
David S. Miller8d50b532008-07-30 02:37:46 -07002123 if (q != &noop_qdisc) {
David S. Miller83874002008-07-17 00:53:03 -07002124 spin_lock(qdisc_lock(q));
David S. Millera9312ae2008-08-17 21:51:03 -07002125 if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
2126 result = qdisc_enqueue_root(skb, q);
David S. Miller83874002008-07-17 00:53:03 -07002127 spin_unlock(qdisc_lock(q));
2128 }
Herbert Xuf697c3e2007-10-14 00:38:47 -07002129
Linus Torvalds1da177e2005-04-16 15:20:36 -07002130 return result;
2131}
Herbert Xuf697c3e2007-10-14 00:38:47 -07002132
2133static inline struct sk_buff *handle_ing(struct sk_buff *skb,
2134 struct packet_type **pt_prev,
2135 int *ret, struct net_device *orig_dev)
2136{
David S. Miller8d50b532008-07-30 02:37:46 -07002137 if (skb->dev->rx_queue.qdisc == &noop_qdisc)
Herbert Xuf697c3e2007-10-14 00:38:47 -07002138 goto out;
2139
2140 if (*pt_prev) {
2141 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2142 *pt_prev = NULL;
2143 } else {
2144 /* Huh? Why does turning on AF_PACKET affect this? */
2145 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
2146 }
2147
2148 switch (ing_filter(skb)) {
2149 case TC_ACT_SHOT:
2150 case TC_ACT_STOLEN:
2151 kfree_skb(skb);
2152 return NULL;
2153 }
2154
2155out:
2156 skb->tc_verd = 0;
2157 return skb;
2158}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002159#endif
2160
Patrick McHardybc1d0412008-07-14 22:49:30 -07002161/*
2162 * netif_nit_deliver - deliver received packets to network taps
2163 * @skb: buffer
2164 *
2165 * This function is used to deliver incoming packets to network
2166 * taps. It should be used when the normal netif_receive_skb path
2167 * is bypassed, for example because of VLAN acceleration.
2168 */
2169void netif_nit_deliver(struct sk_buff *skb)
2170{
2171 struct packet_type *ptype;
2172
2173 if (list_empty(&ptype_all))
2174 return;
2175
2176 skb_reset_network_header(skb);
2177 skb_reset_transport_header(skb);
2178 skb->mac_len = skb->network_header - skb->mac_header;
2179
2180 rcu_read_lock();
2181 list_for_each_entry_rcu(ptype, &ptype_all, list) {
2182 if (!ptype->dev || ptype->dev == skb->dev)
2183 deliver_skb(skb, ptype, skb->dev);
2184 }
2185 rcu_read_unlock();
2186}
2187
Stephen Hemminger3b582cc2007-11-01 02:21:47 -07002188/**
2189 * netif_receive_skb - process receive buffer from network
2190 * @skb: buffer to process
2191 *
2192 * netif_receive_skb() is the main receive data processing function.
2193 * It always succeeds. The buffer may be dropped during processing
2194 * for congestion control or by the protocol layers.
2195 *
2196 * This function may only be called from softirq context and interrupts
2197 * should be enabled.
2198 *
2199 * Return values (usually ignored):
2200 * NET_RX_SUCCESS: no congestion
2201 * NET_RX_DROP: packet was dropped
2202 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002203int netif_receive_skb(struct sk_buff *skb)
2204{
2205 struct packet_type *ptype, *pt_prev;
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002206 struct net_device *orig_dev;
Joe Eykholt0d7a3682008-07-02 18:22:01 -07002207 struct net_device *null_or_orig;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002208 int ret = NET_RX_DROP;
Al Viro252e3342006-11-14 20:48:11 -08002209 __be16 type;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002210
2211 /* if we've gotten here through NAPI, check netpoll */
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002212 if (netpoll_receive_skb(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002213 return NET_RX_DROP;
2214
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -07002215 if (!skb->tstamp.tv64)
Patrick McHardya61bbcf2005-08-14 17:24:31 -07002216 net_timestamp(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002217
Patrick McHardyc01003c2007-03-29 11:46:52 -07002218 if (!skb->iif)
2219 skb->iif = skb->dev->ifindex;
David S. Miller86e65da2005-08-09 19:36:29 -07002220
Joe Eykholt0d7a3682008-07-02 18:22:01 -07002221 null_or_orig = NULL;
Joe Eykholtcc9bd5c2008-07-02 18:22:00 -07002222 orig_dev = skb->dev;
2223 if (orig_dev->master) {
Joe Eykholt0d7a3682008-07-02 18:22:01 -07002224 if (skb_bond_should_drop(skb))
2225 null_or_orig = orig_dev; /* deliver only exact match */
2226 else
2227 skb->dev = orig_dev->master;
Joe Eykholtcc9bd5c2008-07-02 18:22:00 -07002228 }
Jay Vosburgh8f903c72006-02-21 16:36:44 -08002229
Linus Torvalds1da177e2005-04-16 15:20:36 -07002230 __get_cpu_var(netdev_rx_stat).total++;
2231
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -07002232 skb_reset_network_header(skb);
Arnaldo Carvalho de Melobadff6d2007-03-13 13:06:52 -03002233 skb_reset_transport_header(skb);
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07002234 skb->mac_len = skb->network_header - skb->mac_header;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002235
2236 pt_prev = NULL;
2237
2238 rcu_read_lock();
2239
Eric W. Biedermanb9f75f42008-06-20 22:16:51 -07002240 /* Don't receive packets in an exiting network namespace */
2241 if (!net_alive(dev_net(skb->dev)))
2242 goto out;
2243
Linus Torvalds1da177e2005-04-16 15:20:36 -07002244#ifdef CONFIG_NET_CLS_ACT
2245 if (skb->tc_verd & TC_NCLS) {
2246 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
2247 goto ncls;
2248 }
2249#endif
2250
2251 list_for_each_entry_rcu(ptype, &ptype_all, list) {
Joe Eykholtf9823072008-07-02 18:22:02 -07002252 if (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2253 ptype->dev == orig_dev) {
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002254 if (pt_prev)
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002255 ret = deliver_skb(skb, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002256 pt_prev = ptype;
2257 }
2258 }
2259
2260#ifdef CONFIG_NET_CLS_ACT
Herbert Xuf697c3e2007-10-14 00:38:47 -07002261 skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
2262 if (!skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002263 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002264ncls:
2265#endif
2266
Stephen Hemminger6229e362007-03-21 13:38:47 -07002267 skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
2268 if (!skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002269 goto out;
Patrick McHardyb863ceb2007-07-14 18:55:06 -07002270 skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
2271 if (!skb)
2272 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002273
2274 type = skb->protocol;
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08002275 list_for_each_entry_rcu(ptype,
2276 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002277 if (ptype->type == type &&
Joe Eykholtf9823072008-07-02 18:22:02 -07002278 (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2279 ptype->dev == orig_dev)) {
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002280 if (pt_prev)
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002281 ret = deliver_skb(skb, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002282 pt_prev = ptype;
2283 }
2284 }
2285
2286 if (pt_prev) {
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002287 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002288 } else {
2289 kfree_skb(skb);
2290 /* Jamal, now you will not able to escape explaining
2291 * me how you were going to use this. :-)
2292 */
2293 ret = NET_RX_DROP;
2294 }
2295
2296out:
2297 rcu_read_unlock();
2298 return ret;
2299}
2300
Stephen Hemminger6e583ce2008-08-03 21:29:57 -07002301/* Network device is going away, flush any packets still pending */
2302static void flush_backlog(void *arg)
2303{
2304 struct net_device *dev = arg;
2305 struct softnet_data *queue = &__get_cpu_var(softnet_data);
2306 struct sk_buff *skb, *tmp;
2307
2308 skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp)
2309 if (skb->dev == dev) {
2310 __skb_unlink(skb, &queue->input_pkt_queue);
2311 kfree_skb(skb);
2312 }
2313}
2314
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002315static int process_backlog(struct napi_struct *napi, int quota)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002316{
2317 int work = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002318 struct softnet_data *queue = &__get_cpu_var(softnet_data);
2319 unsigned long start_time = jiffies;
2320
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002321 napi->weight = weight_p;
2322 do {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002323 struct sk_buff *skb;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002324
2325 local_irq_disable();
2326 skb = __skb_dequeue(&queue->input_pkt_queue);
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002327 if (!skb) {
2328 __napi_complete(napi);
2329 local_irq_enable();
2330 break;
2331 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002332 local_irq_enable();
2333
Linus Torvalds1da177e2005-04-16 15:20:36 -07002334 netif_receive_skb(skb);
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002335 } while (++work < quota && jiffies == start_time);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002336
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002337 return work;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002338}
2339
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002340/**
2341 * __napi_schedule - schedule for receive
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07002342 * @n: entry to schedule
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002343 *
2344 * The entry's receive function will be scheduled to run
2345 */
Harvey Harrisonb5606c22008-02-13 15:03:16 -08002346void __napi_schedule(struct napi_struct *n)
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002347{
2348 unsigned long flags;
2349
2350 local_irq_save(flags);
2351 list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
2352 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2353 local_irq_restore(flags);
2354}
2355EXPORT_SYMBOL(__napi_schedule);
2356
2357
Linus Torvalds1da177e2005-04-16 15:20:36 -07002358static void net_rx_action(struct softirq_action *h)
2359{
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002360 struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002361 unsigned long start_time = jiffies;
Stephen Hemminger51b0bde2005-06-23 20:14:40 -07002362 int budget = netdev_budget;
Matt Mackall53fb95d2005-08-11 19:27:43 -07002363 void *have;
2364
Linus Torvalds1da177e2005-04-16 15:20:36 -07002365 local_irq_disable();
2366
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002367 while (!list_empty(list)) {
2368 struct napi_struct *n;
2369 int work, weight;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002370
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002371 /* If softirq window is exhuasted then punt.
2372 *
2373 * Note that this is a slight policy change from the
2374 * previous NAPI code, which would allow up to 2
2375 * jiffies to pass before breaking out. The test
2376 * used to be "jiffies - start_time > 1".
2377 */
2378 if (unlikely(budget <= 0 || jiffies != start_time))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002379 goto softnet_break;
2380
2381 local_irq_enable();
2382
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002383 /* Even though interrupts have been re-enabled, this
2384 * access is safe because interrupts can only add new
2385 * entries to the tail of this list, and only ->poll()
2386 * calls can remove this head entry from the list.
2387 */
2388 n = list_entry(list->next, struct napi_struct, poll_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002389
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002390 have = netpoll_poll_lock(n);
2391
2392 weight = n->weight;
2393
David S. Miller0a7606c2007-10-29 21:28:47 -07002394 /* This NAPI_STATE_SCHED test is for avoiding a race
2395 * with netpoll's poll_napi(). Only the entity which
2396 * obtains the lock and sees NAPI_STATE_SCHED set will
2397 * actually make the ->poll() call. Therefore we avoid
2398 * accidently calling ->poll() when NAPI is not scheduled.
2399 */
2400 work = 0;
2401 if (test_bit(NAPI_STATE_SCHED, &n->state))
2402 work = n->poll(n, weight);
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002403
2404 WARN_ON_ONCE(work > weight);
2405
2406 budget -= work;
2407
2408 local_irq_disable();
2409
2410 /* Drivers must not modify the NAPI state if they
2411 * consume the entire weight. In such cases this code
2412 * still "owns" the NAPI instance and therefore can
2413 * move the instance around on the list at-will.
2414 */
David S. Millerfed17f32008-01-07 21:00:40 -08002415 if (unlikely(work == weight)) {
2416 if (unlikely(napi_disable_pending(n)))
2417 __napi_complete(n);
2418 else
2419 list_move_tail(&n->poll_list, list);
2420 }
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002421
2422 netpoll_poll_unlock(have);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002423 }
2424out:
Shannon Nelson515e06c2007-06-23 23:09:23 -07002425 local_irq_enable();
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002426
Chris Leechdb217332006-06-17 21:24:58 -07002427#ifdef CONFIG_NET_DMA
2428 /*
2429 * There may not be any more sk_buffs coming right now, so push
2430 * any pending DMA copies to hardware
2431 */
Dan Williamsd379b012007-07-09 11:56:42 -07002432 if (!cpus_empty(net_dma.channel_mask)) {
2433 int chan_idx;
Mike Travis0e12f842008-05-12 21:21:13 +02002434 for_each_cpu_mask_nr(chan_idx, net_dma.channel_mask) {
Dan Williamsd379b012007-07-09 11:56:42 -07002435 struct dma_chan *chan = net_dma.channels[chan_idx];
2436 if (chan)
2437 dma_async_memcpy_issue_pending(chan);
2438 }
Chris Leechdb217332006-06-17 21:24:58 -07002439 }
2440#endif
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002441
Linus Torvalds1da177e2005-04-16 15:20:36 -07002442 return;
2443
2444softnet_break:
2445 __get_cpu_var(netdev_rx_stat).time_squeeze++;
2446 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2447 goto out;
2448}
2449
2450static gifconf_func_t * gifconf_list [NPROTO];
2451
2452/**
2453 * register_gifconf - register a SIOCGIF handler
2454 * @family: Address family
2455 * @gifconf: Function handler
2456 *
2457 * Register protocol dependent address dumping routines. The handler
2458 * that is passed must not be freed or reused until it has been replaced
2459 * by another handler.
2460 */
2461int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
2462{
2463 if (family >= NPROTO)
2464 return -EINVAL;
2465 gifconf_list[family] = gifconf;
2466 return 0;
2467}
2468
2469
2470/*
2471 * Map an interface index to its name (SIOCGIFNAME)
2472 */
2473
2474/*
2475 * We need this ioctl for efficient implementation of the
2476 * if_indextoname() function required by the IPv6 API. Without
2477 * it, we would have to search all the interfaces to find a
2478 * match. --pb
2479 */
2480
Eric W. Biederman881d9662007-09-17 11:56:21 -07002481static int dev_ifname(struct net *net, struct ifreq __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002482{
2483 struct net_device *dev;
2484 struct ifreq ifr;
2485
2486 /*
2487 * Fetch the caller's info block.
2488 */
2489
2490 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2491 return -EFAULT;
2492
2493 read_lock(&dev_base_lock);
Eric W. Biederman881d9662007-09-17 11:56:21 -07002494 dev = __dev_get_by_index(net, ifr.ifr_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002495 if (!dev) {
2496 read_unlock(&dev_base_lock);
2497 return -ENODEV;
2498 }
2499
2500 strcpy(ifr.ifr_name, dev->name);
2501 read_unlock(&dev_base_lock);
2502
2503 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2504 return -EFAULT;
2505 return 0;
2506}
2507
2508/*
2509 * Perform a SIOCGIFCONF call. This structure will change
2510 * size eventually, and there is nothing I can do about it.
2511 * Thus we will need a 'compatibility mode'.
2512 */
2513
Eric W. Biederman881d9662007-09-17 11:56:21 -07002514static int dev_ifconf(struct net *net, char __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002515{
2516 struct ifconf ifc;
2517 struct net_device *dev;
2518 char __user *pos;
2519 int len;
2520 int total;
2521 int i;
2522
2523 /*
2524 * Fetch the caller's info block.
2525 */
2526
2527 if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
2528 return -EFAULT;
2529
2530 pos = ifc.ifc_buf;
2531 len = ifc.ifc_len;
2532
2533 /*
2534 * Loop over the interfaces, and write an info block for each.
2535 */
2536
2537 total = 0;
Eric W. Biederman881d9662007-09-17 11:56:21 -07002538 for_each_netdev(net, dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002539 for (i = 0; i < NPROTO; i++) {
2540 if (gifconf_list[i]) {
2541 int done;
2542 if (!pos)
2543 done = gifconf_list[i](dev, NULL, 0);
2544 else
2545 done = gifconf_list[i](dev, pos + total,
2546 len - total);
2547 if (done < 0)
2548 return -EFAULT;
2549 total += done;
2550 }
2551 }
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002552 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002553
2554 /*
2555 * All done. Write the updated control block back to the caller.
2556 */
2557 ifc.ifc_len = total;
2558
2559 /*
2560 * Both BSD and Solaris return 0 here, so we do too.
2561 */
2562 return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
2563}
2564
2565#ifdef CONFIG_PROC_FS
2566/*
2567 * This is invoked by the /proc filesystem handler to display a device
2568 * in detail.
2569 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002570void *dev_seq_start(struct seq_file *seq, loff_t *pos)
Eric Dumazet9a429c42008-01-01 21:58:02 -08002571 __acquires(dev_base_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002572{
Denis V. Luneve372c412007-11-19 22:31:54 -08002573 struct net *net = seq_file_net(seq);
Pavel Emelianov7562f872007-05-03 15:13:45 -07002574 loff_t off;
2575 struct net_device *dev;
2576
Linus Torvalds1da177e2005-04-16 15:20:36 -07002577 read_lock(&dev_base_lock);
Pavel Emelianov7562f872007-05-03 15:13:45 -07002578 if (!*pos)
2579 return SEQ_START_TOKEN;
2580
2581 off = 1;
Eric W. Biederman881d9662007-09-17 11:56:21 -07002582 for_each_netdev(net, dev)
Pavel Emelianov7562f872007-05-03 15:13:45 -07002583 if (off++ == *pos)
2584 return dev;
2585
2586 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002587}
2588
2589void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2590{
Denis V. Luneve372c412007-11-19 22:31:54 -08002591 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002592 ++*pos;
Pavel Emelianov7562f872007-05-03 15:13:45 -07002593 return v == SEQ_START_TOKEN ?
Eric W. Biederman881d9662007-09-17 11:56:21 -07002594 first_net_device(net) : next_net_device((struct net_device *)v);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002595}
2596
2597void dev_seq_stop(struct seq_file *seq, void *v)
Eric Dumazet9a429c42008-01-01 21:58:02 -08002598 __releases(dev_base_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002599{
2600 read_unlock(&dev_base_lock);
2601}
2602
2603static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
2604{
Rusty Russellc45d2862007-03-28 14:29:08 -07002605 struct net_device_stats *stats = dev->get_stats(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002606
Rusty Russell5a1b5892007-04-28 21:04:03 -07002607 seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
2608 "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
2609 dev->name, stats->rx_bytes, stats->rx_packets,
2610 stats->rx_errors,
2611 stats->rx_dropped + stats->rx_missed_errors,
2612 stats->rx_fifo_errors,
2613 stats->rx_length_errors + stats->rx_over_errors +
2614 stats->rx_crc_errors + stats->rx_frame_errors,
2615 stats->rx_compressed, stats->multicast,
2616 stats->tx_bytes, stats->tx_packets,
2617 stats->tx_errors, stats->tx_dropped,
2618 stats->tx_fifo_errors, stats->collisions,
2619 stats->tx_carrier_errors +
2620 stats->tx_aborted_errors +
2621 stats->tx_window_errors +
2622 stats->tx_heartbeat_errors,
2623 stats->tx_compressed);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002624}
2625
2626/*
2627 * Called from the PROCfs module. This now uses the new arbitrary sized
2628 * /proc/net interface to create /proc/net/dev
2629 */
2630static int dev_seq_show(struct seq_file *seq, void *v)
2631{
2632 if (v == SEQ_START_TOKEN)
2633 seq_puts(seq, "Inter-| Receive "
2634 " | Transmit\n"
2635 " face |bytes packets errs drop fifo frame "
2636 "compressed multicast|bytes packets errs "
2637 "drop fifo colls carrier compressed\n");
2638 else
2639 dev_seq_printf_stats(seq, v);
2640 return 0;
2641}
2642
2643static struct netif_rx_stats *softnet_get_online(loff_t *pos)
2644{
2645 struct netif_rx_stats *rc = NULL;
2646
Mike Travis0c0b0ac2008-05-02 16:43:08 -07002647 while (*pos < nr_cpu_ids)
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002648 if (cpu_online(*pos)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002649 rc = &per_cpu(netdev_rx_stat, *pos);
2650 break;
2651 } else
2652 ++*pos;
2653 return rc;
2654}
2655
2656static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
2657{
2658 return softnet_get_online(pos);
2659}
2660
2661static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2662{
2663 ++*pos;
2664 return softnet_get_online(pos);
2665}
2666
2667static void softnet_seq_stop(struct seq_file *seq, void *v)
2668{
2669}
2670
2671static int softnet_seq_show(struct seq_file *seq, void *v)
2672{
2673 struct netif_rx_stats *s = v;
2674
2675 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
Stephen Hemminger31aa02c2005-06-23 20:12:48 -07002676 s->total, s->dropped, s->time_squeeze, 0,
Stephen Hemmingerc1ebcdb2005-06-23 20:08:59 -07002677 0, 0, 0, 0, /* was fastroute */
2678 s->cpu_collision );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002679 return 0;
2680}
2681
Stephen Hemmingerf6908082007-03-12 14:34:29 -07002682static const struct seq_operations dev_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002683 .start = dev_seq_start,
2684 .next = dev_seq_next,
2685 .stop = dev_seq_stop,
2686 .show = dev_seq_show,
2687};
2688
2689static int dev_seq_open(struct inode *inode, struct file *file)
2690{
Denis V. Luneve372c412007-11-19 22:31:54 -08002691 return seq_open_net(inode, file, &dev_seq_ops,
2692 sizeof(struct seq_net_private));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002693}
2694
Arjan van de Ven9a321442007-02-12 00:55:35 -08002695static const struct file_operations dev_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002696 .owner = THIS_MODULE,
2697 .open = dev_seq_open,
2698 .read = seq_read,
2699 .llseek = seq_lseek,
Denis V. Luneve372c412007-11-19 22:31:54 -08002700 .release = seq_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002701};
2702
Stephen Hemmingerf6908082007-03-12 14:34:29 -07002703static const struct seq_operations softnet_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002704 .start = softnet_seq_start,
2705 .next = softnet_seq_next,
2706 .stop = softnet_seq_stop,
2707 .show = softnet_seq_show,
2708};
2709
2710static int softnet_seq_open(struct inode *inode, struct file *file)
2711{
2712 return seq_open(file, &softnet_seq_ops);
2713}
2714
Arjan van de Ven9a321442007-02-12 00:55:35 -08002715static const struct file_operations softnet_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002716 .owner = THIS_MODULE,
2717 .open = softnet_seq_open,
2718 .read = seq_read,
2719 .llseek = seq_lseek,
2720 .release = seq_release,
2721};
2722
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07002723static void *ptype_get_idx(loff_t pos)
2724{
2725 struct packet_type *pt = NULL;
2726 loff_t i = 0;
2727 int t;
2728
2729 list_for_each_entry_rcu(pt, &ptype_all, list) {
2730 if (i == pos)
2731 return pt;
2732 ++i;
2733 }
2734
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08002735 for (t = 0; t < PTYPE_HASH_SIZE; t++) {
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07002736 list_for_each_entry_rcu(pt, &ptype_base[t], list) {
2737 if (i == pos)
2738 return pt;
2739 ++i;
2740 }
2741 }
2742 return NULL;
2743}
2744
2745static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
Stephen Hemminger72348a42008-01-21 02:27:29 -08002746 __acquires(RCU)
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07002747{
2748 rcu_read_lock();
2749 return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
2750}
2751
2752static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2753{
2754 struct packet_type *pt;
2755 struct list_head *nxt;
2756 int hash;
2757
2758 ++*pos;
2759 if (v == SEQ_START_TOKEN)
2760 return ptype_get_idx(0);
2761
2762 pt = v;
2763 nxt = pt->list.next;
2764 if (pt->type == htons(ETH_P_ALL)) {
2765 if (nxt != &ptype_all)
2766 goto found;
2767 hash = 0;
2768 nxt = ptype_base[0].next;
2769 } else
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08002770 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07002771
2772 while (nxt == &ptype_base[hash]) {
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08002773 if (++hash >= PTYPE_HASH_SIZE)
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07002774 return NULL;
2775 nxt = ptype_base[hash].next;
2776 }
2777found:
2778 return list_entry(nxt, struct packet_type, list);
2779}
2780
2781static void ptype_seq_stop(struct seq_file *seq, void *v)
Stephen Hemminger72348a42008-01-21 02:27:29 -08002782 __releases(RCU)
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07002783{
2784 rcu_read_unlock();
2785}
2786
2787static void ptype_seq_decode(struct seq_file *seq, void *sym)
2788{
2789#ifdef CONFIG_KALLSYMS
2790 unsigned long offset = 0, symsize;
2791 const char *symname;
2792 char *modname;
2793 char namebuf[128];
2794
2795 symname = kallsyms_lookup((unsigned long)sym, &symsize, &offset,
2796 &modname, namebuf);
2797
2798 if (symname) {
2799 char *delim = ":";
2800
2801 if (!modname)
2802 modname = delim = "";
2803 seq_printf(seq, "%s%s%s%s+0x%lx", delim, modname, delim,
2804 symname, offset);
2805 return;
2806 }
2807#endif
2808
2809 seq_printf(seq, "[%p]", sym);
2810}
2811
2812static int ptype_seq_show(struct seq_file *seq, void *v)
2813{
2814 struct packet_type *pt = v;
2815
2816 if (v == SEQ_START_TOKEN)
2817 seq_puts(seq, "Type Device Function\n");
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002818 else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07002819 if (pt->type == htons(ETH_P_ALL))
2820 seq_puts(seq, "ALL ");
2821 else
2822 seq_printf(seq, "%04x", ntohs(pt->type));
2823
2824 seq_printf(seq, " %-8s ",
2825 pt->dev ? pt->dev->name : "");
2826 ptype_seq_decode(seq, pt->func);
2827 seq_putc(seq, '\n');
2828 }
2829
2830 return 0;
2831}
2832
2833static const struct seq_operations ptype_seq_ops = {
2834 .start = ptype_seq_start,
2835 .next = ptype_seq_next,
2836 .stop = ptype_seq_stop,
2837 .show = ptype_seq_show,
2838};
2839
2840static int ptype_seq_open(struct inode *inode, struct file *file)
2841{
Pavel Emelyanov2feb27d2008-03-24 14:57:45 -07002842 return seq_open_net(inode, file, &ptype_seq_ops,
2843 sizeof(struct seq_net_private));
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07002844}
2845
2846static const struct file_operations ptype_seq_fops = {
2847 .owner = THIS_MODULE,
2848 .open = ptype_seq_open,
2849 .read = seq_read,
2850 .llseek = seq_lseek,
Pavel Emelyanov2feb27d2008-03-24 14:57:45 -07002851 .release = seq_release_net,
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07002852};
2853
2854
Pavel Emelyanov46650792007-10-08 20:38:39 -07002855static int __net_init dev_proc_net_init(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002856{
2857 int rc = -ENOMEM;
2858
Eric W. Biederman881d9662007-09-17 11:56:21 -07002859 if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002860 goto out;
Eric W. Biederman881d9662007-09-17 11:56:21 -07002861 if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002862 goto out_dev;
Eric W. Biederman881d9662007-09-17 11:56:21 -07002863 if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02002864 goto out_softnet;
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07002865
Eric W. Biederman881d9662007-09-17 11:56:21 -07002866 if (wext_proc_init(net))
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02002867 goto out_ptype;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002868 rc = 0;
2869out:
2870 return rc;
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02002871out_ptype:
Eric W. Biederman881d9662007-09-17 11:56:21 -07002872 proc_net_remove(net, "ptype");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002873out_softnet:
Eric W. Biederman881d9662007-09-17 11:56:21 -07002874 proc_net_remove(net, "softnet_stat");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002875out_dev:
Eric W. Biederman881d9662007-09-17 11:56:21 -07002876 proc_net_remove(net, "dev");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002877 goto out;
2878}
Eric W. Biederman881d9662007-09-17 11:56:21 -07002879
Pavel Emelyanov46650792007-10-08 20:38:39 -07002880static void __net_exit dev_proc_net_exit(struct net *net)
Eric W. Biederman881d9662007-09-17 11:56:21 -07002881{
2882 wext_proc_exit(net);
2883
2884 proc_net_remove(net, "ptype");
2885 proc_net_remove(net, "softnet_stat");
2886 proc_net_remove(net, "dev");
2887}
2888
Denis V. Lunev022cbae2007-11-13 03:23:50 -08002889static struct pernet_operations __net_initdata dev_proc_ops = {
Eric W. Biederman881d9662007-09-17 11:56:21 -07002890 .init = dev_proc_net_init,
2891 .exit = dev_proc_net_exit,
2892};
2893
2894static int __init dev_proc_init(void)
2895{
2896 return register_pernet_subsys(&dev_proc_ops);
2897}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002898#else
2899#define dev_proc_init() 0
2900#endif /* CONFIG_PROC_FS */
2901
2902
2903/**
2904 * netdev_set_master - set up master/slave pair
2905 * @slave: slave device
2906 * @master: new master device
2907 *
2908 * Changes the master device of the slave. Pass %NULL to break the
2909 * bonding. The caller must hold the RTNL semaphore. On a failure
2910 * a negative errno code is returned. On success the reference counts
2911 * are adjusted, %RTM_NEWLINK is sent to the routing socket and the
2912 * function returns zero.
2913 */
2914int netdev_set_master(struct net_device *slave, struct net_device *master)
2915{
2916 struct net_device *old = slave->master;
2917
2918 ASSERT_RTNL();
2919
2920 if (master) {
2921 if (old)
2922 return -EBUSY;
2923 dev_hold(master);
2924 }
2925
2926 slave->master = master;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002927
Linus Torvalds1da177e2005-04-16 15:20:36 -07002928 synchronize_net();
2929
2930 if (old)
2931 dev_put(old);
2932
2933 if (master)
2934 slave->flags |= IFF_SLAVE;
2935 else
2936 slave->flags &= ~IFF_SLAVE;
2937
2938 rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
2939 return 0;
2940}
2941
Wang Chendad9b332008-06-18 01:48:28 -07002942static int __dev_set_promiscuity(struct net_device *dev, int inc)
Patrick McHardy4417da62007-06-27 01:28:10 -07002943{
2944 unsigned short old_flags = dev->flags;
2945
Patrick McHardy24023452007-07-14 18:51:31 -07002946 ASSERT_RTNL();
2947
Wang Chendad9b332008-06-18 01:48:28 -07002948 dev->flags |= IFF_PROMISC;
2949 dev->promiscuity += inc;
2950 if (dev->promiscuity == 0) {
2951 /*
2952 * Avoid overflow.
2953 * If inc causes overflow, untouch promisc and return error.
2954 */
2955 if (inc < 0)
2956 dev->flags &= ~IFF_PROMISC;
2957 else {
2958 dev->promiscuity -= inc;
2959 printk(KERN_WARNING "%s: promiscuity touches roof, "
2960 "set promiscuity failed, promiscuity feature "
2961 "of device might be broken.\n", dev->name);
2962 return -EOVERFLOW;
2963 }
2964 }
Patrick McHardy4417da62007-06-27 01:28:10 -07002965 if (dev->flags != old_flags) {
2966 printk(KERN_INFO "device %s %s promiscuous mode\n",
2967 dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
2968 "left");
Klaus Heinrich Kiwi7759db82008-01-23 22:57:45 -05002969 if (audit_enabled)
2970 audit_log(current->audit_context, GFP_ATOMIC,
2971 AUDIT_ANOM_PROMISCUOUS,
2972 "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
2973 dev->name, (dev->flags & IFF_PROMISC),
2974 (old_flags & IFF_PROMISC),
2975 audit_get_loginuid(current),
2976 current->uid, current->gid,
2977 audit_get_sessionid(current));
Patrick McHardy24023452007-07-14 18:51:31 -07002978
2979 if (dev->change_rx_flags)
2980 dev->change_rx_flags(dev, IFF_PROMISC);
Patrick McHardy4417da62007-06-27 01:28:10 -07002981 }
Wang Chendad9b332008-06-18 01:48:28 -07002982 return 0;
Patrick McHardy4417da62007-06-27 01:28:10 -07002983}
2984
Linus Torvalds1da177e2005-04-16 15:20:36 -07002985/**
2986 * dev_set_promiscuity - update promiscuity count on a device
2987 * @dev: device
2988 * @inc: modifier
2989 *
Stephen Hemminger3041a062006-05-26 13:25:24 -07002990 * Add or remove promiscuity from a device. While the count in the device
Linus Torvalds1da177e2005-04-16 15:20:36 -07002991 * remains above zero the interface remains promiscuous. Once it hits zero
2992 * the device reverts back to normal filtering operation. A negative inc
2993 * value is used to drop promiscuity on the device.
Wang Chendad9b332008-06-18 01:48:28 -07002994 * Return 0 if successful or a negative errno code on error.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002995 */
Wang Chendad9b332008-06-18 01:48:28 -07002996int dev_set_promiscuity(struct net_device *dev, int inc)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002997{
2998 unsigned short old_flags = dev->flags;
Wang Chendad9b332008-06-18 01:48:28 -07002999 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003000
Wang Chendad9b332008-06-18 01:48:28 -07003001 err = __dev_set_promiscuity(dev, inc);
Patrick McHardy4b5a6982008-07-06 15:49:08 -07003002 if (err < 0)
Wang Chendad9b332008-06-18 01:48:28 -07003003 return err;
Patrick McHardy4417da62007-06-27 01:28:10 -07003004 if (dev->flags != old_flags)
3005 dev_set_rx_mode(dev);
Wang Chendad9b332008-06-18 01:48:28 -07003006 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003007}
3008
3009/**
3010 * dev_set_allmulti - update allmulti count on a device
3011 * @dev: device
3012 * @inc: modifier
3013 *
3014 * Add or remove reception of all multicast frames to a device. While the
3015 * count in the device remains above zero the interface remains listening
3016 * to all interfaces. Once it hits zero the device reverts back to normal
3017 * filtering operation. A negative @inc value is used to drop the counter
3018 * when releasing a resource needing all multicasts.
Wang Chendad9b332008-06-18 01:48:28 -07003019 * Return 0 if successful or a negative errno code on error.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003020 */
3021
Wang Chendad9b332008-06-18 01:48:28 -07003022int dev_set_allmulti(struct net_device *dev, int inc)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003023{
3024 unsigned short old_flags = dev->flags;
3025
Patrick McHardy24023452007-07-14 18:51:31 -07003026 ASSERT_RTNL();
3027
Linus Torvalds1da177e2005-04-16 15:20:36 -07003028 dev->flags |= IFF_ALLMULTI;
Wang Chendad9b332008-06-18 01:48:28 -07003029 dev->allmulti += inc;
3030 if (dev->allmulti == 0) {
3031 /*
3032 * Avoid overflow.
3033 * If inc causes overflow, untouch allmulti and return error.
3034 */
3035 if (inc < 0)
3036 dev->flags &= ~IFF_ALLMULTI;
3037 else {
3038 dev->allmulti -= inc;
3039 printk(KERN_WARNING "%s: allmulti touches roof, "
3040 "set allmulti failed, allmulti feature of "
3041 "device might be broken.\n", dev->name);
3042 return -EOVERFLOW;
3043 }
3044 }
Patrick McHardy24023452007-07-14 18:51:31 -07003045 if (dev->flags ^ old_flags) {
3046 if (dev->change_rx_flags)
3047 dev->change_rx_flags(dev, IFF_ALLMULTI);
Patrick McHardy4417da62007-06-27 01:28:10 -07003048 dev_set_rx_mode(dev);
Patrick McHardy24023452007-07-14 18:51:31 -07003049 }
Wang Chendad9b332008-06-18 01:48:28 -07003050 return 0;
Patrick McHardy4417da62007-06-27 01:28:10 -07003051}
3052
3053/*
3054 * Upload unicast and multicast address lists to device and
3055 * configure RX filtering. When the device doesn't support unicast
Joe Perches53ccaae2007-12-20 14:02:06 -08003056 * filtering it is put in promiscuous mode while unicast addresses
Patrick McHardy4417da62007-06-27 01:28:10 -07003057 * are present.
3058 */
3059void __dev_set_rx_mode(struct net_device *dev)
3060{
3061 /* dev_open will call this function so the list will stay sane. */
3062 if (!(dev->flags&IFF_UP))
3063 return;
3064
3065 if (!netif_device_present(dev))
YOSHIFUJI Hideaki40b77c92007-07-19 10:43:23 +09003066 return;
Patrick McHardy4417da62007-06-27 01:28:10 -07003067
3068 if (dev->set_rx_mode)
3069 dev->set_rx_mode(dev);
3070 else {
3071 /* Unicast addresses changes may only happen under the rtnl,
3072 * therefore calling __dev_set_promiscuity here is safe.
3073 */
3074 if (dev->uc_count > 0 && !dev->uc_promisc) {
3075 __dev_set_promiscuity(dev, 1);
3076 dev->uc_promisc = 1;
3077 } else if (dev->uc_count == 0 && dev->uc_promisc) {
3078 __dev_set_promiscuity(dev, -1);
3079 dev->uc_promisc = 0;
3080 }
3081
3082 if (dev->set_multicast_list)
3083 dev->set_multicast_list(dev);
3084 }
3085}
3086
3087void dev_set_rx_mode(struct net_device *dev)
3088{
David S. Millerb9e40852008-07-15 00:15:08 -07003089 netif_addr_lock_bh(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07003090 __dev_set_rx_mode(dev);
David S. Millerb9e40852008-07-15 00:15:08 -07003091 netif_addr_unlock_bh(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003092}
3093
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07003094int __dev_addr_delete(struct dev_addr_list **list, int *count,
3095 void *addr, int alen, int glbl)
Patrick McHardybf742482007-06-27 01:26:19 -07003096{
3097 struct dev_addr_list *da;
3098
3099 for (; (da = *list) != NULL; list = &da->next) {
3100 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3101 alen == da->da_addrlen) {
3102 if (glbl) {
3103 int old_glbl = da->da_gusers;
3104 da->da_gusers = 0;
3105 if (old_glbl == 0)
3106 break;
3107 }
3108 if (--da->da_users)
3109 return 0;
3110
3111 *list = da->next;
3112 kfree(da);
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07003113 (*count)--;
Patrick McHardybf742482007-06-27 01:26:19 -07003114 return 0;
3115 }
3116 }
3117 return -ENOENT;
3118}
3119
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07003120int __dev_addr_add(struct dev_addr_list **list, int *count,
3121 void *addr, int alen, int glbl)
Patrick McHardybf742482007-06-27 01:26:19 -07003122{
3123 struct dev_addr_list *da;
3124
3125 for (da = *list; da != NULL; da = da->next) {
3126 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3127 da->da_addrlen == alen) {
3128 if (glbl) {
3129 int old_glbl = da->da_gusers;
3130 da->da_gusers = 1;
3131 if (old_glbl)
3132 return 0;
3133 }
3134 da->da_users++;
3135 return 0;
3136 }
3137 }
3138
Jorge Boncompte [DTI2]12aa3432008-02-19 14:17:04 -08003139 da = kzalloc(sizeof(*da), GFP_ATOMIC);
Patrick McHardybf742482007-06-27 01:26:19 -07003140 if (da == NULL)
3141 return -ENOMEM;
3142 memcpy(da->da_addr, addr, alen);
3143 da->da_addrlen = alen;
3144 da->da_users = 1;
3145 da->da_gusers = glbl ? 1 : 0;
3146 da->next = *list;
3147 *list = da;
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07003148 (*count)++;
Patrick McHardybf742482007-06-27 01:26:19 -07003149 return 0;
3150}
3151
Patrick McHardy4417da62007-06-27 01:28:10 -07003152/**
3153 * dev_unicast_delete - Release secondary unicast address.
3154 * @dev: device
Randy Dunlap0ed72ec2007-07-26 00:03:29 -07003155 * @addr: address to delete
3156 * @alen: length of @addr
Patrick McHardy4417da62007-06-27 01:28:10 -07003157 *
3158 * Release reference to a secondary unicast address and remove it
Randy Dunlap0ed72ec2007-07-26 00:03:29 -07003159 * from the device if the reference count drops to zero.
Patrick McHardy4417da62007-06-27 01:28:10 -07003160 *
3161 * The caller must hold the rtnl_mutex.
3162 */
3163int dev_unicast_delete(struct net_device *dev, void *addr, int alen)
3164{
3165 int err;
3166
3167 ASSERT_RTNL();
3168
David S. Millerb9e40852008-07-15 00:15:08 -07003169 netif_addr_lock_bh(dev);
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07003170 err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0);
3171 if (!err)
Patrick McHardy4417da62007-06-27 01:28:10 -07003172 __dev_set_rx_mode(dev);
David S. Millerb9e40852008-07-15 00:15:08 -07003173 netif_addr_unlock_bh(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07003174 return err;
3175}
3176EXPORT_SYMBOL(dev_unicast_delete);
3177
3178/**
3179 * dev_unicast_add - add a secondary unicast address
3180 * @dev: device
Wang Chen5dbaec52008-06-27 19:35:16 -07003181 * @addr: address to add
Randy Dunlap0ed72ec2007-07-26 00:03:29 -07003182 * @alen: length of @addr
Patrick McHardy4417da62007-06-27 01:28:10 -07003183 *
3184 * Add a secondary unicast address to the device or increase
3185 * the reference count if it already exists.
3186 *
3187 * The caller must hold the rtnl_mutex.
3188 */
3189int dev_unicast_add(struct net_device *dev, void *addr, int alen)
3190{
3191 int err;
3192
3193 ASSERT_RTNL();
3194
David S. Millerb9e40852008-07-15 00:15:08 -07003195 netif_addr_lock_bh(dev);
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07003196 err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0);
3197 if (!err)
Patrick McHardy4417da62007-06-27 01:28:10 -07003198 __dev_set_rx_mode(dev);
David S. Millerb9e40852008-07-15 00:15:08 -07003199 netif_addr_unlock_bh(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07003200 return err;
3201}
3202EXPORT_SYMBOL(dev_unicast_add);
3203
Chris Leeche83a2ea2008-01-31 16:53:23 -08003204int __dev_addr_sync(struct dev_addr_list **to, int *to_count,
3205 struct dev_addr_list **from, int *from_count)
3206{
3207 struct dev_addr_list *da, *next;
3208 int err = 0;
3209
3210 da = *from;
3211 while (da != NULL) {
3212 next = da->next;
3213 if (!da->da_synced) {
3214 err = __dev_addr_add(to, to_count,
3215 da->da_addr, da->da_addrlen, 0);
3216 if (err < 0)
3217 break;
3218 da->da_synced = 1;
3219 da->da_users++;
3220 } else if (da->da_users == 1) {
3221 __dev_addr_delete(to, to_count,
3222 da->da_addr, da->da_addrlen, 0);
3223 __dev_addr_delete(from, from_count,
3224 da->da_addr, da->da_addrlen, 0);
3225 }
3226 da = next;
3227 }
3228 return err;
3229}
3230
3231void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
3232 struct dev_addr_list **from, int *from_count)
3233{
3234 struct dev_addr_list *da, *next;
3235
3236 da = *from;
3237 while (da != NULL) {
3238 next = da->next;
3239 if (da->da_synced) {
3240 __dev_addr_delete(to, to_count,
3241 da->da_addr, da->da_addrlen, 0);
3242 da->da_synced = 0;
3243 __dev_addr_delete(from, from_count,
3244 da->da_addr, da->da_addrlen, 0);
3245 }
3246 da = next;
3247 }
3248}
3249
3250/**
3251 * dev_unicast_sync - Synchronize device's unicast list to another device
3252 * @to: destination device
3253 * @from: source device
3254 *
3255 * Add newly added addresses to the destination device and release
3256 * addresses that have no users left. The source device must be
3257 * locked by netif_tx_lock_bh.
3258 *
3259 * This function is intended to be called from the dev->set_rx_mode
3260 * function of layered software devices.
3261 */
3262int dev_unicast_sync(struct net_device *to, struct net_device *from)
3263{
3264 int err = 0;
3265
David S. Millerb9e40852008-07-15 00:15:08 -07003266 netif_addr_lock_bh(to);
Chris Leeche83a2ea2008-01-31 16:53:23 -08003267 err = __dev_addr_sync(&to->uc_list, &to->uc_count,
3268 &from->uc_list, &from->uc_count);
3269 if (!err)
3270 __dev_set_rx_mode(to);
David S. Millerb9e40852008-07-15 00:15:08 -07003271 netif_addr_unlock_bh(to);
Chris Leeche83a2ea2008-01-31 16:53:23 -08003272 return err;
3273}
3274EXPORT_SYMBOL(dev_unicast_sync);
3275
3276/**
Randy Dunlapbc2cda12008-02-13 15:03:25 -08003277 * dev_unicast_unsync - Remove synchronized addresses from the destination device
Chris Leeche83a2ea2008-01-31 16:53:23 -08003278 * @to: destination device
3279 * @from: source device
3280 *
3281 * Remove all addresses that were added to the destination device by
3282 * dev_unicast_sync(). This function is intended to be called from the
3283 * dev->stop function of layered software devices.
3284 */
3285void dev_unicast_unsync(struct net_device *to, struct net_device *from)
3286{
David S. Millerb9e40852008-07-15 00:15:08 -07003287 netif_addr_lock_bh(from);
David S. Millere308a5d2008-07-15 00:13:44 -07003288 netif_addr_lock(to);
Chris Leeche83a2ea2008-01-31 16:53:23 -08003289
3290 __dev_addr_unsync(&to->uc_list, &to->uc_count,
3291 &from->uc_list, &from->uc_count);
3292 __dev_set_rx_mode(to);
3293
David S. Millere308a5d2008-07-15 00:13:44 -07003294 netif_addr_unlock(to);
David S. Millerb9e40852008-07-15 00:15:08 -07003295 netif_addr_unlock_bh(from);
Chris Leeche83a2ea2008-01-31 16:53:23 -08003296}
3297EXPORT_SYMBOL(dev_unicast_unsync);
3298
Denis Cheng12972622007-07-18 02:12:56 -07003299static void __dev_addr_discard(struct dev_addr_list **list)
3300{
3301 struct dev_addr_list *tmp;
3302
3303 while (*list != NULL) {
3304 tmp = *list;
3305 *list = tmp->next;
3306 if (tmp->da_users > tmp->da_gusers)
3307 printk("__dev_addr_discard: address leakage! "
3308 "da_users=%d\n", tmp->da_users);
3309 kfree(tmp);
3310 }
3311}
3312
Denis Cheng26cc2522007-07-18 02:12:03 -07003313static void dev_addr_discard(struct net_device *dev)
Patrick McHardy4417da62007-06-27 01:28:10 -07003314{
David S. Millerb9e40852008-07-15 00:15:08 -07003315 netif_addr_lock_bh(dev);
Denis Cheng26cc2522007-07-18 02:12:03 -07003316
Patrick McHardy4417da62007-06-27 01:28:10 -07003317 __dev_addr_discard(&dev->uc_list);
3318 dev->uc_count = 0;
Patrick McHardy4417da62007-06-27 01:28:10 -07003319
Denis Cheng456ad752007-07-18 02:10:54 -07003320 __dev_addr_discard(&dev->mc_list);
3321 dev->mc_count = 0;
Denis Cheng26cc2522007-07-18 02:12:03 -07003322
David S. Millerb9e40852008-07-15 00:15:08 -07003323 netif_addr_unlock_bh(dev);
Denis Cheng456ad752007-07-18 02:10:54 -07003324}
3325
Linus Torvalds1da177e2005-04-16 15:20:36 -07003326unsigned dev_get_flags(const struct net_device *dev)
3327{
3328 unsigned flags;
3329
3330 flags = (dev->flags & ~(IFF_PROMISC |
3331 IFF_ALLMULTI |
Stefan Rompfb00055a2006-03-20 17:09:11 -08003332 IFF_RUNNING |
3333 IFF_LOWER_UP |
3334 IFF_DORMANT)) |
Linus Torvalds1da177e2005-04-16 15:20:36 -07003335 (dev->gflags & (IFF_PROMISC |
3336 IFF_ALLMULTI));
3337
Stefan Rompfb00055a2006-03-20 17:09:11 -08003338 if (netif_running(dev)) {
3339 if (netif_oper_up(dev))
3340 flags |= IFF_RUNNING;
3341 if (netif_carrier_ok(dev))
3342 flags |= IFF_LOWER_UP;
3343 if (netif_dormant(dev))
3344 flags |= IFF_DORMANT;
3345 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003346
3347 return flags;
3348}
3349
3350int dev_change_flags(struct net_device *dev, unsigned flags)
3351{
Thomas Graf7c355f52007-06-05 16:03:03 -07003352 int ret, changes;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003353 int old_flags = dev->flags;
3354
Patrick McHardy24023452007-07-14 18:51:31 -07003355 ASSERT_RTNL();
3356
Linus Torvalds1da177e2005-04-16 15:20:36 -07003357 /*
3358 * Set the flags on our device.
3359 */
3360
3361 dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
3362 IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
3363 IFF_AUTOMEDIA)) |
3364 (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
3365 IFF_ALLMULTI));
3366
3367 /*
3368 * Load in the correct multicast list now the flags have changed.
3369 */
3370
David Woodhouse0e917962008-05-20 14:36:14 -07003371 if (dev->change_rx_flags && (old_flags ^ flags) & IFF_MULTICAST)
Patrick McHardy24023452007-07-14 18:51:31 -07003372 dev->change_rx_flags(dev, IFF_MULTICAST);
3373
Patrick McHardy4417da62007-06-27 01:28:10 -07003374 dev_set_rx_mode(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003375
3376 /*
3377 * Have we downed the interface. We handle IFF_UP ourselves
3378 * according to user attempts to set it, rather than blindly
3379 * setting it.
3380 */
3381
3382 ret = 0;
3383 if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */
3384 ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
3385
3386 if (!ret)
Patrick McHardy4417da62007-06-27 01:28:10 -07003387 dev_set_rx_mode(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003388 }
3389
3390 if (dev->flags & IFF_UP &&
3391 ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
3392 IFF_VOLATILE)))
Pavel Emelyanov056925a2007-09-16 15:42:43 -07003393 call_netdevice_notifiers(NETDEV_CHANGE, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003394
3395 if ((flags ^ dev->gflags) & IFF_PROMISC) {
3396 int inc = (flags & IFF_PROMISC) ? +1 : -1;
3397 dev->gflags ^= IFF_PROMISC;
3398 dev_set_promiscuity(dev, inc);
3399 }
3400
3401 /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
3402 is important. Some (broken) drivers set IFF_PROMISC, when
3403 IFF_ALLMULTI is requested not asking us and not reporting.
3404 */
3405 if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
3406 int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
3407 dev->gflags ^= IFF_ALLMULTI;
3408 dev_set_allmulti(dev, inc);
3409 }
3410
Thomas Graf7c355f52007-06-05 16:03:03 -07003411 /* Exclude state transition flags, already notified */
3412 changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING);
3413 if (changes)
3414 rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003415
3416 return ret;
3417}
3418
3419int dev_set_mtu(struct net_device *dev, int new_mtu)
3420{
3421 int err;
3422
3423 if (new_mtu == dev->mtu)
3424 return 0;
3425
3426 /* MTU must be positive. */
3427 if (new_mtu < 0)
3428 return -EINVAL;
3429
3430 if (!netif_device_present(dev))
3431 return -ENODEV;
3432
3433 err = 0;
3434 if (dev->change_mtu)
3435 err = dev->change_mtu(dev, new_mtu);
3436 else
3437 dev->mtu = new_mtu;
3438 if (!err && dev->flags & IFF_UP)
Pavel Emelyanov056925a2007-09-16 15:42:43 -07003439 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003440 return err;
3441}
3442
3443int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
3444{
3445 int err;
3446
3447 if (!dev->set_mac_address)
3448 return -EOPNOTSUPP;
3449 if (sa->sa_family != dev->type)
3450 return -EINVAL;
3451 if (!netif_device_present(dev))
3452 return -ENODEV;
3453 err = dev->set_mac_address(dev, sa);
3454 if (!err)
Pavel Emelyanov056925a2007-09-16 15:42:43 -07003455 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003456 return err;
3457}
3458
3459/*
Jeff Garzik14e3e072007-10-08 00:06:32 -07003460 * Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003461 */
Jeff Garzik14e3e072007-10-08 00:06:32 -07003462static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003463{
3464 int err;
Eric W. Biederman881d9662007-09-17 11:56:21 -07003465 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003466
3467 if (!dev)
3468 return -ENODEV;
3469
3470 switch (cmd) {
3471 case SIOCGIFFLAGS: /* Get interface flags */
3472 ifr->ifr_flags = dev_get_flags(dev);
3473 return 0;
3474
Linus Torvalds1da177e2005-04-16 15:20:36 -07003475 case SIOCGIFMETRIC: /* Get the metric on the interface
3476 (currently unused) */
3477 ifr->ifr_metric = 0;
3478 return 0;
3479
Linus Torvalds1da177e2005-04-16 15:20:36 -07003480 case SIOCGIFMTU: /* Get the MTU of a device */
3481 ifr->ifr_mtu = dev->mtu;
3482 return 0;
3483
Linus Torvalds1da177e2005-04-16 15:20:36 -07003484 case SIOCGIFHWADDR:
3485 if (!dev->addr_len)
3486 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
3487 else
3488 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
3489 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
3490 ifr->ifr_hwaddr.sa_family = dev->type;
3491 return 0;
3492
Jeff Garzik14e3e072007-10-08 00:06:32 -07003493 case SIOCGIFSLAVE:
3494 err = -EINVAL;
3495 break;
3496
3497 case SIOCGIFMAP:
3498 ifr->ifr_map.mem_start = dev->mem_start;
3499 ifr->ifr_map.mem_end = dev->mem_end;
3500 ifr->ifr_map.base_addr = dev->base_addr;
3501 ifr->ifr_map.irq = dev->irq;
3502 ifr->ifr_map.dma = dev->dma;
3503 ifr->ifr_map.port = dev->if_port;
3504 return 0;
3505
3506 case SIOCGIFINDEX:
3507 ifr->ifr_ifindex = dev->ifindex;
3508 return 0;
3509
3510 case SIOCGIFTXQLEN:
3511 ifr->ifr_qlen = dev->tx_queue_len;
3512 return 0;
3513
3514 default:
3515 /* dev_ioctl() should ensure this case
3516 * is never reached
3517 */
3518 WARN_ON(1);
3519 err = -EINVAL;
3520 break;
3521
3522 }
3523 return err;
3524}
3525
3526/*
3527 * Perform the SIOCxIFxxx calls, inside rtnl_lock()
3528 */
3529static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
3530{
3531 int err;
3532 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
3533
3534 if (!dev)
3535 return -ENODEV;
3536
3537 switch (cmd) {
3538 case SIOCSIFFLAGS: /* Set interface flags */
3539 return dev_change_flags(dev, ifr->ifr_flags);
3540
3541 case SIOCSIFMETRIC: /* Set the metric on the interface
3542 (currently unused) */
3543 return -EOPNOTSUPP;
3544
3545 case SIOCSIFMTU: /* Set the MTU of a device */
3546 return dev_set_mtu(dev, ifr->ifr_mtu);
3547
Linus Torvalds1da177e2005-04-16 15:20:36 -07003548 case SIOCSIFHWADDR:
3549 return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
3550
3551 case SIOCSIFHWBROADCAST:
3552 if (ifr->ifr_hwaddr.sa_family != dev->type)
3553 return -EINVAL;
3554 memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
3555 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
Pavel Emelyanov056925a2007-09-16 15:42:43 -07003556 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003557 return 0;
3558
Linus Torvalds1da177e2005-04-16 15:20:36 -07003559 case SIOCSIFMAP:
3560 if (dev->set_config) {
3561 if (!netif_device_present(dev))
3562 return -ENODEV;
3563 return dev->set_config(dev, &ifr->ifr_map);
3564 }
3565 return -EOPNOTSUPP;
3566
3567 case SIOCADDMULTI:
Patrick McHardy61ee6bd2008-03-26 02:12:11 -07003568 if ((!dev->set_multicast_list && !dev->set_rx_mode) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07003569 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3570 return -EINVAL;
3571 if (!netif_device_present(dev))
3572 return -ENODEV;
3573 return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
3574 dev->addr_len, 1);
3575
3576 case SIOCDELMULTI:
Patrick McHardy61ee6bd2008-03-26 02:12:11 -07003577 if ((!dev->set_multicast_list && !dev->set_rx_mode) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07003578 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3579 return -EINVAL;
3580 if (!netif_device_present(dev))
3581 return -ENODEV;
3582 return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
3583 dev->addr_len, 1);
3584
Linus Torvalds1da177e2005-04-16 15:20:36 -07003585 case SIOCSIFTXQLEN:
3586 if (ifr->ifr_qlen < 0)
3587 return -EINVAL;
3588 dev->tx_queue_len = ifr->ifr_qlen;
3589 return 0;
3590
3591 case SIOCSIFNAME:
3592 ifr->ifr_newname[IFNAMSIZ-1] = '\0';
3593 return dev_change_name(dev, ifr->ifr_newname);
3594
3595 /*
3596 * Unknown or private ioctl
3597 */
3598
3599 default:
3600 if ((cmd >= SIOCDEVPRIVATE &&
3601 cmd <= SIOCDEVPRIVATE + 15) ||
3602 cmd == SIOCBONDENSLAVE ||
3603 cmd == SIOCBONDRELEASE ||
3604 cmd == SIOCBONDSETHWADDR ||
3605 cmd == SIOCBONDSLAVEINFOQUERY ||
3606 cmd == SIOCBONDINFOQUERY ||
3607 cmd == SIOCBONDCHANGEACTIVE ||
3608 cmd == SIOCGMIIPHY ||
3609 cmd == SIOCGMIIREG ||
3610 cmd == SIOCSMIIREG ||
3611 cmd == SIOCBRADDIF ||
3612 cmd == SIOCBRDELIF ||
3613 cmd == SIOCWANDEV) {
3614 err = -EOPNOTSUPP;
3615 if (dev->do_ioctl) {
3616 if (netif_device_present(dev))
3617 err = dev->do_ioctl(dev, ifr,
3618 cmd);
3619 else
3620 err = -ENODEV;
3621 }
3622 } else
3623 err = -EINVAL;
3624
3625 }
3626 return err;
3627}
3628
3629/*
3630 * This function handles all "interface"-type I/O control requests. The actual
3631 * 'doing' part of this is dev_ifsioc above.
3632 */
3633
3634/**
3635 * dev_ioctl - network device ioctl
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07003636 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -07003637 * @cmd: command to issue
3638 * @arg: pointer to a struct ifreq in user space
3639 *
3640 * Issue ioctl functions to devices. This is normally called by the
3641 * user space syscall interfaces but can sometimes be useful for
3642 * other purposes. The return value is the return from the syscall if
3643 * positive or a negative errno code on error.
3644 */
3645
Eric W. Biederman881d9662007-09-17 11:56:21 -07003646int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003647{
3648 struct ifreq ifr;
3649 int ret;
3650 char *colon;
3651
3652 /* One special case: SIOCGIFCONF takes ifconf argument
3653 and requires shared lock, because it sleeps writing
3654 to user space.
3655 */
3656
3657 if (cmd == SIOCGIFCONF) {
Stephen Hemminger6756ae42006-03-20 22:23:58 -08003658 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -07003659 ret = dev_ifconf(net, (char __user *) arg);
Stephen Hemminger6756ae42006-03-20 22:23:58 -08003660 rtnl_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003661 return ret;
3662 }
3663 if (cmd == SIOCGIFNAME)
Eric W. Biederman881d9662007-09-17 11:56:21 -07003664 return dev_ifname(net, (struct ifreq __user *)arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003665
3666 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
3667 return -EFAULT;
3668
3669 ifr.ifr_name[IFNAMSIZ-1] = 0;
3670
3671 colon = strchr(ifr.ifr_name, ':');
3672 if (colon)
3673 *colon = 0;
3674
3675 /*
3676 * See which interface the caller is talking about.
3677 */
3678
3679 switch (cmd) {
3680 /*
3681 * These ioctl calls:
3682 * - can be done by all.
3683 * - atomic and do not require locking.
3684 * - return a value
3685 */
3686 case SIOCGIFFLAGS:
3687 case SIOCGIFMETRIC:
3688 case SIOCGIFMTU:
3689 case SIOCGIFHWADDR:
3690 case SIOCGIFSLAVE:
3691 case SIOCGIFMAP:
3692 case SIOCGIFINDEX:
3693 case SIOCGIFTXQLEN:
Eric W. Biederman881d9662007-09-17 11:56:21 -07003694 dev_load(net, ifr.ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003695 read_lock(&dev_base_lock);
Jeff Garzik14e3e072007-10-08 00:06:32 -07003696 ret = dev_ifsioc_locked(net, &ifr, cmd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003697 read_unlock(&dev_base_lock);
3698 if (!ret) {
3699 if (colon)
3700 *colon = ':';
3701 if (copy_to_user(arg, &ifr,
3702 sizeof(struct ifreq)))
3703 ret = -EFAULT;
3704 }
3705 return ret;
3706
3707 case SIOCETHTOOL:
Eric W. Biederman881d9662007-09-17 11:56:21 -07003708 dev_load(net, ifr.ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003709 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -07003710 ret = dev_ethtool(net, &ifr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003711 rtnl_unlock();
3712 if (!ret) {
3713 if (colon)
3714 *colon = ':';
3715 if (copy_to_user(arg, &ifr,
3716 sizeof(struct ifreq)))
3717 ret = -EFAULT;
3718 }
3719 return ret;
3720
3721 /*
3722 * These ioctl calls:
3723 * - require superuser power.
3724 * - require strict serialization.
3725 * - return a value
3726 */
3727 case SIOCGMIIPHY:
3728 case SIOCGMIIREG:
3729 case SIOCSIFNAME:
3730 if (!capable(CAP_NET_ADMIN))
3731 return -EPERM;
Eric W. Biederman881d9662007-09-17 11:56:21 -07003732 dev_load(net, ifr.ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003733 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -07003734 ret = dev_ifsioc(net, &ifr, cmd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003735 rtnl_unlock();
3736 if (!ret) {
3737 if (colon)
3738 *colon = ':';
3739 if (copy_to_user(arg, &ifr,
3740 sizeof(struct ifreq)))
3741 ret = -EFAULT;
3742 }
3743 return ret;
3744
3745 /*
3746 * These ioctl calls:
3747 * - require superuser power.
3748 * - require strict serialization.
3749 * - do not return a value
3750 */
3751 case SIOCSIFFLAGS:
3752 case SIOCSIFMETRIC:
3753 case SIOCSIFMTU:
3754 case SIOCSIFMAP:
3755 case SIOCSIFHWADDR:
3756 case SIOCSIFSLAVE:
3757 case SIOCADDMULTI:
3758 case SIOCDELMULTI:
3759 case SIOCSIFHWBROADCAST:
3760 case SIOCSIFTXQLEN:
3761 case SIOCSMIIREG:
3762 case SIOCBONDENSLAVE:
3763 case SIOCBONDRELEASE:
3764 case SIOCBONDSETHWADDR:
Linus Torvalds1da177e2005-04-16 15:20:36 -07003765 case SIOCBONDCHANGEACTIVE:
3766 case SIOCBRADDIF:
3767 case SIOCBRDELIF:
3768 if (!capable(CAP_NET_ADMIN))
3769 return -EPERM;
Thomas Grafcabcac02006-01-24 12:46:33 -08003770 /* fall through */
3771 case SIOCBONDSLAVEINFOQUERY:
3772 case SIOCBONDINFOQUERY:
Eric W. Biederman881d9662007-09-17 11:56:21 -07003773 dev_load(net, ifr.ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003774 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -07003775 ret = dev_ifsioc(net, &ifr, cmd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003776 rtnl_unlock();
3777 return ret;
3778
3779 case SIOCGIFMEM:
3780 /* Get the per device memory space. We can add this but
3781 * currently do not support it */
3782 case SIOCSIFMEM:
3783 /* Set the per device memory buffer space.
3784 * Not applicable in our case */
3785 case SIOCSIFLINK:
3786 return -EINVAL;
3787
3788 /*
3789 * Unknown or private ioctl.
3790 */
3791 default:
3792 if (cmd == SIOCWANDEV ||
3793 (cmd >= SIOCDEVPRIVATE &&
3794 cmd <= SIOCDEVPRIVATE + 15)) {
Eric W. Biederman881d9662007-09-17 11:56:21 -07003795 dev_load(net, ifr.ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003796 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -07003797 ret = dev_ifsioc(net, &ifr, cmd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003798 rtnl_unlock();
3799 if (!ret && copy_to_user(arg, &ifr,
3800 sizeof(struct ifreq)))
3801 ret = -EFAULT;
3802 return ret;
3803 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003804 /* Take care of Wireless Extensions */
Johannes Berg295f4a12007-04-26 20:43:56 -07003805 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
Eric W. Biederman881d9662007-09-17 11:56:21 -07003806 return wext_handle_ioctl(net, &ifr, cmd, arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003807 return -EINVAL;
3808 }
3809}
3810
3811
3812/**
3813 * dev_new_index - allocate an ifindex
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07003814 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -07003815 *
3816 * Returns a suitable unique value for a new device interface
3817 * number. The caller must hold the rtnl semaphore or the
3818 * dev_base_lock to be sure it remains unique.
3819 */
Eric W. Biederman881d9662007-09-17 11:56:21 -07003820static int dev_new_index(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003821{
3822 static int ifindex;
3823 for (;;) {
3824 if (++ifindex <= 0)
3825 ifindex = 1;
Eric W. Biederman881d9662007-09-17 11:56:21 -07003826 if (!__dev_get_by_index(net, ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07003827 return ifindex;
3828 }
3829}
3830
Linus Torvalds1da177e2005-04-16 15:20:36 -07003831/* Delayed registration/unregisteration */
3832static DEFINE_SPINLOCK(net_todo_list_lock);
Denis Cheng3b5b34f2007-12-07 00:49:17 -08003833static LIST_HEAD(net_todo_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003834
Stephen Hemminger6f05f622007-03-08 20:46:03 -08003835static void net_set_todo(struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003836{
3837 spin_lock(&net_todo_list_lock);
3838 list_add_tail(&dev->todo_list, &net_todo_list);
3839 spin_unlock(&net_todo_list_lock);
3840}
3841
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07003842static void rollback_registered(struct net_device *dev)
3843{
3844 BUG_ON(dev_boot_phase);
3845 ASSERT_RTNL();
3846
3847 /* Some devices call without registering for initialization unwind. */
3848 if (dev->reg_state == NETREG_UNINITIALIZED) {
3849 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
3850 "was registered\n", dev->name, dev);
3851
3852 WARN_ON(1);
3853 return;
3854 }
3855
3856 BUG_ON(dev->reg_state != NETREG_REGISTERED);
3857
3858 /* If device is running, close it first. */
3859 dev_close(dev);
3860
3861 /* And unlink it from device chain. */
3862 unlist_netdevice(dev);
3863
3864 dev->reg_state = NETREG_UNREGISTERING;
3865
3866 synchronize_net();
3867
3868 /* Shutdown queueing discipline. */
3869 dev_shutdown(dev);
3870
3871
3872 /* Notify protocols, that we are about to destroy
3873 this device. They should clean all the things.
3874 */
3875 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
3876
3877 /*
3878 * Flush the unicast and multicast chains
3879 */
3880 dev_addr_discard(dev);
3881
3882 if (dev->uninit)
3883 dev->uninit(dev);
3884
3885 /* Notifier chain MUST detach us from master device. */
Ilpo Järvinen547b7922008-07-25 21:43:18 -07003886 WARN_ON(dev->master);
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07003887
3888 /* Remove entries from kobject tree */
3889 netdev_unregister_kobject(dev);
3890
3891 synchronize_net();
3892
3893 dev_put(dev);
3894}
3895
David S. Millere8a04642008-07-17 00:34:19 -07003896static void __netdev_init_queue_locks_one(struct net_device *dev,
3897 struct netdev_queue *dev_queue,
3898 void *_unused)
David S. Millerc773e842008-07-08 23:13:53 -07003899{
3900 spin_lock_init(&dev_queue->_xmit_lock);
David S. Millercf508b12008-07-22 14:16:42 -07003901 netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type);
David S. Millerc773e842008-07-08 23:13:53 -07003902 dev_queue->xmit_lock_owner = -1;
3903}
3904
3905static void netdev_init_queue_locks(struct net_device *dev)
3906{
David S. Millere8a04642008-07-17 00:34:19 -07003907 netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
3908 __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL);
David S. Millerc773e842008-07-08 23:13:53 -07003909}
3910
Linus Torvalds1da177e2005-04-16 15:20:36 -07003911/**
3912 * register_netdevice - register a network device
3913 * @dev: device to register
3914 *
3915 * Take a completed network device structure and add it to the kernel
3916 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
3917 * chain. 0 is returned on success. A negative errno code is returned
3918 * on a failure to set up the device, or if the name is a duplicate.
3919 *
3920 * Callers must hold the rtnl semaphore. You may want
3921 * register_netdev() instead of this.
3922 *
3923 * BUGS:
3924 * The locking appears insufficient to guarantee two parallel registers
3925 * will not get the same name.
3926 */
3927
3928int register_netdevice(struct net_device *dev)
3929{
3930 struct hlist_head *head;
3931 struct hlist_node *p;
3932 int ret;
Eric W. Biederman881d9662007-09-17 11:56:21 -07003933 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003934
3935 BUG_ON(dev_boot_phase);
3936 ASSERT_RTNL();
3937
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07003938 might_sleep();
3939
Linus Torvalds1da177e2005-04-16 15:20:36 -07003940 /* When net_device's are persistent, this will be fatal. */
3941 BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09003942 BUG_ON(!dev_net(dev));
3943 net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003944
David S. Millerf1f28aa2008-07-15 00:08:33 -07003945 spin_lock_init(&dev->addr_list_lock);
David S. Millercf508b12008-07-22 14:16:42 -07003946 netdev_set_addr_lockdep_class(dev);
David S. Millerc773e842008-07-08 23:13:53 -07003947 netdev_init_queue_locks(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003948
Linus Torvalds1da177e2005-04-16 15:20:36 -07003949 dev->iflink = -1;
3950
3951 /* Init, if this function is available */
3952 if (dev->init) {
3953 ret = dev->init(dev);
3954 if (ret) {
3955 if (ret > 0)
3956 ret = -EIO;
Adrian Bunk90833aa2006-11-13 16:02:22 -08003957 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003958 }
3959 }
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09003960
Linus Torvalds1da177e2005-04-16 15:20:36 -07003961 if (!dev_valid_name(dev->name)) {
3962 ret = -EINVAL;
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07003963 goto err_uninit;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003964 }
3965
Eric W. Biederman881d9662007-09-17 11:56:21 -07003966 dev->ifindex = dev_new_index(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003967 if (dev->iflink == -1)
3968 dev->iflink = dev->ifindex;
3969
3970 /* Check for existence of name */
Eric W. Biederman881d9662007-09-17 11:56:21 -07003971 head = dev_name_hash(net, dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003972 hlist_for_each(p, head) {
3973 struct net_device *d
3974 = hlist_entry(p, struct net_device, name_hlist);
3975 if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
3976 ret = -EEXIST;
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07003977 goto err_uninit;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003978 }
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09003979 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003980
Stephen Hemmingerd212f872007-06-27 00:47:37 -07003981 /* Fix illegal checksum combinations */
3982 if ((dev->features & NETIF_F_HW_CSUM) &&
3983 (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
3984 printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
3985 dev->name);
3986 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
3987 }
3988
3989 if ((dev->features & NETIF_F_NO_CSUM) &&
3990 (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
3991 printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
3992 dev->name);
3993 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
3994 }
3995
3996
Linus Torvalds1da177e2005-04-16 15:20:36 -07003997 /* Fix illegal SG+CSUM combinations. */
3998 if ((dev->features & NETIF_F_SG) &&
Herbert Xu8648b302006-06-17 22:06:05 -07003999 !(dev->features & NETIF_F_ALL_CSUM)) {
Stephen Hemminger5a8da022006-07-07 16:54:05 -07004000 printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no checksum feature.\n",
Linus Torvalds1da177e2005-04-16 15:20:36 -07004001 dev->name);
4002 dev->features &= ~NETIF_F_SG;
4003 }
4004
4005 /* TSO requires that SG is present as well. */
4006 if ((dev->features & NETIF_F_TSO) &&
4007 !(dev->features & NETIF_F_SG)) {
Stephen Hemminger5a8da022006-07-07 16:54:05 -07004008 printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no SG feature.\n",
Linus Torvalds1da177e2005-04-16 15:20:36 -07004009 dev->name);
4010 dev->features &= ~NETIF_F_TSO;
4011 }
Ananda Rajue89e9cf2005-10-18 15:46:41 -07004012 if (dev->features & NETIF_F_UFO) {
4013 if (!(dev->features & NETIF_F_HW_CSUM)) {
4014 printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
4015 "NETIF_F_HW_CSUM feature.\n",
4016 dev->name);
4017 dev->features &= ~NETIF_F_UFO;
4018 }
4019 if (!(dev->features & NETIF_F_SG)) {
4020 printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
4021 "NETIF_F_SG feature.\n",
4022 dev->name);
4023 dev->features &= ~NETIF_F_UFO;
4024 }
4025 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004026
Lennert Buytenheke5a4a722008-08-03 01:23:10 -07004027 /* Enable software GSO if SG is supported. */
4028 if (dev->features & NETIF_F_SG)
4029 dev->features |= NETIF_F_GSO;
4030
Daniel Lezcanoaaf8cdc2008-05-02 17:00:58 -07004031 netdev_initialize_kobject(dev);
Eric W. Biederman8b41d182007-09-26 22:02:53 -07004032 ret = netdev_register_kobject(dev);
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004033 if (ret)
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07004034 goto err_uninit;
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004035 dev->reg_state = NETREG_REGISTERED;
4036
Linus Torvalds1da177e2005-04-16 15:20:36 -07004037 /*
4038 * Default initial state at registry is that the
4039 * device is present.
4040 */
4041
4042 set_bit(__LINK_STATE_PRESENT, &dev->state);
4043
Linus Torvalds1da177e2005-04-16 15:20:36 -07004044 dev_init_scheduler(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004045 dev_hold(dev);
Eric W. Biedermance286d32007-09-12 13:53:49 +02004046 list_netdevice(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004047
4048 /* Notify protocols, that a new device appeared. */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07004049 ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
Herbert Xufcc5a032007-07-30 17:03:38 -07004050 ret = notifier_to_errno(ret);
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004051 if (ret) {
4052 rollback_registered(dev);
4053 dev->reg_state = NETREG_UNREGISTERED;
4054 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004055
4056out:
4057 return ret;
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07004058
4059err_uninit:
4060 if (dev->uninit)
4061 dev->uninit(dev);
4062 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004063}
4064
4065/**
4066 * register_netdev - register a network device
4067 * @dev: device to register
4068 *
4069 * Take a completed network device structure and add it to the kernel
4070 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
4071 * chain. 0 is returned on success. A negative errno code is returned
4072 * on a failure to set up the device, or if the name is a duplicate.
4073 *
Borislav Petkov38b4da32007-04-20 22:14:10 -07004074 * This is a wrapper around register_netdevice that takes the rtnl semaphore
Linus Torvalds1da177e2005-04-16 15:20:36 -07004075 * and expands the device name if you passed a format string to
4076 * alloc_netdev.
4077 */
4078int register_netdev(struct net_device *dev)
4079{
4080 int err;
4081
4082 rtnl_lock();
4083
4084 /*
4085 * If the name is a format string the caller wants us to do a
4086 * name allocation.
4087 */
4088 if (strchr(dev->name, '%')) {
4089 err = dev_alloc_name(dev, dev->name);
4090 if (err < 0)
4091 goto out;
4092 }
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004093
Linus Torvalds1da177e2005-04-16 15:20:36 -07004094 err = register_netdevice(dev);
4095out:
4096 rtnl_unlock();
4097 return err;
4098}
4099EXPORT_SYMBOL(register_netdev);
4100
4101/*
4102 * netdev_wait_allrefs - wait until all references are gone.
4103 *
4104 * This is called when unregistering network devices.
4105 *
4106 * Any protocol or device that holds a reference should register
4107 * for netdevice notification, and cleanup and put back the
4108 * reference if they receive an UNREGISTER event.
4109 * We can get stuck here if buggy protocols don't correctly
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004110 * call dev_put.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004111 */
4112static void netdev_wait_allrefs(struct net_device *dev)
4113{
4114 unsigned long rebroadcast_time, warning_time;
4115
4116 rebroadcast_time = warning_time = jiffies;
4117 while (atomic_read(&dev->refcnt) != 0) {
4118 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
Stephen Hemminger6756ae42006-03-20 22:23:58 -08004119 rtnl_lock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07004120
4121 /* Rebroadcast unregister notification */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07004122 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004123
4124 if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
4125 &dev->state)) {
4126 /* We must not have linkwatch events
4127 * pending on unregister. If this
4128 * happens, we simply run the queue
4129 * unscheduled, resulting in a noop
4130 * for this device.
4131 */
4132 linkwatch_run_queue();
4133 }
4134
Stephen Hemminger6756ae42006-03-20 22:23:58 -08004135 __rtnl_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07004136
4137 rebroadcast_time = jiffies;
4138 }
4139
4140 msleep(250);
4141
4142 if (time_after(jiffies, warning_time + 10 * HZ)) {
4143 printk(KERN_EMERG "unregister_netdevice: "
4144 "waiting for %s to become free. Usage "
4145 "count = %d\n",
4146 dev->name, atomic_read(&dev->refcnt));
4147 warning_time = jiffies;
4148 }
4149 }
4150}
4151
4152/* The sequence is:
4153 *
4154 * rtnl_lock();
4155 * ...
4156 * register_netdevice(x1);
4157 * register_netdevice(x2);
4158 * ...
4159 * unregister_netdevice(y1);
4160 * unregister_netdevice(y2);
4161 * ...
4162 * rtnl_unlock();
4163 * free_netdev(y1);
4164 * free_netdev(y2);
4165 *
4166 * We are invoked by rtnl_unlock() after it drops the semaphore.
4167 * This allows us to deal with problems:
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004168 * 1) We can delete sysfs objects which invoke hotplug
Linus Torvalds1da177e2005-04-16 15:20:36 -07004169 * without deadlocking with linkwatch via keventd.
4170 * 2) Since we run with the RTNL semaphore not held, we can sleep
4171 * safely in order to wait for the netdev refcnt to drop to zero.
4172 */
Arjan van de Ven4a3e2f72006-03-20 22:33:17 -08004173static DEFINE_MUTEX(net_todo_run_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004174void netdev_run_todo(void)
4175{
Oleg Nesterov626ab0e2006-06-23 02:05:55 -07004176 struct list_head list;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004177
4178 /* Need to guard against multiple cpu's getting out of order. */
Arjan van de Ven4a3e2f72006-03-20 22:33:17 -08004179 mutex_lock(&net_todo_run_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004180
4181 /* Not safe to do outside the semaphore. We must not return
4182 * until all unregister events invoked by the local processor
4183 * have been completed (either by this todo run, or one on
4184 * another cpu).
4185 */
4186 if (list_empty(&net_todo_list))
4187 goto out;
4188
4189 /* Snapshot list, allow later requests */
4190 spin_lock(&net_todo_list_lock);
Oleg Nesterov626ab0e2006-06-23 02:05:55 -07004191 list_replace_init(&net_todo_list, &list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004192 spin_unlock(&net_todo_list_lock);
Oleg Nesterov626ab0e2006-06-23 02:05:55 -07004193
Linus Torvalds1da177e2005-04-16 15:20:36 -07004194 while (!list_empty(&list)) {
4195 struct net_device *dev
4196 = list_entry(list.next, struct net_device, todo_list);
4197 list_del(&dev->todo_list);
4198
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004199 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004200 printk(KERN_ERR "network todo '%s' but state %d\n",
4201 dev->name, dev->reg_state);
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004202 dump_stack();
4203 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004204 }
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004205
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004206 dev->reg_state = NETREG_UNREGISTERED;
4207
Stephen Hemminger6e583ce2008-08-03 21:29:57 -07004208 on_each_cpu(flush_backlog, dev, 1);
4209
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004210 netdev_wait_allrefs(dev);
4211
4212 /* paranoia */
4213 BUG_ON(atomic_read(&dev->refcnt));
Ilpo Järvinen547b7922008-07-25 21:43:18 -07004214 WARN_ON(dev->ip_ptr);
4215 WARN_ON(dev->ip6_ptr);
4216 WARN_ON(dev->dn_ptr);
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004217
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004218 if (dev->destructor)
4219 dev->destructor(dev);
Stephen Hemminger9093bbb2007-05-19 15:39:25 -07004220
4221 /* Free network device */
4222 kobject_put(&dev->dev.kobj);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004223 }
4224
4225out:
Arjan van de Ven4a3e2f72006-03-20 22:33:17 -08004226 mutex_unlock(&net_todo_run_mutex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004227}
4228
Rusty Russell5a1b5892007-04-28 21:04:03 -07004229static struct net_device_stats *internal_stats(struct net_device *dev)
Rusty Russellc45d2862007-03-28 14:29:08 -07004230{
Rusty Russell5a1b5892007-04-28 21:04:03 -07004231 return &dev->stats;
Rusty Russellc45d2862007-03-28 14:29:08 -07004232}
4233
David S. Millerdc2b4842008-07-08 17:18:23 -07004234static void netdev_init_one_queue(struct net_device *dev,
David S. Millere8a04642008-07-17 00:34:19 -07004235 struct netdev_queue *queue,
4236 void *_unused)
David S. Millerdc2b4842008-07-08 17:18:23 -07004237{
David S. Millerdc2b4842008-07-08 17:18:23 -07004238 queue->dev = dev;
4239}
4240
David S. Millerbb949fb2008-07-08 16:55:56 -07004241static void netdev_init_queues(struct net_device *dev)
4242{
David S. Millere8a04642008-07-17 00:34:19 -07004243 netdev_init_one_queue(dev, &dev->rx_queue, NULL);
4244 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
David S. Millerc3f26a22008-07-31 16:58:50 -07004245 spin_lock_init(&dev->tx_global_lock);
David S. Millerbb949fb2008-07-08 16:55:56 -07004246}
4247
Linus Torvalds1da177e2005-04-16 15:20:36 -07004248/**
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07004249 * alloc_netdev_mq - allocate network device
Linus Torvalds1da177e2005-04-16 15:20:36 -07004250 * @sizeof_priv: size of private data to allocate space for
4251 * @name: device name format string
4252 * @setup: callback to initialize device
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07004253 * @queue_count: the number of subqueues to allocate
Linus Torvalds1da177e2005-04-16 15:20:36 -07004254 *
4255 * Allocates a struct net_device with private data area for driver use
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07004256 * and performs basic initialization. Also allocates subquue structs
4257 * for each queue on the device at the end of the netdevice.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004258 */
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07004259struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
4260 void (*setup)(struct net_device *), unsigned int queue_count)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004261{
David S. Millere8a04642008-07-17 00:34:19 -07004262 struct netdev_queue *tx;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004263 struct net_device *dev;
Stephen Hemminger79439862008-07-21 13:28:44 -07004264 size_t alloc_size;
David S. Millere8a04642008-07-17 00:34:19 -07004265 void *p;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004266
Stephen Hemmingerb6fe17d2006-08-29 17:06:13 -07004267 BUG_ON(strlen(name) >= sizeof(dev->name));
4268
David S. Millerfd2ea0a2008-07-17 01:56:23 -07004269 alloc_size = sizeof(struct net_device);
Alexey Dobriyand1643d22008-04-18 15:43:32 -07004270 if (sizeof_priv) {
4271 /* ensure 32-byte alignment of private area */
4272 alloc_size = (alloc_size + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
4273 alloc_size += sizeof_priv;
4274 }
4275 /* ensure 32-byte alignment of whole construct */
4276 alloc_size += NETDEV_ALIGN_CONST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004277
Paolo 'Blaisorblade' Giarrusso31380de2006-04-06 22:38:28 -07004278 p = kzalloc(alloc_size, GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004279 if (!p) {
Stephen Hemmingerb6fe17d2006-08-29 17:06:13 -07004280 printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07004281 return NULL;
4282 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004283
Stephen Hemminger79439862008-07-21 13:28:44 -07004284 tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
David S. Millere8a04642008-07-17 00:34:19 -07004285 if (!tx) {
4286 printk(KERN_ERR "alloc_netdev: Unable to allocate "
4287 "tx qdiscs.\n");
4288 kfree(p);
4289 return NULL;
4290 }
4291
Linus Torvalds1da177e2005-04-16 15:20:36 -07004292 dev = (struct net_device *)
4293 (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
4294 dev->padded = (char *)dev - (char *)p;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09004295 dev_net_set(dev, &init_net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004296
David S. Millere8a04642008-07-17 00:34:19 -07004297 dev->_tx = tx;
4298 dev->num_tx_queues = queue_count;
David S. Millerfd2ea0a2008-07-17 01:56:23 -07004299 dev->real_num_tx_queues = queue_count;
David S. Millere8a04642008-07-17 00:34:19 -07004300
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07004301 if (sizeof_priv) {
4302 dev->priv = ((char *)dev +
David S. Millerfd2ea0a2008-07-17 01:56:23 -07004303 ((sizeof(struct net_device) + NETDEV_ALIGN_CONST)
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07004304 & ~NETDEV_ALIGN_CONST));
4305 }
4306
Peter P Waskiewicz Jr82cc1a72008-03-21 03:43:19 -07004307 dev->gso_max_size = GSO_MAX_SIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004308
David S. Millerbb949fb2008-07-08 16:55:56 -07004309 netdev_init_queues(dev);
4310
Rusty Russell5a1b5892007-04-28 21:04:03 -07004311 dev->get_stats = internal_stats;
Stephen Hemmingerbea33482007-10-03 16:41:36 -07004312 netpoll_netdev_init(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004313 setup(dev);
4314 strcpy(dev->name, name);
4315 return dev;
4316}
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07004317EXPORT_SYMBOL(alloc_netdev_mq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004318
4319/**
4320 * free_netdev - free network device
4321 * @dev: device
4322 *
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004323 * This function does the last stage of destroying an allocated device
4324 * interface. The reference to the device object is released.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004325 * If this is the last reference then it will be freed.
4326 */
4327void free_netdev(struct net_device *dev)
4328{
Denis V. Lunevf3005d72008-04-16 02:02:18 -07004329 release_net(dev_net(dev));
4330
David S. Millere8a04642008-07-17 00:34:19 -07004331 kfree(dev->_tx);
4332
Stephen Hemminger3041a062006-05-26 13:25:24 -07004333 /* Compatibility with error handling in drivers */
Linus Torvalds1da177e2005-04-16 15:20:36 -07004334 if (dev->reg_state == NETREG_UNINITIALIZED) {
4335 kfree((char *)dev - dev->padded);
4336 return;
4337 }
4338
4339 BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
4340 dev->reg_state = NETREG_RELEASED;
4341
Greg Kroah-Hartman43cb76d2002-04-09 12:14:34 -07004342 /* will free via device release */
4343 put_device(&dev->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004344}
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004345
Linus Torvalds1da177e2005-04-16 15:20:36 -07004346/* Synchronize with packet receive processing. */
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004347void synchronize_net(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004348{
4349 might_sleep();
Paul E. McKenneyfbd568a3e2005-05-01 08:59:04 -07004350 synchronize_rcu();
Linus Torvalds1da177e2005-04-16 15:20:36 -07004351}
4352
4353/**
4354 * unregister_netdevice - remove device from the kernel
4355 * @dev: device
4356 *
4357 * This function shuts down a device interface and removes it
Wang Chend59b54b2007-12-11 02:28:03 -08004358 * from the kernel tables.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004359 *
4360 * Callers must hold the rtnl semaphore. You may want
4361 * unregister_netdev() instead of this.
4362 */
4363
Stephen Hemminger22f8cde2007-02-07 00:09:58 -08004364void unregister_netdevice(struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004365{
Herbert Xua6620712007-12-12 19:21:56 -08004366 ASSERT_RTNL();
4367
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004368 rollback_registered(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004369 /* Finish processing unregister after unlock */
4370 net_set_todo(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004371}
4372
4373/**
4374 * unregister_netdev - remove device from the kernel
4375 * @dev: device
4376 *
4377 * This function shuts down a device interface and removes it
Wang Chend59b54b2007-12-11 02:28:03 -08004378 * from the kernel tables.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004379 *
4380 * This is just a wrapper for unregister_netdevice that takes
4381 * the rtnl semaphore. In general you want to use this and not
4382 * unregister_netdevice.
4383 */
4384void unregister_netdev(struct net_device *dev)
4385{
4386 rtnl_lock();
4387 unregister_netdevice(dev);
4388 rtnl_unlock();
4389}
4390
4391EXPORT_SYMBOL(unregister_netdev);
4392
Eric W. Biedermance286d32007-09-12 13:53:49 +02004393/**
4394 * dev_change_net_namespace - move device to different nethost namespace
4395 * @dev: device
4396 * @net: network namespace
4397 * @pat: If not NULL name pattern to try if the current device name
4398 * is already taken in the destination network namespace.
4399 *
4400 * This function shuts down a device interface and moves it
4401 * to a new network namespace. On success 0 is returned, on
4402 * a failure a netagive errno code is returned.
4403 *
4404 * Callers must hold the rtnl semaphore.
4405 */
4406
4407int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
4408{
4409 char buf[IFNAMSIZ];
4410 const char *destname;
4411 int err;
4412
4413 ASSERT_RTNL();
4414
4415 /* Don't allow namespace local devices to be moved. */
4416 err = -EINVAL;
4417 if (dev->features & NETIF_F_NETNS_LOCAL)
4418 goto out;
4419
4420 /* Ensure the device has been registrered */
4421 err = -EINVAL;
4422 if (dev->reg_state != NETREG_REGISTERED)
4423 goto out;
4424
4425 /* Get out if there is nothing todo */
4426 err = 0;
YOSHIFUJI Hideaki878628f2008-03-26 03:57:35 +09004427 if (net_eq(dev_net(dev), net))
Eric W. Biedermance286d32007-09-12 13:53:49 +02004428 goto out;
4429
4430 /* Pick the destination device name, and ensure
4431 * we can use it in the destination network namespace.
4432 */
4433 err = -EEXIST;
4434 destname = dev->name;
4435 if (__dev_get_by_name(net, destname)) {
4436 /* We get here if we can't use the current device name */
4437 if (!pat)
4438 goto out;
4439 if (!dev_valid_name(pat))
4440 goto out;
4441 if (strchr(pat, '%')) {
4442 if (__dev_alloc_name(net, pat, buf) < 0)
4443 goto out;
4444 destname = buf;
4445 } else
4446 destname = pat;
4447 if (__dev_get_by_name(net, destname))
4448 goto out;
4449 }
4450
4451 /*
4452 * And now a mini version of register_netdevice unregister_netdevice.
4453 */
4454
4455 /* If device is running close it first. */
Pavel Emelyanov9b772652007-10-10 02:49:09 -07004456 dev_close(dev);
Eric W. Biedermance286d32007-09-12 13:53:49 +02004457
4458 /* And unlink it from device chain */
4459 err = -ENODEV;
4460 unlist_netdevice(dev);
4461
4462 synchronize_net();
4463
4464 /* Shutdown queueing discipline. */
4465 dev_shutdown(dev);
4466
4467 /* Notify protocols, that we are about to destroy
4468 this device. They should clean all the things.
4469 */
4470 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4471
4472 /*
4473 * Flush the unicast and multicast chains
4474 */
4475 dev_addr_discard(dev);
4476
4477 /* Actually switch the network namespace */
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09004478 dev_net_set(dev, net);
Eric W. Biedermance286d32007-09-12 13:53:49 +02004479
4480 /* Assign the new device name */
4481 if (destname != dev->name)
4482 strcpy(dev->name, destname);
4483
4484 /* If there is an ifindex conflict assign a new one */
4485 if (__dev_get_by_index(net, dev->ifindex)) {
4486 int iflink = (dev->iflink == dev->ifindex);
4487 dev->ifindex = dev_new_index(net);
4488 if (iflink)
4489 dev->iflink = dev->ifindex;
4490 }
4491
Eric W. Biederman8b41d182007-09-26 22:02:53 -07004492 /* Fixup kobjects */
Daniel Lezcanoaaf8cdc2008-05-02 17:00:58 -07004493 netdev_unregister_kobject(dev);
4494 err = netdev_register_kobject(dev);
Eric W. Biederman8b41d182007-09-26 22:02:53 -07004495 WARN_ON(err);
Eric W. Biedermance286d32007-09-12 13:53:49 +02004496
4497 /* Add the device back in the hashes */
4498 list_netdevice(dev);
4499
4500 /* Notify protocols, that a new device appeared. */
4501 call_netdevice_notifiers(NETDEV_REGISTER, dev);
4502
4503 synchronize_net();
4504 err = 0;
4505out:
4506 return err;
4507}
4508
Linus Torvalds1da177e2005-04-16 15:20:36 -07004509static int dev_cpu_callback(struct notifier_block *nfb,
4510 unsigned long action,
4511 void *ocpu)
4512{
4513 struct sk_buff **list_skb;
David S. Miller37437bb2008-07-16 02:15:04 -07004514 struct Qdisc **list_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004515 struct sk_buff *skb;
4516 unsigned int cpu, oldcpu = (unsigned long)ocpu;
4517 struct softnet_data *sd, *oldsd;
4518
Rafael J. Wysocki8bb78442007-05-09 02:35:10 -07004519 if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004520 return NOTIFY_OK;
4521
4522 local_irq_disable();
4523 cpu = smp_processor_id();
4524 sd = &per_cpu(softnet_data, cpu);
4525 oldsd = &per_cpu(softnet_data, oldcpu);
4526
4527 /* Find end of our completion_queue. */
4528 list_skb = &sd->completion_queue;
4529 while (*list_skb)
4530 list_skb = &(*list_skb)->next;
4531 /* Append completion queue from offline CPU. */
4532 *list_skb = oldsd->completion_queue;
4533 oldsd->completion_queue = NULL;
4534
4535 /* Find end of our output_queue. */
4536 list_net = &sd->output_queue;
4537 while (*list_net)
4538 list_net = &(*list_net)->next_sched;
4539 /* Append output queue from offline CPU. */
4540 *list_net = oldsd->output_queue;
4541 oldsd->output_queue = NULL;
4542
4543 raise_softirq_irqoff(NET_TX_SOFTIRQ);
4544 local_irq_enable();
4545
4546 /* Process offline CPU's input_pkt_queue */
4547 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
4548 netif_rx(skb);
4549
4550 return NOTIFY_OK;
4551}
Linus Torvalds1da177e2005-04-16 15:20:36 -07004552
Chris Leechdb217332006-06-17 21:24:58 -07004553#ifdef CONFIG_NET_DMA
4554/**
Randy Dunlap0ed72ec2007-07-26 00:03:29 -07004555 * net_dma_rebalance - try to maintain one DMA channel per CPU
4556 * @net_dma: DMA client and associated data (lock, channels, channel_mask)
4557 *
4558 * This is called when the number of channels allocated to the net_dma client
4559 * changes. The net_dma client tries to have one DMA channel per CPU.
Chris Leechdb217332006-06-17 21:24:58 -07004560 */
Dan Williamsd379b012007-07-09 11:56:42 -07004561
4562static void net_dma_rebalance(struct net_dma *net_dma)
Chris Leechdb217332006-06-17 21:24:58 -07004563{
Dan Williamsd379b012007-07-09 11:56:42 -07004564 unsigned int cpu, i, n, chan_idx;
Chris Leechdb217332006-06-17 21:24:58 -07004565 struct dma_chan *chan;
4566
Dan Williamsd379b012007-07-09 11:56:42 -07004567 if (cpus_empty(net_dma->channel_mask)) {
Chris Leechdb217332006-06-17 21:24:58 -07004568 for_each_online_cpu(cpu)
Alexey Dobriyan29bbd722006-08-02 15:02:31 -07004569 rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL);
Chris Leechdb217332006-06-17 21:24:58 -07004570 return;
4571 }
4572
4573 i = 0;
4574 cpu = first_cpu(cpu_online_map);
4575
Mike Travis0e12f842008-05-12 21:21:13 +02004576 for_each_cpu_mask_nr(chan_idx, net_dma->channel_mask) {
Dan Williamsd379b012007-07-09 11:56:42 -07004577 chan = net_dma->channels[chan_idx];
4578
4579 n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask))
4580 + (i < (num_online_cpus() %
4581 cpus_weight(net_dma->channel_mask)) ? 1 : 0));
Chris Leechdb217332006-06-17 21:24:58 -07004582
4583 while(n) {
Alexey Dobriyan29bbd722006-08-02 15:02:31 -07004584 per_cpu(softnet_data, cpu).net_dma = chan;
Chris Leechdb217332006-06-17 21:24:58 -07004585 cpu = next_cpu(cpu, cpu_online_map);
4586 n--;
4587 }
4588 i++;
4589 }
Chris Leechdb217332006-06-17 21:24:58 -07004590}
4591
4592/**
4593 * netdev_dma_event - event callback for the net_dma_client
4594 * @client: should always be net_dma_client
Randy Dunlapf4b8ea72006-06-22 16:00:11 -07004595 * @chan: DMA channel for the event
Randy Dunlap0ed72ec2007-07-26 00:03:29 -07004596 * @state: DMA state to be handled
Chris Leechdb217332006-06-17 21:24:58 -07004597 */
Dan Williamsd379b012007-07-09 11:56:42 -07004598static enum dma_state_client
4599netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
4600 enum dma_state state)
Chris Leechdb217332006-06-17 21:24:58 -07004601{
Dan Williamsd379b012007-07-09 11:56:42 -07004602 int i, found = 0, pos = -1;
4603 struct net_dma *net_dma =
4604 container_of(client, struct net_dma, client);
4605 enum dma_state_client ack = DMA_DUP; /* default: take no action */
4606
4607 spin_lock(&net_dma->lock);
4608 switch (state) {
4609 case DMA_RESOURCE_AVAILABLE:
Mike Travis0c0b0ac2008-05-02 16:43:08 -07004610 for (i = 0; i < nr_cpu_ids; i++)
Dan Williamsd379b012007-07-09 11:56:42 -07004611 if (net_dma->channels[i] == chan) {
4612 found = 1;
4613 break;
4614 } else if (net_dma->channels[i] == NULL && pos < 0)
4615 pos = i;
4616
4617 if (!found && pos >= 0) {
4618 ack = DMA_ACK;
4619 net_dma->channels[pos] = chan;
4620 cpu_set(pos, net_dma->channel_mask);
4621 net_dma_rebalance(net_dma);
4622 }
Chris Leechdb217332006-06-17 21:24:58 -07004623 break;
4624 case DMA_RESOURCE_REMOVED:
Mike Travis0c0b0ac2008-05-02 16:43:08 -07004625 for (i = 0; i < nr_cpu_ids; i++)
Dan Williamsd379b012007-07-09 11:56:42 -07004626 if (net_dma->channels[i] == chan) {
4627 found = 1;
4628 pos = i;
4629 break;
4630 }
4631
4632 if (found) {
4633 ack = DMA_ACK;
4634 cpu_clear(pos, net_dma->channel_mask);
4635 net_dma->channels[i] = NULL;
4636 net_dma_rebalance(net_dma);
4637 }
Chris Leechdb217332006-06-17 21:24:58 -07004638 break;
4639 default:
4640 break;
4641 }
Dan Williamsd379b012007-07-09 11:56:42 -07004642 spin_unlock(&net_dma->lock);
4643
4644 return ack;
Chris Leechdb217332006-06-17 21:24:58 -07004645}
4646
4647/**
4648 * netdev_dma_regiser - register the networking subsystem as a DMA client
4649 */
4650static int __init netdev_dma_register(void)
4651{
Mike Travis0c0b0ac2008-05-02 16:43:08 -07004652 net_dma.channels = kzalloc(nr_cpu_ids * sizeof(struct net_dma),
4653 GFP_KERNEL);
4654 if (unlikely(!net_dma.channels)) {
4655 printk(KERN_NOTICE
4656 "netdev_dma: no memory for net_dma.channels\n");
4657 return -ENOMEM;
4658 }
Dan Williamsd379b012007-07-09 11:56:42 -07004659 spin_lock_init(&net_dma.lock);
4660 dma_cap_set(DMA_MEMCPY, net_dma.client.cap_mask);
4661 dma_async_client_register(&net_dma.client);
4662 dma_async_client_chan_request(&net_dma.client);
Chris Leechdb217332006-06-17 21:24:58 -07004663 return 0;
4664}
4665
4666#else
4667static int __init netdev_dma_register(void) { return -ENODEV; }
4668#endif /* CONFIG_NET_DMA */
Linus Torvalds1da177e2005-04-16 15:20:36 -07004669
Herbert Xu7f353bf2007-08-10 15:47:58 -07004670/**
4671 * netdev_compute_feature - compute conjunction of two feature sets
4672 * @all: first feature set
4673 * @one: second feature set
4674 *
4675 * Computes a new feature set after adding a device with feature set
4676 * @one to the master device with current feature set @all. Returns
4677 * the new feature set.
4678 */
4679int netdev_compute_features(unsigned long all, unsigned long one)
4680{
4681 /* if device needs checksumming, downgrade to hw checksumming */
4682 if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
4683 all ^= NETIF_F_NO_CSUM | NETIF_F_HW_CSUM;
4684
4685 /* if device can't do all checksum, downgrade to ipv4/ipv6 */
4686 if (all & NETIF_F_HW_CSUM && !(one & NETIF_F_HW_CSUM))
4687 all ^= NETIF_F_HW_CSUM
4688 | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
4689
4690 if (one & NETIF_F_GSO)
4691 one |= NETIF_F_GSO_SOFTWARE;
4692 one |= NETIF_F_GSO;
4693
Herbert Xue2a6b852008-09-08 16:10:02 -07004694 /*
4695 * If even one device supports a GSO protocol with software fallback,
4696 * enable it for all.
4697 */
4698 all |= one & NETIF_F_GSO_SOFTWARE;
4699
Herbert Xu7f353bf2007-08-10 15:47:58 -07004700 /* If even one device supports robust GSO, enable it for all. */
4701 if (one & NETIF_F_GSO_ROBUST)
4702 all |= NETIF_F_GSO_ROBUST;
4703
4704 all &= one | NETIF_F_LLTX;
4705
4706 if (!(all & NETIF_F_ALL_CSUM))
4707 all &= ~NETIF_F_SG;
4708 if (!(all & NETIF_F_SG))
4709 all &= ~NETIF_F_GSO_MASK;
4710
4711 return all;
4712}
4713EXPORT_SYMBOL(netdev_compute_features);
4714
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07004715static struct hlist_head *netdev_create_hash(void)
4716{
4717 int i;
4718 struct hlist_head *hash;
4719
4720 hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
4721 if (hash != NULL)
4722 for (i = 0; i < NETDEV_HASHENTRIES; i++)
4723 INIT_HLIST_HEAD(&hash[i]);
4724
4725 return hash;
4726}
4727
Eric W. Biederman881d9662007-09-17 11:56:21 -07004728/* Initialize per network namespace state */
Pavel Emelyanov46650792007-10-08 20:38:39 -07004729static int __net_init netdev_init(struct net *net)
Eric W. Biederman881d9662007-09-17 11:56:21 -07004730{
Eric W. Biederman881d9662007-09-17 11:56:21 -07004731 INIT_LIST_HEAD(&net->dev_base_head);
Eric W. Biederman881d9662007-09-17 11:56:21 -07004732
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07004733 net->dev_name_head = netdev_create_hash();
4734 if (net->dev_name_head == NULL)
4735 goto err_name;
Eric W. Biederman881d9662007-09-17 11:56:21 -07004736
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07004737 net->dev_index_head = netdev_create_hash();
4738 if (net->dev_index_head == NULL)
4739 goto err_idx;
Eric W. Biederman881d9662007-09-17 11:56:21 -07004740
4741 return 0;
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07004742
4743err_idx:
4744 kfree(net->dev_name_head);
4745err_name:
4746 return -ENOMEM;
Eric W. Biederman881d9662007-09-17 11:56:21 -07004747}
4748
Arjan van de Ven6579e572008-07-21 13:31:48 -07004749char *netdev_drivername(struct net_device *dev, char *buffer, int len)
4750{
4751 struct device_driver *driver;
4752 struct device *parent;
4753
4754 if (len <= 0 || !buffer)
4755 return buffer;
4756 buffer[0] = 0;
4757
4758 parent = dev->dev.parent;
4759
4760 if (!parent)
4761 return buffer;
4762
4763 driver = parent->driver;
4764 if (driver && driver->name)
4765 strlcpy(buffer, driver->name, len);
4766 return buffer;
4767}
4768
Pavel Emelyanov46650792007-10-08 20:38:39 -07004769static void __net_exit netdev_exit(struct net *net)
Eric W. Biederman881d9662007-09-17 11:56:21 -07004770{
4771 kfree(net->dev_name_head);
4772 kfree(net->dev_index_head);
4773}
4774
Denis V. Lunev022cbae2007-11-13 03:23:50 -08004775static struct pernet_operations __net_initdata netdev_net_ops = {
Eric W. Biederman881d9662007-09-17 11:56:21 -07004776 .init = netdev_init,
4777 .exit = netdev_exit,
4778};
4779
Pavel Emelyanov46650792007-10-08 20:38:39 -07004780static void __net_exit default_device_exit(struct net *net)
Eric W. Biedermance286d32007-09-12 13:53:49 +02004781{
4782 struct net_device *dev, *next;
4783 /*
4784 * Push all migratable of the network devices back to the
4785 * initial network namespace
4786 */
4787 rtnl_lock();
4788 for_each_netdev_safe(net, dev, next) {
4789 int err;
Pavel Emelyanovaca51392008-05-08 01:24:25 -07004790 char fb_name[IFNAMSIZ];
Eric W. Biedermance286d32007-09-12 13:53:49 +02004791
4792 /* Ignore unmoveable devices (i.e. loopback) */
4793 if (dev->features & NETIF_F_NETNS_LOCAL)
4794 continue;
4795
4796 /* Push remaing network devices to init_net */
Pavel Emelyanovaca51392008-05-08 01:24:25 -07004797 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
4798 err = dev_change_net_namespace(dev, &init_net, fb_name);
Eric W. Biedermance286d32007-09-12 13:53:49 +02004799 if (err) {
Pavel Emelyanovaca51392008-05-08 01:24:25 -07004800 printk(KERN_EMERG "%s: failed to move %s to init_net: %d\n",
Eric W. Biedermance286d32007-09-12 13:53:49 +02004801 __func__, dev->name, err);
Pavel Emelyanovaca51392008-05-08 01:24:25 -07004802 BUG();
Eric W. Biedermance286d32007-09-12 13:53:49 +02004803 }
4804 }
4805 rtnl_unlock();
4806}
4807
Denis V. Lunev022cbae2007-11-13 03:23:50 -08004808static struct pernet_operations __net_initdata default_device_ops = {
Eric W. Biedermance286d32007-09-12 13:53:49 +02004809 .exit = default_device_exit,
4810};
4811
Linus Torvalds1da177e2005-04-16 15:20:36 -07004812/*
4813 * Initialize the DEV module. At boot time this walks the device list and
4814 * unhooks any devices that fail to initialise (normally hardware not
4815 * present) and leaves us with a valid list of present and active devices.
4816 *
4817 */
4818
4819/*
4820 * This is called single threaded during boot, so no need
4821 * to take the rtnl semaphore.
4822 */
4823static int __init net_dev_init(void)
4824{
4825 int i, rc = -ENOMEM;
4826
4827 BUG_ON(!dev_boot_phase);
4828
Linus Torvalds1da177e2005-04-16 15:20:36 -07004829 if (dev_proc_init())
4830 goto out;
4831
Eric W. Biederman8b41d182007-09-26 22:02:53 -07004832 if (netdev_kobject_init())
Linus Torvalds1da177e2005-04-16 15:20:36 -07004833 goto out;
4834
4835 INIT_LIST_HEAD(&ptype_all);
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08004836 for (i = 0; i < PTYPE_HASH_SIZE; i++)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004837 INIT_LIST_HEAD(&ptype_base[i]);
4838
Eric W. Biederman881d9662007-09-17 11:56:21 -07004839 if (register_pernet_subsys(&netdev_net_ops))
4840 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004841
Eric W. Biedermance286d32007-09-12 13:53:49 +02004842 if (register_pernet_device(&default_device_ops))
4843 goto out;
4844
Linus Torvalds1da177e2005-04-16 15:20:36 -07004845 /*
4846 * Initialise the packet receive queues.
4847 */
4848
KAMEZAWA Hiroyuki6f912042006-04-10 22:52:50 -07004849 for_each_possible_cpu(i) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004850 struct softnet_data *queue;
4851
4852 queue = &per_cpu(softnet_data, i);
4853 skb_queue_head_init(&queue->input_pkt_queue);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004854 queue->completion_queue = NULL;
4855 INIT_LIST_HEAD(&queue->poll_list);
Stephen Hemmingerbea33482007-10-03 16:41:36 -07004856
4857 queue->backlog.poll = process_backlog;
4858 queue->backlog.weight = weight_p;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004859 }
4860
Chris Leechdb217332006-06-17 21:24:58 -07004861 netdev_dma_register();
4862
Linus Torvalds1da177e2005-04-16 15:20:36 -07004863 dev_boot_phase = 0;
4864
Carlos R. Mafra962cf362008-05-15 11:15:37 -03004865 open_softirq(NET_TX_SOFTIRQ, net_tx_action);
4866 open_softirq(NET_RX_SOFTIRQ, net_rx_action);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004867
4868 hotcpu_notifier(dev_cpu_callback, 0);
4869 dst_init();
4870 dev_mcast_init();
4871 rc = 0;
4872out:
4873 return rc;
4874}
4875
4876subsys_initcall(net_dev_init);
4877
4878EXPORT_SYMBOL(__dev_get_by_index);
4879EXPORT_SYMBOL(__dev_get_by_name);
4880EXPORT_SYMBOL(__dev_remove_pack);
Mitch Williamsc2373ee2005-11-09 10:34:45 -08004881EXPORT_SYMBOL(dev_valid_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004882EXPORT_SYMBOL(dev_add_pack);
4883EXPORT_SYMBOL(dev_alloc_name);
4884EXPORT_SYMBOL(dev_close);
4885EXPORT_SYMBOL(dev_get_by_flags);
4886EXPORT_SYMBOL(dev_get_by_index);
4887EXPORT_SYMBOL(dev_get_by_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004888EXPORT_SYMBOL(dev_open);
4889EXPORT_SYMBOL(dev_queue_xmit);
4890EXPORT_SYMBOL(dev_remove_pack);
4891EXPORT_SYMBOL(dev_set_allmulti);
4892EXPORT_SYMBOL(dev_set_promiscuity);
4893EXPORT_SYMBOL(dev_change_flags);
4894EXPORT_SYMBOL(dev_set_mtu);
4895EXPORT_SYMBOL(dev_set_mac_address);
4896EXPORT_SYMBOL(free_netdev);
4897EXPORT_SYMBOL(netdev_boot_setup_check);
4898EXPORT_SYMBOL(netdev_set_master);
4899EXPORT_SYMBOL(netdev_state_change);
4900EXPORT_SYMBOL(netif_receive_skb);
4901EXPORT_SYMBOL(netif_rx);
4902EXPORT_SYMBOL(register_gifconf);
4903EXPORT_SYMBOL(register_netdevice);
4904EXPORT_SYMBOL(register_netdevice_notifier);
4905EXPORT_SYMBOL(skb_checksum_help);
4906EXPORT_SYMBOL(synchronize_net);
4907EXPORT_SYMBOL(unregister_netdevice);
4908EXPORT_SYMBOL(unregister_netdevice_notifier);
4909EXPORT_SYMBOL(net_enable_timestamp);
4910EXPORT_SYMBOL(net_disable_timestamp);
4911EXPORT_SYMBOL(dev_get_flags);
4912
4913#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
4914EXPORT_SYMBOL(br_handle_frame_hook);
4915EXPORT_SYMBOL(br_fdb_get_hook);
4916EXPORT_SYMBOL(br_fdb_put_hook);
4917#endif
4918
4919#ifdef CONFIG_KMOD
4920EXPORT_SYMBOL(dev_load);
4921#endif
4922
4923EXPORT_PER_CPU_SYMBOL(softnet_data);