blob: e415f0b0d0d0fc0cafa7cbde8f523ff1c5e201bf [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * NET3 Protocol independent device support routines.
3 *
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
8 *
9 * Derived from the non IP parts of dev.c 1.0.19
Jesper Juhl02c30a82005-05-05 16:16:16 -070010 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -070011 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Mark Evans, <evansmp@uhura.aston.ac.uk>
13 *
14 * Additional Authors:
15 * Florian la Roche <rzsfl@rz.uni-sb.de>
16 * Alan Cox <gw4pts@gw4pts.ampr.org>
17 * David Hinds <dahinds@users.sourceforge.net>
18 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
19 * Adam Sulmicki <adam@cfar.umd.edu>
20 * Pekka Riikonen <priikone@poesidon.pspt.fi>
21 *
22 * Changes:
23 * D.J. Barrow : Fixed bug where dev->refcnt gets set
24 * to 2 if register_netdev gets called
25 * before net_dev_init & also removed a
26 * few lines of code in the process.
27 * Alan Cox : device private ioctl copies fields back.
28 * Alan Cox : Transmit queue code does relevant
29 * stunts to keep the queue safe.
30 * Alan Cox : Fixed double lock.
31 * Alan Cox : Fixed promisc NULL pointer trap
32 * ???????? : Support the full private ioctl range
33 * Alan Cox : Moved ioctl permission check into
34 * drivers
35 * Tim Kordas : SIOCADDMULTI/SIOCDELMULTI
36 * Alan Cox : 100 backlog just doesn't cut it when
37 * you start doing multicast video 8)
38 * Alan Cox : Rewrote net_bh and list manager.
39 * Alan Cox : Fix ETH_P_ALL echoback lengths.
40 * Alan Cox : Took out transmit every packet pass
41 * Saved a few bytes in the ioctl handler
42 * Alan Cox : Network driver sets packet type before
43 * calling netif_rx. Saves a function
44 * call a packet.
45 * Alan Cox : Hashed net_bh()
46 * Richard Kooijman: Timestamp fixes.
47 * Alan Cox : Wrong field in SIOCGIFDSTADDR
48 * Alan Cox : Device lock protection.
49 * Alan Cox : Fixed nasty side effect of device close
50 * changes.
51 * Rudi Cilibrasi : Pass the right thing to
52 * set_mac_address()
53 * Dave Miller : 32bit quantity for the device lock to
54 * make it work out on a Sparc.
55 * Bjorn Ekwall : Added KERNELD hack.
56 * Alan Cox : Cleaned up the backlog initialise.
57 * Craig Metz : SIOCGIFCONF fix if space for under
58 * 1 device.
59 * Thomas Bogendoerfer : Return ENODEV for dev_open, if there
60 * is no device open function.
61 * Andi Kleen : Fix error reporting for SIOCGIFCONF
62 * Michael Chastain : Fix signed/unsigned for SIOCGIFCONF
63 * Cyrus Durgin : Cleaned for KMOD
64 * Adam Sulmicki : Bug Fix : Network Device Unload
65 * A network device unload needs to purge
66 * the backlog queue.
67 * Paul Rusty Russell : SIOCSIFNAME
68 * Pekka Riikonen : Netdev boot-time settings code
69 * Andrew Morton : Make unregister_netdevice wait
70 * indefinitely on dev->refcnt
71 * J Hadi Salim : - Backlog queue sampling
72 * - netif_rx() feedback
73 */
74
75#include <asm/uaccess.h>
76#include <asm/system.h>
77#include <linux/bitops.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080078#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070079#include <linux/cpu.h>
80#include <linux/types.h>
81#include <linux/kernel.h>
82#include <linux/sched.h>
Arjan van de Ven4a3e2f72006-03-20 22:33:17 -080083#include <linux/mutex.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070084#include <linux/string.h>
85#include <linux/mm.h>
86#include <linux/socket.h>
87#include <linux/sockios.h>
88#include <linux/errno.h>
89#include <linux/interrupt.h>
90#include <linux/if_ether.h>
91#include <linux/netdevice.h>
92#include <linux/etherdevice.h>
Ben Hutchings0187bdf2008-06-19 16:15:47 -070093#include <linux/ethtool.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070094#include <linux/notifier.h>
95#include <linux/skbuff.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020096#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070097#include <net/sock.h>
98#include <linux/rtnetlink.h>
99#include <linux/proc_fs.h>
100#include <linux/seq_file.h>
101#include <linux/stat.h>
102#include <linux/if_bridge.h>
Patrick McHardyb863ceb2007-07-14 18:55:06 -0700103#include <linux/if_macvlan.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700104#include <net/dst.h>
105#include <net/pkt_sched.h>
106#include <net/checksum.h>
107#include <linux/highmem.h>
108#include <linux/init.h>
109#include <linux/kmod.h>
110#include <linux/module.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700111#include <linux/netpoll.h>
112#include <linux/rcupdate.h>
113#include <linux/delay.h>
Johannes Berg295f4a12007-04-26 20:43:56 -0700114#include <net/wext.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700115#include <net/iw_handler.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700116#include <asm/current.h>
Steve Grubb5bdb9882005-12-03 08:39:35 -0500117#include <linux/audit.h>
Chris Leechdb217332006-06-17 21:24:58 -0700118#include <linux/dmaengine.h>
Herbert Xuf6a78bf2006-06-22 02:57:17 -0700119#include <linux/err.h>
David S. Millerc7fa9d12006-08-15 16:34:13 -0700120#include <linux/ctype.h>
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700121#include <linux/if_arp.h>
Ben Hutchings6de329e2008-06-16 17:02:28 -0700122#include <linux/if_vlan.h>
David S. Miller8f0f2222008-07-15 03:47:03 -0700123#include <linux/ip.h>
Alexander Duyckad55dca2008-09-20 22:05:50 -0700124#include <net/ip.h>
David S. Miller8f0f2222008-07-15 03:47:03 -0700125#include <linux/ipv6.h>
126#include <linux/in.h>
David S. Millerb6b2fed2008-07-21 09:48:06 -0700127#include <linux/jhash.h>
128#include <linux/random.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -0700129
Pavel Emelyanov342709e2007-10-23 21:14:45 -0700130#include "net-sysfs.h"
131
Linus Torvalds1da177e2005-04-16 15:20:36 -0700132/*
133 * The list of packet types we will receive (as opposed to discard)
134 * and the routines to invoke.
135 *
136 * Why 16. Because with 16 the only overlap we get on a hash of the
137 * low nibble of the protocol value is RARP/SNAP/X.25.
138 *
139 * NOTE: That is no longer true with the addition of VLAN tags. Not
140 * sure which should go first, but I bet it won't make much
141 * difference if we are running VLANs. The good news is that
142 * this protocol won't be in the list unless compiled in, so
Stephen Hemminger3041a062006-05-26 13:25:24 -0700143 * the average user (w/out VLANs) will not be adversely affected.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700144 * --BLG
145 *
146 * 0800 IP
147 * 8100 802.1Q VLAN
148 * 0001 802.3
149 * 0002 AX.25
150 * 0004 802.2
151 * 8035 RARP
152 * 0005 SNAP
153 * 0805 X.25
154 * 0806 ARP
155 * 8137 IPX
156 * 0009 Localtalk
157 * 86DD IPv6
158 */
159
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +0800160#define PTYPE_HASH_SIZE (16)
161#define PTYPE_HASH_MASK (PTYPE_HASH_SIZE - 1)
162
Linus Torvalds1da177e2005-04-16 15:20:36 -0700163static DEFINE_SPINLOCK(ptype_lock);
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +0800164static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
Stephen Hemminger6b2bedc2007-03-12 14:33:50 -0700165static struct list_head ptype_all __read_mostly; /* Taps */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700166
Chris Leechdb217332006-06-17 21:24:58 -0700167#ifdef CONFIG_NET_DMA
Dan Williamsd379b012007-07-09 11:56:42 -0700168struct net_dma {
169 struct dma_client client;
170 spinlock_t lock;
171 cpumask_t channel_mask;
Mike Travis0c0b0ac2008-05-02 16:43:08 -0700172 struct dma_chan **channels;
Dan Williamsd379b012007-07-09 11:56:42 -0700173};
174
175static enum dma_state_client
176netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
177 enum dma_state state);
178
179static struct net_dma net_dma = {
180 .client = {
181 .event_callback = netdev_dma_event,
182 },
183};
Chris Leechdb217332006-06-17 21:24:58 -0700184#endif
185
Linus Torvalds1da177e2005-04-16 15:20:36 -0700186/*
Pavel Emelianov7562f872007-05-03 15:13:45 -0700187 * The @dev_base_head list is protected by @dev_base_lock and the rtnl
Linus Torvalds1da177e2005-04-16 15:20:36 -0700188 * semaphore.
189 *
190 * Pure readers hold dev_base_lock for reading.
191 *
192 * Writers must hold the rtnl semaphore while they loop through the
Pavel Emelianov7562f872007-05-03 15:13:45 -0700193 * dev_base_head list, and hold dev_base_lock for writing when they do the
Linus Torvalds1da177e2005-04-16 15:20:36 -0700194 * actual updates. This allows pure readers to access the list even
195 * while a writer is preparing to update it.
196 *
197 * To put it another way, dev_base_lock is held for writing only to
198 * protect against pure readers; the rtnl semaphore provides the
199 * protection against other writers.
200 *
201 * See, for example usages, register_netdevice() and
202 * unregister_netdevice(), which must be called with the rtnl
203 * semaphore held.
204 */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700205DEFINE_RWLOCK(dev_base_lock);
206
Linus Torvalds1da177e2005-04-16 15:20:36 -0700207EXPORT_SYMBOL(dev_base_lock);
208
209#define NETDEV_HASHBITS 8
Eric W. Biederman881d9662007-09-17 11:56:21 -0700210#define NETDEV_HASHENTRIES (1 << NETDEV_HASHBITS)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700211
Eric W. Biederman881d9662007-09-17 11:56:21 -0700212static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700213{
214 unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
Eric W. Biederman881d9662007-09-17 11:56:21 -0700215 return &net->dev_name_head[hash & ((1 << NETDEV_HASHBITS) - 1)];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700216}
217
Eric W. Biederman881d9662007-09-17 11:56:21 -0700218static inline struct hlist_head *dev_index_hash(struct net *net, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700219{
Eric W. Biederman881d9662007-09-17 11:56:21 -0700220 return &net->dev_index_head[ifindex & ((1 << NETDEV_HASHBITS) - 1)];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700221}
222
Eric W. Biedermance286d32007-09-12 13:53:49 +0200223/* Device list insertion */
224static int list_netdevice(struct net_device *dev)
225{
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900226 struct net *net = dev_net(dev);
Eric W. Biedermance286d32007-09-12 13:53:49 +0200227
228 ASSERT_RTNL();
229
230 write_lock_bh(&dev_base_lock);
231 list_add_tail(&dev->dev_list, &net->dev_base_head);
232 hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
233 hlist_add_head(&dev->index_hlist, dev_index_hash(net, dev->ifindex));
234 write_unlock_bh(&dev_base_lock);
235 return 0;
236}
237
238/* Device list removal */
239static void unlist_netdevice(struct net_device *dev)
240{
241 ASSERT_RTNL();
242
243 /* Unlink dev from the device chain */
244 write_lock_bh(&dev_base_lock);
245 list_del(&dev->dev_list);
246 hlist_del(&dev->name_hlist);
247 hlist_del(&dev->index_hlist);
248 write_unlock_bh(&dev_base_lock);
249}
250
Linus Torvalds1da177e2005-04-16 15:20:36 -0700251/*
252 * Our notifier list
253 */
254
Alan Sternf07d5b92006-05-09 15:23:03 -0700255static RAW_NOTIFIER_HEAD(netdev_chain);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700256
257/*
258 * Device drivers call our routines to queue packets here. We empty the
259 * queue in the local softnet handler.
260 */
Stephen Hemmingerbea33482007-10-03 16:41:36 -0700261
262DEFINE_PER_CPU(struct softnet_data, softnet_data);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700263
David S. Millercf508b12008-07-22 14:16:42 -0700264#ifdef CONFIG_LOCKDEP
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700265/*
David S. Millerc773e842008-07-08 23:13:53 -0700266 * register_netdevice() inits txq->_xmit_lock and sets lockdep class
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700267 * according to dev->type
268 */
269static const unsigned short netdev_lock_type[] =
270 {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
271 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
272 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
273 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
274 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
275 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
276 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
277 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
278 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
279 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
280 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
281 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
282 ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
283 ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_VOID,
284 ARPHRD_NONE};
285
286static const char *netdev_lock_name[] =
287 {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
288 "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
289 "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
290 "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
291 "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
292 "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
293 "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
294 "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
295 "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
296 "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
297 "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
298 "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
299 "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
300 "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_VOID",
301 "_xmit_NONE"};
302
303static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
David S. Millercf508b12008-07-22 14:16:42 -0700304static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700305
306static inline unsigned short netdev_lock_pos(unsigned short dev_type)
307{
308 int i;
309
310 for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
311 if (netdev_lock_type[i] == dev_type)
312 return i;
313 /* the last key is used by default */
314 return ARRAY_SIZE(netdev_lock_type) - 1;
315}
316
David S. Millercf508b12008-07-22 14:16:42 -0700317static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
318 unsigned short dev_type)
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700319{
320 int i;
321
322 i = netdev_lock_pos(dev_type);
323 lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
324 netdev_lock_name[i]);
325}
David S. Millercf508b12008-07-22 14:16:42 -0700326
327static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
328{
329 int i;
330
331 i = netdev_lock_pos(dev->type);
332 lockdep_set_class_and_name(&dev->addr_list_lock,
333 &netdev_addr_lock_key[i],
334 netdev_lock_name[i]);
335}
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700336#else
David S. Millercf508b12008-07-22 14:16:42 -0700337static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
338 unsigned short dev_type)
339{
340}
341static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
Jarek Poplawski723e98b2007-05-15 22:46:18 -0700342{
343}
344#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700345
346/*******************************************************************************
347
348 Protocol management and registration routines
349
350*******************************************************************************/
351
352/*
Linus Torvalds1da177e2005-04-16 15:20:36 -0700353 * Add a protocol ID to the list. Now that the input handler is
354 * smarter we can dispense with all the messy stuff that used to be
355 * here.
356 *
357 * BEWARE!!! Protocol handlers, mangling input packets,
358 * MUST BE last in hash buckets and checking protocol handlers
359 * MUST start from promiscuous ptype_all chain in net_bh.
360 * It is true now, do not change it.
361 * Explanation follows: if protocol handler, mangling packet, will
362 * be the first on list, it is not able to sense, that packet
363 * is cloned and should be copied-on-write, so that it will
364 * change it and subsequent readers will get broken packet.
365 * --ANK (980803)
366 */
367
368/**
369 * dev_add_pack - add packet handler
370 * @pt: packet type declaration
371 *
372 * Add a protocol handler to the networking stack. The passed &packet_type
373 * is linked into kernel lists and may not be freed until it has been
374 * removed from the kernel lists.
375 *
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900376 * This call does not sleep therefore it can not
Linus Torvalds1da177e2005-04-16 15:20:36 -0700377 * guarantee all CPU's that are in middle of receiving packets
378 * will see the new packet type (until the next received packet).
379 */
380
381void dev_add_pack(struct packet_type *pt)
382{
383 int hash;
384
385 spin_lock_bh(&ptype_lock);
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -0700386 if (pt->type == htons(ETH_P_ALL))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700387 list_add_rcu(&pt->list, &ptype_all);
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -0700388 else {
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +0800389 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700390 list_add_rcu(&pt->list, &ptype_base[hash]);
391 }
392 spin_unlock_bh(&ptype_lock);
393}
394
Linus Torvalds1da177e2005-04-16 15:20:36 -0700395/**
396 * __dev_remove_pack - remove packet handler
397 * @pt: packet type declaration
398 *
399 * Remove a protocol handler that was previously added to the kernel
400 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
401 * from the kernel lists and can be freed or reused once this function
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900402 * returns.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700403 *
404 * The packet type might still be in use by receivers
405 * and must not be freed until after all the CPU's have gone
406 * through a quiescent state.
407 */
408void __dev_remove_pack(struct packet_type *pt)
409{
410 struct list_head *head;
411 struct packet_type *pt1;
412
413 spin_lock_bh(&ptype_lock);
414
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -0700415 if (pt->type == htons(ETH_P_ALL))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700416 head = &ptype_all;
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -0700417 else
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +0800418 head = &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700419
420 list_for_each_entry(pt1, head, list) {
421 if (pt == pt1) {
422 list_del_rcu(&pt->list);
423 goto out;
424 }
425 }
426
427 printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
428out:
429 spin_unlock_bh(&ptype_lock);
430}
431/**
432 * dev_remove_pack - remove packet handler
433 * @pt: packet type declaration
434 *
435 * Remove a protocol handler that was previously added to the kernel
436 * protocol handlers by dev_add_pack(). The passed &packet_type is removed
437 * from the kernel lists and can be freed or reused once this function
438 * returns.
439 *
440 * This call sleeps to guarantee that no CPU is looking at the packet
441 * type after return.
442 */
443void dev_remove_pack(struct packet_type *pt)
444{
445 __dev_remove_pack(pt);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900446
Linus Torvalds1da177e2005-04-16 15:20:36 -0700447 synchronize_net();
448}
449
450/******************************************************************************
451
452 Device Boot-time Settings Routines
453
454*******************************************************************************/
455
456/* Boot time configuration table */
457static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
458
459/**
460 * netdev_boot_setup_add - add new setup entry
461 * @name: name of the device
462 * @map: configured settings for the device
463 *
464 * Adds new setup entry to the dev_boot_setup list. The function
465 * returns 0 on error and 1 on success. This is a generic routine to
466 * all netdevices.
467 */
468static int netdev_boot_setup_add(char *name, struct ifmap *map)
469{
470 struct netdev_boot_setup *s;
471 int i;
472
473 s = dev_boot_setup;
474 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
475 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
476 memset(s[i].name, 0, sizeof(s[i].name));
Wang Chen93b3cff2008-07-01 19:57:19 -0700477 strlcpy(s[i].name, name, IFNAMSIZ);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700478 memcpy(&s[i].map, map, sizeof(s[i].map));
479 break;
480 }
481 }
482
483 return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
484}
485
486/**
487 * netdev_boot_setup_check - check boot time settings
488 * @dev: the netdevice
489 *
490 * Check boot time settings for the device.
491 * The found settings are set for the device to be used
492 * later in the device probing.
493 * Returns 0 if no settings found, 1 if they are.
494 */
495int netdev_boot_setup_check(struct net_device *dev)
496{
497 struct netdev_boot_setup *s = dev_boot_setup;
498 int i;
499
500 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
501 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
Wang Chen93b3cff2008-07-01 19:57:19 -0700502 !strcmp(dev->name, s[i].name)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700503 dev->irq = s[i].map.irq;
504 dev->base_addr = s[i].map.base_addr;
505 dev->mem_start = s[i].map.mem_start;
506 dev->mem_end = s[i].map.mem_end;
507 return 1;
508 }
509 }
510 return 0;
511}
512
513
514/**
515 * netdev_boot_base - get address from boot time settings
516 * @prefix: prefix for network device
517 * @unit: id for network device
518 *
519 * Check boot time settings for the base address of device.
520 * The found settings are set for the device to be used
521 * later in the device probing.
522 * Returns 0 if no settings found.
523 */
524unsigned long netdev_boot_base(const char *prefix, int unit)
525{
526 const struct netdev_boot_setup *s = dev_boot_setup;
527 char name[IFNAMSIZ];
528 int i;
529
530 sprintf(name, "%s%d", prefix, unit);
531
532 /*
533 * If device already registered then return base of 1
534 * to indicate not to probe for this interface
535 */
Eric W. Biederman881d9662007-09-17 11:56:21 -0700536 if (__dev_get_by_name(&init_net, name))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700537 return 1;
538
539 for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
540 if (!strcmp(name, s[i].name))
541 return s[i].map.base_addr;
542 return 0;
543}
544
545/*
546 * Saves at boot time configured settings for any netdevice.
547 */
548int __init netdev_boot_setup(char *str)
549{
550 int ints[5];
551 struct ifmap map;
552
553 str = get_options(str, ARRAY_SIZE(ints), ints);
554 if (!str || !*str)
555 return 0;
556
557 /* Save settings */
558 memset(&map, 0, sizeof(map));
559 if (ints[0] > 0)
560 map.irq = ints[1];
561 if (ints[0] > 1)
562 map.base_addr = ints[2];
563 if (ints[0] > 2)
564 map.mem_start = ints[3];
565 if (ints[0] > 3)
566 map.mem_end = ints[4];
567
568 /* Add new entry to the list */
569 return netdev_boot_setup_add(str, &map);
570}
571
572__setup("netdev=", netdev_boot_setup);
573
574/*******************************************************************************
575
576 Device Interface Subroutines
577
578*******************************************************************************/
579
580/**
581 * __dev_get_by_name - find a device by its name
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700582 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700583 * @name: name to find
584 *
585 * Find an interface by name. Must be called under RTNL semaphore
586 * or @dev_base_lock. If the name is found a pointer to the device
587 * is returned. If the name is not found then %NULL is returned. The
588 * reference counters are not incremented so the caller must be
589 * careful with locks.
590 */
591
Eric W. Biederman881d9662007-09-17 11:56:21 -0700592struct net_device *__dev_get_by_name(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700593{
594 struct hlist_node *p;
595
Eric W. Biederman881d9662007-09-17 11:56:21 -0700596 hlist_for_each(p, dev_name_hash(net, name)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700597 struct net_device *dev
598 = hlist_entry(p, struct net_device, name_hlist);
599 if (!strncmp(dev->name, name, IFNAMSIZ))
600 return dev;
601 }
602 return NULL;
603}
604
605/**
606 * dev_get_by_name - find a device by its name
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700607 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700608 * @name: name to find
609 *
610 * Find an interface by name. This can be called from any
611 * context and does its own locking. The returned handle has
612 * the usage count incremented and the caller must use dev_put() to
613 * release it when it is no longer needed. %NULL is returned if no
614 * matching device is found.
615 */
616
Eric W. Biederman881d9662007-09-17 11:56:21 -0700617struct net_device *dev_get_by_name(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700618{
619 struct net_device *dev;
620
621 read_lock(&dev_base_lock);
Eric W. Biederman881d9662007-09-17 11:56:21 -0700622 dev = __dev_get_by_name(net, name);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700623 if (dev)
624 dev_hold(dev);
625 read_unlock(&dev_base_lock);
626 return dev;
627}
628
629/**
630 * __dev_get_by_index - find a device by its ifindex
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700631 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700632 * @ifindex: index of device
633 *
634 * Search for an interface by index. Returns %NULL if the device
635 * is not found or a pointer to the device. The device has not
636 * had its reference counter increased so the caller must be careful
637 * about locking. The caller must hold either the RTNL semaphore
638 * or @dev_base_lock.
639 */
640
Eric W. Biederman881d9662007-09-17 11:56:21 -0700641struct net_device *__dev_get_by_index(struct net *net, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700642{
643 struct hlist_node *p;
644
Eric W. Biederman881d9662007-09-17 11:56:21 -0700645 hlist_for_each(p, dev_index_hash(net, ifindex)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700646 struct net_device *dev
647 = hlist_entry(p, struct net_device, index_hlist);
648 if (dev->ifindex == ifindex)
649 return dev;
650 }
651 return NULL;
652}
653
654
655/**
656 * dev_get_by_index - find a device by its ifindex
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700657 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700658 * @ifindex: index of device
659 *
660 * Search for an interface by index. Returns NULL if the device
661 * is not found or a pointer to the device. The device returned has
662 * had a reference added and the pointer is safe until the user calls
663 * dev_put to indicate they have finished with it.
664 */
665
Eric W. Biederman881d9662007-09-17 11:56:21 -0700666struct net_device *dev_get_by_index(struct net *net, int ifindex)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700667{
668 struct net_device *dev;
669
670 read_lock(&dev_base_lock);
Eric W. Biederman881d9662007-09-17 11:56:21 -0700671 dev = __dev_get_by_index(net, ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700672 if (dev)
673 dev_hold(dev);
674 read_unlock(&dev_base_lock);
675 return dev;
676}
677
678/**
679 * dev_getbyhwaddr - find a device by its hardware address
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700680 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700681 * @type: media type of device
682 * @ha: hardware address
683 *
684 * Search for an interface by MAC address. Returns NULL if the device
685 * is not found or a pointer to the device. The caller must hold the
686 * rtnl semaphore. The returned device has not had its ref count increased
687 * and the caller must therefore be careful about locking
688 *
689 * BUGS:
690 * If the API was consistent this would be __dev_get_by_hwaddr
691 */
692
Eric W. Biederman881d9662007-09-17 11:56:21 -0700693struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700694{
695 struct net_device *dev;
696
697 ASSERT_RTNL();
698
Denis V. Lunev81103a52007-12-12 10:47:38 -0800699 for_each_netdev(net, dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700700 if (dev->type == type &&
701 !memcmp(dev->dev_addr, ha, dev->addr_len))
Pavel Emelianov7562f872007-05-03 15:13:45 -0700702 return dev;
703
704 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700705}
706
Jochen Friedrichcf309e32005-09-22 04:44:55 -0300707EXPORT_SYMBOL(dev_getbyhwaddr);
708
Eric W. Biederman881d9662007-09-17 11:56:21 -0700709struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700710{
711 struct net_device *dev;
712
713 ASSERT_RTNL();
Eric W. Biederman881d9662007-09-17 11:56:21 -0700714 for_each_netdev(net, dev)
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700715 if (dev->type == type)
Pavel Emelianov7562f872007-05-03 15:13:45 -0700716 return dev;
717
718 return NULL;
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700719}
720
721EXPORT_SYMBOL(__dev_getfirstbyhwtype);
722
Eric W. Biederman881d9662007-09-17 11:56:21 -0700723struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700724{
725 struct net_device *dev;
726
727 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -0700728 dev = __dev_getfirstbyhwtype(net, type);
Patrick McHardy4e9cac22007-05-03 03:28:13 -0700729 if (dev)
730 dev_hold(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700731 rtnl_unlock();
732 return dev;
733}
734
735EXPORT_SYMBOL(dev_getfirstbyhwtype);
736
737/**
738 * dev_get_by_flags - find any device with given flags
Randy Dunlapc4ea43c2007-10-12 21:17:49 -0700739 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -0700740 * @if_flags: IFF_* values
741 * @mask: bitmask of bits in if_flags to check
742 *
743 * Search for any interface with the given flags. Returns NULL if a device
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +0900744 * is not found or a pointer to the device. The device returned has
Linus Torvalds1da177e2005-04-16 15:20:36 -0700745 * had a reference added and the pointer is safe until the user calls
746 * dev_put to indicate they have finished with it.
747 */
748
Eric W. Biederman881d9662007-09-17 11:56:21 -0700749struct net_device * dev_get_by_flags(struct net *net, unsigned short if_flags, unsigned short mask)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700750{
Pavel Emelianov7562f872007-05-03 15:13:45 -0700751 struct net_device *dev, *ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700752
Pavel Emelianov7562f872007-05-03 15:13:45 -0700753 ret = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700754 read_lock(&dev_base_lock);
Eric W. Biederman881d9662007-09-17 11:56:21 -0700755 for_each_netdev(net, dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700756 if (((dev->flags ^ if_flags) & mask) == 0) {
757 dev_hold(dev);
Pavel Emelianov7562f872007-05-03 15:13:45 -0700758 ret = dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700759 break;
760 }
761 }
762 read_unlock(&dev_base_lock);
Pavel Emelianov7562f872007-05-03 15:13:45 -0700763 return ret;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700764}
765
766/**
767 * dev_valid_name - check if name is okay for network device
768 * @name: name string
769 *
770 * Network device names need to be valid file names to
David S. Millerc7fa9d12006-08-15 16:34:13 -0700771 * to allow sysfs to work. We also disallow any kind of
772 * whitespace.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700773 */
Mitch Williamsc2373ee2005-11-09 10:34:45 -0800774int dev_valid_name(const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700775{
David S. Millerc7fa9d12006-08-15 16:34:13 -0700776 if (*name == '\0')
777 return 0;
Stephen Hemmingerb6fe17d2006-08-29 17:06:13 -0700778 if (strlen(name) >= IFNAMSIZ)
779 return 0;
David S. Millerc7fa9d12006-08-15 16:34:13 -0700780 if (!strcmp(name, ".") || !strcmp(name, ".."))
781 return 0;
782
783 while (*name) {
784 if (*name == '/' || isspace(*name))
785 return 0;
786 name++;
787 }
788 return 1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700789}
790
791/**
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200792 * __dev_alloc_name - allocate a name for a device
793 * @net: network namespace to allocate the device name in
Linus Torvalds1da177e2005-04-16 15:20:36 -0700794 * @name: name format string
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200795 * @buf: scratch buffer and result name string
Linus Torvalds1da177e2005-04-16 15:20:36 -0700796 *
797 * Passed a format string - eg "lt%d" it will try and find a suitable
Stephen Hemminger3041a062006-05-26 13:25:24 -0700798 * id. It scans list of devices to build up a free map, then chooses
799 * the first empty slot. The caller must hold the dev_base or rtnl lock
800 * while allocating the name and adding the device in order to avoid
801 * duplicates.
802 * Limited to bits_per_byte * page size devices (ie 32K on most platforms).
803 * Returns the number of the unit assigned or a negative errno code.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700804 */
805
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200806static int __dev_alloc_name(struct net *net, const char *name, char *buf)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700807{
808 int i = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700809 const char *p;
810 const int max_netdevices = 8*PAGE_SIZE;
Stephen Hemmingercfcabdc2007-10-09 01:59:42 -0700811 unsigned long *inuse;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700812 struct net_device *d;
813
814 p = strnchr(name, IFNAMSIZ-1, '%');
815 if (p) {
816 /*
817 * Verify the string as this thing may have come from
818 * the user. There must be either one "%d" and no other "%"
819 * characters.
820 */
821 if (p[1] != 'd' || strchr(p + 2, '%'))
822 return -EINVAL;
823
824 /* Use one page as a bit array of possible slots */
Stephen Hemmingercfcabdc2007-10-09 01:59:42 -0700825 inuse = (unsigned long *) get_zeroed_page(GFP_ATOMIC);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700826 if (!inuse)
827 return -ENOMEM;
828
Eric W. Biederman881d9662007-09-17 11:56:21 -0700829 for_each_netdev(net, d) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700830 if (!sscanf(d->name, name, &i))
831 continue;
832 if (i < 0 || i >= max_netdevices)
833 continue;
834
835 /* avoid cases where sscanf is not exact inverse of printf */
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200836 snprintf(buf, IFNAMSIZ, name, i);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700837 if (!strncmp(buf, d->name, IFNAMSIZ))
838 set_bit(i, inuse);
839 }
840
841 i = find_first_zero_bit(inuse, max_netdevices);
842 free_page((unsigned long) inuse);
843 }
844
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200845 snprintf(buf, IFNAMSIZ, name, i);
846 if (!__dev_get_by_name(net, buf))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700847 return i;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700848
849 /* It is possible to run out of possible slots
850 * when the name is long and there isn't enough space left
851 * for the digits, or if all bits are used.
852 */
853 return -ENFILE;
854}
855
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200856/**
857 * dev_alloc_name - allocate a name for a device
858 * @dev: device
859 * @name: name format string
860 *
861 * Passed a format string - eg "lt%d" it will try and find a suitable
862 * id. It scans list of devices to build up a free map, then chooses
863 * the first empty slot. The caller must hold the dev_base or rtnl lock
864 * while allocating the name and adding the device in order to avoid
865 * duplicates.
866 * Limited to bits_per_byte * page size devices (ie 32K on most platforms).
867 * Returns the number of the unit assigned or a negative errno code.
868 */
869
870int dev_alloc_name(struct net_device *dev, const char *name)
871{
872 char buf[IFNAMSIZ];
873 struct net *net;
874 int ret;
875
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900876 BUG_ON(!dev_net(dev));
877 net = dev_net(dev);
Eric W. Biedermanb267b172007-09-12 13:48:45 +0200878 ret = __dev_alloc_name(net, name, buf);
879 if (ret >= 0)
880 strlcpy(dev->name, buf, IFNAMSIZ);
881 return ret;
882}
883
Linus Torvalds1da177e2005-04-16 15:20:36 -0700884
885/**
886 * dev_change_name - change name of a device
887 * @dev: device
888 * @newname: name (or format string) must be at least IFNAMSIZ
889 *
890 * Change name of a device, can pass format strings "eth%d".
891 * for wildcarding.
892 */
Stephen Hemmingercf04a4c72008-09-30 02:22:14 -0700893int dev_change_name(struct net_device *dev, const char *newname)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700894{
Herbert Xufcc5a032007-07-30 17:03:38 -0700895 char oldname[IFNAMSIZ];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700896 int err = 0;
Herbert Xufcc5a032007-07-30 17:03:38 -0700897 int ret;
Eric W. Biederman881d9662007-09-17 11:56:21 -0700898 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700899
900 ASSERT_RTNL();
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900901 BUG_ON(!dev_net(dev));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700902
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +0900903 net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700904 if (dev->flags & IFF_UP)
905 return -EBUSY;
906
907 if (!dev_valid_name(newname))
908 return -EINVAL;
909
Stephen Hemmingerc8d90dc2007-10-26 03:53:42 -0700910 if (strncmp(newname, dev->name, IFNAMSIZ) == 0)
911 return 0;
912
Herbert Xufcc5a032007-07-30 17:03:38 -0700913 memcpy(oldname, dev->name, IFNAMSIZ);
914
Linus Torvalds1da177e2005-04-16 15:20:36 -0700915 if (strchr(newname, '%')) {
916 err = dev_alloc_name(dev, newname);
917 if (err < 0)
918 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700919 }
Eric W. Biederman881d9662007-09-17 11:56:21 -0700920 else if (__dev_get_by_name(net, newname))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700921 return -EEXIST;
922 else
923 strlcpy(dev->name, newname, IFNAMSIZ);
924
Herbert Xufcc5a032007-07-30 17:03:38 -0700925rollback:
Eric W. Biederman38918452008-10-27 17:51:47 -0700926 /* For now only devices in the initial network namespace
927 * are in sysfs.
928 */
929 if (net == &init_net) {
930 ret = device_rename(&dev->dev, dev->name);
931 if (ret) {
932 memcpy(dev->name, oldname, IFNAMSIZ);
933 return ret;
934 }
Stephen Hemmingerdcc99772008-05-14 22:33:38 -0700935 }
Herbert Xu7f988ea2007-07-30 16:35:46 -0700936
937 write_lock_bh(&dev_base_lock);
Eric W. Biederman92749822007-04-03 00:07:30 -0600938 hlist_del(&dev->name_hlist);
Eric W. Biederman881d9662007-09-17 11:56:21 -0700939 hlist_add_head(&dev->name_hlist, dev_name_hash(net, dev->name));
Herbert Xu7f988ea2007-07-30 16:35:46 -0700940 write_unlock_bh(&dev_base_lock);
941
Pavel Emelyanov056925a2007-09-16 15:42:43 -0700942 ret = call_netdevice_notifiers(NETDEV_CHANGENAME, dev);
Herbert Xufcc5a032007-07-30 17:03:38 -0700943 ret = notifier_to_errno(ret);
944
945 if (ret) {
946 if (err) {
947 printk(KERN_ERR
948 "%s: name change rollback failed: %d.\n",
949 dev->name, ret);
950 } else {
951 err = ret;
952 memcpy(dev->name, oldname, IFNAMSIZ);
953 goto rollback;
954 }
955 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700956
957 return err;
958}
959
960/**
Stephen Hemminger0b815a12008-09-22 21:28:11 -0700961 * dev_set_alias - change ifalias of a device
962 * @dev: device
963 * @alias: name up to IFALIASZ
Stephen Hemmingerf0db2752008-09-30 02:23:58 -0700964 * @len: limit of bytes to copy from info
Stephen Hemminger0b815a12008-09-22 21:28:11 -0700965 *
966 * Set ifalias for a device,
967 */
968int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
969{
970 ASSERT_RTNL();
971
972 if (len >= IFALIASZ)
973 return -EINVAL;
974
Oliver Hartkopp96ca4a22008-09-23 21:23:19 -0700975 if (!len) {
976 if (dev->ifalias) {
977 kfree(dev->ifalias);
978 dev->ifalias = NULL;
979 }
980 return 0;
981 }
982
Stephen Hemminger0b815a12008-09-22 21:28:11 -0700983 dev->ifalias = krealloc(dev->ifalias, len+1, GFP_KERNEL);
984 if (!dev->ifalias)
985 return -ENOMEM;
986
987 strlcpy(dev->ifalias, alias, len+1);
988 return len;
989}
990
991
992/**
Stephen Hemminger3041a062006-05-26 13:25:24 -0700993 * netdev_features_change - device changes features
Stephen Hemmingerd8a33ac2005-05-29 14:13:47 -0700994 * @dev: device to cause notification
995 *
996 * Called to indicate a device has changed features.
997 */
998void netdev_features_change(struct net_device *dev)
999{
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001000 call_netdevice_notifiers(NETDEV_FEAT_CHANGE, dev);
Stephen Hemmingerd8a33ac2005-05-29 14:13:47 -07001001}
1002EXPORT_SYMBOL(netdev_features_change);
1003
1004/**
Linus Torvalds1da177e2005-04-16 15:20:36 -07001005 * netdev_state_change - device changes state
1006 * @dev: device to cause notification
1007 *
1008 * Called to indicate a device has changed state. This function calls
1009 * the notifier chains for netdev_chain and sends a NEWLINK message
1010 * to the routing socket.
1011 */
1012void netdev_state_change(struct net_device *dev)
1013{
1014 if (dev->flags & IFF_UP) {
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001015 call_netdevice_notifiers(NETDEV_CHANGE, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001016 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
1017 }
1018}
1019
Or Gerlitzc1da4ac2008-06-13 18:12:00 -07001020void netdev_bonding_change(struct net_device *dev)
1021{
1022 call_netdevice_notifiers(NETDEV_BONDING_FAILOVER, dev);
1023}
1024EXPORT_SYMBOL(netdev_bonding_change);
1025
Linus Torvalds1da177e2005-04-16 15:20:36 -07001026/**
1027 * dev_load - load a network module
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07001028 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -07001029 * @name: name of interface
1030 *
1031 * If a network interface is not present and the process has suitable
1032 * privileges this function loads the module. If module loading is not
1033 * available in this kernel then it becomes a nop.
1034 */
1035
Eric W. Biederman881d9662007-09-17 11:56:21 -07001036void dev_load(struct net *net, const char *name)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001037{
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001038 struct net_device *dev;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001039
1040 read_lock(&dev_base_lock);
Eric W. Biederman881d9662007-09-17 11:56:21 -07001041 dev = __dev_get_by_name(net, name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001042 read_unlock(&dev_base_lock);
1043
1044 if (!dev && capable(CAP_SYS_MODULE))
1045 request_module("%s", name);
1046}
1047
Linus Torvalds1da177e2005-04-16 15:20:36 -07001048/**
1049 * dev_open - prepare an interface for use.
1050 * @dev: device to open
1051 *
1052 * Takes a device from down to up state. The device's private open
1053 * function is invoked and then the multicast lists are loaded. Finally
1054 * the device is moved into the up state and a %NETDEV_UP message is
1055 * sent to the netdev notifier chain.
1056 *
1057 * Calling this function on an active interface is a nop. On a failure
1058 * a negative errno code is returned.
1059 */
1060int dev_open(struct net_device *dev)
1061{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08001062 const struct net_device_ops *ops = dev->netdev_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001063 int ret = 0;
1064
Ben Hutchingse46b66b2008-05-08 02:53:17 -07001065 ASSERT_RTNL();
1066
Linus Torvalds1da177e2005-04-16 15:20:36 -07001067 /*
1068 * Is it already up?
1069 */
1070
1071 if (dev->flags & IFF_UP)
1072 return 0;
1073
1074 /*
1075 * Is it even present?
1076 */
1077 if (!netif_device_present(dev))
1078 return -ENODEV;
1079
1080 /*
1081 * Call device private open method
1082 */
1083 set_bit(__LINK_STATE_START, &dev->state);
Jeff Garzikbada3392007-10-23 20:19:37 -07001084
Stephen Hemmingerd3147742008-11-19 21:32:24 -08001085 if (ops->ndo_validate_addr)
1086 ret = ops->ndo_validate_addr(dev);
Jeff Garzikbada3392007-10-23 20:19:37 -07001087
Stephen Hemmingerd3147742008-11-19 21:32:24 -08001088 if (!ret && ops->ndo_open)
1089 ret = ops->ndo_open(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001090
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001091 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001092 * If it went open OK then:
1093 */
1094
Jeff Garzikbada3392007-10-23 20:19:37 -07001095 if (ret)
1096 clear_bit(__LINK_STATE_START, &dev->state);
1097 else {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001098 /*
1099 * Set the flags.
1100 */
1101 dev->flags |= IFF_UP;
1102
1103 /*
1104 * Initialize multicasting status
1105 */
Patrick McHardy4417da62007-06-27 01:28:10 -07001106 dev_set_rx_mode(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001107
1108 /*
1109 * Wakeup transmit queue engine
1110 */
1111 dev_activate(dev);
1112
1113 /*
1114 * ... and announce new interface.
1115 */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001116 call_netdevice_notifiers(NETDEV_UP, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001117 }
Jeff Garzikbada3392007-10-23 20:19:37 -07001118
Linus Torvalds1da177e2005-04-16 15:20:36 -07001119 return ret;
1120}
1121
1122/**
1123 * dev_close - shutdown an interface.
1124 * @dev: device to shutdown
1125 *
1126 * This function moves an active device into down state. A
1127 * %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1128 * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1129 * chain.
1130 */
1131int dev_close(struct net_device *dev)
1132{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08001133 const struct net_device_ops *ops = dev->netdev_ops;
Ben Hutchingse46b66b2008-05-08 02:53:17 -07001134 ASSERT_RTNL();
1135
David S. Miller9d5010d2007-09-12 14:33:25 +02001136 might_sleep();
1137
Linus Torvalds1da177e2005-04-16 15:20:36 -07001138 if (!(dev->flags & IFF_UP))
1139 return 0;
1140
1141 /*
1142 * Tell people we are going down, so that they can
1143 * prepare to death, when device is still operating.
1144 */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001145 call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001146
Linus Torvalds1da177e2005-04-16 15:20:36 -07001147 clear_bit(__LINK_STATE_START, &dev->state);
1148
1149 /* Synchronize to scheduled poll. We cannot touch poll list,
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001150 * it can be even on different cpu. So just clear netif_running().
1151 *
1152 * dev->stop() will invoke napi_disable() on all of it's
1153 * napi_struct instances on this device.
1154 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001155 smp_mb__after_clear_bit(); /* Commit netif_running(). */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001156
Matti Linnanvuorid8b2a4d2008-02-12 23:10:11 -08001157 dev_deactivate(dev);
1158
Linus Torvalds1da177e2005-04-16 15:20:36 -07001159 /*
1160 * Call the device specific close. This cannot fail.
1161 * Only if device is UP
1162 *
1163 * We allow it to be called even after a DETACH hot-plug
1164 * event.
1165 */
Stephen Hemmingerd3147742008-11-19 21:32:24 -08001166 if (ops->ndo_stop)
1167 ops->ndo_stop(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001168
1169 /*
1170 * Device is now down.
1171 */
1172
1173 dev->flags &= ~IFF_UP;
1174
1175 /*
1176 * Tell people we are down
1177 */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07001178 call_netdevice_notifiers(NETDEV_DOWN, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001179
1180 return 0;
1181}
1182
1183
Ben Hutchings0187bdf2008-06-19 16:15:47 -07001184/**
1185 * dev_disable_lro - disable Large Receive Offload on a device
1186 * @dev: device
1187 *
1188 * Disable Large Receive Offload (LRO) on a net device. Must be
1189 * called under RTNL. This is needed if received packets may be
1190 * forwarded to another interface.
1191 */
1192void dev_disable_lro(struct net_device *dev)
1193{
1194 if (dev->ethtool_ops && dev->ethtool_ops->get_flags &&
1195 dev->ethtool_ops->set_flags) {
1196 u32 flags = dev->ethtool_ops->get_flags(dev);
1197 if (flags & ETH_FLAG_LRO) {
1198 flags &= ~ETH_FLAG_LRO;
1199 dev->ethtool_ops->set_flags(dev, flags);
1200 }
1201 }
1202 WARN_ON(dev->features & NETIF_F_LRO);
1203}
1204EXPORT_SYMBOL(dev_disable_lro);
1205
1206
Eric W. Biederman881d9662007-09-17 11:56:21 -07001207static int dev_boot_phase = 1;
1208
Linus Torvalds1da177e2005-04-16 15:20:36 -07001209/*
1210 * Device change register/unregister. These are not inline or static
1211 * as we export them to the world.
1212 */
1213
1214/**
1215 * register_netdevice_notifier - register a network notifier block
1216 * @nb: notifier
1217 *
1218 * Register a notifier to be called when network device events occur.
1219 * The notifier passed is linked into the kernel structures and must
1220 * not be reused until it has been unregistered. A negative errno code
1221 * is returned on a failure.
1222 *
1223 * When registered all registration and up events are replayed
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001224 * to the new notifier to allow device to have a race free
Linus Torvalds1da177e2005-04-16 15:20:36 -07001225 * view of the network device list.
1226 */
1227
1228int register_netdevice_notifier(struct notifier_block *nb)
1229{
1230 struct net_device *dev;
Herbert Xufcc5a032007-07-30 17:03:38 -07001231 struct net_device *last;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001232 struct net *net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001233 int err;
1234
1235 rtnl_lock();
Alan Sternf07d5b92006-05-09 15:23:03 -07001236 err = raw_notifier_chain_register(&netdev_chain, nb);
Herbert Xufcc5a032007-07-30 17:03:38 -07001237 if (err)
1238 goto unlock;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001239 if (dev_boot_phase)
1240 goto unlock;
1241 for_each_net(net) {
1242 for_each_netdev(net, dev) {
1243 err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
1244 err = notifier_to_errno(err);
1245 if (err)
1246 goto rollback;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001247
Eric W. Biederman881d9662007-09-17 11:56:21 -07001248 if (!(dev->flags & IFF_UP))
1249 continue;
Herbert Xufcc5a032007-07-30 17:03:38 -07001250
Eric W. Biederman881d9662007-09-17 11:56:21 -07001251 nb->notifier_call(nb, NETDEV_UP, dev);
1252 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001253 }
Herbert Xufcc5a032007-07-30 17:03:38 -07001254
1255unlock:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001256 rtnl_unlock();
1257 return err;
Herbert Xufcc5a032007-07-30 17:03:38 -07001258
1259rollback:
1260 last = dev;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001261 for_each_net(net) {
1262 for_each_netdev(net, dev) {
1263 if (dev == last)
1264 break;
Herbert Xufcc5a032007-07-30 17:03:38 -07001265
Eric W. Biederman881d9662007-09-17 11:56:21 -07001266 if (dev->flags & IFF_UP) {
1267 nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1268 nb->notifier_call(nb, NETDEV_DOWN, dev);
1269 }
1270 nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
Herbert Xufcc5a032007-07-30 17:03:38 -07001271 }
Herbert Xufcc5a032007-07-30 17:03:38 -07001272 }
Pavel Emelyanovc67625a2007-11-14 15:53:16 -08001273
1274 raw_notifier_chain_unregister(&netdev_chain, nb);
Herbert Xufcc5a032007-07-30 17:03:38 -07001275 goto unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001276}
1277
1278/**
1279 * unregister_netdevice_notifier - unregister a network notifier block
1280 * @nb: notifier
1281 *
1282 * Unregister a notifier previously registered by
1283 * register_netdevice_notifier(). The notifier is unlinked into the
1284 * kernel structures and may then be reused. A negative errno code
1285 * is returned on a failure.
1286 */
1287
1288int unregister_netdevice_notifier(struct notifier_block *nb)
1289{
Herbert Xu9f514952006-03-25 01:24:25 -08001290 int err;
1291
1292 rtnl_lock();
Alan Sternf07d5b92006-05-09 15:23:03 -07001293 err = raw_notifier_chain_unregister(&netdev_chain, nb);
Herbert Xu9f514952006-03-25 01:24:25 -08001294 rtnl_unlock();
1295 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001296}
1297
1298/**
1299 * call_netdevice_notifiers - call all network notifier blocks
1300 * @val: value passed unmodified to notifier function
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07001301 * @dev: net_device pointer passed unmodified to notifier function
Linus Torvalds1da177e2005-04-16 15:20:36 -07001302 *
1303 * Call all network notifier blocks. Parameters and return value
Alan Sternf07d5b92006-05-09 15:23:03 -07001304 * are as for raw_notifier_call_chain().
Linus Torvalds1da177e2005-04-16 15:20:36 -07001305 */
1306
Eric W. Biedermanad7379d2007-09-16 15:33:32 -07001307int call_netdevice_notifiers(unsigned long val, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001308{
Eric W. Biedermanad7379d2007-09-16 15:33:32 -07001309 return raw_notifier_call_chain(&netdev_chain, val, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001310}
1311
1312/* When > 0 there are consumers of rx skb time stamps */
1313static atomic_t netstamp_needed = ATOMIC_INIT(0);
1314
1315void net_enable_timestamp(void)
1316{
1317 atomic_inc(&netstamp_needed);
1318}
1319
1320void net_disable_timestamp(void)
1321{
1322 atomic_dec(&netstamp_needed);
1323}
1324
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001325static inline void net_timestamp(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001326{
1327 if (atomic_read(&netstamp_needed))
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001328 __net_timestamp(skb);
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -07001329 else
1330 skb->tstamp.tv64 = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001331}
1332
1333/*
1334 * Support routine. Sends outgoing frames to any network
1335 * taps currently in use.
1336 */
1337
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001338static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001339{
1340 struct packet_type *ptype;
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001341
1342 net_timestamp(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001343
1344 rcu_read_lock();
1345 list_for_each_entry_rcu(ptype, &ptype_all, list) {
1346 /* Never send packets back to the socket
1347 * they originated from - MvS (miquels@drinkel.ow.org)
1348 */
1349 if ((ptype->dev == dev || !ptype->dev) &&
1350 (ptype->af_packet_priv == NULL ||
1351 (struct sock *)ptype->af_packet_priv != skb->sk)) {
1352 struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1353 if (!skb2)
1354 break;
1355
1356 /* skb->nh should be correctly
1357 set by sender, so that the second statement is
1358 just protection against buggy protocols.
1359 */
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07001360 skb_reset_mac_header(skb2);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001361
Arnaldo Carvalho de Melod56f90a2007-04-10 20:50:43 -07001362 if (skb_network_header(skb2) < skb2->data ||
Arnaldo Carvalho de Melo27a884d2007-04-19 20:29:13 -07001363 skb2->network_header > skb2->tail) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001364 if (net_ratelimit())
1365 printk(KERN_CRIT "protocol %04x is "
1366 "buggy, dev %s\n",
1367 skb2->protocol, dev->name);
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -07001368 skb_reset_network_header(skb2);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001369 }
1370
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07001371 skb2->transport_header = skb2->network_header;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001372 skb2->pkt_type = PACKET_OUTGOING;
David S. Millerf2ccd8f2005-08-09 19:34:12 -07001373 ptype->func(skb2, skb->dev, ptype, skb->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001374 }
1375 }
1376 rcu_read_unlock();
1377}
1378
Denis Vlasenko56079432006-03-29 15:57:29 -08001379
Jarek Poplawskidef82a12008-08-17 21:54:43 -07001380static inline void __netif_reschedule(struct Qdisc *q)
1381{
1382 struct softnet_data *sd;
1383 unsigned long flags;
1384
1385 local_irq_save(flags);
1386 sd = &__get_cpu_var(softnet_data);
1387 q->next_sched = sd->output_queue;
1388 sd->output_queue = q;
1389 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1390 local_irq_restore(flags);
1391}
1392
David S. Miller37437bb2008-07-16 02:15:04 -07001393void __netif_schedule(struct Qdisc *q)
Denis Vlasenko56079432006-03-29 15:57:29 -08001394{
Jarek Poplawskidef82a12008-08-17 21:54:43 -07001395 if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
1396 __netif_reschedule(q);
Denis Vlasenko56079432006-03-29 15:57:29 -08001397}
1398EXPORT_SYMBOL(__netif_schedule);
1399
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001400void dev_kfree_skb_irq(struct sk_buff *skb)
Denis Vlasenko56079432006-03-29 15:57:29 -08001401{
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001402 if (atomic_dec_and_test(&skb->users)) {
1403 struct softnet_data *sd;
1404 unsigned long flags;
Denis Vlasenko56079432006-03-29 15:57:29 -08001405
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001406 local_irq_save(flags);
1407 sd = &__get_cpu_var(softnet_data);
1408 skb->next = sd->completion_queue;
1409 sd->completion_queue = skb;
1410 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1411 local_irq_restore(flags);
1412 }
Denis Vlasenko56079432006-03-29 15:57:29 -08001413}
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001414EXPORT_SYMBOL(dev_kfree_skb_irq);
Denis Vlasenko56079432006-03-29 15:57:29 -08001415
1416void dev_kfree_skb_any(struct sk_buff *skb)
1417{
1418 if (in_irq() || irqs_disabled())
1419 dev_kfree_skb_irq(skb);
1420 else
1421 dev_kfree_skb(skb);
1422}
1423EXPORT_SYMBOL(dev_kfree_skb_any);
1424
1425
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001426/**
1427 * netif_device_detach - mark device as removed
1428 * @dev: network device
1429 *
1430 * Mark device as removed from system and therefore no longer available.
1431 */
Denis Vlasenko56079432006-03-29 15:57:29 -08001432void netif_device_detach(struct net_device *dev)
1433{
1434 if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1435 netif_running(dev)) {
1436 netif_stop_queue(dev);
1437 }
1438}
1439EXPORT_SYMBOL(netif_device_detach);
1440
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001441/**
1442 * netif_device_attach - mark device as attached
1443 * @dev: network device
1444 *
1445 * Mark device as attached from system and restart if needed.
1446 */
Denis Vlasenko56079432006-03-29 15:57:29 -08001447void netif_device_attach(struct net_device *dev)
1448{
1449 if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1450 netif_running(dev)) {
1451 netif_wake_queue(dev);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001452 __netdev_watchdog_up(dev);
Denis Vlasenko56079432006-03-29 15:57:29 -08001453 }
1454}
1455EXPORT_SYMBOL(netif_device_attach);
1456
Ben Hutchings6de329e2008-06-16 17:02:28 -07001457static bool can_checksum_protocol(unsigned long features, __be16 protocol)
1458{
1459 return ((features & NETIF_F_GEN_CSUM) ||
1460 ((features & NETIF_F_IP_CSUM) &&
1461 protocol == htons(ETH_P_IP)) ||
1462 ((features & NETIF_F_IPV6_CSUM) &&
1463 protocol == htons(ETH_P_IPV6)));
1464}
1465
1466static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
1467{
1468 if (can_checksum_protocol(dev->features, skb->protocol))
1469 return true;
1470
1471 if (skb->protocol == htons(ETH_P_8021Q)) {
1472 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1473 if (can_checksum_protocol(dev->features & dev->vlan_features,
1474 veh->h_vlan_encapsulated_proto))
1475 return true;
1476 }
1477
1478 return false;
1479}
Denis Vlasenko56079432006-03-29 15:57:29 -08001480
Linus Torvalds1da177e2005-04-16 15:20:36 -07001481/*
1482 * Invalidate hardware checksum when packet is to be mangled, and
1483 * complete checksum manually on outgoing path.
1484 */
Patrick McHardy84fa7932006-08-29 16:44:56 -07001485int skb_checksum_help(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001486{
Al Virod3bc23e2006-11-14 21:24:49 -08001487 __wsum csum;
Herbert Xu663ead32007-04-09 11:59:07 -07001488 int ret = 0, offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001489
Patrick McHardy84fa7932006-08-29 16:44:56 -07001490 if (skb->ip_summed == CHECKSUM_COMPLETE)
Herbert Xua430a432006-07-08 13:34:56 -07001491 goto out_set_summed;
1492
1493 if (unlikely(skb_shinfo(skb)->gso_size)) {
Herbert Xua430a432006-07-08 13:34:56 -07001494 /* Let GSO fix up the checksum. */
1495 goto out_set_summed;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001496 }
1497
Herbert Xua0308472007-10-15 01:47:15 -07001498 offset = skb->csum_start - skb_headroom(skb);
1499 BUG_ON(offset >= skb_headlen(skb));
1500 csum = skb_checksum(skb, offset, skb->len - offset, 0);
1501
1502 offset += skb->csum_offset;
1503 BUG_ON(offset + sizeof(__sum16) > skb_headlen(skb));
1504
1505 if (skb_cloned(skb) &&
1506 !skb_clone_writable(skb, offset + sizeof(__sum16))) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001507 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1508 if (ret)
1509 goto out;
1510 }
1511
Herbert Xua0308472007-10-15 01:47:15 -07001512 *(__sum16 *)(skb->data + offset) = csum_fold(csum);
Herbert Xua430a432006-07-08 13:34:56 -07001513out_set_summed:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001514 skb->ip_summed = CHECKSUM_NONE;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001515out:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001516 return ret;
1517}
1518
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001519/**
1520 * skb_gso_segment - Perform segmentation on skb.
1521 * @skb: buffer to segment
Herbert Xu576a30e2006-06-27 13:22:38 -07001522 * @features: features for the output path (see dev->features)
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001523 *
1524 * This function segments the given skb and returns a list of segments.
Herbert Xu576a30e2006-06-27 13:22:38 -07001525 *
1526 * It may return NULL if the skb requires no segmentation. This is
1527 * only possible when GSO is used for verifying header integrity.
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001528 */
Herbert Xu576a30e2006-06-27 13:22:38 -07001529struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001530{
1531 struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1532 struct packet_type *ptype;
Al Viro252e3342006-11-14 20:48:11 -08001533 __be16 type = skb->protocol;
Herbert Xua430a432006-07-08 13:34:56 -07001534 int err;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001535
Arnaldo Carvalho de Melo459a98e2007-03-19 15:30:44 -07001536 skb_reset_mac_header(skb);
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07001537 skb->mac_len = skb->network_header - skb->mac_header;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001538 __skb_pull(skb, skb->mac_len);
1539
Herbert Xuf9d106a2007-04-23 22:36:13 -07001540 if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) {
Herbert Xua430a432006-07-08 13:34:56 -07001541 if (skb_header_cloned(skb) &&
1542 (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1543 return ERR_PTR(err);
1544 }
1545
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001546 rcu_read_lock();
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08001547 list_for_each_entry_rcu(ptype,
1548 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001549 if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
Patrick McHardy84fa7932006-08-29 16:44:56 -07001550 if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
Herbert Xua430a432006-07-08 13:34:56 -07001551 err = ptype->gso_send_check(skb);
1552 segs = ERR_PTR(err);
1553 if (err || skb_gso_ok(skb, features))
1554 break;
Arnaldo Carvalho de Melod56f90a2007-04-10 20:50:43 -07001555 __skb_push(skb, (skb->data -
1556 skb_network_header(skb)));
Herbert Xua430a432006-07-08 13:34:56 -07001557 }
Herbert Xu576a30e2006-06-27 13:22:38 -07001558 segs = ptype->gso_segment(skb, features);
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001559 break;
1560 }
1561 }
1562 rcu_read_unlock();
1563
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -07001564 __skb_push(skb, skb->data - skb_mac_header(skb));
Herbert Xu576a30e2006-06-27 13:22:38 -07001565
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001566 return segs;
1567}
1568
1569EXPORT_SYMBOL(skb_gso_segment);
1570
Herbert Xufb286bb2005-11-10 13:01:24 -08001571/* Take action when hardware reception checksum errors are detected. */
1572#ifdef CONFIG_BUG
1573void netdev_rx_csum_fault(struct net_device *dev)
1574{
1575 if (net_ratelimit()) {
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001576 printk(KERN_ERR "%s: hw csum failure.\n",
Stephen Hemminger246a4212005-12-08 15:21:39 -08001577 dev ? dev->name : "<unknown>");
Herbert Xufb286bb2005-11-10 13:01:24 -08001578 dump_stack();
1579 }
1580}
1581EXPORT_SYMBOL(netdev_rx_csum_fault);
1582#endif
1583
Linus Torvalds1da177e2005-04-16 15:20:36 -07001584/* Actually, we should eliminate this check as soon as we know, that:
1585 * 1. IOMMU is present and allows to map all the memory.
1586 * 2. No high memory really exists on this machine.
1587 */
1588
1589static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1590{
Herbert Xu3d3a8532006-06-27 13:33:10 -07001591#ifdef CONFIG_HIGHMEM
Linus Torvalds1da177e2005-04-16 15:20:36 -07001592 int i;
1593
1594 if (dev->features & NETIF_F_HIGHDMA)
1595 return 0;
1596
1597 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1598 if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1599 return 1;
1600
Herbert Xu3d3a8532006-06-27 13:33:10 -07001601#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -07001602 return 0;
1603}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001604
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001605struct dev_gso_cb {
1606 void (*destructor)(struct sk_buff *skb);
1607};
1608
1609#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1610
1611static void dev_gso_skb_destructor(struct sk_buff *skb)
1612{
1613 struct dev_gso_cb *cb;
1614
1615 do {
1616 struct sk_buff *nskb = skb->next;
1617
1618 skb->next = nskb->next;
1619 nskb->next = NULL;
1620 kfree_skb(nskb);
1621 } while (skb->next);
1622
1623 cb = DEV_GSO_CB(skb);
1624 if (cb->destructor)
1625 cb->destructor(skb);
1626}
1627
1628/**
1629 * dev_gso_segment - Perform emulated hardware segmentation on skb.
1630 * @skb: buffer to segment
1631 *
1632 * This function segments the given skb and stores the list of segments
1633 * in skb->next.
1634 */
1635static int dev_gso_segment(struct sk_buff *skb)
1636{
1637 struct net_device *dev = skb->dev;
1638 struct sk_buff *segs;
Herbert Xu576a30e2006-06-27 13:22:38 -07001639 int features = dev->features & ~(illegal_highdma(dev, skb) ?
1640 NETIF_F_SG : 0);
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001641
Herbert Xu576a30e2006-06-27 13:22:38 -07001642 segs = skb_gso_segment(skb, features);
1643
1644 /* Verifying header integrity only. */
1645 if (!segs)
1646 return 0;
1647
Hirofumi Nakagawa801678c2008-04-29 01:03:09 -07001648 if (IS_ERR(segs))
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001649 return PTR_ERR(segs);
1650
1651 skb->next = segs;
1652 DEV_GSO_CB(skb)->destructor = skb->destructor;
1653 skb->destructor = dev_gso_skb_destructor;
1654
1655 return 0;
1656}
1657
David S. Millerfd2ea0a2008-07-17 01:56:23 -07001658int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
1659 struct netdev_queue *txq)
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001660{
Stephen Hemminger00829822008-11-20 20:14:53 -08001661 const struct net_device_ops *ops = dev->netdev_ops;
1662
1663 prefetch(&dev->netdev_ops->ndo_start_xmit);
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001664 if (likely(!skb->next)) {
Stephen Hemminger9be9a6b2007-04-20 17:02:45 -07001665 if (!list_empty(&ptype_all))
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001666 dev_queue_xmit_nit(skb, dev);
1667
Herbert Xu576a30e2006-06-27 13:22:38 -07001668 if (netif_needs_gso(dev, skb)) {
1669 if (unlikely(dev_gso_segment(skb)))
1670 goto out_kfree_skb;
1671 if (skb->next)
1672 goto gso;
1673 }
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001674
Stephen Hemminger00829822008-11-20 20:14:53 -08001675 return ops->ndo_start_xmit(skb, dev);
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001676 }
1677
Herbert Xu576a30e2006-06-27 13:22:38 -07001678gso:
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001679 do {
1680 struct sk_buff *nskb = skb->next;
1681 int rc;
1682
1683 skb->next = nskb->next;
1684 nskb->next = NULL;
Stephen Hemminger00829822008-11-20 20:14:53 -08001685 rc = ops->ndo_start_xmit(nskb, dev);
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001686 if (unlikely(rc)) {
Michael Chanf54d9e82006-06-25 23:57:04 -07001687 nskb->next = skb->next;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001688 skb->next = nskb;
1689 return rc;
1690 }
David S. Millerfd2ea0a2008-07-17 01:56:23 -07001691 if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
Michael Chanf54d9e82006-06-25 23:57:04 -07001692 return NETDEV_TX_BUSY;
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001693 } while (skb->next);
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001694
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001695 skb->destructor = DEV_GSO_CB(skb)->destructor;
1696
1697out_kfree_skb:
1698 kfree_skb(skb);
1699 return 0;
1700}
1701
David S. Millerb6b2fed2008-07-21 09:48:06 -07001702static u32 simple_tx_hashrnd;
1703static int simple_tx_hashrnd_initialized = 0;
1704
David S. Miller8f0f2222008-07-15 03:47:03 -07001705static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb)
1706{
David S. Millerb6b2fed2008-07-21 09:48:06 -07001707 u32 addr1, addr2, ports;
1708 u32 hash, ihl;
Alexander Duyckad55dca2008-09-20 22:05:50 -07001709 u8 ip_proto = 0;
David S. Millerb6b2fed2008-07-21 09:48:06 -07001710
1711 if (unlikely(!simple_tx_hashrnd_initialized)) {
1712 get_random_bytes(&simple_tx_hashrnd, 4);
1713 simple_tx_hashrnd_initialized = 1;
1714 }
David S. Miller8f0f2222008-07-15 03:47:03 -07001715
1716 switch (skb->protocol) {
Arnaldo Carvalho de Melo60678042008-09-20 22:20:49 -07001717 case htons(ETH_P_IP):
Alexander Duyckad55dca2008-09-20 22:05:50 -07001718 if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)))
1719 ip_proto = ip_hdr(skb)->protocol;
David S. Millerb6b2fed2008-07-21 09:48:06 -07001720 addr1 = ip_hdr(skb)->saddr;
1721 addr2 = ip_hdr(skb)->daddr;
David S. Miller8f0f2222008-07-15 03:47:03 -07001722 ihl = ip_hdr(skb)->ihl;
David S. Miller8f0f2222008-07-15 03:47:03 -07001723 break;
Arnaldo Carvalho de Melo60678042008-09-20 22:20:49 -07001724 case htons(ETH_P_IPV6):
David S. Miller8f0f2222008-07-15 03:47:03 -07001725 ip_proto = ipv6_hdr(skb)->nexthdr;
David S. Millerb6b2fed2008-07-21 09:48:06 -07001726 addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3];
1727 addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3];
David S. Miller8f0f2222008-07-15 03:47:03 -07001728 ihl = (40 >> 2);
David S. Miller8f0f2222008-07-15 03:47:03 -07001729 break;
1730 default:
1731 return 0;
1732 }
1733
David S. Miller8f0f2222008-07-15 03:47:03 -07001734
1735 switch (ip_proto) {
1736 case IPPROTO_TCP:
1737 case IPPROTO_UDP:
1738 case IPPROTO_DCCP:
1739 case IPPROTO_ESP:
1740 case IPPROTO_AH:
1741 case IPPROTO_SCTP:
1742 case IPPROTO_UDPLITE:
David S. Millerb6b2fed2008-07-21 09:48:06 -07001743 ports = *((u32 *) (skb_network_header(skb) + (ihl * 4)));
David S. Miller8f0f2222008-07-15 03:47:03 -07001744 break;
1745
1746 default:
David S. Millerb6b2fed2008-07-21 09:48:06 -07001747 ports = 0;
David S. Miller8f0f2222008-07-15 03:47:03 -07001748 break;
1749 }
1750
David S. Millerb6b2fed2008-07-21 09:48:06 -07001751 hash = jhash_3words(addr1, addr2, ports, simple_tx_hashrnd);
1752
1753 return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
David S. Miller8f0f2222008-07-15 03:47:03 -07001754}
1755
David S. Millere8a04642008-07-17 00:34:19 -07001756static struct netdev_queue *dev_pick_tx(struct net_device *dev,
1757 struct sk_buff *skb)
1758{
Stephen Hemminger00829822008-11-20 20:14:53 -08001759 const struct net_device_ops *ops = dev->netdev_ops;
David S. Millerfd2ea0a2008-07-17 01:56:23 -07001760 u16 queue_index = 0;
1761
Stephen Hemminger00829822008-11-20 20:14:53 -08001762 if (ops->ndo_select_queue)
1763 queue_index = ops->ndo_select_queue(dev, skb);
David S. Miller8f0f2222008-07-15 03:47:03 -07001764 else if (dev->real_num_tx_queues > 1)
1765 queue_index = simple_tx_hash(dev, skb);
David S. Millereae792b2008-07-15 03:03:33 -07001766
David S. Millerfd2ea0a2008-07-17 01:56:23 -07001767 skb_set_queue_mapping(skb, queue_index);
1768 return netdev_get_tx_queue(dev, queue_index);
David S. Millere8a04642008-07-17 00:34:19 -07001769}
1770
Dave Jonesd29f7492008-07-22 14:09:06 -07001771/**
1772 * dev_queue_xmit - transmit a buffer
1773 * @skb: buffer to transmit
1774 *
1775 * Queue a buffer for transmission to a network device. The caller must
1776 * have set the device and priority and built the buffer before calling
1777 * this function. The function can be called from an interrupt.
1778 *
1779 * A negative errno code is returned on a failure. A success does not
1780 * guarantee the frame will be transmitted as it may be dropped due
1781 * to congestion or traffic shaping.
1782 *
1783 * -----------------------------------------------------------------------------------
1784 * I notice this method can also return errors from the queue disciplines,
1785 * including NET_XMIT_DROP, which is a positive value. So, errors can also
1786 * be positive.
1787 *
1788 * Regardless of the return value, the skb is consumed, so it is currently
1789 * difficult to retry a send to this method. (You can bump the ref count
1790 * before sending to hold a reference for retry if you are careful.)
1791 *
1792 * When calling this method, interrupts MUST be enabled. This is because
1793 * the BH enable code must have IRQs enabled so that it will not deadlock.
1794 * --BLG
1795 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001796int dev_queue_xmit(struct sk_buff *skb)
1797{
1798 struct net_device *dev = skb->dev;
David S. Millerdc2b4842008-07-08 17:18:23 -07001799 struct netdev_queue *txq;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001800 struct Qdisc *q;
1801 int rc = -ENOMEM;
1802
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001803 /* GSO will handle the following emulations directly. */
1804 if (netif_needs_gso(dev, skb))
1805 goto gso;
1806
Linus Torvalds1da177e2005-04-16 15:20:36 -07001807 if (skb_shinfo(skb)->frag_list &&
1808 !(dev->features & NETIF_F_FRAGLIST) &&
Herbert Xu364c6ba2006-06-09 16:10:40 -07001809 __skb_linearize(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001810 goto out_kfree_skb;
1811
1812 /* Fragmented skb is linearized if device does not support SG,
1813 * or if at least one of fragments is in highmem and device
1814 * does not support DMA from it.
1815 */
1816 if (skb_shinfo(skb)->nr_frags &&
1817 (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
Herbert Xu364c6ba2006-06-09 16:10:40 -07001818 __skb_linearize(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001819 goto out_kfree_skb;
1820
1821 /* If packet is not checksummed and device does not support
1822 * checksumming for this protocol, complete checksumming here.
1823 */
Herbert Xu663ead32007-04-09 11:59:07 -07001824 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1825 skb_set_transport_header(skb, skb->csum_start -
1826 skb_headroom(skb));
Ben Hutchings6de329e2008-06-16 17:02:28 -07001827 if (!dev_can_checksum(dev, skb) && skb_checksum_help(skb))
1828 goto out_kfree_skb;
Herbert Xu663ead32007-04-09 11:59:07 -07001829 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001830
Herbert Xuf6a78bf2006-06-22 02:57:17 -07001831gso:
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001832 /* Disable soft irqs for various locks below. Also
1833 * stops preemption for RCU.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001834 */
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09001835 rcu_read_lock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001836
David S. Millereae792b2008-07-15 03:03:33 -07001837 txq = dev_pick_tx(dev, skb);
David S. Millerb0e1e642008-07-08 17:42:10 -07001838 q = rcu_dereference(txq->qdisc);
David S. Miller37437bb2008-07-16 02:15:04 -07001839
Linus Torvalds1da177e2005-04-16 15:20:36 -07001840#ifdef CONFIG_NET_CLS_ACT
1841 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1842#endif
1843 if (q->enqueue) {
David S. Miller5fb66222008-08-02 20:02:43 -07001844 spinlock_t *root_lock = qdisc_lock(q);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001845
David S. Miller37437bb2008-07-16 02:15:04 -07001846 spin_lock(root_lock);
1847
David S. Millera9312ae2008-08-17 21:51:03 -07001848 if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
David S. Miller96d20312008-08-17 23:37:16 -07001849 kfree_skb(skb);
David S. Millera9312ae2008-08-17 21:51:03 -07001850 rc = NET_XMIT_DROP;
David S. Miller96d20312008-08-17 23:37:16 -07001851 } else {
1852 rc = qdisc_enqueue_root(skb, q);
1853 qdisc_run(q);
David S. Millera9312ae2008-08-17 21:51:03 -07001854 }
David S. Miller37437bb2008-07-16 02:15:04 -07001855 spin_unlock(root_lock);
1856
David S. Miller37437bb2008-07-16 02:15:04 -07001857 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001858 }
1859
1860 /* The device has no queue. Common case for software devices:
1861 loopback, all the sorts of tunnels...
1862
Herbert Xu932ff272006-06-09 12:20:56 -07001863 Really, it is unlikely that netif_tx_lock protection is necessary
1864 here. (f.e. loopback and IP tunnels are clean ignoring statistics
Linus Torvalds1da177e2005-04-16 15:20:36 -07001865 counters.)
1866 However, it is possible, that they rely on protection
1867 made by us here.
1868
1869 Check this and shot the lock. It is not prone from deadlocks.
1870 Either shot noqueue qdisc, it is even simpler 8)
1871 */
1872 if (dev->flags & IFF_UP) {
1873 int cpu = smp_processor_id(); /* ok because BHs are off */
1874
David S. Millerc773e842008-07-08 23:13:53 -07001875 if (txq->xmit_lock_owner != cpu) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001876
David S. Millerc773e842008-07-08 23:13:53 -07001877 HARD_TX_LOCK(dev, txq, cpu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001878
David S. Millerfd2ea0a2008-07-17 01:56:23 -07001879 if (!netif_tx_queue_stopped(txq)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001880 rc = 0;
David S. Millerfd2ea0a2008-07-17 01:56:23 -07001881 if (!dev_hard_start_xmit(skb, dev, txq)) {
David S. Millerc773e842008-07-08 23:13:53 -07001882 HARD_TX_UNLOCK(dev, txq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001883 goto out;
1884 }
1885 }
David S. Millerc773e842008-07-08 23:13:53 -07001886 HARD_TX_UNLOCK(dev, txq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001887 if (net_ratelimit())
1888 printk(KERN_CRIT "Virtual device %s asks to "
1889 "queue packet!\n", dev->name);
1890 } else {
1891 /* Recursion is detected! It is possible,
1892 * unfortunately */
1893 if (net_ratelimit())
1894 printk(KERN_CRIT "Dead loop on virtual device "
1895 "%s, fix it urgently!\n", dev->name);
1896 }
1897 }
1898
1899 rc = -ENETDOWN;
Herbert Xud4828d82006-06-22 02:28:18 -07001900 rcu_read_unlock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001901
1902out_kfree_skb:
1903 kfree_skb(skb);
1904 return rc;
1905out:
Herbert Xud4828d82006-06-22 02:28:18 -07001906 rcu_read_unlock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001907 return rc;
1908}
1909
1910
1911/*=======================================================================
1912 Receiver routines
1913 =======================================================================*/
1914
Stephen Hemminger6b2bedc2007-03-12 14:33:50 -07001915int netdev_max_backlog __read_mostly = 1000;
1916int netdev_budget __read_mostly = 300;
1917int weight_p __read_mostly = 64; /* old backlog weight */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001918
1919DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1920
1921
Linus Torvalds1da177e2005-04-16 15:20:36 -07001922/**
1923 * netif_rx - post buffer to the network code
1924 * @skb: buffer to post
1925 *
1926 * This function receives a packet from a device driver and queues it for
1927 * the upper (protocol) levels to process. It always succeeds. The buffer
1928 * may be dropped during processing for congestion control or by the
1929 * protocol layers.
1930 *
1931 * return values:
1932 * NET_RX_SUCCESS (no congestion)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001933 * NET_RX_DROP (packet was dropped)
1934 *
1935 */
1936
1937int netif_rx(struct sk_buff *skb)
1938{
Linus Torvalds1da177e2005-04-16 15:20:36 -07001939 struct softnet_data *queue;
1940 unsigned long flags;
1941
1942 /* if netpoll wants it, pretend we never saw it */
1943 if (netpoll_rx(skb))
1944 return NET_RX_DROP;
1945
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -07001946 if (!skb->tstamp.tv64)
Patrick McHardya61bbcf2005-08-14 17:24:31 -07001947 net_timestamp(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001948
1949 /*
1950 * The code is rearranged so that the path is the most
1951 * short when CPU is congested, but is still operating.
1952 */
1953 local_irq_save(flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001954 queue = &__get_cpu_var(softnet_data);
1955
1956 __get_cpu_var(netdev_rx_stat).total++;
1957 if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1958 if (queue->input_pkt_queue.qlen) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001959enqueue:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001960 __skb_queue_tail(&queue->input_pkt_queue, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001961 local_irq_restore(flags);
Stephen Hemminger34008d82005-06-23 20:10:00 -07001962 return NET_RX_SUCCESS;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001963 }
1964
Stephen Hemmingerbea33482007-10-03 16:41:36 -07001965 napi_schedule(&queue->backlog);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001966 goto enqueue;
1967 }
1968
Linus Torvalds1da177e2005-04-16 15:20:36 -07001969 __get_cpu_var(netdev_rx_stat).dropped++;
1970 local_irq_restore(flags);
1971
1972 kfree_skb(skb);
1973 return NET_RX_DROP;
1974}
1975
1976int netif_rx_ni(struct sk_buff *skb)
1977{
1978 int err;
1979
1980 preempt_disable();
1981 err = netif_rx(skb);
1982 if (local_softirq_pending())
1983 do_softirq();
1984 preempt_enable();
1985
1986 return err;
1987}
1988
1989EXPORT_SYMBOL(netif_rx_ni);
1990
Linus Torvalds1da177e2005-04-16 15:20:36 -07001991static void net_tx_action(struct softirq_action *h)
1992{
1993 struct softnet_data *sd = &__get_cpu_var(softnet_data);
1994
1995 if (sd->completion_queue) {
1996 struct sk_buff *clist;
1997
1998 local_irq_disable();
1999 clist = sd->completion_queue;
2000 sd->completion_queue = NULL;
2001 local_irq_enable();
2002
2003 while (clist) {
2004 struct sk_buff *skb = clist;
2005 clist = clist->next;
2006
Ilpo Järvinen547b7922008-07-25 21:43:18 -07002007 WARN_ON(atomic_read(&skb->users));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002008 __kfree_skb(skb);
2009 }
2010 }
2011
2012 if (sd->output_queue) {
David S. Miller37437bb2008-07-16 02:15:04 -07002013 struct Qdisc *head;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002014
2015 local_irq_disable();
2016 head = sd->output_queue;
2017 sd->output_queue = NULL;
2018 local_irq_enable();
2019
2020 while (head) {
David S. Miller37437bb2008-07-16 02:15:04 -07002021 struct Qdisc *q = head;
2022 spinlock_t *root_lock;
2023
Linus Torvalds1da177e2005-04-16 15:20:36 -07002024 head = head->next_sched;
2025
David S. Miller5fb66222008-08-02 20:02:43 -07002026 root_lock = qdisc_lock(q);
David S. Miller37437bb2008-07-16 02:15:04 -07002027 if (spin_trylock(root_lock)) {
Jarek Poplawskidef82a12008-08-17 21:54:43 -07002028 smp_mb__before_clear_bit();
2029 clear_bit(__QDISC_STATE_SCHED,
2030 &q->state);
David S. Miller37437bb2008-07-16 02:15:04 -07002031 qdisc_run(q);
2032 spin_unlock(root_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002033 } else {
David S. Miller195648b2008-08-19 04:00:36 -07002034 if (!test_bit(__QDISC_STATE_DEACTIVATED,
Jarek Poplawskie8a83e12008-09-07 18:41:21 -07002035 &q->state)) {
David S. Miller195648b2008-08-19 04:00:36 -07002036 __netif_reschedule(q);
Jarek Poplawskie8a83e12008-09-07 18:41:21 -07002037 } else {
2038 smp_mb__before_clear_bit();
2039 clear_bit(__QDISC_STATE_SCHED,
2040 &q->state);
2041 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002042 }
2043 }
2044 }
2045}
2046
Stephen Hemminger6f05f622007-03-08 20:46:03 -08002047static inline int deliver_skb(struct sk_buff *skb,
2048 struct packet_type *pt_prev,
2049 struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002050{
2051 atomic_inc(&skb->users);
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002052 return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002053}
2054
2055#if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
Stephen Hemminger6229e362007-03-21 13:38:47 -07002056/* These hooks defined here for ATM */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002057struct net_bridge;
2058struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
2059 unsigned char *addr);
Stephen Hemminger6229e362007-03-21 13:38:47 -07002060void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002061
Stephen Hemminger6229e362007-03-21 13:38:47 -07002062/*
2063 * If bridge module is loaded call bridging hook.
2064 * returns NULL if packet was consumed.
2065 */
2066struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
2067 struct sk_buff *skb) __read_mostly;
2068static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
2069 struct packet_type **pt_prev, int *ret,
2070 struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002071{
2072 struct net_bridge_port *port;
2073
Stephen Hemminger6229e362007-03-21 13:38:47 -07002074 if (skb->pkt_type == PACKET_LOOPBACK ||
2075 (port = rcu_dereference(skb->dev->br_port)) == NULL)
2076 return skb;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002077
2078 if (*pt_prev) {
Stephen Hemminger6229e362007-03-21 13:38:47 -07002079 *ret = deliver_skb(skb, *pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002080 *pt_prev = NULL;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002081 }
2082
Stephen Hemminger6229e362007-03-21 13:38:47 -07002083 return br_handle_frame_hook(port, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002084}
2085#else
Stephen Hemminger6229e362007-03-21 13:38:47 -07002086#define handle_bridge(skb, pt_prev, ret, orig_dev) (skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002087#endif
2088
Patrick McHardyb863ceb2007-07-14 18:55:06 -07002089#if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
2090struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
2091EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
2092
2093static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
2094 struct packet_type **pt_prev,
2095 int *ret,
2096 struct net_device *orig_dev)
2097{
2098 if (skb->dev->macvlan_port == NULL)
2099 return skb;
2100
2101 if (*pt_prev) {
2102 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2103 *pt_prev = NULL;
2104 }
2105 return macvlan_handle_frame_hook(skb);
2106}
2107#else
2108#define handle_macvlan(skb, pt_prev, ret, orig_dev) (skb)
2109#endif
2110
Linus Torvalds1da177e2005-04-16 15:20:36 -07002111#ifdef CONFIG_NET_CLS_ACT
2112/* TODO: Maybe we should just force sch_ingress to be compiled in
2113 * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
2114 * a compare and 2 stores extra right now if we dont have it on
2115 * but have CONFIG_NET_CLS_ACT
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002116 * NOTE: This doesnt stop any functionality; if you dont have
Linus Torvalds1da177e2005-04-16 15:20:36 -07002117 * the ingress scheduler, you just cant add policies on ingress.
2118 *
2119 */
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002120static int ing_filter(struct sk_buff *skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002121{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002122 struct net_device *dev = skb->dev;
Herbert Xuf697c3e2007-10-14 00:38:47 -07002123 u32 ttl = G_TC_RTTL(skb->tc_verd);
David S. Miller555353c2008-07-08 17:33:13 -07002124 struct netdev_queue *rxq;
2125 int result = TC_ACT_OK;
2126 struct Qdisc *q;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002127
Herbert Xuf697c3e2007-10-14 00:38:47 -07002128 if (MAX_RED_LOOP < ttl++) {
2129 printk(KERN_WARNING
2130 "Redir loop detected Dropping packet (%d->%d)\n",
2131 skb->iif, dev->ifindex);
2132 return TC_ACT_SHOT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002133 }
2134
Herbert Xuf697c3e2007-10-14 00:38:47 -07002135 skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
2136 skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
2137
David S. Miller555353c2008-07-08 17:33:13 -07002138 rxq = &dev->rx_queue;
2139
David S. Miller83874002008-07-17 00:53:03 -07002140 q = rxq->qdisc;
David S. Miller8d50b532008-07-30 02:37:46 -07002141 if (q != &noop_qdisc) {
David S. Miller83874002008-07-17 00:53:03 -07002142 spin_lock(qdisc_lock(q));
David S. Millera9312ae2008-08-17 21:51:03 -07002143 if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
2144 result = qdisc_enqueue_root(skb, q);
David S. Miller83874002008-07-17 00:53:03 -07002145 spin_unlock(qdisc_lock(q));
2146 }
Herbert Xuf697c3e2007-10-14 00:38:47 -07002147
Linus Torvalds1da177e2005-04-16 15:20:36 -07002148 return result;
2149}
Herbert Xuf697c3e2007-10-14 00:38:47 -07002150
2151static inline struct sk_buff *handle_ing(struct sk_buff *skb,
2152 struct packet_type **pt_prev,
2153 int *ret, struct net_device *orig_dev)
2154{
David S. Miller8d50b532008-07-30 02:37:46 -07002155 if (skb->dev->rx_queue.qdisc == &noop_qdisc)
Herbert Xuf697c3e2007-10-14 00:38:47 -07002156 goto out;
2157
2158 if (*pt_prev) {
2159 *ret = deliver_skb(skb, *pt_prev, orig_dev);
2160 *pt_prev = NULL;
2161 } else {
2162 /* Huh? Why does turning on AF_PACKET affect this? */
2163 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
2164 }
2165
2166 switch (ing_filter(skb)) {
2167 case TC_ACT_SHOT:
2168 case TC_ACT_STOLEN:
2169 kfree_skb(skb);
2170 return NULL;
2171 }
2172
2173out:
2174 skb->tc_verd = 0;
2175 return skb;
2176}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002177#endif
2178
Patrick McHardybc1d0412008-07-14 22:49:30 -07002179/*
2180 * netif_nit_deliver - deliver received packets to network taps
2181 * @skb: buffer
2182 *
2183 * This function is used to deliver incoming packets to network
2184 * taps. It should be used when the normal netif_receive_skb path
2185 * is bypassed, for example because of VLAN acceleration.
2186 */
2187void netif_nit_deliver(struct sk_buff *skb)
2188{
2189 struct packet_type *ptype;
2190
2191 if (list_empty(&ptype_all))
2192 return;
2193
2194 skb_reset_network_header(skb);
2195 skb_reset_transport_header(skb);
2196 skb->mac_len = skb->network_header - skb->mac_header;
2197
2198 rcu_read_lock();
2199 list_for_each_entry_rcu(ptype, &ptype_all, list) {
2200 if (!ptype->dev || ptype->dev == skb->dev)
2201 deliver_skb(skb, ptype, skb->dev);
2202 }
2203 rcu_read_unlock();
2204}
2205
Stephen Hemminger3b582cc2007-11-01 02:21:47 -07002206/**
2207 * netif_receive_skb - process receive buffer from network
2208 * @skb: buffer to process
2209 *
2210 * netif_receive_skb() is the main receive data processing function.
2211 * It always succeeds. The buffer may be dropped during processing
2212 * for congestion control or by the protocol layers.
2213 *
2214 * This function may only be called from softirq context and interrupts
2215 * should be enabled.
2216 *
2217 * Return values (usually ignored):
2218 * NET_RX_SUCCESS: no congestion
2219 * NET_RX_DROP: packet was dropped
2220 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002221int netif_receive_skb(struct sk_buff *skb)
2222{
2223 struct packet_type *ptype, *pt_prev;
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002224 struct net_device *orig_dev;
Joe Eykholt0d7a3682008-07-02 18:22:01 -07002225 struct net_device *null_or_orig;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002226 int ret = NET_RX_DROP;
Al Viro252e3342006-11-14 20:48:11 -08002227 __be16 type;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002228
Patrick McHardy9b22ea52008-11-04 14:49:57 -08002229 if (skb->vlan_tci && vlan_hwaccel_do_receive(skb))
2230 return NET_RX_SUCCESS;
2231
Linus Torvalds1da177e2005-04-16 15:20:36 -07002232 /* if we've gotten here through NAPI, check netpoll */
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002233 if (netpoll_receive_skb(skb))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002234 return NET_RX_DROP;
2235
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -07002236 if (!skb->tstamp.tv64)
Patrick McHardya61bbcf2005-08-14 17:24:31 -07002237 net_timestamp(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002238
Patrick McHardyc01003c2007-03-29 11:46:52 -07002239 if (!skb->iif)
2240 skb->iif = skb->dev->ifindex;
David S. Miller86e65da2005-08-09 19:36:29 -07002241
Joe Eykholt0d7a3682008-07-02 18:22:01 -07002242 null_or_orig = NULL;
Joe Eykholtcc9bd5c2008-07-02 18:22:00 -07002243 orig_dev = skb->dev;
2244 if (orig_dev->master) {
Joe Eykholt0d7a3682008-07-02 18:22:01 -07002245 if (skb_bond_should_drop(skb))
2246 null_or_orig = orig_dev; /* deliver only exact match */
2247 else
2248 skb->dev = orig_dev->master;
Joe Eykholtcc9bd5c2008-07-02 18:22:00 -07002249 }
Jay Vosburgh8f903c72006-02-21 16:36:44 -08002250
Linus Torvalds1da177e2005-04-16 15:20:36 -07002251 __get_cpu_var(netdev_rx_stat).total++;
2252
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -07002253 skb_reset_network_header(skb);
Arnaldo Carvalho de Melobadff6d2007-03-13 13:06:52 -03002254 skb_reset_transport_header(skb);
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -07002255 skb->mac_len = skb->network_header - skb->mac_header;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002256
2257 pt_prev = NULL;
2258
2259 rcu_read_lock();
2260
Eric W. Biedermanb9f75f42008-06-20 22:16:51 -07002261 /* Don't receive packets in an exiting network namespace */
Eric W. Biederman0a36b342008-11-05 16:00:24 -08002262 if (!net_alive(dev_net(skb->dev))) {
2263 kfree_skb(skb);
Eric W. Biedermanb9f75f42008-06-20 22:16:51 -07002264 goto out;
Eric W. Biederman0a36b342008-11-05 16:00:24 -08002265 }
Eric W. Biedermanb9f75f42008-06-20 22:16:51 -07002266
Linus Torvalds1da177e2005-04-16 15:20:36 -07002267#ifdef CONFIG_NET_CLS_ACT
2268 if (skb->tc_verd & TC_NCLS) {
2269 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
2270 goto ncls;
2271 }
2272#endif
2273
2274 list_for_each_entry_rcu(ptype, &ptype_all, list) {
Joe Eykholtf9823072008-07-02 18:22:02 -07002275 if (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2276 ptype->dev == orig_dev) {
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002277 if (pt_prev)
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002278 ret = deliver_skb(skb, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002279 pt_prev = ptype;
2280 }
2281 }
2282
2283#ifdef CONFIG_NET_CLS_ACT
Herbert Xuf697c3e2007-10-14 00:38:47 -07002284 skb = handle_ing(skb, &pt_prev, &ret, orig_dev);
2285 if (!skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002286 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002287ncls:
2288#endif
2289
Stephen Hemminger6229e362007-03-21 13:38:47 -07002290 skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
2291 if (!skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002292 goto out;
Patrick McHardyb863ceb2007-07-14 18:55:06 -07002293 skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
2294 if (!skb)
2295 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002296
2297 type = skb->protocol;
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08002298 list_for_each_entry_rcu(ptype,
2299 &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002300 if (ptype->type == type &&
Joe Eykholtf9823072008-07-02 18:22:02 -07002301 (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
2302 ptype->dev == orig_dev)) {
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002303 if (pt_prev)
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002304 ret = deliver_skb(skb, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002305 pt_prev = ptype;
2306 }
2307 }
2308
2309 if (pt_prev) {
David S. Millerf2ccd8f2005-08-09 19:34:12 -07002310 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002311 } else {
2312 kfree_skb(skb);
2313 /* Jamal, now you will not able to escape explaining
2314 * me how you were going to use this. :-)
2315 */
2316 ret = NET_RX_DROP;
2317 }
2318
2319out:
2320 rcu_read_unlock();
2321 return ret;
2322}
2323
Stephen Hemminger6e583ce2008-08-03 21:29:57 -07002324/* Network device is going away, flush any packets still pending */
2325static void flush_backlog(void *arg)
2326{
2327 struct net_device *dev = arg;
2328 struct softnet_data *queue = &__get_cpu_var(softnet_data);
2329 struct sk_buff *skb, *tmp;
2330
2331 skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp)
2332 if (skb->dev == dev) {
2333 __skb_unlink(skb, &queue->input_pkt_queue);
2334 kfree_skb(skb);
2335 }
2336}
2337
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002338static int process_backlog(struct napi_struct *napi, int quota)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002339{
2340 int work = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002341 struct softnet_data *queue = &__get_cpu_var(softnet_data);
2342 unsigned long start_time = jiffies;
2343
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002344 napi->weight = weight_p;
2345 do {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002346 struct sk_buff *skb;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002347
2348 local_irq_disable();
2349 skb = __skb_dequeue(&queue->input_pkt_queue);
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002350 if (!skb) {
2351 __napi_complete(napi);
2352 local_irq_enable();
2353 break;
2354 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002355 local_irq_enable();
2356
Linus Torvalds1da177e2005-04-16 15:20:36 -07002357 netif_receive_skb(skb);
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002358 } while (++work < quota && jiffies == start_time);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002359
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002360 return work;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002361}
2362
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002363/**
2364 * __napi_schedule - schedule for receive
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07002365 * @n: entry to schedule
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002366 *
2367 * The entry's receive function will be scheduled to run
2368 */
Harvey Harrisonb5606c22008-02-13 15:03:16 -08002369void __napi_schedule(struct napi_struct *n)
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002370{
2371 unsigned long flags;
2372
2373 local_irq_save(flags);
2374 list_add_tail(&n->poll_list, &__get_cpu_var(softnet_data).poll_list);
2375 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2376 local_irq_restore(flags);
2377}
2378EXPORT_SYMBOL(__napi_schedule);
2379
2380
Linus Torvalds1da177e2005-04-16 15:20:36 -07002381static void net_rx_action(struct softirq_action *h)
2382{
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002383 struct list_head *list = &__get_cpu_var(softnet_data).poll_list;
Stephen Hemminger24f8b232008-11-03 17:14:38 -08002384 unsigned long time_limit = jiffies + 2;
Stephen Hemminger51b0bde2005-06-23 20:14:40 -07002385 int budget = netdev_budget;
Matt Mackall53fb95d2005-08-11 19:27:43 -07002386 void *have;
2387
Linus Torvalds1da177e2005-04-16 15:20:36 -07002388 local_irq_disable();
2389
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002390 while (!list_empty(list)) {
2391 struct napi_struct *n;
2392 int work, weight;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002393
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002394 /* If softirq window is exhuasted then punt.
Stephen Hemminger24f8b232008-11-03 17:14:38 -08002395 * Allow this to run for 2 jiffies since which will allow
2396 * an average latency of 1.5/HZ.
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002397 */
Stephen Hemminger24f8b232008-11-03 17:14:38 -08002398 if (unlikely(budget <= 0 || time_after(jiffies, time_limit)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002399 goto softnet_break;
2400
2401 local_irq_enable();
2402
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002403 /* Even though interrupts have been re-enabled, this
2404 * access is safe because interrupts can only add new
2405 * entries to the tail of this list, and only ->poll()
2406 * calls can remove this head entry from the list.
2407 */
2408 n = list_entry(list->next, struct napi_struct, poll_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002409
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002410 have = netpoll_poll_lock(n);
2411
2412 weight = n->weight;
2413
David S. Miller0a7606c2007-10-29 21:28:47 -07002414 /* This NAPI_STATE_SCHED test is for avoiding a race
2415 * with netpoll's poll_napi(). Only the entity which
2416 * obtains the lock and sees NAPI_STATE_SCHED set will
2417 * actually make the ->poll() call. Therefore we avoid
2418 * accidently calling ->poll() when NAPI is not scheduled.
2419 */
2420 work = 0;
2421 if (test_bit(NAPI_STATE_SCHED, &n->state))
2422 work = n->poll(n, weight);
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002423
2424 WARN_ON_ONCE(work > weight);
2425
2426 budget -= work;
2427
2428 local_irq_disable();
2429
2430 /* Drivers must not modify the NAPI state if they
2431 * consume the entire weight. In such cases this code
2432 * still "owns" the NAPI instance and therefore can
2433 * move the instance around on the list at-will.
2434 */
David S. Millerfed17f32008-01-07 21:00:40 -08002435 if (unlikely(work == weight)) {
2436 if (unlikely(napi_disable_pending(n)))
2437 __napi_complete(n);
2438 else
2439 list_move_tail(&n->poll_list, list);
2440 }
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002441
2442 netpoll_poll_unlock(have);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002443 }
2444out:
Shannon Nelson515e06c2007-06-23 23:09:23 -07002445 local_irq_enable();
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002446
Chris Leechdb217332006-06-17 21:24:58 -07002447#ifdef CONFIG_NET_DMA
2448 /*
2449 * There may not be any more sk_buffs coming right now, so push
2450 * any pending DMA copies to hardware
2451 */
Dan Williamsd379b012007-07-09 11:56:42 -07002452 if (!cpus_empty(net_dma.channel_mask)) {
2453 int chan_idx;
Mike Travis0e12f842008-05-12 21:21:13 +02002454 for_each_cpu_mask_nr(chan_idx, net_dma.channel_mask) {
Dan Williamsd379b012007-07-09 11:56:42 -07002455 struct dma_chan *chan = net_dma.channels[chan_idx];
2456 if (chan)
2457 dma_async_memcpy_issue_pending(chan);
2458 }
Chris Leechdb217332006-06-17 21:24:58 -07002459 }
2460#endif
Stephen Hemmingerbea33482007-10-03 16:41:36 -07002461
Linus Torvalds1da177e2005-04-16 15:20:36 -07002462 return;
2463
2464softnet_break:
2465 __get_cpu_var(netdev_rx_stat).time_squeeze++;
2466 __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2467 goto out;
2468}
2469
2470static gifconf_func_t * gifconf_list [NPROTO];
2471
2472/**
2473 * register_gifconf - register a SIOCGIF handler
2474 * @family: Address family
2475 * @gifconf: Function handler
2476 *
2477 * Register protocol dependent address dumping routines. The handler
2478 * that is passed must not be freed or reused until it has been replaced
2479 * by another handler.
2480 */
2481int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
2482{
2483 if (family >= NPROTO)
2484 return -EINVAL;
2485 gifconf_list[family] = gifconf;
2486 return 0;
2487}
2488
2489
2490/*
2491 * Map an interface index to its name (SIOCGIFNAME)
2492 */
2493
2494/*
2495 * We need this ioctl for efficient implementation of the
2496 * if_indextoname() function required by the IPv6 API. Without
2497 * it, we would have to search all the interfaces to find a
2498 * match. --pb
2499 */
2500
Eric W. Biederman881d9662007-09-17 11:56:21 -07002501static int dev_ifname(struct net *net, struct ifreq __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002502{
2503 struct net_device *dev;
2504 struct ifreq ifr;
2505
2506 /*
2507 * Fetch the caller's info block.
2508 */
2509
2510 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2511 return -EFAULT;
2512
2513 read_lock(&dev_base_lock);
Eric W. Biederman881d9662007-09-17 11:56:21 -07002514 dev = __dev_get_by_index(net, ifr.ifr_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002515 if (!dev) {
2516 read_unlock(&dev_base_lock);
2517 return -ENODEV;
2518 }
2519
2520 strcpy(ifr.ifr_name, dev->name);
2521 read_unlock(&dev_base_lock);
2522
2523 if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2524 return -EFAULT;
2525 return 0;
2526}
2527
2528/*
2529 * Perform a SIOCGIFCONF call. This structure will change
2530 * size eventually, and there is nothing I can do about it.
2531 * Thus we will need a 'compatibility mode'.
2532 */
2533
Eric W. Biederman881d9662007-09-17 11:56:21 -07002534static int dev_ifconf(struct net *net, char __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002535{
2536 struct ifconf ifc;
2537 struct net_device *dev;
2538 char __user *pos;
2539 int len;
2540 int total;
2541 int i;
2542
2543 /*
2544 * Fetch the caller's info block.
2545 */
2546
2547 if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
2548 return -EFAULT;
2549
2550 pos = ifc.ifc_buf;
2551 len = ifc.ifc_len;
2552
2553 /*
2554 * Loop over the interfaces, and write an info block for each.
2555 */
2556
2557 total = 0;
Eric W. Biederman881d9662007-09-17 11:56:21 -07002558 for_each_netdev(net, dev) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002559 for (i = 0; i < NPROTO; i++) {
2560 if (gifconf_list[i]) {
2561 int done;
2562 if (!pos)
2563 done = gifconf_list[i](dev, NULL, 0);
2564 else
2565 done = gifconf_list[i](dev, pos + total,
2566 len - total);
2567 if (done < 0)
2568 return -EFAULT;
2569 total += done;
2570 }
2571 }
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002572 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002573
2574 /*
2575 * All done. Write the updated control block back to the caller.
2576 */
2577 ifc.ifc_len = total;
2578
2579 /*
2580 * Both BSD and Solaris return 0 here, so we do too.
2581 */
2582 return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
2583}
2584
2585#ifdef CONFIG_PROC_FS
2586/*
2587 * This is invoked by the /proc filesystem handler to display a device
2588 * in detail.
2589 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07002590void *dev_seq_start(struct seq_file *seq, loff_t *pos)
Eric Dumazet9a429c42008-01-01 21:58:02 -08002591 __acquires(dev_base_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002592{
Denis V. Luneve372c412007-11-19 22:31:54 -08002593 struct net *net = seq_file_net(seq);
Pavel Emelianov7562f872007-05-03 15:13:45 -07002594 loff_t off;
2595 struct net_device *dev;
2596
Linus Torvalds1da177e2005-04-16 15:20:36 -07002597 read_lock(&dev_base_lock);
Pavel Emelianov7562f872007-05-03 15:13:45 -07002598 if (!*pos)
2599 return SEQ_START_TOKEN;
2600
2601 off = 1;
Eric W. Biederman881d9662007-09-17 11:56:21 -07002602 for_each_netdev(net, dev)
Pavel Emelianov7562f872007-05-03 15:13:45 -07002603 if (off++ == *pos)
2604 return dev;
2605
2606 return NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002607}
2608
2609void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2610{
Denis V. Luneve372c412007-11-19 22:31:54 -08002611 struct net *net = seq_file_net(seq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002612 ++*pos;
Pavel Emelianov7562f872007-05-03 15:13:45 -07002613 return v == SEQ_START_TOKEN ?
Eric W. Biederman881d9662007-09-17 11:56:21 -07002614 first_net_device(net) : next_net_device((struct net_device *)v);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002615}
2616
2617void dev_seq_stop(struct seq_file *seq, void *v)
Eric Dumazet9a429c42008-01-01 21:58:02 -08002618 __releases(dev_base_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002619{
2620 read_unlock(&dev_base_lock);
2621}
2622
2623static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
2624{
Stephen Hemmingereeda3fd2008-11-19 21:40:23 -08002625 const struct net_device_stats *stats = dev_get_stats(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002626
Rusty Russell5a1b5892007-04-28 21:04:03 -07002627 seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
2628 "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
2629 dev->name, stats->rx_bytes, stats->rx_packets,
2630 stats->rx_errors,
2631 stats->rx_dropped + stats->rx_missed_errors,
2632 stats->rx_fifo_errors,
2633 stats->rx_length_errors + stats->rx_over_errors +
2634 stats->rx_crc_errors + stats->rx_frame_errors,
2635 stats->rx_compressed, stats->multicast,
2636 stats->tx_bytes, stats->tx_packets,
2637 stats->tx_errors, stats->tx_dropped,
2638 stats->tx_fifo_errors, stats->collisions,
2639 stats->tx_carrier_errors +
2640 stats->tx_aborted_errors +
2641 stats->tx_window_errors +
2642 stats->tx_heartbeat_errors,
2643 stats->tx_compressed);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002644}
2645
2646/*
2647 * Called from the PROCfs module. This now uses the new arbitrary sized
2648 * /proc/net interface to create /proc/net/dev
2649 */
2650static int dev_seq_show(struct seq_file *seq, void *v)
2651{
2652 if (v == SEQ_START_TOKEN)
2653 seq_puts(seq, "Inter-| Receive "
2654 " | Transmit\n"
2655 " face |bytes packets errs drop fifo frame "
2656 "compressed multicast|bytes packets errs "
2657 "drop fifo colls carrier compressed\n");
2658 else
2659 dev_seq_printf_stats(seq, v);
2660 return 0;
2661}
2662
2663static struct netif_rx_stats *softnet_get_online(loff_t *pos)
2664{
2665 struct netif_rx_stats *rc = NULL;
2666
Mike Travis0c0b0ac2008-05-02 16:43:08 -07002667 while (*pos < nr_cpu_ids)
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002668 if (cpu_online(*pos)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002669 rc = &per_cpu(netdev_rx_stat, *pos);
2670 break;
2671 } else
2672 ++*pos;
2673 return rc;
2674}
2675
2676static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
2677{
2678 return softnet_get_online(pos);
2679}
2680
2681static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2682{
2683 ++*pos;
2684 return softnet_get_online(pos);
2685}
2686
2687static void softnet_seq_stop(struct seq_file *seq, void *v)
2688{
2689}
2690
2691static int softnet_seq_show(struct seq_file *seq, void *v)
2692{
2693 struct netif_rx_stats *s = v;
2694
2695 seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
Stephen Hemminger31aa02c2005-06-23 20:12:48 -07002696 s->total, s->dropped, s->time_squeeze, 0,
Stephen Hemmingerc1ebcdb2005-06-23 20:08:59 -07002697 0, 0, 0, 0, /* was fastroute */
2698 s->cpu_collision );
Linus Torvalds1da177e2005-04-16 15:20:36 -07002699 return 0;
2700}
2701
Stephen Hemmingerf6908082007-03-12 14:34:29 -07002702static const struct seq_operations dev_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002703 .start = dev_seq_start,
2704 .next = dev_seq_next,
2705 .stop = dev_seq_stop,
2706 .show = dev_seq_show,
2707};
2708
2709static int dev_seq_open(struct inode *inode, struct file *file)
2710{
Denis V. Luneve372c412007-11-19 22:31:54 -08002711 return seq_open_net(inode, file, &dev_seq_ops,
2712 sizeof(struct seq_net_private));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002713}
2714
Arjan van de Ven9a321442007-02-12 00:55:35 -08002715static const struct file_operations dev_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002716 .owner = THIS_MODULE,
2717 .open = dev_seq_open,
2718 .read = seq_read,
2719 .llseek = seq_lseek,
Denis V. Luneve372c412007-11-19 22:31:54 -08002720 .release = seq_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002721};
2722
Stephen Hemmingerf6908082007-03-12 14:34:29 -07002723static const struct seq_operations softnet_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002724 .start = softnet_seq_start,
2725 .next = softnet_seq_next,
2726 .stop = softnet_seq_stop,
2727 .show = softnet_seq_show,
2728};
2729
2730static int softnet_seq_open(struct inode *inode, struct file *file)
2731{
2732 return seq_open(file, &softnet_seq_ops);
2733}
2734
Arjan van de Ven9a321442007-02-12 00:55:35 -08002735static const struct file_operations softnet_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002736 .owner = THIS_MODULE,
2737 .open = softnet_seq_open,
2738 .read = seq_read,
2739 .llseek = seq_lseek,
2740 .release = seq_release,
2741};
2742
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07002743static void *ptype_get_idx(loff_t pos)
2744{
2745 struct packet_type *pt = NULL;
2746 loff_t i = 0;
2747 int t;
2748
2749 list_for_each_entry_rcu(pt, &ptype_all, list) {
2750 if (i == pos)
2751 return pt;
2752 ++i;
2753 }
2754
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08002755 for (t = 0; t < PTYPE_HASH_SIZE; t++) {
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07002756 list_for_each_entry_rcu(pt, &ptype_base[t], list) {
2757 if (i == pos)
2758 return pt;
2759 ++i;
2760 }
2761 }
2762 return NULL;
2763}
2764
2765static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
Stephen Hemminger72348a42008-01-21 02:27:29 -08002766 __acquires(RCU)
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07002767{
2768 rcu_read_lock();
2769 return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
2770}
2771
2772static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2773{
2774 struct packet_type *pt;
2775 struct list_head *nxt;
2776 int hash;
2777
2778 ++*pos;
2779 if (v == SEQ_START_TOKEN)
2780 return ptype_get_idx(0);
2781
2782 pt = v;
2783 nxt = pt->list.next;
2784 if (pt->type == htons(ETH_P_ALL)) {
2785 if (nxt != &ptype_all)
2786 goto found;
2787 hash = 0;
2788 nxt = ptype_base[0].next;
2789 } else
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08002790 hash = ntohs(pt->type) & PTYPE_HASH_MASK;
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07002791
2792 while (nxt == &ptype_base[hash]) {
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08002793 if (++hash >= PTYPE_HASH_SIZE)
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07002794 return NULL;
2795 nxt = ptype_base[hash].next;
2796 }
2797found:
2798 return list_entry(nxt, struct packet_type, list);
2799}
2800
2801static void ptype_seq_stop(struct seq_file *seq, void *v)
Stephen Hemminger72348a42008-01-21 02:27:29 -08002802 __releases(RCU)
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07002803{
2804 rcu_read_unlock();
2805}
2806
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07002807static int ptype_seq_show(struct seq_file *seq, void *v)
2808{
2809 struct packet_type *pt = v;
2810
2811 if (v == SEQ_START_TOKEN)
2812 seq_puts(seq, "Type Device Function\n");
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002813 else if (pt->dev == NULL || dev_net(pt->dev) == seq_file_net(seq)) {
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07002814 if (pt->type == htons(ETH_P_ALL))
2815 seq_puts(seq, "ALL ");
2816 else
2817 seq_printf(seq, "%04x", ntohs(pt->type));
2818
Alexey Dobriyan908cd2d2008-11-16 19:50:35 -08002819 seq_printf(seq, " %-8s %pF\n",
2820 pt->dev ? pt->dev->name : "", pt->func);
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07002821 }
2822
2823 return 0;
2824}
2825
2826static const struct seq_operations ptype_seq_ops = {
2827 .start = ptype_seq_start,
2828 .next = ptype_seq_next,
2829 .stop = ptype_seq_stop,
2830 .show = ptype_seq_show,
2831};
2832
2833static int ptype_seq_open(struct inode *inode, struct file *file)
2834{
Pavel Emelyanov2feb27d2008-03-24 14:57:45 -07002835 return seq_open_net(inode, file, &ptype_seq_ops,
2836 sizeof(struct seq_net_private));
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07002837}
2838
2839static const struct file_operations ptype_seq_fops = {
2840 .owner = THIS_MODULE,
2841 .open = ptype_seq_open,
2842 .read = seq_read,
2843 .llseek = seq_lseek,
Pavel Emelyanov2feb27d2008-03-24 14:57:45 -07002844 .release = seq_release_net,
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07002845};
2846
2847
Pavel Emelyanov46650792007-10-08 20:38:39 -07002848static int __net_init dev_proc_net_init(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002849{
2850 int rc = -ENOMEM;
2851
Eric W. Biederman881d9662007-09-17 11:56:21 -07002852 if (!proc_net_fops_create(net, "dev", S_IRUGO, &dev_seq_fops))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002853 goto out;
Eric W. Biederman881d9662007-09-17 11:56:21 -07002854 if (!proc_net_fops_create(net, "softnet_stat", S_IRUGO, &softnet_seq_fops))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002855 goto out_dev;
Eric W. Biederman881d9662007-09-17 11:56:21 -07002856 if (!proc_net_fops_create(net, "ptype", S_IRUGO, &ptype_seq_fops))
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02002857 goto out_softnet;
Stephen Hemminger0e1256f2007-03-12 14:35:37 -07002858
Eric W. Biederman881d9662007-09-17 11:56:21 -07002859 if (wext_proc_init(net))
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02002860 goto out_ptype;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002861 rc = 0;
2862out:
2863 return rc;
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02002864out_ptype:
Eric W. Biederman881d9662007-09-17 11:56:21 -07002865 proc_net_remove(net, "ptype");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002866out_softnet:
Eric W. Biederman881d9662007-09-17 11:56:21 -07002867 proc_net_remove(net, "softnet_stat");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002868out_dev:
Eric W. Biederman881d9662007-09-17 11:56:21 -07002869 proc_net_remove(net, "dev");
Linus Torvalds1da177e2005-04-16 15:20:36 -07002870 goto out;
2871}
Eric W. Biederman881d9662007-09-17 11:56:21 -07002872
Pavel Emelyanov46650792007-10-08 20:38:39 -07002873static void __net_exit dev_proc_net_exit(struct net *net)
Eric W. Biederman881d9662007-09-17 11:56:21 -07002874{
2875 wext_proc_exit(net);
2876
2877 proc_net_remove(net, "ptype");
2878 proc_net_remove(net, "softnet_stat");
2879 proc_net_remove(net, "dev");
2880}
2881
Denis V. Lunev022cbae2007-11-13 03:23:50 -08002882static struct pernet_operations __net_initdata dev_proc_ops = {
Eric W. Biederman881d9662007-09-17 11:56:21 -07002883 .init = dev_proc_net_init,
2884 .exit = dev_proc_net_exit,
2885};
2886
2887static int __init dev_proc_init(void)
2888{
2889 return register_pernet_subsys(&dev_proc_ops);
2890}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002891#else
2892#define dev_proc_init() 0
2893#endif /* CONFIG_PROC_FS */
2894
2895
2896/**
2897 * netdev_set_master - set up master/slave pair
2898 * @slave: slave device
2899 * @master: new master device
2900 *
2901 * Changes the master device of the slave. Pass %NULL to break the
2902 * bonding. The caller must hold the RTNL semaphore. On a failure
2903 * a negative errno code is returned. On success the reference counts
2904 * are adjusted, %RTM_NEWLINK is sent to the routing socket and the
2905 * function returns zero.
2906 */
2907int netdev_set_master(struct net_device *slave, struct net_device *master)
2908{
2909 struct net_device *old = slave->master;
2910
2911 ASSERT_RTNL();
2912
2913 if (master) {
2914 if (old)
2915 return -EBUSY;
2916 dev_hold(master);
2917 }
2918
2919 slave->master = master;
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09002920
Linus Torvalds1da177e2005-04-16 15:20:36 -07002921 synchronize_net();
2922
2923 if (old)
2924 dev_put(old);
2925
2926 if (master)
2927 slave->flags |= IFF_SLAVE;
2928 else
2929 slave->flags &= ~IFF_SLAVE;
2930
2931 rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
2932 return 0;
2933}
2934
Patrick McHardyb6c40d62008-10-07 15:26:48 -07002935static void dev_change_rx_flags(struct net_device *dev, int flags)
2936{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08002937 const struct net_device_ops *ops = dev->netdev_ops;
2938
2939 if ((dev->flags & IFF_UP) && ops->ndo_change_rx_flags)
2940 ops->ndo_change_rx_flags(dev, flags);
Patrick McHardyb6c40d62008-10-07 15:26:48 -07002941}
2942
Wang Chendad9b332008-06-18 01:48:28 -07002943static int __dev_set_promiscuity(struct net_device *dev, int inc)
Patrick McHardy4417da62007-06-27 01:28:10 -07002944{
2945 unsigned short old_flags = dev->flags;
2946
Patrick McHardy24023452007-07-14 18:51:31 -07002947 ASSERT_RTNL();
2948
Wang Chendad9b332008-06-18 01:48:28 -07002949 dev->flags |= IFF_PROMISC;
2950 dev->promiscuity += inc;
2951 if (dev->promiscuity == 0) {
2952 /*
2953 * Avoid overflow.
2954 * If inc causes overflow, untouch promisc and return error.
2955 */
2956 if (inc < 0)
2957 dev->flags &= ~IFF_PROMISC;
2958 else {
2959 dev->promiscuity -= inc;
2960 printk(KERN_WARNING "%s: promiscuity touches roof, "
2961 "set promiscuity failed, promiscuity feature "
2962 "of device might be broken.\n", dev->name);
2963 return -EOVERFLOW;
2964 }
2965 }
Patrick McHardy4417da62007-06-27 01:28:10 -07002966 if (dev->flags != old_flags) {
2967 printk(KERN_INFO "device %s %s promiscuous mode\n",
2968 dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
2969 "left");
Klaus Heinrich Kiwi7759db82008-01-23 22:57:45 -05002970 if (audit_enabled)
2971 audit_log(current->audit_context, GFP_ATOMIC,
2972 AUDIT_ANOM_PROMISCUOUS,
2973 "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
2974 dev->name, (dev->flags & IFF_PROMISC),
2975 (old_flags & IFF_PROMISC),
2976 audit_get_loginuid(current),
2977 current->uid, current->gid,
2978 audit_get_sessionid(current));
Patrick McHardy24023452007-07-14 18:51:31 -07002979
Patrick McHardyb6c40d62008-10-07 15:26:48 -07002980 dev_change_rx_flags(dev, IFF_PROMISC);
Patrick McHardy4417da62007-06-27 01:28:10 -07002981 }
Wang Chendad9b332008-06-18 01:48:28 -07002982 return 0;
Patrick McHardy4417da62007-06-27 01:28:10 -07002983}
2984
Linus Torvalds1da177e2005-04-16 15:20:36 -07002985/**
2986 * dev_set_promiscuity - update promiscuity count on a device
2987 * @dev: device
2988 * @inc: modifier
2989 *
Stephen Hemminger3041a062006-05-26 13:25:24 -07002990 * Add or remove promiscuity from a device. While the count in the device
Linus Torvalds1da177e2005-04-16 15:20:36 -07002991 * remains above zero the interface remains promiscuous. Once it hits zero
2992 * the device reverts back to normal filtering operation. A negative inc
2993 * value is used to drop promiscuity on the device.
Wang Chendad9b332008-06-18 01:48:28 -07002994 * Return 0 if successful or a negative errno code on error.
Linus Torvalds1da177e2005-04-16 15:20:36 -07002995 */
Wang Chendad9b332008-06-18 01:48:28 -07002996int dev_set_promiscuity(struct net_device *dev, int inc)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002997{
2998 unsigned short old_flags = dev->flags;
Wang Chendad9b332008-06-18 01:48:28 -07002999 int err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003000
Wang Chendad9b332008-06-18 01:48:28 -07003001 err = __dev_set_promiscuity(dev, inc);
Patrick McHardy4b5a6982008-07-06 15:49:08 -07003002 if (err < 0)
Wang Chendad9b332008-06-18 01:48:28 -07003003 return err;
Patrick McHardy4417da62007-06-27 01:28:10 -07003004 if (dev->flags != old_flags)
3005 dev_set_rx_mode(dev);
Wang Chendad9b332008-06-18 01:48:28 -07003006 return err;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003007}
3008
3009/**
3010 * dev_set_allmulti - update allmulti count on a device
3011 * @dev: device
3012 * @inc: modifier
3013 *
3014 * Add or remove reception of all multicast frames to a device. While the
3015 * count in the device remains above zero the interface remains listening
3016 * to all interfaces. Once it hits zero the device reverts back to normal
3017 * filtering operation. A negative @inc value is used to drop the counter
3018 * when releasing a resource needing all multicasts.
Wang Chendad9b332008-06-18 01:48:28 -07003019 * Return 0 if successful or a negative errno code on error.
Linus Torvalds1da177e2005-04-16 15:20:36 -07003020 */
3021
Wang Chendad9b332008-06-18 01:48:28 -07003022int dev_set_allmulti(struct net_device *dev, int inc)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003023{
3024 unsigned short old_flags = dev->flags;
3025
Patrick McHardy24023452007-07-14 18:51:31 -07003026 ASSERT_RTNL();
3027
Linus Torvalds1da177e2005-04-16 15:20:36 -07003028 dev->flags |= IFF_ALLMULTI;
Wang Chendad9b332008-06-18 01:48:28 -07003029 dev->allmulti += inc;
3030 if (dev->allmulti == 0) {
3031 /*
3032 * Avoid overflow.
3033 * If inc causes overflow, untouch allmulti and return error.
3034 */
3035 if (inc < 0)
3036 dev->flags &= ~IFF_ALLMULTI;
3037 else {
3038 dev->allmulti -= inc;
3039 printk(KERN_WARNING "%s: allmulti touches roof, "
3040 "set allmulti failed, allmulti feature of "
3041 "device might be broken.\n", dev->name);
3042 return -EOVERFLOW;
3043 }
3044 }
Patrick McHardy24023452007-07-14 18:51:31 -07003045 if (dev->flags ^ old_flags) {
Patrick McHardyb6c40d62008-10-07 15:26:48 -07003046 dev_change_rx_flags(dev, IFF_ALLMULTI);
Patrick McHardy4417da62007-06-27 01:28:10 -07003047 dev_set_rx_mode(dev);
Patrick McHardy24023452007-07-14 18:51:31 -07003048 }
Wang Chendad9b332008-06-18 01:48:28 -07003049 return 0;
Patrick McHardy4417da62007-06-27 01:28:10 -07003050}
3051
3052/*
3053 * Upload unicast and multicast address lists to device and
3054 * configure RX filtering. When the device doesn't support unicast
Joe Perches53ccaae2007-12-20 14:02:06 -08003055 * filtering it is put in promiscuous mode while unicast addresses
Patrick McHardy4417da62007-06-27 01:28:10 -07003056 * are present.
3057 */
3058void __dev_set_rx_mode(struct net_device *dev)
3059{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003060 const struct net_device_ops *ops = dev->netdev_ops;
3061
Patrick McHardy4417da62007-06-27 01:28:10 -07003062 /* dev_open will call this function so the list will stay sane. */
3063 if (!(dev->flags&IFF_UP))
3064 return;
3065
3066 if (!netif_device_present(dev))
YOSHIFUJI Hideaki40b77c92007-07-19 10:43:23 +09003067 return;
Patrick McHardy4417da62007-06-27 01:28:10 -07003068
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003069 if (ops->ndo_set_rx_mode)
3070 ops->ndo_set_rx_mode(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07003071 else {
3072 /* Unicast addresses changes may only happen under the rtnl,
3073 * therefore calling __dev_set_promiscuity here is safe.
3074 */
3075 if (dev->uc_count > 0 && !dev->uc_promisc) {
3076 __dev_set_promiscuity(dev, 1);
3077 dev->uc_promisc = 1;
3078 } else if (dev->uc_count == 0 && dev->uc_promisc) {
3079 __dev_set_promiscuity(dev, -1);
3080 dev->uc_promisc = 0;
3081 }
3082
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003083 if (ops->ndo_set_multicast_list)
3084 ops->ndo_set_multicast_list(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07003085 }
3086}
3087
3088void dev_set_rx_mode(struct net_device *dev)
3089{
David S. Millerb9e40852008-07-15 00:15:08 -07003090 netif_addr_lock_bh(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07003091 __dev_set_rx_mode(dev);
David S. Millerb9e40852008-07-15 00:15:08 -07003092 netif_addr_unlock_bh(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003093}
3094
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07003095int __dev_addr_delete(struct dev_addr_list **list, int *count,
3096 void *addr, int alen, int glbl)
Patrick McHardybf742482007-06-27 01:26:19 -07003097{
3098 struct dev_addr_list *da;
3099
3100 for (; (da = *list) != NULL; list = &da->next) {
3101 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3102 alen == da->da_addrlen) {
3103 if (glbl) {
3104 int old_glbl = da->da_gusers;
3105 da->da_gusers = 0;
3106 if (old_glbl == 0)
3107 break;
3108 }
3109 if (--da->da_users)
3110 return 0;
3111
3112 *list = da->next;
3113 kfree(da);
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07003114 (*count)--;
Patrick McHardybf742482007-06-27 01:26:19 -07003115 return 0;
3116 }
3117 }
3118 return -ENOENT;
3119}
3120
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07003121int __dev_addr_add(struct dev_addr_list **list, int *count,
3122 void *addr, int alen, int glbl)
Patrick McHardybf742482007-06-27 01:26:19 -07003123{
3124 struct dev_addr_list *da;
3125
3126 for (da = *list; da != NULL; da = da->next) {
3127 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
3128 da->da_addrlen == alen) {
3129 if (glbl) {
3130 int old_glbl = da->da_gusers;
3131 da->da_gusers = 1;
3132 if (old_glbl)
3133 return 0;
3134 }
3135 da->da_users++;
3136 return 0;
3137 }
3138 }
3139
Jorge Boncompte [DTI2]12aa3432008-02-19 14:17:04 -08003140 da = kzalloc(sizeof(*da), GFP_ATOMIC);
Patrick McHardybf742482007-06-27 01:26:19 -07003141 if (da == NULL)
3142 return -ENOMEM;
3143 memcpy(da->da_addr, addr, alen);
3144 da->da_addrlen = alen;
3145 da->da_users = 1;
3146 da->da_gusers = glbl ? 1 : 0;
3147 da->next = *list;
3148 *list = da;
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07003149 (*count)++;
Patrick McHardybf742482007-06-27 01:26:19 -07003150 return 0;
3151}
3152
Patrick McHardy4417da62007-06-27 01:28:10 -07003153/**
3154 * dev_unicast_delete - Release secondary unicast address.
3155 * @dev: device
Randy Dunlap0ed72ec2007-07-26 00:03:29 -07003156 * @addr: address to delete
3157 * @alen: length of @addr
Patrick McHardy4417da62007-06-27 01:28:10 -07003158 *
3159 * Release reference to a secondary unicast address and remove it
Randy Dunlap0ed72ec2007-07-26 00:03:29 -07003160 * from the device if the reference count drops to zero.
Patrick McHardy4417da62007-06-27 01:28:10 -07003161 *
3162 * The caller must hold the rtnl_mutex.
3163 */
3164int dev_unicast_delete(struct net_device *dev, void *addr, int alen)
3165{
3166 int err;
3167
3168 ASSERT_RTNL();
3169
David S. Millerb9e40852008-07-15 00:15:08 -07003170 netif_addr_lock_bh(dev);
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07003171 err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0);
3172 if (!err)
Patrick McHardy4417da62007-06-27 01:28:10 -07003173 __dev_set_rx_mode(dev);
David S. Millerb9e40852008-07-15 00:15:08 -07003174 netif_addr_unlock_bh(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07003175 return err;
3176}
3177EXPORT_SYMBOL(dev_unicast_delete);
3178
3179/**
3180 * dev_unicast_add - add a secondary unicast address
3181 * @dev: device
Wang Chen5dbaec52008-06-27 19:35:16 -07003182 * @addr: address to add
Randy Dunlap0ed72ec2007-07-26 00:03:29 -07003183 * @alen: length of @addr
Patrick McHardy4417da62007-06-27 01:28:10 -07003184 *
3185 * Add a secondary unicast address to the device or increase
3186 * the reference count if it already exists.
3187 *
3188 * The caller must hold the rtnl_mutex.
3189 */
3190int dev_unicast_add(struct net_device *dev, void *addr, int alen)
3191{
3192 int err;
3193
3194 ASSERT_RTNL();
3195
David S. Millerb9e40852008-07-15 00:15:08 -07003196 netif_addr_lock_bh(dev);
Patrick McHardy61cbc2f2007-06-30 13:35:52 -07003197 err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0);
3198 if (!err)
Patrick McHardy4417da62007-06-27 01:28:10 -07003199 __dev_set_rx_mode(dev);
David S. Millerb9e40852008-07-15 00:15:08 -07003200 netif_addr_unlock_bh(dev);
Patrick McHardy4417da62007-06-27 01:28:10 -07003201 return err;
3202}
3203EXPORT_SYMBOL(dev_unicast_add);
3204
Chris Leeche83a2ea2008-01-31 16:53:23 -08003205int __dev_addr_sync(struct dev_addr_list **to, int *to_count,
3206 struct dev_addr_list **from, int *from_count)
3207{
3208 struct dev_addr_list *da, *next;
3209 int err = 0;
3210
3211 da = *from;
3212 while (da != NULL) {
3213 next = da->next;
3214 if (!da->da_synced) {
3215 err = __dev_addr_add(to, to_count,
3216 da->da_addr, da->da_addrlen, 0);
3217 if (err < 0)
3218 break;
3219 da->da_synced = 1;
3220 da->da_users++;
3221 } else if (da->da_users == 1) {
3222 __dev_addr_delete(to, to_count,
3223 da->da_addr, da->da_addrlen, 0);
3224 __dev_addr_delete(from, from_count,
3225 da->da_addr, da->da_addrlen, 0);
3226 }
3227 da = next;
3228 }
3229 return err;
3230}
3231
3232void __dev_addr_unsync(struct dev_addr_list **to, int *to_count,
3233 struct dev_addr_list **from, int *from_count)
3234{
3235 struct dev_addr_list *da, *next;
3236
3237 da = *from;
3238 while (da != NULL) {
3239 next = da->next;
3240 if (da->da_synced) {
3241 __dev_addr_delete(to, to_count,
3242 da->da_addr, da->da_addrlen, 0);
3243 da->da_synced = 0;
3244 __dev_addr_delete(from, from_count,
3245 da->da_addr, da->da_addrlen, 0);
3246 }
3247 da = next;
3248 }
3249}
3250
3251/**
3252 * dev_unicast_sync - Synchronize device's unicast list to another device
3253 * @to: destination device
3254 * @from: source device
3255 *
3256 * Add newly added addresses to the destination device and release
3257 * addresses that have no users left. The source device must be
3258 * locked by netif_tx_lock_bh.
3259 *
3260 * This function is intended to be called from the dev->set_rx_mode
3261 * function of layered software devices.
3262 */
3263int dev_unicast_sync(struct net_device *to, struct net_device *from)
3264{
3265 int err = 0;
3266
David S. Millerb9e40852008-07-15 00:15:08 -07003267 netif_addr_lock_bh(to);
Chris Leeche83a2ea2008-01-31 16:53:23 -08003268 err = __dev_addr_sync(&to->uc_list, &to->uc_count,
3269 &from->uc_list, &from->uc_count);
3270 if (!err)
3271 __dev_set_rx_mode(to);
David S. Millerb9e40852008-07-15 00:15:08 -07003272 netif_addr_unlock_bh(to);
Chris Leeche83a2ea2008-01-31 16:53:23 -08003273 return err;
3274}
3275EXPORT_SYMBOL(dev_unicast_sync);
3276
3277/**
Randy Dunlapbc2cda12008-02-13 15:03:25 -08003278 * dev_unicast_unsync - Remove synchronized addresses from the destination device
Chris Leeche83a2ea2008-01-31 16:53:23 -08003279 * @to: destination device
3280 * @from: source device
3281 *
3282 * Remove all addresses that were added to the destination device by
3283 * dev_unicast_sync(). This function is intended to be called from the
3284 * dev->stop function of layered software devices.
3285 */
3286void dev_unicast_unsync(struct net_device *to, struct net_device *from)
3287{
David S. Millerb9e40852008-07-15 00:15:08 -07003288 netif_addr_lock_bh(from);
David S. Millere308a5d2008-07-15 00:13:44 -07003289 netif_addr_lock(to);
Chris Leeche83a2ea2008-01-31 16:53:23 -08003290
3291 __dev_addr_unsync(&to->uc_list, &to->uc_count,
3292 &from->uc_list, &from->uc_count);
3293 __dev_set_rx_mode(to);
3294
David S. Millere308a5d2008-07-15 00:13:44 -07003295 netif_addr_unlock(to);
David S. Millerb9e40852008-07-15 00:15:08 -07003296 netif_addr_unlock_bh(from);
Chris Leeche83a2ea2008-01-31 16:53:23 -08003297}
3298EXPORT_SYMBOL(dev_unicast_unsync);
3299
Denis Cheng12972622007-07-18 02:12:56 -07003300static void __dev_addr_discard(struct dev_addr_list **list)
3301{
3302 struct dev_addr_list *tmp;
3303
3304 while (*list != NULL) {
3305 tmp = *list;
3306 *list = tmp->next;
3307 if (tmp->da_users > tmp->da_gusers)
3308 printk("__dev_addr_discard: address leakage! "
3309 "da_users=%d\n", tmp->da_users);
3310 kfree(tmp);
3311 }
3312}
3313
Denis Cheng26cc2522007-07-18 02:12:03 -07003314static void dev_addr_discard(struct net_device *dev)
Patrick McHardy4417da62007-06-27 01:28:10 -07003315{
David S. Millerb9e40852008-07-15 00:15:08 -07003316 netif_addr_lock_bh(dev);
Denis Cheng26cc2522007-07-18 02:12:03 -07003317
Patrick McHardy4417da62007-06-27 01:28:10 -07003318 __dev_addr_discard(&dev->uc_list);
3319 dev->uc_count = 0;
Patrick McHardy4417da62007-06-27 01:28:10 -07003320
Denis Cheng456ad752007-07-18 02:10:54 -07003321 __dev_addr_discard(&dev->mc_list);
3322 dev->mc_count = 0;
Denis Cheng26cc2522007-07-18 02:12:03 -07003323
David S. Millerb9e40852008-07-15 00:15:08 -07003324 netif_addr_unlock_bh(dev);
Denis Cheng456ad752007-07-18 02:10:54 -07003325}
3326
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07003327/**
3328 * dev_get_flags - get flags reported to userspace
3329 * @dev: device
3330 *
3331 * Get the combination of flag bits exported through APIs to userspace.
3332 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07003333unsigned dev_get_flags(const struct net_device *dev)
3334{
3335 unsigned flags;
3336
3337 flags = (dev->flags & ~(IFF_PROMISC |
3338 IFF_ALLMULTI |
Stefan Rompfb00055a2006-03-20 17:09:11 -08003339 IFF_RUNNING |
3340 IFF_LOWER_UP |
3341 IFF_DORMANT)) |
Linus Torvalds1da177e2005-04-16 15:20:36 -07003342 (dev->gflags & (IFF_PROMISC |
3343 IFF_ALLMULTI));
3344
Stefan Rompfb00055a2006-03-20 17:09:11 -08003345 if (netif_running(dev)) {
3346 if (netif_oper_up(dev))
3347 flags |= IFF_RUNNING;
3348 if (netif_carrier_ok(dev))
3349 flags |= IFF_LOWER_UP;
3350 if (netif_dormant(dev))
3351 flags |= IFF_DORMANT;
3352 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003353
3354 return flags;
3355}
3356
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07003357/**
3358 * dev_change_flags - change device settings
3359 * @dev: device
3360 * @flags: device state flags
3361 *
3362 * Change settings on device based state flags. The flags are
3363 * in the userspace exported format.
3364 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07003365int dev_change_flags(struct net_device *dev, unsigned flags)
3366{
Thomas Graf7c355f52007-06-05 16:03:03 -07003367 int ret, changes;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003368 int old_flags = dev->flags;
3369
Patrick McHardy24023452007-07-14 18:51:31 -07003370 ASSERT_RTNL();
3371
Linus Torvalds1da177e2005-04-16 15:20:36 -07003372 /*
3373 * Set the flags on our device.
3374 */
3375
3376 dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
3377 IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
3378 IFF_AUTOMEDIA)) |
3379 (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
3380 IFF_ALLMULTI));
3381
3382 /*
3383 * Load in the correct multicast list now the flags have changed.
3384 */
3385
Patrick McHardyb6c40d62008-10-07 15:26:48 -07003386 if ((old_flags ^ flags) & IFF_MULTICAST)
3387 dev_change_rx_flags(dev, IFF_MULTICAST);
Patrick McHardy24023452007-07-14 18:51:31 -07003388
Patrick McHardy4417da62007-06-27 01:28:10 -07003389 dev_set_rx_mode(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003390
3391 /*
3392 * Have we downed the interface. We handle IFF_UP ourselves
3393 * according to user attempts to set it, rather than blindly
3394 * setting it.
3395 */
3396
3397 ret = 0;
3398 if ((old_flags ^ flags) & IFF_UP) { /* Bit is different ? */
3399 ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
3400
3401 if (!ret)
Patrick McHardy4417da62007-06-27 01:28:10 -07003402 dev_set_rx_mode(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003403 }
3404
3405 if (dev->flags & IFF_UP &&
3406 ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
3407 IFF_VOLATILE)))
Pavel Emelyanov056925a2007-09-16 15:42:43 -07003408 call_netdevice_notifiers(NETDEV_CHANGE, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003409
3410 if ((flags ^ dev->gflags) & IFF_PROMISC) {
3411 int inc = (flags & IFF_PROMISC) ? +1 : -1;
3412 dev->gflags ^= IFF_PROMISC;
3413 dev_set_promiscuity(dev, inc);
3414 }
3415
3416 /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
3417 is important. Some (broken) drivers set IFF_PROMISC, when
3418 IFF_ALLMULTI is requested not asking us and not reporting.
3419 */
3420 if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
3421 int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
3422 dev->gflags ^= IFF_ALLMULTI;
3423 dev_set_allmulti(dev, inc);
3424 }
3425
Thomas Graf7c355f52007-06-05 16:03:03 -07003426 /* Exclude state transition flags, already notified */
3427 changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING);
3428 if (changes)
3429 rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003430
3431 return ret;
3432}
3433
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07003434/**
3435 * dev_set_mtu - Change maximum transfer unit
3436 * @dev: device
3437 * @new_mtu: new transfer unit
3438 *
3439 * Change the maximum transfer size of the network device.
3440 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07003441int dev_set_mtu(struct net_device *dev, int new_mtu)
3442{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003443 const struct net_device_ops *ops = dev->netdev_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003444 int err;
3445
3446 if (new_mtu == dev->mtu)
3447 return 0;
3448
3449 /* MTU must be positive. */
3450 if (new_mtu < 0)
3451 return -EINVAL;
3452
3453 if (!netif_device_present(dev))
3454 return -ENODEV;
3455
3456 err = 0;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003457 if (ops->ndo_change_mtu)
3458 err = ops->ndo_change_mtu(dev, new_mtu);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003459 else
3460 dev->mtu = new_mtu;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003461
Linus Torvalds1da177e2005-04-16 15:20:36 -07003462 if (!err && dev->flags & IFF_UP)
Pavel Emelyanov056925a2007-09-16 15:42:43 -07003463 call_netdevice_notifiers(NETDEV_CHANGEMTU, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003464 return err;
3465}
3466
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07003467/**
3468 * dev_set_mac_address - Change Media Access Control Address
3469 * @dev: device
3470 * @sa: new address
3471 *
3472 * Change the hardware (MAC) address of the device
3473 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07003474int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
3475{
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003476 const struct net_device_ops *ops = dev->netdev_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07003477 int err;
3478
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003479 if (!ops->ndo_set_mac_address)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003480 return -EOPNOTSUPP;
3481 if (sa->sa_family != dev->type)
3482 return -EINVAL;
3483 if (!netif_device_present(dev))
3484 return -ENODEV;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003485 err = ops->ndo_set_mac_address(dev, sa);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003486 if (!err)
Pavel Emelyanov056925a2007-09-16 15:42:43 -07003487 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003488 return err;
3489}
3490
3491/*
Jeff Garzik14e3e072007-10-08 00:06:32 -07003492 * Perform the SIOCxIFxxx calls, inside read_lock(dev_base_lock)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003493 */
Jeff Garzik14e3e072007-10-08 00:06:32 -07003494static int dev_ifsioc_locked(struct net *net, struct ifreq *ifr, unsigned int cmd)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003495{
3496 int err;
Eric W. Biederman881d9662007-09-17 11:56:21 -07003497 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003498
3499 if (!dev)
3500 return -ENODEV;
3501
3502 switch (cmd) {
3503 case SIOCGIFFLAGS: /* Get interface flags */
3504 ifr->ifr_flags = dev_get_flags(dev);
3505 return 0;
3506
Linus Torvalds1da177e2005-04-16 15:20:36 -07003507 case SIOCGIFMETRIC: /* Get the metric on the interface
3508 (currently unused) */
3509 ifr->ifr_metric = 0;
3510 return 0;
3511
Linus Torvalds1da177e2005-04-16 15:20:36 -07003512 case SIOCGIFMTU: /* Get the MTU of a device */
3513 ifr->ifr_mtu = dev->mtu;
3514 return 0;
3515
Linus Torvalds1da177e2005-04-16 15:20:36 -07003516 case SIOCGIFHWADDR:
3517 if (!dev->addr_len)
3518 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
3519 else
3520 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
3521 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
3522 ifr->ifr_hwaddr.sa_family = dev->type;
3523 return 0;
3524
Jeff Garzik14e3e072007-10-08 00:06:32 -07003525 case SIOCGIFSLAVE:
3526 err = -EINVAL;
3527 break;
3528
3529 case SIOCGIFMAP:
3530 ifr->ifr_map.mem_start = dev->mem_start;
3531 ifr->ifr_map.mem_end = dev->mem_end;
3532 ifr->ifr_map.base_addr = dev->base_addr;
3533 ifr->ifr_map.irq = dev->irq;
3534 ifr->ifr_map.dma = dev->dma;
3535 ifr->ifr_map.port = dev->if_port;
3536 return 0;
3537
3538 case SIOCGIFINDEX:
3539 ifr->ifr_ifindex = dev->ifindex;
3540 return 0;
3541
3542 case SIOCGIFTXQLEN:
3543 ifr->ifr_qlen = dev->tx_queue_len;
3544 return 0;
3545
3546 default:
3547 /* dev_ioctl() should ensure this case
3548 * is never reached
3549 */
3550 WARN_ON(1);
3551 err = -EINVAL;
3552 break;
3553
3554 }
3555 return err;
3556}
3557
3558/*
3559 * Perform the SIOCxIFxxx calls, inside rtnl_lock()
3560 */
3561static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
3562{
3563 int err;
3564 struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003565 const struct net_device_ops *ops = dev->netdev_ops;
Jeff Garzik14e3e072007-10-08 00:06:32 -07003566
3567 if (!dev)
3568 return -ENODEV;
3569
3570 switch (cmd) {
3571 case SIOCSIFFLAGS: /* Set interface flags */
3572 return dev_change_flags(dev, ifr->ifr_flags);
3573
3574 case SIOCSIFMETRIC: /* Set the metric on the interface
3575 (currently unused) */
3576 return -EOPNOTSUPP;
3577
3578 case SIOCSIFMTU: /* Set the MTU of a device */
3579 return dev_set_mtu(dev, ifr->ifr_mtu);
3580
Linus Torvalds1da177e2005-04-16 15:20:36 -07003581 case SIOCSIFHWADDR:
3582 return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
3583
3584 case SIOCSIFHWBROADCAST:
3585 if (ifr->ifr_hwaddr.sa_family != dev->type)
3586 return -EINVAL;
3587 memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
3588 min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
Pavel Emelyanov056925a2007-09-16 15:42:43 -07003589 call_netdevice_notifiers(NETDEV_CHANGEADDR, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003590 return 0;
3591
Linus Torvalds1da177e2005-04-16 15:20:36 -07003592 case SIOCSIFMAP:
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003593 if (ops->ndo_set_config) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003594 if (!netif_device_present(dev))
3595 return -ENODEV;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003596 return ops->ndo_set_config(dev, &ifr->ifr_map);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003597 }
3598 return -EOPNOTSUPP;
3599
3600 case SIOCADDMULTI:
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003601 if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07003602 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3603 return -EINVAL;
3604 if (!netif_device_present(dev))
3605 return -ENODEV;
3606 return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
3607 dev->addr_len, 1);
3608
3609 case SIOCDELMULTI:
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003610 if ((!ops->ndo_set_multicast_list && !ops->ndo_set_rx_mode) ||
Linus Torvalds1da177e2005-04-16 15:20:36 -07003611 ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3612 return -EINVAL;
3613 if (!netif_device_present(dev))
3614 return -ENODEV;
3615 return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
3616 dev->addr_len, 1);
3617
Linus Torvalds1da177e2005-04-16 15:20:36 -07003618 case SIOCSIFTXQLEN:
3619 if (ifr->ifr_qlen < 0)
3620 return -EINVAL;
3621 dev->tx_queue_len = ifr->ifr_qlen;
3622 return 0;
3623
3624 case SIOCSIFNAME:
3625 ifr->ifr_newname[IFNAMSIZ-1] = '\0';
3626 return dev_change_name(dev, ifr->ifr_newname);
3627
3628 /*
3629 * Unknown or private ioctl
3630 */
3631
3632 default:
3633 if ((cmd >= SIOCDEVPRIVATE &&
3634 cmd <= SIOCDEVPRIVATE + 15) ||
3635 cmd == SIOCBONDENSLAVE ||
3636 cmd == SIOCBONDRELEASE ||
3637 cmd == SIOCBONDSETHWADDR ||
3638 cmd == SIOCBONDSLAVEINFOQUERY ||
3639 cmd == SIOCBONDINFOQUERY ||
3640 cmd == SIOCBONDCHANGEACTIVE ||
3641 cmd == SIOCGMIIPHY ||
3642 cmd == SIOCGMIIREG ||
3643 cmd == SIOCSMIIREG ||
3644 cmd == SIOCBRADDIF ||
3645 cmd == SIOCBRDELIF ||
3646 cmd == SIOCWANDEV) {
3647 err = -EOPNOTSUPP;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003648 if (ops->ndo_do_ioctl) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07003649 if (netif_device_present(dev))
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003650 err = ops->ndo_do_ioctl(dev, ifr, cmd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003651 else
3652 err = -ENODEV;
3653 }
3654 } else
3655 err = -EINVAL;
3656
3657 }
3658 return err;
3659}
3660
3661/*
3662 * This function handles all "interface"-type I/O control requests. The actual
3663 * 'doing' part of this is dev_ifsioc above.
3664 */
3665
3666/**
3667 * dev_ioctl - network device ioctl
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07003668 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -07003669 * @cmd: command to issue
3670 * @arg: pointer to a struct ifreq in user space
3671 *
3672 * Issue ioctl functions to devices. This is normally called by the
3673 * user space syscall interfaces but can sometimes be useful for
3674 * other purposes. The return value is the return from the syscall if
3675 * positive or a negative errno code on error.
3676 */
3677
Eric W. Biederman881d9662007-09-17 11:56:21 -07003678int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003679{
3680 struct ifreq ifr;
3681 int ret;
3682 char *colon;
3683
3684 /* One special case: SIOCGIFCONF takes ifconf argument
3685 and requires shared lock, because it sleeps writing
3686 to user space.
3687 */
3688
3689 if (cmd == SIOCGIFCONF) {
Stephen Hemminger6756ae42006-03-20 22:23:58 -08003690 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -07003691 ret = dev_ifconf(net, (char __user *) arg);
Stephen Hemminger6756ae42006-03-20 22:23:58 -08003692 rtnl_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07003693 return ret;
3694 }
3695 if (cmd == SIOCGIFNAME)
Eric W. Biederman881d9662007-09-17 11:56:21 -07003696 return dev_ifname(net, (struct ifreq __user *)arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003697
3698 if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
3699 return -EFAULT;
3700
3701 ifr.ifr_name[IFNAMSIZ-1] = 0;
3702
3703 colon = strchr(ifr.ifr_name, ':');
3704 if (colon)
3705 *colon = 0;
3706
3707 /*
3708 * See which interface the caller is talking about.
3709 */
3710
3711 switch (cmd) {
3712 /*
3713 * These ioctl calls:
3714 * - can be done by all.
3715 * - atomic and do not require locking.
3716 * - return a value
3717 */
3718 case SIOCGIFFLAGS:
3719 case SIOCGIFMETRIC:
3720 case SIOCGIFMTU:
3721 case SIOCGIFHWADDR:
3722 case SIOCGIFSLAVE:
3723 case SIOCGIFMAP:
3724 case SIOCGIFINDEX:
3725 case SIOCGIFTXQLEN:
Eric W. Biederman881d9662007-09-17 11:56:21 -07003726 dev_load(net, ifr.ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003727 read_lock(&dev_base_lock);
Jeff Garzik14e3e072007-10-08 00:06:32 -07003728 ret = dev_ifsioc_locked(net, &ifr, cmd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003729 read_unlock(&dev_base_lock);
3730 if (!ret) {
3731 if (colon)
3732 *colon = ':';
3733 if (copy_to_user(arg, &ifr,
3734 sizeof(struct ifreq)))
3735 ret = -EFAULT;
3736 }
3737 return ret;
3738
3739 case SIOCETHTOOL:
Eric W. Biederman881d9662007-09-17 11:56:21 -07003740 dev_load(net, ifr.ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003741 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -07003742 ret = dev_ethtool(net, &ifr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003743 rtnl_unlock();
3744 if (!ret) {
3745 if (colon)
3746 *colon = ':';
3747 if (copy_to_user(arg, &ifr,
3748 sizeof(struct ifreq)))
3749 ret = -EFAULT;
3750 }
3751 return ret;
3752
3753 /*
3754 * These ioctl calls:
3755 * - require superuser power.
3756 * - require strict serialization.
3757 * - return a value
3758 */
3759 case SIOCGMIIPHY:
3760 case SIOCGMIIREG:
3761 case SIOCSIFNAME:
3762 if (!capable(CAP_NET_ADMIN))
3763 return -EPERM;
Eric W. Biederman881d9662007-09-17 11:56:21 -07003764 dev_load(net, ifr.ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003765 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -07003766 ret = dev_ifsioc(net, &ifr, cmd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003767 rtnl_unlock();
3768 if (!ret) {
3769 if (colon)
3770 *colon = ':';
3771 if (copy_to_user(arg, &ifr,
3772 sizeof(struct ifreq)))
3773 ret = -EFAULT;
3774 }
3775 return ret;
3776
3777 /*
3778 * These ioctl calls:
3779 * - require superuser power.
3780 * - require strict serialization.
3781 * - do not return a value
3782 */
3783 case SIOCSIFFLAGS:
3784 case SIOCSIFMETRIC:
3785 case SIOCSIFMTU:
3786 case SIOCSIFMAP:
3787 case SIOCSIFHWADDR:
3788 case SIOCSIFSLAVE:
3789 case SIOCADDMULTI:
3790 case SIOCDELMULTI:
3791 case SIOCSIFHWBROADCAST:
3792 case SIOCSIFTXQLEN:
3793 case SIOCSMIIREG:
3794 case SIOCBONDENSLAVE:
3795 case SIOCBONDRELEASE:
3796 case SIOCBONDSETHWADDR:
Linus Torvalds1da177e2005-04-16 15:20:36 -07003797 case SIOCBONDCHANGEACTIVE:
3798 case SIOCBRADDIF:
3799 case SIOCBRDELIF:
3800 if (!capable(CAP_NET_ADMIN))
3801 return -EPERM;
Thomas Grafcabcac02006-01-24 12:46:33 -08003802 /* fall through */
3803 case SIOCBONDSLAVEINFOQUERY:
3804 case SIOCBONDINFOQUERY:
Eric W. Biederman881d9662007-09-17 11:56:21 -07003805 dev_load(net, ifr.ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003806 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -07003807 ret = dev_ifsioc(net, &ifr, cmd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003808 rtnl_unlock();
3809 return ret;
3810
3811 case SIOCGIFMEM:
3812 /* Get the per device memory space. We can add this but
3813 * currently do not support it */
3814 case SIOCSIFMEM:
3815 /* Set the per device memory buffer space.
3816 * Not applicable in our case */
3817 case SIOCSIFLINK:
3818 return -EINVAL;
3819
3820 /*
3821 * Unknown or private ioctl.
3822 */
3823 default:
3824 if (cmd == SIOCWANDEV ||
3825 (cmd >= SIOCDEVPRIVATE &&
3826 cmd <= SIOCDEVPRIVATE + 15)) {
Eric W. Biederman881d9662007-09-17 11:56:21 -07003827 dev_load(net, ifr.ifr_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003828 rtnl_lock();
Eric W. Biederman881d9662007-09-17 11:56:21 -07003829 ret = dev_ifsioc(net, &ifr, cmd);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003830 rtnl_unlock();
3831 if (!ret && copy_to_user(arg, &ifr,
3832 sizeof(struct ifreq)))
3833 ret = -EFAULT;
3834 return ret;
3835 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07003836 /* Take care of Wireless Extensions */
Johannes Berg295f4a12007-04-26 20:43:56 -07003837 if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
Eric W. Biederman881d9662007-09-17 11:56:21 -07003838 return wext_handle_ioctl(net, &ifr, cmd, arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003839 return -EINVAL;
3840 }
3841}
3842
3843
3844/**
3845 * dev_new_index - allocate an ifindex
Randy Dunlapc4ea43c2007-10-12 21:17:49 -07003846 * @net: the applicable net namespace
Linus Torvalds1da177e2005-04-16 15:20:36 -07003847 *
3848 * Returns a suitable unique value for a new device interface
3849 * number. The caller must hold the rtnl semaphore or the
3850 * dev_base_lock to be sure it remains unique.
3851 */
Eric W. Biederman881d9662007-09-17 11:56:21 -07003852static int dev_new_index(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003853{
3854 static int ifindex;
3855 for (;;) {
3856 if (++ifindex <= 0)
3857 ifindex = 1;
Eric W. Biederman881d9662007-09-17 11:56:21 -07003858 if (!__dev_get_by_index(net, ifindex))
Linus Torvalds1da177e2005-04-16 15:20:36 -07003859 return ifindex;
3860 }
3861}
3862
Linus Torvalds1da177e2005-04-16 15:20:36 -07003863/* Delayed registration/unregisteration */
Denis Cheng3b5b34f2007-12-07 00:49:17 -08003864static LIST_HEAD(net_todo_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003865
Stephen Hemminger6f05f622007-03-08 20:46:03 -08003866static void net_set_todo(struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07003867{
Linus Torvalds1da177e2005-04-16 15:20:36 -07003868 list_add_tail(&dev->todo_list, &net_todo_list);
Linus Torvalds1da177e2005-04-16 15:20:36 -07003869}
3870
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07003871static void rollback_registered(struct net_device *dev)
3872{
3873 BUG_ON(dev_boot_phase);
3874 ASSERT_RTNL();
3875
3876 /* Some devices call without registering for initialization unwind. */
3877 if (dev->reg_state == NETREG_UNINITIALIZED) {
3878 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
3879 "was registered\n", dev->name, dev);
3880
3881 WARN_ON(1);
3882 return;
3883 }
3884
3885 BUG_ON(dev->reg_state != NETREG_REGISTERED);
3886
3887 /* If device is running, close it first. */
3888 dev_close(dev);
3889
3890 /* And unlink it from device chain. */
3891 unlist_netdevice(dev);
3892
3893 dev->reg_state = NETREG_UNREGISTERING;
3894
3895 synchronize_net();
3896
3897 /* Shutdown queueing discipline. */
3898 dev_shutdown(dev);
3899
3900
3901 /* Notify protocols, that we are about to destroy
3902 this device. They should clean all the things.
3903 */
3904 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
3905
3906 /*
3907 * Flush the unicast and multicast chains
3908 */
3909 dev_addr_discard(dev);
3910
Stephen Hemmingerd3147742008-11-19 21:32:24 -08003911 if (dev->netdev_ops->ndo_uninit)
3912 dev->netdev_ops->ndo_uninit(dev);
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07003913
3914 /* Notifier chain MUST detach us from master device. */
Ilpo Järvinen547b7922008-07-25 21:43:18 -07003915 WARN_ON(dev->master);
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07003916
3917 /* Remove entries from kobject tree */
3918 netdev_unregister_kobject(dev);
3919
3920 synchronize_net();
3921
3922 dev_put(dev);
3923}
3924
David S. Millere8a04642008-07-17 00:34:19 -07003925static void __netdev_init_queue_locks_one(struct net_device *dev,
3926 struct netdev_queue *dev_queue,
3927 void *_unused)
David S. Millerc773e842008-07-08 23:13:53 -07003928{
3929 spin_lock_init(&dev_queue->_xmit_lock);
David S. Millercf508b12008-07-22 14:16:42 -07003930 netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type);
David S. Millerc773e842008-07-08 23:13:53 -07003931 dev_queue->xmit_lock_owner = -1;
3932}
3933
3934static void netdev_init_queue_locks(struct net_device *dev)
3935{
David S. Millere8a04642008-07-17 00:34:19 -07003936 netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
3937 __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL);
David S. Millerc773e842008-07-08 23:13:53 -07003938}
3939
Herbert Xub63365a2008-10-23 01:11:29 -07003940unsigned long netdev_fix_features(unsigned long features, const char *name)
3941{
3942 /* Fix illegal SG+CSUM combinations. */
3943 if ((features & NETIF_F_SG) &&
3944 !(features & NETIF_F_ALL_CSUM)) {
3945 if (name)
3946 printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no "
3947 "checksum feature.\n", name);
3948 features &= ~NETIF_F_SG;
3949 }
3950
3951 /* TSO requires that SG is present as well. */
3952 if ((features & NETIF_F_TSO) && !(features & NETIF_F_SG)) {
3953 if (name)
3954 printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no "
3955 "SG feature.\n", name);
3956 features &= ~NETIF_F_TSO;
3957 }
3958
3959 if (features & NETIF_F_UFO) {
3960 if (!(features & NETIF_F_GEN_CSUM)) {
3961 if (name)
3962 printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
3963 "since no NETIF_F_HW_CSUM feature.\n",
3964 name);
3965 features &= ~NETIF_F_UFO;
3966 }
3967
3968 if (!(features & NETIF_F_SG)) {
3969 if (name)
3970 printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
3971 "since no NETIF_F_SG feature.\n", name);
3972 features &= ~NETIF_F_UFO;
3973 }
3974 }
3975
3976 return features;
3977}
3978EXPORT_SYMBOL(netdev_fix_features);
3979
Linus Torvalds1da177e2005-04-16 15:20:36 -07003980/**
3981 * register_netdevice - register a network device
3982 * @dev: device to register
3983 *
3984 * Take a completed network device structure and add it to the kernel
3985 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
3986 * chain. 0 is returned on success. A negative errno code is returned
3987 * on a failure to set up the device, or if the name is a duplicate.
3988 *
3989 * Callers must hold the rtnl semaphore. You may want
3990 * register_netdev() instead of this.
3991 *
3992 * BUGS:
3993 * The locking appears insufficient to guarantee two parallel registers
3994 * will not get the same name.
3995 */
3996
3997int register_netdevice(struct net_device *dev)
3998{
3999 struct hlist_head *head;
4000 struct hlist_node *p;
4001 int ret;
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004002 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004003
4004 BUG_ON(dev_boot_phase);
4005 ASSERT_RTNL();
4006
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004007 might_sleep();
4008
Linus Torvalds1da177e2005-04-16 15:20:36 -07004009 /* When net_device's are persistent, this will be fatal. */
4010 BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004011 BUG_ON(!net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004012
David S. Millerf1f28aa2008-07-15 00:08:33 -07004013 spin_lock_init(&dev->addr_list_lock);
David S. Millercf508b12008-07-22 14:16:42 -07004014 netdev_set_addr_lockdep_class(dev);
David S. Millerc773e842008-07-08 23:13:53 -07004015 netdev_init_queue_locks(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004016
Linus Torvalds1da177e2005-04-16 15:20:36 -07004017 dev->iflink = -1;
4018
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004019#ifdef CONFIG_COMPAT_NET_DEV_OPS
4020 /* Netdevice_ops API compatiability support.
4021 * This is temporary until all network devices are converted.
4022 */
4023 if (dev->netdev_ops) {
4024 const struct net_device_ops *ops = dev->netdev_ops;
4025
4026 dev->init = ops->ndo_init;
4027 dev->uninit = ops->ndo_uninit;
4028 dev->open = ops->ndo_open;
4029 dev->change_rx_flags = ops->ndo_change_rx_flags;
4030 dev->set_rx_mode = ops->ndo_set_rx_mode;
4031 dev->set_multicast_list = ops->ndo_set_multicast_list;
4032 dev->set_mac_address = ops->ndo_set_mac_address;
4033 dev->validate_addr = ops->ndo_validate_addr;
4034 dev->do_ioctl = ops->ndo_do_ioctl;
4035 dev->set_config = ops->ndo_set_config;
4036 dev->change_mtu = ops->ndo_change_mtu;
4037 dev->tx_timeout = ops->ndo_tx_timeout;
4038 dev->get_stats = ops->ndo_get_stats;
4039 dev->vlan_rx_register = ops->ndo_vlan_rx_register;
4040 dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid;
4041 dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid;
4042#ifdef CONFIG_NET_POLL_CONTROLLER
4043 dev->poll_controller = ops->ndo_poll_controller;
4044#endif
4045 } else {
4046 char drivername[64];
4047 pr_info("%s (%s): not using net_device_ops yet\n",
4048 dev->name, netdev_drivername(dev, drivername, 64));
4049
4050 /* This works only because net_device_ops and the
4051 compatiablity structure are the same. */
4052 dev->netdev_ops = (void *) &(dev->init);
4053 }
4054#endif
4055
Linus Torvalds1da177e2005-04-16 15:20:36 -07004056 /* Init, if this function is available */
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004057 if (dev->netdev_ops->ndo_init) {
4058 ret = dev->netdev_ops->ndo_init(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004059 if (ret) {
4060 if (ret > 0)
4061 ret = -EIO;
Adrian Bunk90833aa2006-11-13 16:02:22 -08004062 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004063 }
4064 }
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004065
Linus Torvalds1da177e2005-04-16 15:20:36 -07004066 if (!dev_valid_name(dev->name)) {
4067 ret = -EINVAL;
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07004068 goto err_uninit;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004069 }
4070
Eric W. Biederman881d9662007-09-17 11:56:21 -07004071 dev->ifindex = dev_new_index(net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004072 if (dev->iflink == -1)
4073 dev->iflink = dev->ifindex;
4074
4075 /* Check for existence of name */
Eric W. Biederman881d9662007-09-17 11:56:21 -07004076 head = dev_name_hash(net, dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004077 hlist_for_each(p, head) {
4078 struct net_device *d
4079 = hlist_entry(p, struct net_device, name_hlist);
4080 if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
4081 ret = -EEXIST;
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07004082 goto err_uninit;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004083 }
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004084 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004085
Stephen Hemmingerd212f872007-06-27 00:47:37 -07004086 /* Fix illegal checksum combinations */
4087 if ((dev->features & NETIF_F_HW_CSUM) &&
4088 (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
4089 printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
4090 dev->name);
4091 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
4092 }
4093
4094 if ((dev->features & NETIF_F_NO_CSUM) &&
4095 (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
4096 printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
4097 dev->name);
4098 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
4099 }
4100
Herbert Xub63365a2008-10-23 01:11:29 -07004101 dev->features = netdev_fix_features(dev->features, dev->name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004102
Lennert Buytenheke5a4a722008-08-03 01:23:10 -07004103 /* Enable software GSO if SG is supported. */
4104 if (dev->features & NETIF_F_SG)
4105 dev->features |= NETIF_F_GSO;
4106
Daniel Lezcanoaaf8cdc2008-05-02 17:00:58 -07004107 netdev_initialize_kobject(dev);
Eric W. Biederman8b41d182007-09-26 22:02:53 -07004108 ret = netdev_register_kobject(dev);
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004109 if (ret)
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07004110 goto err_uninit;
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004111 dev->reg_state = NETREG_REGISTERED;
4112
Linus Torvalds1da177e2005-04-16 15:20:36 -07004113 /*
4114 * Default initial state at registry is that the
4115 * device is present.
4116 */
4117
4118 set_bit(__LINK_STATE_PRESENT, &dev->state);
4119
Linus Torvalds1da177e2005-04-16 15:20:36 -07004120 dev_init_scheduler(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004121 dev_hold(dev);
Eric W. Biedermance286d32007-09-12 13:53:49 +02004122 list_netdevice(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004123
4124 /* Notify protocols, that a new device appeared. */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07004125 ret = call_netdevice_notifiers(NETDEV_REGISTER, dev);
Herbert Xufcc5a032007-07-30 17:03:38 -07004126 ret = notifier_to_errno(ret);
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004127 if (ret) {
4128 rollback_registered(dev);
4129 dev->reg_state = NETREG_UNREGISTERED;
4130 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004131
4132out:
4133 return ret;
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07004134
4135err_uninit:
Stephen Hemmingerd3147742008-11-19 21:32:24 -08004136 if (dev->netdev_ops->ndo_uninit)
4137 dev->netdev_ops->ndo_uninit(dev);
Herbert Xu7ce1b0e2007-07-30 16:29:40 -07004138 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004139}
4140
4141/**
4142 * register_netdev - register a network device
4143 * @dev: device to register
4144 *
4145 * Take a completed network device structure and add it to the kernel
4146 * interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
4147 * chain. 0 is returned on success. A negative errno code is returned
4148 * on a failure to set up the device, or if the name is a duplicate.
4149 *
Borislav Petkov38b4da32007-04-20 22:14:10 -07004150 * This is a wrapper around register_netdevice that takes the rtnl semaphore
Linus Torvalds1da177e2005-04-16 15:20:36 -07004151 * and expands the device name if you passed a format string to
4152 * alloc_netdev.
4153 */
4154int register_netdev(struct net_device *dev)
4155{
4156 int err;
4157
4158 rtnl_lock();
4159
4160 /*
4161 * If the name is a format string the caller wants us to do a
4162 * name allocation.
4163 */
4164 if (strchr(dev->name, '%')) {
4165 err = dev_alloc_name(dev, dev->name);
4166 if (err < 0)
4167 goto out;
4168 }
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004169
Linus Torvalds1da177e2005-04-16 15:20:36 -07004170 err = register_netdevice(dev);
4171out:
4172 rtnl_unlock();
4173 return err;
4174}
4175EXPORT_SYMBOL(register_netdev);
4176
4177/*
4178 * netdev_wait_allrefs - wait until all references are gone.
4179 *
4180 * This is called when unregistering network devices.
4181 *
4182 * Any protocol or device that holds a reference should register
4183 * for netdevice notification, and cleanup and put back the
4184 * reference if they receive an UNREGISTER event.
4185 * We can get stuck here if buggy protocols don't correctly
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004186 * call dev_put.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004187 */
4188static void netdev_wait_allrefs(struct net_device *dev)
4189{
4190 unsigned long rebroadcast_time, warning_time;
4191
4192 rebroadcast_time = warning_time = jiffies;
4193 while (atomic_read(&dev->refcnt) != 0) {
4194 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
Stephen Hemminger6756ae42006-03-20 22:23:58 -08004195 rtnl_lock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07004196
4197 /* Rebroadcast unregister notification */
Pavel Emelyanov056925a2007-09-16 15:42:43 -07004198 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004199
4200 if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
4201 &dev->state)) {
4202 /* We must not have linkwatch events
4203 * pending on unregister. If this
4204 * happens, we simply run the queue
4205 * unscheduled, resulting in a noop
4206 * for this device.
4207 */
4208 linkwatch_run_queue();
4209 }
4210
Stephen Hemminger6756ae42006-03-20 22:23:58 -08004211 __rtnl_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07004212
4213 rebroadcast_time = jiffies;
4214 }
4215
4216 msleep(250);
4217
4218 if (time_after(jiffies, warning_time + 10 * HZ)) {
4219 printk(KERN_EMERG "unregister_netdevice: "
4220 "waiting for %s to become free. Usage "
4221 "count = %d\n",
4222 dev->name, atomic_read(&dev->refcnt));
4223 warning_time = jiffies;
4224 }
4225 }
4226}
4227
4228/* The sequence is:
4229 *
4230 * rtnl_lock();
4231 * ...
4232 * register_netdevice(x1);
4233 * register_netdevice(x2);
4234 * ...
4235 * unregister_netdevice(y1);
4236 * unregister_netdevice(y2);
4237 * ...
4238 * rtnl_unlock();
4239 * free_netdev(y1);
4240 * free_netdev(y2);
4241 *
Herbert Xu58ec3b42008-10-07 15:50:03 -07004242 * We are invoked by rtnl_unlock().
Linus Torvalds1da177e2005-04-16 15:20:36 -07004243 * This allows us to deal with problems:
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004244 * 1) We can delete sysfs objects which invoke hotplug
Linus Torvalds1da177e2005-04-16 15:20:36 -07004245 * without deadlocking with linkwatch via keventd.
4246 * 2) Since we run with the RTNL semaphore not held, we can sleep
4247 * safely in order to wait for the netdev refcnt to drop to zero.
Herbert Xu58ec3b42008-10-07 15:50:03 -07004248 *
4249 * We must not return until all unregister events added during
4250 * the interval the lock was held have been completed.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004251 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07004252void netdev_run_todo(void)
4253{
Oleg Nesterov626ab0e2006-06-23 02:05:55 -07004254 struct list_head list;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004255
Linus Torvalds1da177e2005-04-16 15:20:36 -07004256 /* Snapshot list, allow later requests */
Oleg Nesterov626ab0e2006-06-23 02:05:55 -07004257 list_replace_init(&net_todo_list, &list);
Herbert Xu58ec3b42008-10-07 15:50:03 -07004258
4259 __rtnl_unlock();
Oleg Nesterov626ab0e2006-06-23 02:05:55 -07004260
Linus Torvalds1da177e2005-04-16 15:20:36 -07004261 while (!list_empty(&list)) {
4262 struct net_device *dev
4263 = list_entry(list.next, struct net_device, todo_list);
4264 list_del(&dev->todo_list);
4265
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004266 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004267 printk(KERN_ERR "network todo '%s' but state %d\n",
4268 dev->name, dev->reg_state);
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004269 dump_stack();
4270 continue;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004271 }
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004272
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004273 dev->reg_state = NETREG_UNREGISTERED;
4274
Stephen Hemminger6e583ce2008-08-03 21:29:57 -07004275 on_each_cpu(flush_backlog, dev, 1);
4276
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004277 netdev_wait_allrefs(dev);
4278
4279 /* paranoia */
4280 BUG_ON(atomic_read(&dev->refcnt));
Ilpo Järvinen547b7922008-07-25 21:43:18 -07004281 WARN_ON(dev->ip_ptr);
4282 WARN_ON(dev->ip6_ptr);
4283 WARN_ON(dev->dn_ptr);
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004284
Stephen Hemmingerb17a7c12006-05-10 13:21:17 -07004285 if (dev->destructor)
4286 dev->destructor(dev);
Stephen Hemminger9093bbb2007-05-19 15:39:25 -07004287
4288 /* Free network device */
4289 kobject_put(&dev->dev.kobj);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004290 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004291}
4292
Stephen Hemmingereeda3fd2008-11-19 21:40:23 -08004293/**
4294 * dev_get_stats - get network device statistics
4295 * @dev: device to get statistics from
4296 *
4297 * Get network statistics from device. The device driver may provide
4298 * its own method by setting dev->netdev_ops->get_stats; otherwise
4299 * the internal statistics structure is used.
4300 */
4301const struct net_device_stats *dev_get_stats(struct net_device *dev)
4302 {
4303 const struct net_device_ops *ops = dev->netdev_ops;
4304
4305 if (ops->ndo_get_stats)
4306 return ops->ndo_get_stats(dev);
4307 else
4308 return &dev->stats;
Rusty Russellc45d2862007-03-28 14:29:08 -07004309}
Stephen Hemmingereeda3fd2008-11-19 21:40:23 -08004310EXPORT_SYMBOL(dev_get_stats);
Rusty Russellc45d2862007-03-28 14:29:08 -07004311
David S. Millerdc2b4842008-07-08 17:18:23 -07004312static void netdev_init_one_queue(struct net_device *dev,
David S. Millere8a04642008-07-17 00:34:19 -07004313 struct netdev_queue *queue,
4314 void *_unused)
David S. Millerdc2b4842008-07-08 17:18:23 -07004315{
David S. Millerdc2b4842008-07-08 17:18:23 -07004316 queue->dev = dev;
4317}
4318
David S. Millerbb949fb2008-07-08 16:55:56 -07004319static void netdev_init_queues(struct net_device *dev)
4320{
David S. Millere8a04642008-07-17 00:34:19 -07004321 netdev_init_one_queue(dev, &dev->rx_queue, NULL);
4322 netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
David S. Millerc3f26a22008-07-31 16:58:50 -07004323 spin_lock_init(&dev->tx_global_lock);
David S. Millerbb949fb2008-07-08 16:55:56 -07004324}
4325
Linus Torvalds1da177e2005-04-16 15:20:36 -07004326/**
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07004327 * alloc_netdev_mq - allocate network device
Linus Torvalds1da177e2005-04-16 15:20:36 -07004328 * @sizeof_priv: size of private data to allocate space for
4329 * @name: device name format string
4330 * @setup: callback to initialize device
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07004331 * @queue_count: the number of subqueues to allocate
Linus Torvalds1da177e2005-04-16 15:20:36 -07004332 *
4333 * Allocates a struct net_device with private data area for driver use
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07004334 * and performs basic initialization. Also allocates subquue structs
4335 * for each queue on the device at the end of the netdevice.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004336 */
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07004337struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
4338 void (*setup)(struct net_device *), unsigned int queue_count)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004339{
David S. Millere8a04642008-07-17 00:34:19 -07004340 struct netdev_queue *tx;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004341 struct net_device *dev;
Stephen Hemminger79439862008-07-21 13:28:44 -07004342 size_t alloc_size;
David S. Millere8a04642008-07-17 00:34:19 -07004343 void *p;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004344
Stephen Hemmingerb6fe17d2006-08-29 17:06:13 -07004345 BUG_ON(strlen(name) >= sizeof(dev->name));
4346
David S. Millerfd2ea0a2008-07-17 01:56:23 -07004347 alloc_size = sizeof(struct net_device);
Alexey Dobriyand1643d22008-04-18 15:43:32 -07004348 if (sizeof_priv) {
4349 /* ensure 32-byte alignment of private area */
4350 alloc_size = (alloc_size + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
4351 alloc_size += sizeof_priv;
4352 }
4353 /* ensure 32-byte alignment of whole construct */
4354 alloc_size += NETDEV_ALIGN_CONST;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004355
Paolo 'Blaisorblade' Giarrusso31380de2006-04-06 22:38:28 -07004356 p = kzalloc(alloc_size, GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004357 if (!p) {
Stephen Hemmingerb6fe17d2006-08-29 17:06:13 -07004358 printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
Linus Torvalds1da177e2005-04-16 15:20:36 -07004359 return NULL;
4360 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07004361
Stephen Hemminger79439862008-07-21 13:28:44 -07004362 tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
David S. Millere8a04642008-07-17 00:34:19 -07004363 if (!tx) {
4364 printk(KERN_ERR "alloc_netdev: Unable to allocate "
4365 "tx qdiscs.\n");
4366 kfree(p);
4367 return NULL;
4368 }
4369
Linus Torvalds1da177e2005-04-16 15:20:36 -07004370 dev = (struct net_device *)
4371 (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
4372 dev->padded = (char *)dev - (char *)p;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09004373 dev_net_set(dev, &init_net);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004374
David S. Millere8a04642008-07-17 00:34:19 -07004375 dev->_tx = tx;
4376 dev->num_tx_queues = queue_count;
David S. Millerfd2ea0a2008-07-17 01:56:23 -07004377 dev->real_num_tx_queues = queue_count;
David S. Millere8a04642008-07-17 00:34:19 -07004378
Peter P Waskiewicz Jr82cc1a72008-03-21 03:43:19 -07004379 dev->gso_max_size = GSO_MAX_SIZE;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004380
David S. Millerbb949fb2008-07-08 16:55:56 -07004381 netdev_init_queues(dev);
4382
Stephen Hemmingerbea33482007-10-03 16:41:36 -07004383 netpoll_netdev_init(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004384 setup(dev);
4385 strcpy(dev->name, name);
4386 return dev;
4387}
Peter P Waskiewicz Jrf25f4e42007-07-06 13:36:20 -07004388EXPORT_SYMBOL(alloc_netdev_mq);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004389
4390/**
4391 * free_netdev - free network device
4392 * @dev: device
4393 *
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004394 * This function does the last stage of destroying an allocated device
4395 * interface. The reference to the device object is released.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004396 * If this is the last reference then it will be freed.
4397 */
4398void free_netdev(struct net_device *dev)
4399{
Denis V. Lunevf3005d72008-04-16 02:02:18 -07004400 release_net(dev_net(dev));
4401
David S. Millere8a04642008-07-17 00:34:19 -07004402 kfree(dev->_tx);
4403
Stephen Hemminger3041a062006-05-26 13:25:24 -07004404 /* Compatibility with error handling in drivers */
Linus Torvalds1da177e2005-04-16 15:20:36 -07004405 if (dev->reg_state == NETREG_UNINITIALIZED) {
4406 kfree((char *)dev - dev->padded);
4407 return;
4408 }
4409
4410 BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
4411 dev->reg_state = NETREG_RELEASED;
4412
Greg Kroah-Hartman43cb76d2002-04-09 12:14:34 -07004413 /* will free via device release */
4414 put_device(&dev->dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004415}
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004416
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07004417/**
4418 * synchronize_net - Synchronize with packet receive processing
4419 *
4420 * Wait for packets currently being received to be done.
4421 * Does not block later packets from starting.
4422 */
YOSHIFUJI Hideaki4ec93ed2007-02-09 23:24:36 +09004423void synchronize_net(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004424{
4425 might_sleep();
Paul E. McKenneyfbd568a3e2005-05-01 08:59:04 -07004426 synchronize_rcu();
Linus Torvalds1da177e2005-04-16 15:20:36 -07004427}
4428
4429/**
4430 * unregister_netdevice - remove device from the kernel
4431 * @dev: device
4432 *
4433 * This function shuts down a device interface and removes it
Wang Chend59b54b2007-12-11 02:28:03 -08004434 * from the kernel tables.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004435 *
4436 * Callers must hold the rtnl semaphore. You may want
4437 * unregister_netdev() instead of this.
4438 */
4439
Stephen Hemminger22f8cde2007-02-07 00:09:58 -08004440void unregister_netdevice(struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004441{
Herbert Xua6620712007-12-12 19:21:56 -08004442 ASSERT_RTNL();
4443
Daniel Lezcano93ee31f2007-10-30 15:38:18 -07004444 rollback_registered(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004445 /* Finish processing unregister after unlock */
4446 net_set_todo(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004447}
4448
4449/**
4450 * unregister_netdev - remove device from the kernel
4451 * @dev: device
4452 *
4453 * This function shuts down a device interface and removes it
Wang Chend59b54b2007-12-11 02:28:03 -08004454 * from the kernel tables.
Linus Torvalds1da177e2005-04-16 15:20:36 -07004455 *
4456 * This is just a wrapper for unregister_netdevice that takes
4457 * the rtnl semaphore. In general you want to use this and not
4458 * unregister_netdevice.
4459 */
4460void unregister_netdev(struct net_device *dev)
4461{
4462 rtnl_lock();
4463 unregister_netdevice(dev);
4464 rtnl_unlock();
4465}
4466
4467EXPORT_SYMBOL(unregister_netdev);
4468
Eric W. Biedermance286d32007-09-12 13:53:49 +02004469/**
4470 * dev_change_net_namespace - move device to different nethost namespace
4471 * @dev: device
4472 * @net: network namespace
4473 * @pat: If not NULL name pattern to try if the current device name
4474 * is already taken in the destination network namespace.
4475 *
4476 * This function shuts down a device interface and moves it
4477 * to a new network namespace. On success 0 is returned, on
4478 * a failure a netagive errno code is returned.
4479 *
4480 * Callers must hold the rtnl semaphore.
4481 */
4482
4483int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat)
4484{
4485 char buf[IFNAMSIZ];
4486 const char *destname;
4487 int err;
4488
4489 ASSERT_RTNL();
4490
4491 /* Don't allow namespace local devices to be moved. */
4492 err = -EINVAL;
4493 if (dev->features & NETIF_F_NETNS_LOCAL)
4494 goto out;
4495
Eric W. Biederman38918452008-10-27 17:51:47 -07004496#ifdef CONFIG_SYSFS
4497 /* Don't allow real devices to be moved when sysfs
4498 * is enabled.
4499 */
4500 err = -EINVAL;
4501 if (dev->dev.parent)
4502 goto out;
4503#endif
4504
Eric W. Biedermance286d32007-09-12 13:53:49 +02004505 /* Ensure the device has been registrered */
4506 err = -EINVAL;
4507 if (dev->reg_state != NETREG_REGISTERED)
4508 goto out;
4509
4510 /* Get out if there is nothing todo */
4511 err = 0;
YOSHIFUJI Hideaki878628f2008-03-26 03:57:35 +09004512 if (net_eq(dev_net(dev), net))
Eric W. Biedermance286d32007-09-12 13:53:49 +02004513 goto out;
4514
4515 /* Pick the destination device name, and ensure
4516 * we can use it in the destination network namespace.
4517 */
4518 err = -EEXIST;
4519 destname = dev->name;
4520 if (__dev_get_by_name(net, destname)) {
4521 /* We get here if we can't use the current device name */
4522 if (!pat)
4523 goto out;
4524 if (!dev_valid_name(pat))
4525 goto out;
4526 if (strchr(pat, '%')) {
4527 if (__dev_alloc_name(net, pat, buf) < 0)
4528 goto out;
4529 destname = buf;
4530 } else
4531 destname = pat;
4532 if (__dev_get_by_name(net, destname))
4533 goto out;
4534 }
4535
4536 /*
4537 * And now a mini version of register_netdevice unregister_netdevice.
4538 */
4539
4540 /* If device is running close it first. */
Pavel Emelyanov9b772652007-10-10 02:49:09 -07004541 dev_close(dev);
Eric W. Biedermance286d32007-09-12 13:53:49 +02004542
4543 /* And unlink it from device chain */
4544 err = -ENODEV;
4545 unlist_netdevice(dev);
4546
4547 synchronize_net();
4548
4549 /* Shutdown queueing discipline. */
4550 dev_shutdown(dev);
4551
4552 /* Notify protocols, that we are about to destroy
4553 this device. They should clean all the things.
4554 */
4555 call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
4556
4557 /*
4558 * Flush the unicast and multicast chains
4559 */
4560 dev_addr_discard(dev);
4561
Eric W. Biederman38918452008-10-27 17:51:47 -07004562 netdev_unregister_kobject(dev);
4563
Eric W. Biedermance286d32007-09-12 13:53:49 +02004564 /* Actually switch the network namespace */
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09004565 dev_net_set(dev, net);
Eric W. Biedermance286d32007-09-12 13:53:49 +02004566
4567 /* Assign the new device name */
4568 if (destname != dev->name)
4569 strcpy(dev->name, destname);
4570
4571 /* If there is an ifindex conflict assign a new one */
4572 if (__dev_get_by_index(net, dev->ifindex)) {
4573 int iflink = (dev->iflink == dev->ifindex);
4574 dev->ifindex = dev_new_index(net);
4575 if (iflink)
4576 dev->iflink = dev->ifindex;
4577 }
4578
Eric W. Biederman8b41d182007-09-26 22:02:53 -07004579 /* Fixup kobjects */
Daniel Lezcanoaaf8cdc2008-05-02 17:00:58 -07004580 err = netdev_register_kobject(dev);
Eric W. Biederman8b41d182007-09-26 22:02:53 -07004581 WARN_ON(err);
Eric W. Biedermance286d32007-09-12 13:53:49 +02004582
4583 /* Add the device back in the hashes */
4584 list_netdevice(dev);
4585
4586 /* Notify protocols, that a new device appeared. */
4587 call_netdevice_notifiers(NETDEV_REGISTER, dev);
4588
4589 synchronize_net();
4590 err = 0;
4591out:
4592 return err;
4593}
4594
Linus Torvalds1da177e2005-04-16 15:20:36 -07004595static int dev_cpu_callback(struct notifier_block *nfb,
4596 unsigned long action,
4597 void *ocpu)
4598{
4599 struct sk_buff **list_skb;
David S. Miller37437bb2008-07-16 02:15:04 -07004600 struct Qdisc **list_net;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004601 struct sk_buff *skb;
4602 unsigned int cpu, oldcpu = (unsigned long)ocpu;
4603 struct softnet_data *sd, *oldsd;
4604
Rafael J. Wysocki8bb78442007-05-09 02:35:10 -07004605 if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004606 return NOTIFY_OK;
4607
4608 local_irq_disable();
4609 cpu = smp_processor_id();
4610 sd = &per_cpu(softnet_data, cpu);
4611 oldsd = &per_cpu(softnet_data, oldcpu);
4612
4613 /* Find end of our completion_queue. */
4614 list_skb = &sd->completion_queue;
4615 while (*list_skb)
4616 list_skb = &(*list_skb)->next;
4617 /* Append completion queue from offline CPU. */
4618 *list_skb = oldsd->completion_queue;
4619 oldsd->completion_queue = NULL;
4620
4621 /* Find end of our output_queue. */
4622 list_net = &sd->output_queue;
4623 while (*list_net)
4624 list_net = &(*list_net)->next_sched;
4625 /* Append output queue from offline CPU. */
4626 *list_net = oldsd->output_queue;
4627 oldsd->output_queue = NULL;
4628
4629 raise_softirq_irqoff(NET_TX_SOFTIRQ);
4630 local_irq_enable();
4631
4632 /* Process offline CPU's input_pkt_queue */
4633 while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
4634 netif_rx(skb);
4635
4636 return NOTIFY_OK;
4637}
Linus Torvalds1da177e2005-04-16 15:20:36 -07004638
Chris Leechdb217332006-06-17 21:24:58 -07004639#ifdef CONFIG_NET_DMA
4640/**
Randy Dunlap0ed72ec2007-07-26 00:03:29 -07004641 * net_dma_rebalance - try to maintain one DMA channel per CPU
4642 * @net_dma: DMA client and associated data (lock, channels, channel_mask)
4643 *
4644 * This is called when the number of channels allocated to the net_dma client
4645 * changes. The net_dma client tries to have one DMA channel per CPU.
Chris Leechdb217332006-06-17 21:24:58 -07004646 */
Dan Williamsd379b012007-07-09 11:56:42 -07004647
4648static void net_dma_rebalance(struct net_dma *net_dma)
Chris Leechdb217332006-06-17 21:24:58 -07004649{
Dan Williamsd379b012007-07-09 11:56:42 -07004650 unsigned int cpu, i, n, chan_idx;
Chris Leechdb217332006-06-17 21:24:58 -07004651 struct dma_chan *chan;
4652
Dan Williamsd379b012007-07-09 11:56:42 -07004653 if (cpus_empty(net_dma->channel_mask)) {
Chris Leechdb217332006-06-17 21:24:58 -07004654 for_each_online_cpu(cpu)
Alexey Dobriyan29bbd722006-08-02 15:02:31 -07004655 rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL);
Chris Leechdb217332006-06-17 21:24:58 -07004656 return;
4657 }
4658
4659 i = 0;
4660 cpu = first_cpu(cpu_online_map);
4661
Mike Travis0e12f842008-05-12 21:21:13 +02004662 for_each_cpu_mask_nr(chan_idx, net_dma->channel_mask) {
Dan Williamsd379b012007-07-09 11:56:42 -07004663 chan = net_dma->channels[chan_idx];
4664
4665 n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask))
4666 + (i < (num_online_cpus() %
4667 cpus_weight(net_dma->channel_mask)) ? 1 : 0));
Chris Leechdb217332006-06-17 21:24:58 -07004668
4669 while(n) {
Alexey Dobriyan29bbd722006-08-02 15:02:31 -07004670 per_cpu(softnet_data, cpu).net_dma = chan;
Chris Leechdb217332006-06-17 21:24:58 -07004671 cpu = next_cpu(cpu, cpu_online_map);
4672 n--;
4673 }
4674 i++;
4675 }
Chris Leechdb217332006-06-17 21:24:58 -07004676}
4677
4678/**
4679 * netdev_dma_event - event callback for the net_dma_client
4680 * @client: should always be net_dma_client
Randy Dunlapf4b8ea72006-06-22 16:00:11 -07004681 * @chan: DMA channel for the event
Randy Dunlap0ed72ec2007-07-26 00:03:29 -07004682 * @state: DMA state to be handled
Chris Leechdb217332006-06-17 21:24:58 -07004683 */
Dan Williamsd379b012007-07-09 11:56:42 -07004684static enum dma_state_client
4685netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
4686 enum dma_state state)
Chris Leechdb217332006-06-17 21:24:58 -07004687{
Dan Williamsd379b012007-07-09 11:56:42 -07004688 int i, found = 0, pos = -1;
4689 struct net_dma *net_dma =
4690 container_of(client, struct net_dma, client);
4691 enum dma_state_client ack = DMA_DUP; /* default: take no action */
4692
4693 spin_lock(&net_dma->lock);
4694 switch (state) {
4695 case DMA_RESOURCE_AVAILABLE:
Mike Travis0c0b0ac2008-05-02 16:43:08 -07004696 for (i = 0; i < nr_cpu_ids; i++)
Dan Williamsd379b012007-07-09 11:56:42 -07004697 if (net_dma->channels[i] == chan) {
4698 found = 1;
4699 break;
4700 } else if (net_dma->channels[i] == NULL && pos < 0)
4701 pos = i;
4702
4703 if (!found && pos >= 0) {
4704 ack = DMA_ACK;
4705 net_dma->channels[pos] = chan;
4706 cpu_set(pos, net_dma->channel_mask);
4707 net_dma_rebalance(net_dma);
4708 }
Chris Leechdb217332006-06-17 21:24:58 -07004709 break;
4710 case DMA_RESOURCE_REMOVED:
Mike Travis0c0b0ac2008-05-02 16:43:08 -07004711 for (i = 0; i < nr_cpu_ids; i++)
Dan Williamsd379b012007-07-09 11:56:42 -07004712 if (net_dma->channels[i] == chan) {
4713 found = 1;
4714 pos = i;
4715 break;
4716 }
4717
4718 if (found) {
4719 ack = DMA_ACK;
4720 cpu_clear(pos, net_dma->channel_mask);
4721 net_dma->channels[i] = NULL;
4722 net_dma_rebalance(net_dma);
4723 }
Chris Leechdb217332006-06-17 21:24:58 -07004724 break;
4725 default:
4726 break;
4727 }
Dan Williamsd379b012007-07-09 11:56:42 -07004728 spin_unlock(&net_dma->lock);
4729
4730 return ack;
Chris Leechdb217332006-06-17 21:24:58 -07004731}
4732
4733/**
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07004734 * netdev_dma_register - register the networking subsystem as a DMA client
Chris Leechdb217332006-06-17 21:24:58 -07004735 */
4736static int __init netdev_dma_register(void)
4737{
Mike Travis0c0b0ac2008-05-02 16:43:08 -07004738 net_dma.channels = kzalloc(nr_cpu_ids * sizeof(struct net_dma),
4739 GFP_KERNEL);
4740 if (unlikely(!net_dma.channels)) {
4741 printk(KERN_NOTICE
4742 "netdev_dma: no memory for net_dma.channels\n");
4743 return -ENOMEM;
4744 }
Dan Williamsd379b012007-07-09 11:56:42 -07004745 spin_lock_init(&net_dma.lock);
4746 dma_cap_set(DMA_MEMCPY, net_dma.client.cap_mask);
4747 dma_async_client_register(&net_dma.client);
4748 dma_async_client_chan_request(&net_dma.client);
Chris Leechdb217332006-06-17 21:24:58 -07004749 return 0;
4750}
4751
4752#else
4753static int __init netdev_dma_register(void) { return -ENODEV; }
4754#endif /* CONFIG_NET_DMA */
Linus Torvalds1da177e2005-04-16 15:20:36 -07004755
Herbert Xu7f353bf2007-08-10 15:47:58 -07004756/**
Herbert Xub63365a2008-10-23 01:11:29 -07004757 * netdev_increment_features - increment feature set by one
4758 * @all: current feature set
4759 * @one: new feature set
4760 * @mask: mask feature set
Herbert Xu7f353bf2007-08-10 15:47:58 -07004761 *
4762 * Computes a new feature set after adding a device with feature set
Herbert Xub63365a2008-10-23 01:11:29 -07004763 * @one to the master device with current feature set @all. Will not
4764 * enable anything that is off in @mask. Returns the new feature set.
Herbert Xu7f353bf2007-08-10 15:47:58 -07004765 */
Herbert Xub63365a2008-10-23 01:11:29 -07004766unsigned long netdev_increment_features(unsigned long all, unsigned long one,
4767 unsigned long mask)
Herbert Xu7f353bf2007-08-10 15:47:58 -07004768{
Herbert Xub63365a2008-10-23 01:11:29 -07004769 /* If device needs checksumming, downgrade to it. */
4770 if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
4771 all ^= NETIF_F_NO_CSUM | (one & NETIF_F_ALL_CSUM);
4772 else if (mask & NETIF_F_ALL_CSUM) {
4773 /* If one device supports v4/v6 checksumming, set for all. */
4774 if (one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM) &&
4775 !(all & NETIF_F_GEN_CSUM)) {
4776 all &= ~NETIF_F_ALL_CSUM;
4777 all |= one & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM);
4778 }
Herbert Xu7f353bf2007-08-10 15:47:58 -07004779
Herbert Xub63365a2008-10-23 01:11:29 -07004780 /* If one device supports hw checksumming, set for all. */
4781 if (one & NETIF_F_GEN_CSUM && !(all & NETIF_F_GEN_CSUM)) {
4782 all &= ~NETIF_F_ALL_CSUM;
4783 all |= NETIF_F_HW_CSUM;
4784 }
4785 }
Herbert Xu7f353bf2007-08-10 15:47:58 -07004786
Herbert Xub63365a2008-10-23 01:11:29 -07004787 one |= NETIF_F_ALL_CSUM;
Herbert Xu7f353bf2007-08-10 15:47:58 -07004788
Herbert Xub63365a2008-10-23 01:11:29 -07004789 one |= all & NETIF_F_ONE_FOR_ALL;
4790 all &= one | NETIF_F_LLTX | NETIF_F_GSO;
4791 all |= one & mask & NETIF_F_ONE_FOR_ALL;
Herbert Xu7f353bf2007-08-10 15:47:58 -07004792
4793 return all;
4794}
Herbert Xub63365a2008-10-23 01:11:29 -07004795EXPORT_SYMBOL(netdev_increment_features);
Herbert Xu7f353bf2007-08-10 15:47:58 -07004796
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07004797static struct hlist_head *netdev_create_hash(void)
4798{
4799 int i;
4800 struct hlist_head *hash;
4801
4802 hash = kmalloc(sizeof(*hash) * NETDEV_HASHENTRIES, GFP_KERNEL);
4803 if (hash != NULL)
4804 for (i = 0; i < NETDEV_HASHENTRIES; i++)
4805 INIT_HLIST_HEAD(&hash[i]);
4806
4807 return hash;
4808}
4809
Eric W. Biederman881d9662007-09-17 11:56:21 -07004810/* Initialize per network namespace state */
Pavel Emelyanov46650792007-10-08 20:38:39 -07004811static int __net_init netdev_init(struct net *net)
Eric W. Biederman881d9662007-09-17 11:56:21 -07004812{
Eric W. Biederman881d9662007-09-17 11:56:21 -07004813 INIT_LIST_HEAD(&net->dev_base_head);
Eric W. Biederman881d9662007-09-17 11:56:21 -07004814
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07004815 net->dev_name_head = netdev_create_hash();
4816 if (net->dev_name_head == NULL)
4817 goto err_name;
Eric W. Biederman881d9662007-09-17 11:56:21 -07004818
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07004819 net->dev_index_head = netdev_create_hash();
4820 if (net->dev_index_head == NULL)
4821 goto err_idx;
Eric W. Biederman881d9662007-09-17 11:56:21 -07004822
4823 return 0;
Pavel Emelyanov30d97d32007-09-16 15:40:33 -07004824
4825err_idx:
4826 kfree(net->dev_name_head);
4827err_name:
4828 return -ENOMEM;
Eric W. Biederman881d9662007-09-17 11:56:21 -07004829}
4830
Stephen Hemmingerf0db2752008-09-30 02:23:58 -07004831/**
4832 * netdev_drivername - network driver for the device
4833 * @dev: network device
4834 * @buffer: buffer for resulting name
4835 * @len: size of buffer
4836 *
4837 * Determine network driver for device.
4838 */
Stephen Hemmingercf04a4c72008-09-30 02:22:14 -07004839char *netdev_drivername(const struct net_device *dev, char *buffer, int len)
Arjan van de Ven6579e572008-07-21 13:31:48 -07004840{
Stephen Hemmingercf04a4c72008-09-30 02:22:14 -07004841 const struct device_driver *driver;
4842 const struct device *parent;
Arjan van de Ven6579e572008-07-21 13:31:48 -07004843
4844 if (len <= 0 || !buffer)
4845 return buffer;
4846 buffer[0] = 0;
4847
4848 parent = dev->dev.parent;
4849
4850 if (!parent)
4851 return buffer;
4852
4853 driver = parent->driver;
4854 if (driver && driver->name)
4855 strlcpy(buffer, driver->name, len);
4856 return buffer;
4857}
4858
Pavel Emelyanov46650792007-10-08 20:38:39 -07004859static void __net_exit netdev_exit(struct net *net)
Eric W. Biederman881d9662007-09-17 11:56:21 -07004860{
4861 kfree(net->dev_name_head);
4862 kfree(net->dev_index_head);
4863}
4864
Denis V. Lunev022cbae2007-11-13 03:23:50 -08004865static struct pernet_operations __net_initdata netdev_net_ops = {
Eric W. Biederman881d9662007-09-17 11:56:21 -07004866 .init = netdev_init,
4867 .exit = netdev_exit,
4868};
4869
Pavel Emelyanov46650792007-10-08 20:38:39 -07004870static void __net_exit default_device_exit(struct net *net)
Eric W. Biedermance286d32007-09-12 13:53:49 +02004871{
4872 struct net_device *dev, *next;
4873 /*
4874 * Push all migratable of the network devices back to the
4875 * initial network namespace
4876 */
4877 rtnl_lock();
4878 for_each_netdev_safe(net, dev, next) {
4879 int err;
Pavel Emelyanovaca51392008-05-08 01:24:25 -07004880 char fb_name[IFNAMSIZ];
Eric W. Biedermance286d32007-09-12 13:53:49 +02004881
4882 /* Ignore unmoveable devices (i.e. loopback) */
4883 if (dev->features & NETIF_F_NETNS_LOCAL)
4884 continue;
4885
Eric W. Biedermand0c082c2008-11-05 15:59:38 -08004886 /* Delete virtual devices */
4887 if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) {
4888 dev->rtnl_link_ops->dellink(dev);
4889 continue;
4890 }
4891
Eric W. Biedermance286d32007-09-12 13:53:49 +02004892 /* Push remaing network devices to init_net */
Pavel Emelyanovaca51392008-05-08 01:24:25 -07004893 snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
4894 err = dev_change_net_namespace(dev, &init_net, fb_name);
Eric W. Biedermance286d32007-09-12 13:53:49 +02004895 if (err) {
Pavel Emelyanovaca51392008-05-08 01:24:25 -07004896 printk(KERN_EMERG "%s: failed to move %s to init_net: %d\n",
Eric W. Biedermance286d32007-09-12 13:53:49 +02004897 __func__, dev->name, err);
Pavel Emelyanovaca51392008-05-08 01:24:25 -07004898 BUG();
Eric W. Biedermance286d32007-09-12 13:53:49 +02004899 }
4900 }
4901 rtnl_unlock();
4902}
4903
Denis V. Lunev022cbae2007-11-13 03:23:50 -08004904static struct pernet_operations __net_initdata default_device_ops = {
Eric W. Biedermance286d32007-09-12 13:53:49 +02004905 .exit = default_device_exit,
4906};
4907
Linus Torvalds1da177e2005-04-16 15:20:36 -07004908/*
4909 * Initialize the DEV module. At boot time this walks the device list and
4910 * unhooks any devices that fail to initialise (normally hardware not
4911 * present) and leaves us with a valid list of present and active devices.
4912 *
4913 */
4914
4915/*
4916 * This is called single threaded during boot, so no need
4917 * to take the rtnl semaphore.
4918 */
4919static int __init net_dev_init(void)
4920{
4921 int i, rc = -ENOMEM;
4922
4923 BUG_ON(!dev_boot_phase);
4924
Linus Torvalds1da177e2005-04-16 15:20:36 -07004925 if (dev_proc_init())
4926 goto out;
4927
Eric W. Biederman8b41d182007-09-26 22:02:53 -07004928 if (netdev_kobject_init())
Linus Torvalds1da177e2005-04-16 15:20:36 -07004929 goto out;
4930
4931 INIT_LIST_HEAD(&ptype_all);
Pavel Emelyanov82d8a8672007-11-26 20:12:58 +08004932 for (i = 0; i < PTYPE_HASH_SIZE; i++)
Linus Torvalds1da177e2005-04-16 15:20:36 -07004933 INIT_LIST_HEAD(&ptype_base[i]);
4934
Eric W. Biederman881d9662007-09-17 11:56:21 -07004935 if (register_pernet_subsys(&netdev_net_ops))
4936 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004937
4938 /*
4939 * Initialise the packet receive queues.
4940 */
4941
KAMEZAWA Hiroyuki6f912042006-04-10 22:52:50 -07004942 for_each_possible_cpu(i) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07004943 struct softnet_data *queue;
4944
4945 queue = &per_cpu(softnet_data, i);
4946 skb_queue_head_init(&queue->input_pkt_queue);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004947 queue->completion_queue = NULL;
4948 INIT_LIST_HEAD(&queue->poll_list);
Stephen Hemmingerbea33482007-10-03 16:41:36 -07004949
4950 queue->backlog.poll = process_backlog;
4951 queue->backlog.weight = weight_p;
Linus Torvalds1da177e2005-04-16 15:20:36 -07004952 }
4953
Linus Torvalds1da177e2005-04-16 15:20:36 -07004954 dev_boot_phase = 0;
4955
Eric W. Biederman505d4f72008-11-07 22:54:20 -08004956 /* The loopback device is special if any other network devices
4957 * is present in a network namespace the loopback device must
4958 * be present. Since we now dynamically allocate and free the
4959 * loopback device ensure this invariant is maintained by
4960 * keeping the loopback device as the first device on the
4961 * list of network devices. Ensuring the loopback devices
4962 * is the first device that appears and the last network device
4963 * that disappears.
4964 */
4965 if (register_pernet_device(&loopback_net_ops))
4966 goto out;
4967
4968 if (register_pernet_device(&default_device_ops))
4969 goto out;
4970
4971 netdev_dma_register();
4972
Carlos R. Mafra962cf362008-05-15 11:15:37 -03004973 open_softirq(NET_TX_SOFTIRQ, net_tx_action);
4974 open_softirq(NET_RX_SOFTIRQ, net_rx_action);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004975
4976 hotcpu_notifier(dev_cpu_callback, 0);
4977 dst_init();
4978 dev_mcast_init();
4979 rc = 0;
4980out:
4981 return rc;
4982}
4983
4984subsys_initcall(net_dev_init);
4985
4986EXPORT_SYMBOL(__dev_get_by_index);
4987EXPORT_SYMBOL(__dev_get_by_name);
4988EXPORT_SYMBOL(__dev_remove_pack);
Mitch Williamsc2373ee2005-11-09 10:34:45 -08004989EXPORT_SYMBOL(dev_valid_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004990EXPORT_SYMBOL(dev_add_pack);
4991EXPORT_SYMBOL(dev_alloc_name);
4992EXPORT_SYMBOL(dev_close);
4993EXPORT_SYMBOL(dev_get_by_flags);
4994EXPORT_SYMBOL(dev_get_by_index);
4995EXPORT_SYMBOL(dev_get_by_name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07004996EXPORT_SYMBOL(dev_open);
4997EXPORT_SYMBOL(dev_queue_xmit);
4998EXPORT_SYMBOL(dev_remove_pack);
4999EXPORT_SYMBOL(dev_set_allmulti);
5000EXPORT_SYMBOL(dev_set_promiscuity);
5001EXPORT_SYMBOL(dev_change_flags);
5002EXPORT_SYMBOL(dev_set_mtu);
5003EXPORT_SYMBOL(dev_set_mac_address);
5004EXPORT_SYMBOL(free_netdev);
5005EXPORT_SYMBOL(netdev_boot_setup_check);
5006EXPORT_SYMBOL(netdev_set_master);
5007EXPORT_SYMBOL(netdev_state_change);
5008EXPORT_SYMBOL(netif_receive_skb);
5009EXPORT_SYMBOL(netif_rx);
5010EXPORT_SYMBOL(register_gifconf);
5011EXPORT_SYMBOL(register_netdevice);
5012EXPORT_SYMBOL(register_netdevice_notifier);
5013EXPORT_SYMBOL(skb_checksum_help);
5014EXPORT_SYMBOL(synchronize_net);
5015EXPORT_SYMBOL(unregister_netdevice);
5016EXPORT_SYMBOL(unregister_netdevice_notifier);
5017EXPORT_SYMBOL(net_enable_timestamp);
5018EXPORT_SYMBOL(net_disable_timestamp);
5019EXPORT_SYMBOL(dev_get_flags);
5020
5021#if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
5022EXPORT_SYMBOL(br_handle_frame_hook);
5023EXPORT_SYMBOL(br_fdb_get_hook);
5024EXPORT_SYMBOL(br_fdb_put_hook);
5025#endif
5026
Linus Torvalds1da177e2005-04-16 15:20:36 -07005027EXPORT_SYMBOL(dev_load);
Linus Torvalds1da177e2005-04-16 15:20:36 -07005028
5029EXPORT_PER_CPU_SYMBOL(softnet_data);