]> nv-tegra.nvidia Code Review - linux-2.6.git/blobdiff - net/core/dev.c
net: Remove CONFIG_KMOD from net/ (towards removing CONFIG_KMOD entirely)
[linux-2.6.git] / net / core / dev.c
index ce79c28d739ddfb98eb9d224f3fb3b5eee123444..868ec0ba8b77dc7088c28816238b2ee557316e01 100644 (file)
 #include <linux/ctype.h>
 #include <linux/if_arp.h>
 #include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <net/ip.h>
+#include <linux/ipv6.h>
+#include <linux/in.h>
+#include <linux/jhash.h>
+#include <linux/random.h>
 
 #include "net-sysfs.h"
 
@@ -256,9 +262,9 @@ static RAW_NOTIFIER_HEAD(netdev_chain);
 
 DEFINE_PER_CPU(struct softnet_data, softnet_data);
 
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
+#ifdef CONFIG_LOCKDEP
 /*
- * register_netdevice() inits dev->_xmit_lock and sets lockdep class
+ * register_netdevice() inits txq->_xmit_lock and sets lockdep class
  * according to dev->type
  */
 static const unsigned short netdev_lock_type[] =
@@ -296,6 +302,7 @@ static const char *netdev_lock_name[] =
         "_xmit_NONE"};
 
 static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
+static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
 
 static inline unsigned short netdev_lock_pos(unsigned short dev_type)
 {
@@ -308,8 +315,8 @@ static inline unsigned short netdev_lock_pos(unsigned short dev_type)
        return ARRAY_SIZE(netdev_lock_type) - 1;
 }
 
-static inline void netdev_set_lockdep_class(spinlock_t *lock,
-                                           unsigned short dev_type)
+static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
+                                                unsigned short dev_type)
 {
        int i;
 
@@ -317,9 +324,22 @@ static inline void netdev_set_lockdep_class(spinlock_t *lock,
        lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
                                   netdev_lock_name[i]);
 }
+
+static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
+{
+       int i;
+
+       i = netdev_lock_pos(dev->type);
+       lockdep_set_class_and_name(&dev->addr_list_lock,
+                                  &netdev_addr_lock_key[i],
+                                  netdev_lock_name[i]);
+}
 #else
-static inline void netdev_set_lockdep_class(spinlock_t *lock,
-                                           unsigned short dev_type)
+static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
+                                                unsigned short dev_type)
+{
+}
+static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
 {
 }
 #endif
@@ -871,7 +891,7 @@ int dev_alloc_name(struct net_device *dev, const char *name)
  *     Change name of a device, can pass format strings "eth%d".
  *     for wildcarding.
  */
-int dev_change_name(struct net_device *dev, char *newname)
+int dev_change_name(struct net_device *dev, const char *newname)
 {
        char oldname[IFNAMSIZ];
        int err = 0;
@@ -897,7 +917,6 @@ int dev_change_name(struct net_device *dev, char *newname)
                err = dev_alloc_name(dev, newname);
                if (err < 0)
                        return err;
-               strcpy(newname, dev->name);
        }
        else if (__dev_get_by_name(net, newname))
                return -EEXIST;
@@ -934,6 +953,38 @@ rollback:
        return err;
 }
 
+/**
+ *     dev_set_alias - change ifalias of a device
+ *     @dev: device
+ *     @alias: name up to IFALIASZ
+ *     @len: limit of bytes to copy from info
+ *
+ *     Set ifalias for a device,
+ */
+int dev_set_alias(struct net_device *dev, const char *alias, size_t len)
+{
+       ASSERT_RTNL();
+
+       if (len >= IFALIASZ)
+               return -EINVAL;
+
+       if (!len) {
+               if (dev->ifalias) {
+                       kfree(dev->ifalias);
+                       dev->ifalias = NULL;
+               }
+               return 0;
+       }
+
+       dev->ifalias = krealloc(dev->ifalias, len+1, GFP_KERNEL);
+       if (!dev->ifalias)
+               return -ENOMEM;
+
+       strlcpy(dev->ifalias, alias, len+1);
+       return len;
+}
+
+
 /**
  *     netdev_features_change - device changes features
  *     @dev: device to cause notification
@@ -1320,19 +1371,23 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 }
 
 
-void __netif_schedule(struct net_device *dev)
+static inline void __netif_reschedule(struct Qdisc *q)
 {
-       if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) {
-               unsigned long flags;
-               struct softnet_data *sd;
+       struct softnet_data *sd;
+       unsigned long flags;
 
-               local_irq_save(flags);
-               sd = &__get_cpu_var(softnet_data);
-               dev->next_sched = sd->output_queue;
-               sd->output_queue = dev;
-               raise_softirq_irqoff(NET_TX_SOFTIRQ);
-               local_irq_restore(flags);
-       }
+       local_irq_save(flags);
+       sd = &__get_cpu_var(softnet_data);
+       q->next_sched = sd->output_queue;
+       sd->output_queue = q;
+       raise_softirq_irqoff(NET_TX_SOFTIRQ);
+       local_irq_restore(flags);
+}
+
+void __netif_schedule(struct Qdisc *q)
+{
+       if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
+               __netif_reschedule(q);
 }
 EXPORT_SYMBOL(__netif_schedule);
 
@@ -1596,7 +1651,8 @@ static int dev_gso_segment(struct sk_buff *skb)
        return 0;
 }
 
-int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
+int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
+                       struct netdev_queue *txq)
 {
        if (likely(!skb->next)) {
                if (!list_empty(&ptype_all))
@@ -1625,9 +1681,7 @@ gso:
                        skb->next = nskb;
                        return rc;
                }
-               if (unlikely((netif_queue_stopped(dev) ||
-                            netif_subqueue_stopped(dev, skb)) &&
-                            skb->next))
+               if (unlikely(netif_tx_queue_stopped(txq) && skb->next))
                        return NETDEV_TX_BUSY;
        } while (skb->next);
 
@@ -1638,6 +1692,74 @@ out_kfree_skb:
        return 0;
 }
 
+static u32 simple_tx_hashrnd;
+static int simple_tx_hashrnd_initialized = 0;
+
+static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb)
+{
+       u32 addr1, addr2, ports;
+       u32 hash, ihl;
+       u8 ip_proto = 0;
+
+       if (unlikely(!simple_tx_hashrnd_initialized)) {
+               get_random_bytes(&simple_tx_hashrnd, 4);
+               simple_tx_hashrnd_initialized = 1;
+       }
+
+       switch (skb->protocol) {
+       case htons(ETH_P_IP):
+               if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)))
+                       ip_proto = ip_hdr(skb)->protocol;
+               addr1 = ip_hdr(skb)->saddr;
+               addr2 = ip_hdr(skb)->daddr;
+               ihl = ip_hdr(skb)->ihl;
+               break;
+       case htons(ETH_P_IPV6):
+               ip_proto = ipv6_hdr(skb)->nexthdr;
+               addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3];
+               addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3];
+               ihl = (40 >> 2);
+               break;
+       default:
+               return 0;
+       }
+
+
+       switch (ip_proto) {
+       case IPPROTO_TCP:
+       case IPPROTO_UDP:
+       case IPPROTO_DCCP:
+       case IPPROTO_ESP:
+       case IPPROTO_AH:
+       case IPPROTO_SCTP:
+       case IPPROTO_UDPLITE:
+               ports = *((u32 *) (skb_network_header(skb) + (ihl * 4)));
+               break;
+
+       default:
+               ports = 0;
+               break;
+       }
+
+       hash = jhash_3words(addr1, addr2, ports, simple_tx_hashrnd);
+
+       return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
+}
+
+static struct netdev_queue *dev_pick_tx(struct net_device *dev,
+                                       struct sk_buff *skb)
+{
+       u16 queue_index = 0;
+
+       if (dev->select_queue)
+               queue_index = dev->select_queue(dev, skb);
+       else if (dev->real_num_tx_queues > 1)
+               queue_index = simple_tx_hash(dev, skb);
+
+       skb_set_queue_mapping(skb, queue_index);
+       return netdev_get_tx_queue(dev, queue_index);
+}
+
 /**
  *     dev_queue_xmit - transmit a buffer
  *     @skb: buffer to transmit
@@ -1663,7 +1785,6 @@ out_kfree_skb:
  *      the BH enable code must have IRQs enabled so that it will not deadlock.
  *          --BLG
  */
-
 int dev_queue_xmit(struct sk_buff *skb)
 {
        struct net_device *dev = skb->dev;
@@ -1700,45 +1821,32 @@ int dev_queue_xmit(struct sk_buff *skb)
        }
 
 gso:
-       txq = &dev->tx_queue;
-       spin_lock_prefetch(&txq->lock);
-
        /* Disable soft irqs for various locks below. Also
         * stops preemption for RCU.
         */
        rcu_read_lock_bh();
 
-       /* Updates of qdisc are serialized by queue->lock.
-        * The struct Qdisc which is pointed to by qdisc is now a
-        * rcu structure - it may be accessed without acquiring
-        * a lock (but the structure may be stale.) The freeing of the
-        * qdisc will be deferred until it's known that there are no
-        * more references to it.
-        *
-        * If the qdisc has an enqueue function, we still need to
-        * hold the queue->lock before calling it, since queue->lock
-        * also serializes access to the device queue.
-        */
-
+       txq = dev_pick_tx(dev, skb);
        q = rcu_dereference(txq->qdisc);
+
 #ifdef CONFIG_NET_CLS_ACT
        skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
 #endif
        if (q->enqueue) {
-               /* Grab device queue */
-               spin_lock(&txq->lock);
-               q = txq->qdisc;
-               if (q->enqueue) {
-                       /* reset queue_mapping to zero */
-                       skb_set_queue_mapping(skb, 0);
-                       rc = q->enqueue(skb, q);
-                       qdisc_run(dev);
-                       spin_unlock(&txq->lock);
-
-                       rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
-                       goto out;
+               spinlock_t *root_lock = qdisc_lock(q);
+
+               spin_lock(root_lock);
+
+               if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) {
+                       kfree_skb(skb);
+                       rc = NET_XMIT_DROP;
+               } else {
+                       rc = qdisc_enqueue_root(skb, q);
+                       qdisc_run(q);
                }
-               spin_unlock(&txq->lock);
+               spin_unlock(root_lock);
+
+               goto out;
        }
 
        /* The device has no queue. Common case for software devices:
@@ -1756,19 +1864,18 @@ gso:
        if (dev->flags & IFF_UP) {
                int cpu = smp_processor_id(); /* ok because BHs are off */
 
-               if (dev->xmit_lock_owner != cpu) {
+               if (txq->xmit_lock_owner != cpu) {
 
-                       HARD_TX_LOCK(dev, cpu);
+                       HARD_TX_LOCK(dev, txq, cpu);
 
-                       if (!netif_queue_stopped(dev) &&
-                           !netif_subqueue_stopped(dev, skb)) {
+                       if (!netif_tx_queue_stopped(txq)) {
                                rc = 0;
-                               if (!dev_hard_start_xmit(skb, dev)) {
-                                       HARD_TX_UNLOCK(dev);
+                               if (!dev_hard_start_xmit(skb, dev, txq)) {
+                                       HARD_TX_UNLOCK(dev, txq);
                                        goto out;
                                }
                        }
-                       HARD_TX_UNLOCK(dev);
+                       HARD_TX_UNLOCK(dev, txq);
                        if (net_ratelimit())
                                printk(KERN_CRIT "Virtual device %s asks to "
                                       "queue packet!\n", dev->name);
@@ -1842,7 +1949,6 @@ int netif_rx(struct sk_buff *skb)
        if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
                if (queue->input_pkt_queue.qlen) {
 enqueue:
-                       dev_hold(skb->dev);
                        __skb_queue_tail(&queue->input_pkt_queue, skb);
                        local_irq_restore(flags);
                        return NET_RX_SUCCESS;
@@ -1874,22 +1980,6 @@ int netif_rx_ni(struct sk_buff *skb)
 
 EXPORT_SYMBOL(netif_rx_ni);
 
-static inline struct net_device *skb_bond(struct sk_buff *skb)
-{
-       struct net_device *dev = skb->dev;
-
-       if (dev->master) {
-               if (skb_bond_should_drop(skb)) {
-                       kfree_skb(skb);
-                       return NULL;
-               }
-               skb->dev = dev->master;
-       }
-
-       return dev;
-}
-
-
 static void net_tx_action(struct softirq_action *h)
 {
        struct softnet_data *sd = &__get_cpu_var(softnet_data);
@@ -1906,13 +1996,13 @@ static void net_tx_action(struct softirq_action *h)
                        struct sk_buff *skb = clist;
                        clist = clist->next;
 
-                       BUG_TRAP(!atomic_read(&skb->users));
+                       WARN_ON(atomic_read(&skb->users));
                        __kfree_skb(skb);
                }
        }
 
        if (sd->output_queue) {
-               struct net_device *head;
+               struct Qdisc *head;
 
                local_irq_disable();
                head = sd->output_queue;
@@ -1920,20 +2010,27 @@ static void net_tx_action(struct softirq_action *h)
                local_irq_enable();
 
                while (head) {
-                       struct net_device *dev = head;
-                       struct netdev_queue *txq;
-                       head = head->next_sched;
+                       struct Qdisc *q = head;
+                       spinlock_t *root_lock;
 
-                       txq = &dev->tx_queue;
-
-                       smp_mb__before_clear_bit();
-                       clear_bit(__LINK_STATE_SCHED, &dev->state);
+                       head = head->next_sched;
 
-                       if (spin_trylock(&txq->lock)) {
-                               qdisc_run(dev);
-                               spin_unlock(&txq->lock);
+                       root_lock = qdisc_lock(q);
+                       if (spin_trylock(root_lock)) {
+                               smp_mb__before_clear_bit();
+                               clear_bit(__QDISC_STATE_SCHED,
+                                         &q->state);
+                               qdisc_run(q);
+                               spin_unlock(root_lock);
                        } else {
-                               netif_schedule(dev);
+                               if (!test_bit(__QDISC_STATE_DEACTIVATED,
+                                             &q->state)) {
+                                       __netif_reschedule(q);
+                               } else {
+                                       smp_mb__before_clear_bit();
+                                       clear_bit(__QDISC_STATE_SCHED,
+                                                 &q->state);
+                               }
                        }
                }
        }
@@ -2032,10 +2129,13 @@ static int ing_filter(struct sk_buff *skb)
 
        rxq = &dev->rx_queue;
 
-       spin_lock(&rxq->lock);
-       if ((q = dev->qdisc_ingress) != NULL)
-               result = q->enqueue(skb, q);
-       spin_unlock(&rxq->lock);
+       q = rxq->qdisc;
+       if (q != &noop_qdisc) {
+               spin_lock(qdisc_lock(q));
+               if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
+                       result = qdisc_enqueue_root(skb, q);
+               spin_unlock(qdisc_lock(q));
+       }
 
        return result;
 }
@@ -2044,7 +2144,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
                                         struct packet_type **pt_prev,
                                         int *ret, struct net_device *orig_dev)
 {
-       if (!skb->dev->qdisc_ingress)
+       if (skb->dev->rx_queue.qdisc == &noop_qdisc)
                goto out;
 
        if (*pt_prev) {
@@ -2068,6 +2168,33 @@ out:
 }
 #endif
 
+/*
+ *     netif_nit_deliver - deliver received packets to network taps
+ *     @skb: buffer
+ *
+ *     This function is used to deliver incoming packets to network
+ *     taps. It should be used when the normal netif_receive_skb path
+ *     is bypassed, for example because of VLAN acceleration.
+ */
+void netif_nit_deliver(struct sk_buff *skb)
+{
+       struct packet_type *ptype;
+
+       if (list_empty(&ptype_all))
+               return;
+
+       skb_reset_network_header(skb);
+       skb_reset_transport_header(skb);
+       skb->mac_len = skb->network_header - skb->mac_header;
+
+       rcu_read_lock();
+       list_for_each_entry_rcu(ptype, &ptype_all, list) {
+               if (!ptype->dev || ptype->dev == skb->dev)
+                       deliver_skb(skb, ptype, skb->dev);
+       }
+       rcu_read_unlock();
+}
+
 /**
  *     netif_receive_skb - process receive buffer from network
  *     @skb: buffer to process
@@ -2087,6 +2214,7 @@ int netif_receive_skb(struct sk_buff *skb)
 {
        struct packet_type *ptype, *pt_prev;
        struct net_device *orig_dev;
+       struct net_device *null_or_orig;
        int ret = NET_RX_DROP;
        __be16 type;
 
@@ -2100,10 +2228,14 @@ int netif_receive_skb(struct sk_buff *skb)
        if (!skb->iif)
                skb->iif = skb->dev->ifindex;
 
-       orig_dev = skb_bond(skb);
-
-       if (!orig_dev)
-               return NET_RX_DROP;
+       null_or_orig = NULL;
+       orig_dev = skb->dev;
+       if (orig_dev->master) {
+               if (skb_bond_should_drop(skb))
+                       null_or_orig = orig_dev; /* deliver only exact match */
+               else
+                       skb->dev = orig_dev->master;
+       }
 
        __get_cpu_var(netdev_rx_stat).total++;
 
@@ -2127,7 +2259,8 @@ int netif_receive_skb(struct sk_buff *skb)
 #endif
 
        list_for_each_entry_rcu(ptype, &ptype_all, list) {
-               if (!ptype->dev || ptype->dev == skb->dev) {
+               if (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
+                   ptype->dev == orig_dev) {
                        if (pt_prev)
                                ret = deliver_skb(skb, pt_prev, orig_dev);
                        pt_prev = ptype;
@@ -2152,7 +2285,8 @@ ncls:
        list_for_each_entry_rcu(ptype,
                        &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
                if (ptype->type == type &&
-                   (!ptype->dev || ptype->dev == skb->dev)) {
+                   (ptype->dev == null_or_orig || ptype->dev == skb->dev ||
+                    ptype->dev == orig_dev)) {
                        if (pt_prev)
                                ret = deliver_skb(skb, pt_prev, orig_dev);
                        pt_prev = ptype;
@@ -2174,6 +2308,20 @@ out:
        return ret;
 }
 
+/* Network device is going away, flush any packets still pending  */
+static void flush_backlog(void *arg)
+{
+       struct net_device *dev = arg;
+       struct softnet_data *queue = &__get_cpu_var(softnet_data);
+       struct sk_buff *skb, *tmp;
+
+       skb_queue_walk_safe(&queue->input_pkt_queue, skb, tmp)
+               if (skb->dev == dev) {
+                       __skb_unlink(skb, &queue->input_pkt_queue);
+                       kfree_skb(skb);
+               }
+}
+
 static int process_backlog(struct napi_struct *napi, int quota)
 {
        int work = 0;
@@ -2183,7 +2331,6 @@ static int process_backlog(struct napi_struct *napi, int quota)
        napi->weight = weight_p;
        do {
                struct sk_buff *skb;
-               struct net_device *dev;
 
                local_irq_disable();
                skb = __skb_dequeue(&queue->input_pkt_queue);
@@ -2192,14 +2339,9 @@ static int process_backlog(struct napi_struct *napi, int quota)
                        local_irq_enable();
                        break;
                }
-
                local_irq_enable();
 
-               dev = skb->dev;
-
                netif_receive_skb(skb);
-
-               dev_put(dev);
        } while (++work < quota && jiffies == start_time);
 
        return work;
@@ -2299,7 +2441,7 @@ out:
         */
        if (!cpus_empty(net_dma.channel_mask)) {
                int chan_idx;
-               for_each_cpu_mask(chan_idx, net_dma.channel_mask) {
+               for_each_cpu_mask_nr(chan_idx, net_dma.channel_mask) {
                        struct dma_chan *chan = net_dma.channels[chan_idx];
                        if (chan)
                                dma_async_memcpy_issue_pending(chan);
@@ -2807,6 +2949,12 @@ int netdev_set_master(struct net_device *slave, struct net_device *master)
        return 0;
 }
 
+static void dev_change_rx_flags(struct net_device *dev, int flags)
+{
+       if (dev->flags & IFF_UP && dev->change_rx_flags)
+               dev->change_rx_flags(dev, flags);
+}
+
 static int __dev_set_promiscuity(struct net_device *dev, int inc)
 {
        unsigned short old_flags = dev->flags;
@@ -2844,8 +2992,7 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc)
                                current->uid, current->gid,
                                audit_get_sessionid(current));
 
-               if (dev->change_rx_flags)
-                       dev->change_rx_flags(dev, IFF_PROMISC);
+               dev_change_rx_flags(dev, IFF_PROMISC);
        }
        return 0;
 }
@@ -2911,8 +3058,7 @@ int dev_set_allmulti(struct net_device *dev, int inc)
                }
        }
        if (dev->flags ^ old_flags) {
-               if (dev->change_rx_flags)
-                       dev->change_rx_flags(dev, IFF_ALLMULTI);
+               dev_change_rx_flags(dev, IFF_ALLMULTI);
                dev_set_rx_mode(dev);
        }
        return 0;
@@ -2954,9 +3100,9 @@ void __dev_set_rx_mode(struct net_device *dev)
 
 void dev_set_rx_mode(struct net_device *dev)
 {
-       netif_tx_lock_bh(dev);
+       netif_addr_lock_bh(dev);
        __dev_set_rx_mode(dev);
-       netif_tx_unlock_bh(dev);
+       netif_addr_unlock_bh(dev);
 }
 
 int __dev_addr_delete(struct dev_addr_list **list, int *count,
@@ -3034,11 +3180,11 @@ int dev_unicast_delete(struct net_device *dev, void *addr, int alen)
 
        ASSERT_RTNL();
 
-       netif_tx_lock_bh(dev);
+       netif_addr_lock_bh(dev);
        err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0);
        if (!err)
                __dev_set_rx_mode(dev);
-       netif_tx_unlock_bh(dev);
+       netif_addr_unlock_bh(dev);
        return err;
 }
 EXPORT_SYMBOL(dev_unicast_delete);
@@ -3060,11 +3206,11 @@ int dev_unicast_add(struct net_device *dev, void *addr, int alen)
 
        ASSERT_RTNL();
 
-       netif_tx_lock_bh(dev);
+       netif_addr_lock_bh(dev);
        err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0);
        if (!err)
                __dev_set_rx_mode(dev);
-       netif_tx_unlock_bh(dev);
+       netif_addr_unlock_bh(dev);
        return err;
 }
 EXPORT_SYMBOL(dev_unicast_add);
@@ -3131,12 +3277,12 @@ int dev_unicast_sync(struct net_device *to, struct net_device *from)
 {
        int err = 0;
 
-       netif_tx_lock_bh(to);
+       netif_addr_lock_bh(to);
        err = __dev_addr_sync(&to->uc_list, &to->uc_count,
                              &from->uc_list, &from->uc_count);
        if (!err)
                __dev_set_rx_mode(to);
-       netif_tx_unlock_bh(to);
+       netif_addr_unlock_bh(to);
        return err;
 }
 EXPORT_SYMBOL(dev_unicast_sync);
@@ -3152,15 +3298,15 @@ EXPORT_SYMBOL(dev_unicast_sync);
  */
 void dev_unicast_unsync(struct net_device *to, struct net_device *from)
 {
-       netif_tx_lock_bh(from);
-       netif_tx_lock_bh(to);
+       netif_addr_lock_bh(from);
+       netif_addr_lock(to);
 
        __dev_addr_unsync(&to->uc_list, &to->uc_count,
                          &from->uc_list, &from->uc_count);
        __dev_set_rx_mode(to);
 
-       netif_tx_unlock_bh(to);
-       netif_tx_unlock_bh(from);
+       netif_addr_unlock(to);
+       netif_addr_unlock_bh(from);
 }
 EXPORT_SYMBOL(dev_unicast_unsync);
 
@@ -3180,7 +3326,7 @@ static void __dev_addr_discard(struct dev_addr_list **list)
 
 static void dev_addr_discard(struct net_device *dev)
 {
-       netif_tx_lock_bh(dev);
+       netif_addr_lock_bh(dev);
 
        __dev_addr_discard(&dev->uc_list);
        dev->uc_count = 0;
@@ -3188,9 +3334,15 @@ static void dev_addr_discard(struct net_device *dev)
        __dev_addr_discard(&dev->mc_list);
        dev->mc_count = 0;
 
-       netif_tx_unlock_bh(dev);
+       netif_addr_unlock_bh(dev);
 }
 
+/**
+ *     dev_get_flags - get flags reported to userspace
+ *     @dev: device
+ *
+ *     Get the combination of flag bits exported through APIs to userspace.
+ */
 unsigned dev_get_flags(const struct net_device *dev)
 {
        unsigned flags;
@@ -3215,6 +3367,14 @@ unsigned dev_get_flags(const struct net_device *dev)
        return flags;
 }
 
+/**
+ *     dev_change_flags - change device settings
+ *     @dev: device
+ *     @flags: device state flags
+ *
+ *     Change settings on device based state flags. The flags are
+ *     in the userspace exported format.
+ */
 int dev_change_flags(struct net_device *dev, unsigned flags)
 {
        int ret, changes;
@@ -3236,8 +3396,8 @@ int dev_change_flags(struct net_device *dev, unsigned flags)
         *      Load in the correct multicast list now the flags have changed.
         */
 
-       if (dev->change_rx_flags && (old_flags ^ flags) & IFF_MULTICAST)
-               dev->change_rx_flags(dev, IFF_MULTICAST);
+       if ((old_flags ^ flags) & IFF_MULTICAST)
+               dev_change_rx_flags(dev, IFF_MULTICAST);
 
        dev_set_rx_mode(dev);
 
@@ -3284,6 +3444,13 @@ int dev_change_flags(struct net_device *dev, unsigned flags)
        return ret;
 }
 
+/**
+ *     dev_set_mtu - Change maximum transfer unit
+ *     @dev: device
+ *     @new_mtu: new transfer unit
+ *
+ *     Change the maximum transfer size of the network device.
+ */
 int dev_set_mtu(struct net_device *dev, int new_mtu)
 {
        int err;
@@ -3308,6 +3475,13 @@ int dev_set_mtu(struct net_device *dev, int new_mtu)
        return err;
 }
 
+/**
+ *     dev_set_mac_address - Change Media Access Control Address
+ *     @dev: device
+ *     @sa: new address
+ *
+ *     Change the hardware (MAC) address of the device
+ */
 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
 {
        int err;
@@ -3697,14 +3871,11 @@ static int dev_new_index(struct net *net)
 }
 
 /* Delayed registration/unregisteration */
-static DEFINE_SPINLOCK(net_todo_list_lock);
 static LIST_HEAD(net_todo_list);
 
 static void net_set_todo(struct net_device *dev)
 {
-       spin_lock(&net_todo_list_lock);
        list_add_tail(&dev->todo_list, &net_todo_list);
-       spin_unlock(&net_todo_list_lock);
 }
 
 static void rollback_registered(struct net_device *dev)
@@ -3751,7 +3922,7 @@ static void rollback_registered(struct net_device *dev)
                dev->uninit(dev);
 
        /* Notifier chain MUST detach us from master device. */
-       BUG_TRAP(!dev->master);
+       WARN_ON(dev->master);
 
        /* Remove entries from kobject tree */
        netdev_unregister_kobject(dev);
@@ -3761,6 +3932,21 @@ static void rollback_registered(struct net_device *dev)
        dev_put(dev);
 }
 
+static void __netdev_init_queue_locks_one(struct net_device *dev,
+                                         struct netdev_queue *dev_queue,
+                                         void *_unused)
+{
+       spin_lock_init(&dev_queue->_xmit_lock);
+       netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type);
+       dev_queue->xmit_lock_owner = -1;
+}
+
+static void netdev_init_queue_locks(struct net_device *dev)
+{
+       netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
+       __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL);
+}
+
 /**
  *     register_netdevice      - register a network device
  *     @dev: device to register
@@ -3795,9 +3981,9 @@ int register_netdevice(struct net_device *dev)
        BUG_ON(!dev_net(dev));
        net = dev_net(dev);
 
-       spin_lock_init(&dev->_xmit_lock);
-       netdev_set_lockdep_class(&dev->_xmit_lock, dev->type);
-       dev->xmit_lock_owner = -1;
+       spin_lock_init(&dev->addr_list_lock);
+       netdev_set_addr_lockdep_class(dev);
+       netdev_init_queue_locks(dev);
 
        dev->iflink = -1;
 
@@ -3877,6 +4063,10 @@ int register_netdevice(struct net_device *dev)
                }
        }
 
+       /* Enable software GSO if SG is supported. */
+       if (dev->features & NETIF_F_SG)
+               dev->features |= NETIF_F_GSO;
+
        netdev_initialize_kobject(dev);
        ret = netdev_register_kobject(dev);
        if (ret)
@@ -4012,33 +4202,24 @@ static void netdev_wait_allrefs(struct net_device *dev)
  *     free_netdev(y1);
  *     free_netdev(y2);
  *
- * We are invoked by rtnl_unlock() after it drops the semaphore.
+ * We are invoked by rtnl_unlock().
  * This allows us to deal with problems:
  * 1) We can delete sysfs objects which invoke hotplug
  *    without deadlocking with linkwatch via keventd.
  * 2) Since we run with the RTNL semaphore not held, we can sleep
  *    safely in order to wait for the netdev refcnt to drop to zero.
+ *
+ * We must not return until all unregister events added during
+ * the interval the lock was held have been completed.
  */
-static DEFINE_MUTEX(net_todo_run_mutex);
 void netdev_run_todo(void)
 {
        struct list_head list;
 
-       /* Need to guard against multiple cpu's getting out of order. */
-       mutex_lock(&net_todo_run_mutex);
-
-       /* Not safe to do outside the semaphore.  We must not return
-        * until all unregister events invoked by the local processor
-        * have been completed (either by this todo run, or one on
-        * another cpu).
-        */
-       if (list_empty(&net_todo_list))
-               goto out;
-
        /* Snapshot list, allow later requests */
-       spin_lock(&net_todo_list_lock);
        list_replace_init(&net_todo_list, &list);
-       spin_unlock(&net_todo_list_lock);
+
+       __rtnl_unlock();
 
        while (!list_empty(&list)) {
                struct net_device *dev
@@ -4054,13 +4235,15 @@ void netdev_run_todo(void)
 
                dev->reg_state = NETREG_UNREGISTERED;
 
+               on_each_cpu(flush_backlog, dev, 1);
+
                netdev_wait_allrefs(dev);
 
                /* paranoia */
                BUG_ON(atomic_read(&dev->refcnt));
-               BUG_TRAP(!dev->ip_ptr);
-               BUG_TRAP(!dev->ip6_ptr);
-               BUG_TRAP(!dev->dn_ptr);
+               WARN_ON(dev->ip_ptr);
+               WARN_ON(dev->ip6_ptr);
+               WARN_ON(dev->dn_ptr);
 
                if (dev->destructor)
                        dev->destructor(dev);
@@ -4068,9 +4251,6 @@ void netdev_run_todo(void)
                /* Free network device */
                kobject_put(&dev->dev.kobj);
        }
-
-out:
-       mutex_unlock(&net_todo_run_mutex);
 }
 
 static struct net_device_stats *internal_stats(struct net_device *dev)
@@ -4079,16 +4259,17 @@ static struct net_device_stats *internal_stats(struct net_device *dev)
 }
 
 static void netdev_init_one_queue(struct net_device *dev,
-                                 struct netdev_queue *queue)
+                                 struct netdev_queue *queue,
+                                 void *_unused)
 {
-       spin_lock_init(&queue->lock);
        queue->dev = dev;
 }
 
 static void netdev_init_queues(struct net_device *dev)
 {
-       netdev_init_one_queue(dev, &dev->rx_queue);
-       netdev_init_one_queue(dev, &dev->tx_queue);
+       netdev_init_one_queue(dev, &dev->rx_queue, NULL);
+       netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
+       spin_lock_init(&dev->tx_global_lock);
 }
 
 /**
@@ -4105,14 +4286,14 @@ static void netdev_init_queues(struct net_device *dev)
 struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
                void (*setup)(struct net_device *), unsigned int queue_count)
 {
-       void *p;
+       struct netdev_queue *tx;
        struct net_device *dev;
-       int alloc_size;
+       size_t alloc_size;
+       void *p;
 
        BUG_ON(strlen(name) >= sizeof(dev->name));
 
-       alloc_size = sizeof(struct net_device) +
-                    sizeof(struct net_device_subqueue) * (queue_count - 1);
+       alloc_size = sizeof(struct net_device);
        if (sizeof_priv) {
                /* ensure 32-byte alignment of private area */
                alloc_size = (alloc_size + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST;
@@ -4127,20 +4308,29 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
                return NULL;
        }
 
+       tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
+       if (!tx) {
+               printk(KERN_ERR "alloc_netdev: Unable to allocate "
+                      "tx qdiscs.\n");
+               kfree(p);
+               return NULL;
+       }
+
        dev = (struct net_device *)
                (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
        dev->padded = (char *)dev - (char *)p;
        dev_net_set(dev, &init_net);
 
+       dev->_tx = tx;
+       dev->num_tx_queues = queue_count;
+       dev->real_num_tx_queues = queue_count;
+
        if (sizeof_priv) {
                dev->priv = ((char *)dev +
-                            ((sizeof(struct net_device) +
-                              (sizeof(struct net_device_subqueue) *
-                               (queue_count - 1)) + NETDEV_ALIGN_CONST)
+                            ((sizeof(struct net_device) + NETDEV_ALIGN_CONST)
                              & ~NETDEV_ALIGN_CONST));
        }
 
-       dev->egress_subqueue_count = queue_count;
        dev->gso_max_size = GSO_MAX_SIZE;
 
        netdev_init_queues(dev);
@@ -4165,6 +4355,8 @@ void free_netdev(struct net_device *dev)
 {
        release_net(dev_net(dev));
 
+       kfree(dev->_tx);
+
        /*  Compatibility with error handling in drivers */
        if (dev->reg_state == NETREG_UNINITIALIZED) {
                kfree((char *)dev - dev->padded);
@@ -4178,7 +4370,12 @@ void free_netdev(struct net_device *dev)
        put_device(&dev->dev);
 }
 
-/* Synchronize with packet receive processing. */
+/**
+ *     synchronize_net -  Synchronize with packet receive processing
+ *
+ *     Wait for packets currently being received to be done.
+ *     Does not block later packets from starting.
+ */
 void synchronize_net(void)
 {
        might_sleep();
@@ -4346,7 +4543,7 @@ static int dev_cpu_callback(struct notifier_block *nfb,
                            void *ocpu)
 {
        struct sk_buff **list_skb;
-       struct net_device **list_net;
+       struct Qdisc **list_net;
        struct sk_buff *skb;
        unsigned int cpu, oldcpu = (unsigned long)ocpu;
        struct softnet_data *sd, *oldsd;
@@ -4408,7 +4605,7 @@ static void net_dma_rebalance(struct net_dma *net_dma)
        i = 0;
        cpu = first_cpu(cpu_online_map);
 
-       for_each_cpu_mask(chan_idx, net_dma->channel_mask) {
+       for_each_cpu_mask_nr(chan_idx, net_dma->channel_mask) {
                chan = net_dma->channels[chan_idx];
 
                n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask))
@@ -4480,7 +4677,7 @@ netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
 }
 
 /**
- * netdev_dma_regiser - register the networking subsystem as a DMA client
+ * netdev_dma_register - register the networking subsystem as a DMA client
  */
 static int __init netdev_dma_register(void)
 {
@@ -4526,6 +4723,12 @@ int netdev_compute_features(unsigned long all, unsigned long one)
                one |= NETIF_F_GSO_SOFTWARE;
        one |= NETIF_F_GSO;
 
+       /*
+        * If even one device supports a GSO protocol with software fallback,
+        * enable it for all.
+        */
+       all |= one & NETIF_F_GSO_SOFTWARE;
+
        /* If even one device supports robust GSO, enable it for all. */
        if (one & NETIF_F_GSO_ROBUST)
                all |= NETIF_F_GSO_ROBUST;
@@ -4575,6 +4778,34 @@ err_name:
        return -ENOMEM;
 }
 
+/**
+ *     netdev_drivername - network driver for the device
+ *     @dev: network device
+ *     @buffer: buffer for resulting name
+ *     @len: size of buffer
+ *
+ *     Determine network driver for device.
+ */
+char *netdev_drivername(const struct net_device *dev, char *buffer, int len)
+{
+       const struct device_driver *driver;
+       const struct device *parent;
+
+       if (len <= 0 || !buffer)
+               return buffer;
+       buffer[0] = 0;
+
+       parent = dev->dev.parent;
+
+       if (!parent)
+               return buffer;
+
+       driver = parent->driver;
+       if (driver && driver->name)
+               strlcpy(buffer, driver->name, len);
+       return buffer;
+}
+
 static void __net_exit netdev_exit(struct net *net)
 {
        kfree(net->dev_name_head);
@@ -4671,8 +4902,8 @@ static int __init net_dev_init(void)
 
        dev_boot_phase = 0;
 
-       open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
-       open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
+       open_softirq(NET_TX_SOFTIRQ, net_tx_action);
+       open_softirq(NET_RX_SOFTIRQ, net_rx_action);
 
        hotcpu_notifier(dev_cpu_callback, 0);
        dst_init();
@@ -4725,8 +4956,6 @@ EXPORT_SYMBOL(br_fdb_get_hook);
 EXPORT_SYMBOL(br_fdb_put_hook);
 #endif
 
-#ifdef CONFIG_KMOD
 EXPORT_SYMBOL(dev_load);
-#endif
 
 EXPORT_PER_CPU_SYMBOL(softnet_data);