Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-2.6
[linux-2.6.git] / net / core / dev.c
index 42b200f..7c6a46f 100644 (file)
 #include <linux/jhash.h>
 #include <linux/random.h>
 #include <trace/events/napi.h>
+#include <trace/events/net.h>
+#include <trace/events/skb.h>
 #include <linux/pci.h>
 #include <linux/inetdevice.h>
 
@@ -741,34 +743,31 @@ struct net_device *dev_get_by_index(struct net *net, int ifindex)
 EXPORT_SYMBOL(dev_get_by_index);
 
 /**
- *     dev_getbyhwaddr - find a device by its hardware address
+ *     dev_getbyhwaddr_rcu - find a device by its hardware address
  *     @net: the applicable net namespace
  *     @type: media type of device
  *     @ha: hardware address
  *
  *     Search for an interface by MAC address. Returns NULL if the device
- *     is not found or a pointer to the device. The caller must hold the
- *     rtnl semaphore. The returned device has not had its ref count increased
+ *     is not found or a pointer to the device. The caller must hold RCU
+ *     The returned device has not had its ref count increased
  *     and the caller must therefore be careful about locking
  *
- *     BUGS:
- *     If the API was consistent this would be __dev_get_by_hwaddr
  */
 
-struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type, char *ha)
+struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
+                                      const char *ha)
 {
        struct net_device *dev;
 
-       ASSERT_RTNL();
-
-       for_each_netdev(net, dev)
+       for_each_netdev_rcu(net, dev)
                if (dev->type == type &&
                    !memcmp(dev->dev_addr, ha, dev->addr_len))
                        return dev;
 
        return NULL;
 }
-EXPORT_SYMBOL(dev_getbyhwaddr);
+EXPORT_SYMBOL(dev_getbyhwaddr_rcu);
 
 struct net_device *__dev_getfirstbyhwtype(struct net *net, unsigned short type)
 {
@@ -1223,52 +1222,90 @@ int dev_open(struct net_device *dev)
 }
 EXPORT_SYMBOL(dev_open);
 
-static int __dev_close(struct net_device *dev)
+static int __dev_close_many(struct list_head *head)
 {
-       const struct net_device_ops *ops = dev->netdev_ops;
+       struct net_device *dev;
 
        ASSERT_RTNL();
        might_sleep();
 
-       /*
-        *      Tell people we are going down, so that they can
-        *      prepare to death, when device is still operating.
-        */
-       call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
+       list_for_each_entry(dev, head, unreg_list) {
+               /*
+                *      Tell people we are going down, so that they can
+                *      prepare to death, when device is still operating.
+                */
+               call_netdevice_notifiers(NETDEV_GOING_DOWN, dev);
 
-       clear_bit(__LINK_STATE_START, &dev->state);
+               clear_bit(__LINK_STATE_START, &dev->state);
 
-       /* Synchronize to scheduled poll. We cannot touch poll list,
-        * it can be even on different cpu. So just clear netif_running().
-        *
-        * dev->stop() will invoke napi_disable() on all of it's
-        * napi_struct instances on this device.
-        */
-       smp_mb__after_clear_bit(); /* Commit netif_running(). */
+               /* Synchronize to scheduled poll. We cannot touch poll list, it
+                * can be even on different cpu. So just clear netif_running().
+                *
+                * dev->stop() will invoke napi_disable() on all of it's
+                * napi_struct instances on this device.
+                */
+               smp_mb__after_clear_bit(); /* Commit netif_running(). */
+       }
 
-       dev_deactivate(dev);
+       dev_deactivate_many(head);
 
-       /*
-        *      Call the device specific close. This cannot fail.
-        *      Only if device is UP
-        *
-        *      We allow it to be called even after a DETACH hot-plug
-        *      event.
-        */
-       if (ops->ndo_stop)
-               ops->ndo_stop(dev);
+       list_for_each_entry(dev, head, unreg_list) {
+               const struct net_device_ops *ops = dev->netdev_ops;
 
-       /*
-        *      Device is now down.
-        */
+               /*
+                *      Call the device specific close. This cannot fail.
+                *      Only if device is UP
+                *
+                *      We allow it to be called even after a DETACH hot-plug
+                *      event.
+                */
+               if (ops->ndo_stop)
+                       ops->ndo_stop(dev);
+
+               /*
+                *      Device is now down.
+                */
+
+               dev->flags &= ~IFF_UP;
+
+               /*
+                *      Shutdown NET_DMA
+                */
+               net_dmaengine_put();
+       }
+
+       return 0;
+}
 
-       dev->flags &= ~IFF_UP;
+static int __dev_close(struct net_device *dev)
+{
+       LIST_HEAD(single);
+
+       list_add(&dev->unreg_list, &single);
+       return __dev_close_many(&single);
+}
+
+int dev_close_many(struct list_head *head)
+{
+       struct net_device *dev, *tmp;
+       LIST_HEAD(tmp_list);
+
+       list_for_each_entry_safe(dev, tmp, head, unreg_list)
+               if (!(dev->flags & IFF_UP))
+                       list_move(&dev->unreg_list, &tmp_list);
+
+       __dev_close_many(head);
 
        /*
-        *      Shutdown NET_DMA
+        * Tell people we are down
         */
-       net_dmaengine_put();
+       list_for_each_entry(dev, head, unreg_list) {
+               rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
+               call_netdevice_notifiers(NETDEV_DOWN, dev);
+       }
 
+       /* rollback_registered_many needs the complete original list */
+       list_splice(&tmp_list, head);
        return 0;
 }
 
@@ -1283,16 +1320,10 @@ static int __dev_close(struct net_device *dev)
  */
 int dev_close(struct net_device *dev)
 {
-       if (!(dev->flags & IFF_UP))
-               return 0;
-
-       __dev_close(dev);
+       LIST_HEAD(single);
 
-       /*
-        * Tell people we are down
-        */
-       rtmsg_ifinfo(RTM_NEWLINK, dev, IFF_UP|IFF_RUNNING);
-       call_netdevice_notifiers(NETDEV_DOWN, dev);
+       list_add(&dev->unreg_list, &single);
+       dev_close_many(&single);
 
        return 0;
 }
@@ -1483,8 +1514,9 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
        skb_orphan(skb);
        nf_reset(skb);
 
-       if (!(dev->flags & IFF_UP) ||
-           (skb->len > (dev->mtu + dev->hard_header_len))) {
+       if (unlikely(!(dev->flags & IFF_UP) ||
+                    (skb->len > (dev->mtu + dev->hard_header_len + VLAN_HLEN)))) {
+               atomic_long_inc(&dev->rx_dropped);
                kfree_skb(skb);
                return NET_RX_DROP;
        }
@@ -1496,6 +1528,14 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
 }
 EXPORT_SYMBOL_GPL(dev_forward_skb);
 
+static inline int deliver_skb(struct sk_buff *skb,
+                             struct packet_type *pt_prev,
+                             struct net_device *orig_dev)
+{
+       atomic_inc(&skb->users);
+       return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
+}
+
 /*
  *     Support routine. Sends outgoing frames to any network
  *     taps currently in use.
@@ -1504,13 +1544,8 @@ EXPORT_SYMBOL_GPL(dev_forward_skb);
 static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 {
        struct packet_type *ptype;
-
-#ifdef CONFIG_NET_CLS_ACT
-       if (!(skb->tstamp.tv64 && (G_TC_FROM(skb->tc_verd) & AT_INGRESS)))
-               net_timestamp_set(skb);
-#else
-       net_timestamp_set(skb);
-#endif
+       struct sk_buff *skb2 = NULL;
+       struct packet_type *pt_prev = NULL;
 
        rcu_read_lock();
        list_for_each_entry_rcu(ptype, &ptype_all, list) {
@@ -1520,10 +1555,18 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
                if ((ptype->dev == dev || !ptype->dev) &&
                    (ptype->af_packet_priv == NULL ||
                     (struct sock *)ptype->af_packet_priv != skb->sk)) {
-                       struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
+                       if (pt_prev) {
+                               deliver_skb(skb2, pt_prev, skb->dev);
+                               pt_prev = ptype;
+                               continue;
+                       }
+
+                       skb2 = skb_clone(skb, GFP_ATOMIC);
                        if (!skb2)
                                break;
 
+                       net_timestamp_set(skb2);
+
                        /* skb->nh should be correctly
                           set by sender, so that the second statement is
                           just protection against buggy protocols.
@@ -1542,9 +1585,11 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 
                        skb2->transport_header = skb2->network_header;
                        skb2->pkt_type = PACKET_OUTGOING;
-                       ptype->func(skb2, skb->dev, ptype, skb->dev);
+                       pt_prev = ptype;
                }
        }
+       if (pt_prev)
+               pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
        rcu_read_unlock();
 }
 
@@ -1552,21 +1597,63 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
  * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues
  * greater then real_num_tx_queues stale skbs on the qdisc must be flushed.
  */
-void netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
+int netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq)
 {
-       unsigned int real_num = dev->real_num_tx_queues;
+       int rc;
 
-       if (unlikely(txq > dev->num_tx_queues))
-               ;
-       else if (txq > real_num)
-               dev->real_num_tx_queues = txq;
-       else if (txq < real_num) {
-               dev->real_num_tx_queues = txq;
-               qdisc_reset_all_tx_gt(dev, txq);
+       if (txq < 1 || txq > dev->num_tx_queues)
+               return -EINVAL;
+
+       if (dev->reg_state == NETREG_REGISTERED) {
+               ASSERT_RTNL();
+
+               rc = netdev_queue_update_kobjects(dev, dev->real_num_tx_queues,
+                                                 txq);
+               if (rc)
+                       return rc;
+
+               if (txq < dev->real_num_tx_queues)
+                       qdisc_reset_all_tx_gt(dev, txq);
        }
+
+       dev->real_num_tx_queues = txq;
+       return 0;
 }
 EXPORT_SYMBOL(netif_set_real_num_tx_queues);
 
+#ifdef CONFIG_RPS
+/**
+ *     netif_set_real_num_rx_queues - set actual number of RX queues used
+ *     @dev: Network device
+ *     @rxq: Actual number of RX queues
+ *
+ *     This must be called either with the rtnl_lock held or before
+ *     registration of the net device.  Returns 0 on success, or a
+ *     negative error code.  If called before registration, it always
+ *     succeeds.
+ */
+int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)
+{
+       int rc;
+
+       if (rxq < 1 || rxq > dev->num_rx_queues)
+               return -EINVAL;
+
+       if (dev->reg_state == NETREG_REGISTERED) {
+               ASSERT_RTNL();
+
+               rc = net_rx_queue_update_kobjects(dev, dev->real_num_rx_queues,
+                                                 rxq);
+               if (rc)
+                       return rc;
+       }
+
+       dev->real_num_rx_queues = rxq;
+       return 0;
+}
+EXPORT_SYMBOL(netif_set_real_num_rx_queues);
+#endif
+
 static inline void __netif_reschedule(struct Qdisc *q)
 {
        struct softnet_data *sd;
@@ -1645,32 +1732,6 @@ void netif_device_attach(struct net_device *dev)
 }
 EXPORT_SYMBOL(netif_device_attach);
 
-static bool can_checksum_protocol(unsigned long features, __be16 protocol)
-{
-       return ((features & NETIF_F_GEN_CSUM) ||
-               ((features & NETIF_F_IP_CSUM) &&
-                protocol == htons(ETH_P_IP)) ||
-               ((features & NETIF_F_IPV6_CSUM) &&
-                protocol == htons(ETH_P_IPV6)) ||
-               ((features & NETIF_F_FCOE_CRC) &&
-                protocol == htons(ETH_P_FCOE)));
-}
-
-static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
-{
-       if (can_checksum_protocol(dev->features, skb->protocol))
-               return true;
-
-       if (skb->protocol == htons(ETH_P_8021Q)) {
-               struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
-               if (can_checksum_protocol(dev->features & dev->vlan_features,
-                                         veh->h_vlan_encapsulated_proto))
-                       return true;
-       }
-
-       return false;
-}
-
 /**
  * skb_dev_set -- assign a new device to a buffer
  * @skb: buffer for the new device
@@ -1718,7 +1779,7 @@ int skb_checksum_help(struct sk_buff *skb)
                goto out_set_summed;
        }
 
-       offset = skb->csum_start - skb_headroom(skb);
+       offset = skb_checksum_start_offset(skb);
        BUG_ON(offset >= skb_headlen(skb));
        csum = skb_checksum(skb, offset, skb->len - offset, 0);
 
@@ -1755,8 +1816,20 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
        struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
        struct packet_type *ptype;
        __be16 type = skb->protocol;
+       int vlan_depth = ETH_HLEN;
        int err;
 
+       while (type == htons(ETH_P_8021Q)) {
+               struct vlan_hdr *vh;
+
+               if (unlikely(!pskb_may_pull(skb, vlan_depth + VLAN_HLEN)))
+                       return ERR_PTR(-EINVAL);
+
+               vh = (struct vlan_hdr *)(skb->data + vlan_depth);
+               type = vh->h_vlan_encapsulated_proto;
+               vlan_depth += VLAN_HLEN;
+       }
+
        skb_reset_mac_header(skb);
        skb->mac_len = skb->network_header - skb->mac_header;
        __skb_pull(skb, skb->mac_len);
@@ -1768,8 +1841,7 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
                if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo)
                        dev->ethtool_ops->get_drvinfo(dev, &info);
 
-               WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d "
-                       "ip_summed=%d",
+               WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d ip_summed=%d\n",
                     info.driver, dev ? dev->features : 0L,
                     skb->sk ? skb->sk->sk_route_caps : 0L,
                     skb->len, skb->data_len, skb->ip_summed);
@@ -1872,16 +1944,14 @@ static void dev_gso_skb_destructor(struct sk_buff *skb)
 /**
  *     dev_gso_segment - Perform emulated hardware segmentation on skb.
  *     @skb: buffer to segment
+ *     @features: device features as applicable to this skb
  *
  *     This function segments the given skb and stores the list of segments
  *     in skb->next.
  */
-static int dev_gso_segment(struct sk_buff *skb)
+static int dev_gso_segment(struct sk_buff *skb, int features)
 {
-       struct net_device *dev = skb->dev;
        struct sk_buff *segs;
-       int features = dev->features & ~(illegal_highdma(dev, skb) ?
-                                        NETIF_F_SG : 0);
 
        segs = skb_gso_segment(skb, features);
 
@@ -1918,6 +1988,53 @@ static inline void skb_orphan_try(struct sk_buff *skb)
        }
 }
 
+static bool can_checksum_protocol(unsigned long features, __be16 protocol)
+{
+       return ((features & NETIF_F_GEN_CSUM) ||
+               ((features & NETIF_F_V4_CSUM) &&
+                protocol == htons(ETH_P_IP)) ||
+               ((features & NETIF_F_V6_CSUM) &&
+                protocol == htons(ETH_P_IPV6)) ||
+               ((features & NETIF_F_FCOE_CRC) &&
+                protocol == htons(ETH_P_FCOE)));
+}
+
+static int harmonize_features(struct sk_buff *skb, __be16 protocol, int features)
+{
+       if (!can_checksum_protocol(features, protocol)) {
+               features &= ~NETIF_F_ALL_CSUM;
+               features &= ~NETIF_F_SG;
+       } else if (illegal_highdma(skb->dev, skb)) {
+               features &= ~NETIF_F_SG;
+       }
+
+       return features;
+}
+
+int netif_skb_features(struct sk_buff *skb)
+{
+       __be16 protocol = skb->protocol;
+       int features = skb->dev->features;
+
+       if (protocol == htons(ETH_P_8021Q)) {
+               struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
+               protocol = veh->h_vlan_encapsulated_proto;
+       } else if (!vlan_tx_tag_present(skb)) {
+               return harmonize_features(skb, protocol, features);
+       }
+
+       features &= (skb->dev->vlan_features | NETIF_F_HW_VLAN_TX);
+
+       if (protocol != htons(ETH_P_8021Q)) {
+               return harmonize_features(skb, protocol, features);
+       } else {
+               features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST |
+                               NETIF_F_GEN_CSUM | NETIF_F_HW_VLAN_TX;
+               return harmonize_features(skb, protocol, features);
+       }
+}
+EXPORT_SYMBOL(netif_skb_features);
+
 /*
  * Returns true if either:
  *     1. skb has frag_list and the device doesn't support FRAGLIST, or
@@ -1926,12 +2043,13 @@ static inline void skb_orphan_try(struct sk_buff *skb)
  *        support DMA from it.
  */
 static inline int skb_needs_linearize(struct sk_buff *skb,
-                                     struct net_device *dev)
+                                     int features)
 {
        return skb_is_nonlinear(skb) &&
-              ((skb_has_frag_list(skb) && !(dev->features & NETIF_F_FRAGLIST)) ||
-               (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) ||
-                                             illegal_highdma(dev, skb))));
+                       ((skb_has_frag_list(skb) &&
+                               !(features & NETIF_F_FRAGLIST)) ||
+                       (skb_shinfo(skb)->nr_frags &&
+                               !(features & NETIF_F_SG)));
 }
 
 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
@@ -1941,8 +2059,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
        int rc = NETDEV_TX_OK;
 
        if (likely(!skb->next)) {
-               if (!list_empty(&ptype_all))
-                       dev_queue_xmit_nit(skb, dev);
+               int features;
 
                /*
                 * If device doesnt need skb->dst, release it right now while
@@ -1951,15 +2068,29 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
                if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
                        skb_dst_drop(skb);
 
+               if (!list_empty(&ptype_all))
+                       dev_queue_xmit_nit(skb, dev);
+
                skb_orphan_try(skb);
 
-               if (netif_needs_gso(dev, skb)) {
-                       if (unlikely(dev_gso_segment(skb)))
+               features = netif_skb_features(skb);
+
+               if (vlan_tx_tag_present(skb) &&
+                   !(features & NETIF_F_HW_VLAN_TX)) {
+                       skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
+                       if (unlikely(!skb))
+                               goto out;
+
+                       skb->vlan_tci = 0;
+               }
+
+               if (netif_needs_gso(skb, features)) {
+                       if (unlikely(dev_gso_segment(skb, features)))
                                goto out_kfree_skb;
                        if (skb->next)
                                goto gso;
                } else {
-                       if (skb_needs_linearize(skb, dev) &&
+                       if (skb_needs_linearize(skb, features) &&
                            __skb_linearize(skb))
                                goto out_kfree_skb;
 
@@ -1968,15 +2099,16 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
                         * checksumming here.
                         */
                        if (skb->ip_summed == CHECKSUM_PARTIAL) {
-                               skb_set_transport_header(skb, skb->csum_start -
-                                             skb_headroom(skb));
-                               if (!dev_can_checksum(dev, skb) &&
+                               skb_set_transport_header(skb,
+                                       skb_checksum_start_offset(skb));
+                               if (!(features & NETIF_F_ALL_CSUM) &&
                                     skb_checksum_help(skb))
                                        goto out_kfree_skb;
                        }
                }
 
                rc = ops->ndo_start_xmit(skb, dev);
+               trace_net_dev_xmit(skb, rc);
                if (rc == NETDEV_TX_OK)
                        txq_trans_update(txq);
                return rc;
@@ -1997,6 +2129,7 @@ gso:
                        skb_dst_drop(nskb);
 
                rc = ops->ndo_start_xmit(nskb, dev);
+               trace_net_dev_xmit(nskb, rc);
                if (unlikely(rc != NETDEV_TX_OK)) {
                        if (rc & ~NETDEV_TX_MASK)
                                goto out_kfree_gso_skb;
@@ -2014,19 +2147,25 @@ out_kfree_gso_skb:
                skb->destructor = DEV_GSO_CB(skb)->destructor;
 out_kfree_skb:
        kfree_skb(skb);
+out:
        return rc;
 }
 
 static u32 hashrnd __read_mostly;
 
-u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
+/*
+ * Returns a Tx hash based on the given packet descriptor a Tx queues' number
+ * to be used as a distribution range.
+ */
+u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
+                 unsigned int num_tx_queues)
 {
        u32 hash;
 
        if (skb_rx_queue_recorded(skb)) {
                hash = skb_get_rx_queue(skb);
-               while (unlikely(hash >= dev->real_num_tx_queues))
-                       hash -= dev->real_num_tx_queues;
+               while (unlikely(hash >= num_tx_queues))
+                       hash -= num_tx_queues;
                return hash;
        }
 
@@ -2036,9 +2175,9 @@ u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
                hash = (__force u16) skb->protocol ^ skb->rxhash;
        hash = jhash_1word(hash, hashrnd);
 
-       return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
+       return (u16) (((u64) hash * num_tx_queues) >> 32);
 }
-EXPORT_SYMBOL(skb_tx_hash);
+EXPORT_SYMBOL(__skb_tx_hash);
 
 static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
 {
@@ -2053,26 +2192,70 @@ static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
        return queue_index;
 }
 
+static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
+{
+#ifdef CONFIG_XPS
+       struct xps_dev_maps *dev_maps;
+       struct xps_map *map;
+       int queue_index = -1;
+
+       rcu_read_lock();
+       dev_maps = rcu_dereference(dev->xps_maps);
+       if (dev_maps) {
+               map = rcu_dereference(
+                   dev_maps->cpu_map[raw_smp_processor_id()]);
+               if (map) {
+                       if (map->len == 1)
+                               queue_index = map->queues[0];
+                       else {
+                               u32 hash;
+                               if (skb->sk && skb->sk->sk_hash)
+                                       hash = skb->sk->sk_hash;
+                               else
+                                       hash = (__force u16) skb->protocol ^
+                                           skb->rxhash;
+                               hash = jhash_1word(hash, hashrnd);
+                               queue_index = map->queues[
+                                   ((u64)hash * map->len) >> 32];
+                       }
+                       if (unlikely(queue_index >= dev->real_num_tx_queues))
+                               queue_index = -1;
+               }
+       }
+       rcu_read_unlock();
+
+       return queue_index;
+#else
+       return -1;
+#endif
+}
+
 static struct netdev_queue *dev_pick_tx(struct net_device *dev,
                                        struct sk_buff *skb)
 {
        int queue_index;
        const struct net_device_ops *ops = dev->netdev_ops;
 
-       if (ops->ndo_select_queue) {
+       if (dev->real_num_tx_queues == 1)
+               queue_index = 0;
+       else if (ops->ndo_select_queue) {
                queue_index = ops->ndo_select_queue(dev, skb);
                queue_index = dev_cap_txqueue(dev, queue_index);
        } else {
                struct sock *sk = skb->sk;
                queue_index = sk_tx_queue_get(sk);
-               if (queue_index < 0) {
 
-                       queue_index = 0;
-                       if (dev->real_num_tx_queues > 1)
+               if (queue_index < 0 || skb->ooo_okay ||
+                   queue_index >= dev->real_num_tx_queues) {
+                       int old_index = queue_index;
+
+                       queue_index = get_xps_queue(dev, skb);
+                       if (queue_index < 0)
                                queue_index = skb_tx_hash(dev, skb);
 
-                       if (sk) {
-                               struct dst_entry *dst = rcu_dereference_check(sk->sk_dst_cache, 1);
+                       if (queue_index != old_index && sk) {
+                               struct dst_entry *dst =
+                                   rcu_dereference_check(sk->sk_dst_cache, 1);
 
                                if (dst && skb_dst(skb) == dst)
                                        sk_tx_queue_set(sk, queue_index);
@@ -2114,7 +2297,10 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
                 */
                if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
                        skb_dst_force(skb);
-               __qdisc_update_bstats(q, skb->len);
+
+               qdisc_skb_cb(skb)->pkt_len = skb->len;
+               qdisc_bstats_update(q, skb);
+
                if (sch_direct_xmit(skb, q, dev, txq, root_lock)) {
                        if (unlikely(contended)) {
                                spin_unlock(&q->busylock);
@@ -2142,6 +2328,9 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
        return rc;
 }
 
+static DEFINE_PER_CPU(int, xmit_recursion);
+#define RECURSION_LIMIT 10
+
 /**
  *     dev_queue_xmit - transmit a buffer
  *     @skb: buffer to transmit
@@ -2185,6 +2374,7 @@ int dev_queue_xmit(struct sk_buff *skb)
 #ifdef CONFIG_NET_CLS_ACT
        skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_EGRESS);
 #endif
+       trace_net_dev_queue(skb);
        if (q->enqueue) {
                rc = __dev_xmit_skb(skb, q, dev, txq);
                goto out;
@@ -2207,10 +2397,15 @@ int dev_queue_xmit(struct sk_buff *skb)
 
                if (txq->xmit_lock_owner != cpu) {
 
+                       if (__this_cpu_read(xmit_recursion) > RECURSION_LIMIT)
+                               goto recursion_alert;
+
                        HARD_TX_LOCK(dev, txq, cpu);
 
                        if (!netif_tx_queue_stopped(txq)) {
+                               __this_cpu_inc(xmit_recursion);
                                rc = dev_hard_start_xmit(skb, dev, txq);
+                               __this_cpu_dec(xmit_recursion);
                                if (dev_xmit_complete(rc)) {
                                        HARD_TX_UNLOCK(dev, txq);
                                        goto out;
@@ -2222,7 +2417,9 @@ int dev_queue_xmit(struct sk_buff *skb)
                                       "queue packet!\n", dev->name);
                } else {
                        /* Recursion is detected! It is possible,
-                        * unfortunately */
+                        * unfortunately
+                        */
+recursion_alert:
                        if (net_ratelimit())
                                printk(KERN_CRIT "Dead loop on virtual device "
                                       "%s, fix it urgently!\n", dev->name);
@@ -2332,7 +2529,7 @@ EXPORT_SYMBOL(__skb_get_rxhash);
 #ifdef CONFIG_RPS
 
 /* One global table that all flow-based protocols share. */
-struct rps_sock_flow_table *rps_sock_flow_table __read_mostly;
+struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
 EXPORT_SYMBOL(rps_sock_flow_table);
 
 /*
@@ -2344,7 +2541,7 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
                       struct rps_dev_flow **rflowp)
 {
        struct netdev_rx_queue *rxqueue;
-       struct rps_map *map = NULL;
+       struct rps_map *map;
        struct rps_dev_flow_table *flow_table;
        struct rps_sock_flow_table *sock_flow_table;
        int cpu = -1;
@@ -2352,25 +2549,26 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 
        if (skb_rx_queue_recorded(skb)) {
                u16 index = skb_get_rx_queue(skb);
-               if (unlikely(index >= dev->num_rx_queues)) {
-                       WARN_ONCE(dev->num_rx_queues > 1, "%s received packet "
-                               "on queue %u, but number of RX queues is %u\n",
-                               dev->name, index, dev->num_rx_queues);
+               if (unlikely(index >= dev->real_num_rx_queues)) {
+                       WARN_ONCE(dev->real_num_rx_queues > 1,
+                                 "%s received packet on queue %u, but number "
+                                 "of RX queues is %u\n",
+                                 dev->name, index, dev->real_num_rx_queues);
                        goto done;
                }
                rxqueue = dev->_rx + index;
        } else
                rxqueue = dev->_rx;
 
-       if (rxqueue->rps_map) {
-               map = rcu_dereference(rxqueue->rps_map);
-               if (map && map->len == 1) {
+       map = rcu_dereference(rxqueue->rps_map);
+       if (map) {
+               if (map->len == 1) {
                        tcpu = map->cpus[0];
                        if (cpu_online(tcpu))
                                cpu = tcpu;
                        goto done;
                }
-       } else if (!rxqueue->rps_flow_table) {
+       } else if (!rcu_dereference_raw(rxqueue->rps_flow_table)) {
                goto done;
        }
 
@@ -2502,6 +2700,7 @@ enqueue:
 
        local_irq_restore(flags);
 
+       atomic_long_inc(&skb->dev->rx_dropped);
        kfree_skb(skb);
        return NET_RX_DROP;
 }
@@ -2532,6 +2731,7 @@ int netif_rx(struct sk_buff *skb)
        if (netdev_tstamp_prequeue)
                net_timestamp_check(skb);
 
+       trace_netif_rx(skb);
 #ifdef CONFIG_RPS
        {
                struct rps_dev_flow voidflow, *rflow = &voidflow;
@@ -2591,6 +2791,7 @@ static void net_tx_action(struct softirq_action *h)
                        clist = clist->next;
 
                        WARN_ON(atomic_read(&skb->users));
+                       trace_kfree_skb(skb, net_tx_action);
                        __kfree_skb(skb);
                }
        }
@@ -2631,14 +2832,6 @@ static void net_tx_action(struct softirq_action *h)
        }
 }
 
-static inline int deliver_skb(struct sk_buff *skb,
-                             struct packet_type *pt_prev,
-                             struct net_device *orig_dev)
-{
-       atomic_inc(&skb->users);
-       return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
-}
-
 #if (defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)) && \
     (defined(CONFIG_ATM_LANE) || defined(CONFIG_ATM_LANE_MODULE))
 /* This hook is defined here for ATM LANE */
@@ -2656,11 +2849,10 @@ EXPORT_SYMBOL_GPL(br_fdb_test_addr_hook);
  * the ingress scheduler, you just cant add policies on ingress.
  *
  */
-static int ing_filter(struct sk_buff *skb)
+static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq)
 {
        struct net_device *dev = skb->dev;
        u32 ttl = G_TC_RTTL(skb->tc_verd);
-       struct netdev_queue *rxq;
        int result = TC_ACT_OK;
        struct Qdisc *q;
 
@@ -2674,8 +2866,6 @@ static int ing_filter(struct sk_buff *skb)
        skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
        skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
 
-       rxq = &dev->rx_queue;
-
        q = rxq->qdisc;
        if (q != &noop_qdisc) {
                spin_lock(qdisc_lock(q));
@@ -2691,7 +2881,9 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
                                         struct packet_type **pt_prev,
                                         int *ret, struct net_device *orig_dev)
 {
-       if (skb->dev->rx_queue.qdisc == &noop_qdisc)
+       struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue);
+
+       if (!rxq || rxq->qdisc == &noop_qdisc)
                goto out;
 
        if (*pt_prev) {
@@ -2699,7 +2891,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
                *pt_prev = NULL;
        }
 
-       switch (ing_filter(skb)) {
+       switch (ing_filter(skb, rxq)) {
        case TC_ACT_SHOT:
        case TC_ACT_STOLEN:
                kfree_skb(skb);
@@ -2712,33 +2904,6 @@ out:
 }
 #endif
 
-/*
- *     netif_nit_deliver - deliver received packets to network taps
- *     @skb: buffer
- *
- *     This function is used to deliver incoming packets to network
- *     taps. It should be used when the normal netif_receive_skb path
- *     is bypassed, for example because of VLAN acceleration.
- */
-void netif_nit_deliver(struct sk_buff *skb)
-{
-       struct packet_type *ptype;
-
-       if (list_empty(&ptype_all))
-               return;
-
-       skb_reset_network_header(skb);
-       skb_reset_transport_header(skb);
-       skb->mac_len = skb->network_header - skb->mac_header;
-
-       rcu_read_lock();
-       list_for_each_entry_rcu(ptype, &ptype_all, list) {
-               if (!ptype->dev || ptype->dev == skb->dev)
-                       deliver_skb(skb, ptype, skb->dev);
-       }
-       rcu_read_unlock();
-}
-
 /**
  *     netdev_rx_handler_register - register receive handler
  *     @dev: device to register a handler for
@@ -2848,8 +3013,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
        if (!netdev_tstamp_prequeue)
                net_timestamp_check(skb);
 
-       if (vlan_tx_tag_present(skb))
-               vlan_hwaccel_do_receive(skb);
+       trace_netif_receive_skb(skb);
 
        /* if we've gotten here through NAPI, check netpoll */
        if (netpoll_receive_skb(skb))
@@ -2863,8 +3027,7 @@ static int __netif_receive_skb(struct sk_buff *skb)
         * be delivered to pkt handlers that are exact matches.  Also
         * the deliver_no_wcard flag will be set.  If packet handlers
         * are sensitive to duplicate packets these skbs will need to
-        * be dropped at the handler.  The vlan accel path may have
-        * already set the deliver_no_wcard flag.
+        * be dropped at the handler.
         */
        null_or_orig = NULL;
        orig_dev = skb->dev;
@@ -2923,6 +3086,18 @@ ncls:
                        goto out;
        }
 
+       if (vlan_tx_tag_present(skb)) {
+               if (pt_prev) {
+                       ret = deliver_skb(skb, pt_prev, orig_dev);
+                       pt_prev = NULL;
+               }
+               if (vlan_hwaccel_do_receive(&skb)) {
+                       ret = __netif_receive_skb(skb);
+                       goto out;
+               } else if (unlikely(!skb))
+                       goto out;
+       }
+
        /*
         * Make sure frames received on VLAN interfaces stacked on
         * bonding interfaces still make their way to any base bonding
@@ -2950,6 +3125,7 @@ ncls:
        if (pt_prev) {
                ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
        } else {
+               atomic_long_inc(&skb->dev->rx_dropped);
                kfree_skb(skb);
                /* Jamal, now you will not able to escape explaining
                 * me how you were going to use this. :-)
@@ -3186,6 +3362,7 @@ __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
                unsigned long diffs;
 
                diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev;
+               diffs |= p->vlan_tci ^ skb->vlan_tci;
                diffs |= compare_ether_header(skb_mac_header(p),
                                              skb_gro_mac_header(skb));
                NAPI_GRO_CB(p)->same_flow = !diffs;
@@ -3241,14 +3418,14 @@ gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(napi_gro_receive);
 
-void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
+static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
 {
        __skb_pull(skb, skb_headlen(skb));
        skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb));
+       skb->vlan_tci = 0;
 
        napi->skb = skb;
 }
-EXPORT_SYMBOL(napi_reuse_skb);
 
 struct sk_buff *napi_get_frags(struct napi_struct *napi)
 {
@@ -4822,10 +4999,12 @@ static void rollback_registered_many(struct list_head *head)
                }
 
                BUG_ON(dev->reg_state != NETREG_REGISTERED);
+       }
 
-               /* If device is running, close it first. */
-               dev_close(dev);
+       /* If device is running, close it first. */
+       dev_close_many(head);
 
+       list_for_each_entry(dev, head, unreg_list) {
                /* And unlink it from device chain. */
                unlist_netdevice(dev);
 
@@ -4882,21 +5061,6 @@ static void rollback_registered(struct net_device *dev)
        rollback_registered_many(&single);
 }
 
-static void __netdev_init_queue_locks_one(struct net_device *dev,
-                                         struct netdev_queue *dev_queue,
-                                         void *_unused)
-{
-       spin_lock_init(&dev_queue->_xmit_lock);
-       netdev_set_xmit_lockdep_class(&dev_queue->_xmit_lock, dev->type);
-       dev_queue->xmit_lock_owner = -1;
-}
-
-static void netdev_init_queue_locks(struct net_device *dev)
-{
-       netdev_for_each_tx_queue(dev, __netdev_init_queue_locks_one, NULL);
-       __netdev_init_queue_locks_one(dev, &dev->rx_queue, NULL);
-}
-
 unsigned long netdev_fix_features(unsigned long features, const char *name)
 {
        /* Fix illegal SG+CSUM combinations. */
@@ -4917,10 +5081,13 @@ unsigned long netdev_fix_features(unsigned long features, const char *name)
        }
 
        if (features & NETIF_F_UFO) {
-               if (!(features & NETIF_F_GEN_CSUM)) {
+               /* maybe split UFO into V4 and V6? */
+               if (!((features & NETIF_F_GEN_CSUM) ||
+                   (features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))
+                           == (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
                        if (name)
                                printk(KERN_ERR "%s: Dropping NETIF_F_UFO "
-                                      "since no NETIF_F_HW_CSUM feature.\n",
+                                      "since no checksum offload features.\n",
                                       name);
                        features &= ~NETIF_F_UFO;
                }
@@ -4964,31 +5131,56 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev,
 }
 EXPORT_SYMBOL(netif_stacked_transfer_operstate);
 
+#ifdef CONFIG_RPS
 static int netif_alloc_rx_queues(struct net_device *dev)
 {
-#ifdef CONFIG_RPS
        unsigned int i, count = dev->num_rx_queues;
+       struct netdev_rx_queue *rx;
 
-       if (count) {
-               struct netdev_rx_queue *rx;
+       BUG_ON(count < 1);
 
-               rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
-               if (!rx) {
-                       pr_err("netdev: Unable to allocate %u rx queues.\n",
-                              count);
-                       return -ENOMEM;
-               }
-               dev->_rx = rx;
-               atomic_set(&rx->count, count);
-
-               /*
-                * Set a pointer to first element in the array which holds the
-                * reference count.
-                */
-               for (i = 0; i < count; i++)
-                       rx[i].first = rx;
+       rx = kcalloc(count, sizeof(struct netdev_rx_queue), GFP_KERNEL);
+       if (!rx) {
+               pr_err("netdev: Unable to allocate %u rx queues.\n", count);
+               return -ENOMEM;
        }
+       dev->_rx = rx;
+
+       for (i = 0; i < count; i++)
+               rx[i].dev = dev;
+       return 0;
+}
 #endif
+
+static void netdev_init_one_queue(struct net_device *dev,
+                                 struct netdev_queue *queue, void *_unused)
+{
+       /* Initialize queue lock */
+       spin_lock_init(&queue->_xmit_lock);
+       netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
+       queue->xmit_lock_owner = -1;
+       netdev_queue_numa_node_write(queue, NUMA_NO_NODE);
+       queue->dev = dev;
+}
+
+static int netif_alloc_netdev_queues(struct net_device *dev)
+{
+       unsigned int count = dev->num_tx_queues;
+       struct netdev_queue *tx;
+
+       BUG_ON(count < 1);
+
+       tx = kcalloc(count, sizeof(struct netdev_queue), GFP_KERNEL);
+       if (!tx) {
+               pr_err("netdev: Unable to allocate %u tx queues.\n",
+                      count);
+               return -ENOMEM;
+       }
+       dev->_tx = tx;
+
+       netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
+       spin_lock_init(&dev->tx_global_lock);
+
        return 0;
 }
 
@@ -5025,14 +5217,9 @@ int register_netdevice(struct net_device *dev)
 
        spin_lock_init(&dev->addr_list_lock);
        netdev_set_addr_lockdep_class(dev);
-       netdev_init_queue_locks(dev);
 
        dev->iflink = -1;
 
-       ret = netif_alloc_rx_queues(dev);
-       if (ret)
-               goto out;
-
        /* Init, if this function is available */
        if (dev->netdev_ops->ndo_init) {
                ret = dev->netdev_ops->ndo_init(dev);
@@ -5148,9 +5335,6 @@ int init_dummy_netdev(struct net_device *dev)
         */
        dev->reg_state = NETREG_DUMMY;
 
-       /* initialize the ref count */
-       atomic_set(&dev->refcnt, 1);
-
        /* NAPI wants this */
        INIT_LIST_HEAD(&dev->napi_list);
 
@@ -5158,6 +5342,11 @@ int init_dummy_netdev(struct net_device *dev)
        set_bit(__LINK_STATE_PRESENT, &dev->state);
        set_bit(__LINK_STATE_START, &dev->state);
 
+       /* Note : We dont allocate pcpu_refcnt for dummy devices,
+        * because users of this 'device' dont need to change
+        * its refcount.
+        */
+
        return 0;
 }
 EXPORT_SYMBOL_GPL(init_dummy_netdev);
@@ -5199,6 +5388,16 @@ out:
 }
 EXPORT_SYMBOL(register_netdev);
 
+int netdev_refcnt_read(const struct net_device *dev)
+{
+       int i, refcnt = 0;
+
+       for_each_possible_cpu(i)
+               refcnt += *per_cpu_ptr(dev->pcpu_refcnt, i);
+       return refcnt;
+}
+EXPORT_SYMBOL(netdev_refcnt_read);
+
 /*
  * netdev_wait_allrefs - wait until all references are gone.
  *
@@ -5213,11 +5412,14 @@ EXPORT_SYMBOL(register_netdev);
 static void netdev_wait_allrefs(struct net_device *dev)
 {
        unsigned long rebroadcast_time, warning_time;
+       int refcnt;
 
        linkwatch_forget_dev(dev);
 
        rebroadcast_time = warning_time = jiffies;
-       while (atomic_read(&dev->refcnt) != 0) {
+       refcnt = netdev_refcnt_read(dev);
+
+       while (refcnt != 0) {
                if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
                        rtnl_lock();
 
@@ -5244,11 +5446,13 @@ static void netdev_wait_allrefs(struct net_device *dev)
 
                msleep(250);
 
+               refcnt = netdev_refcnt_read(dev);
+
                if (time_after(jiffies, warning_time + 10 * HZ)) {
                        printk(KERN_EMERG "unregister_netdevice: "
                               "waiting for %s to become free. Usage "
                               "count = %d\n",
-                              dev->name, atomic_read(&dev->refcnt));
+                              dev->name, refcnt);
                        warning_time = jiffies;
                }
        }
@@ -5306,9 +5510,9 @@ void netdev_run_todo(void)
                netdev_wait_allrefs(dev);
 
                /* paranoia */
-               BUG_ON(atomic_read(&dev->refcnt));
+               BUG_ON(netdev_refcnt_read(dev));
                WARN_ON(rcu_dereference_raw(dev->ip_ptr));
-               WARN_ON(dev->ip6_ptr);
+               WARN_ON(rcu_dereference_raw(dev->ip6_ptr));
                WARN_ON(dev->dn_ptr);
 
                if (dev->destructor)
@@ -5319,34 +5523,6 @@ void netdev_run_todo(void)
        }
 }
 
-/**
- *     dev_txq_stats_fold - fold tx_queues stats
- *     @dev: device to get statistics from
- *     @stats: struct rtnl_link_stats64 to hold results
- */
-void dev_txq_stats_fold(const struct net_device *dev,
-                       struct rtnl_link_stats64 *stats)
-{
-       u64 tx_bytes = 0, tx_packets = 0, tx_dropped = 0;
-       unsigned int i;
-       struct netdev_queue *txq;
-
-       for (i = 0; i < dev->num_tx_queues; i++) {
-               txq = netdev_get_tx_queue(dev, i);
-               spin_lock_bh(&txq->_xmit_lock);
-               tx_bytes   += txq->tx_bytes;
-               tx_packets += txq->tx_packets;
-               tx_dropped += txq->tx_dropped;
-               spin_unlock_bh(&txq->_xmit_lock);
-       }
-       if (tx_bytes || tx_packets || tx_dropped) {
-               stats->tx_bytes   = tx_bytes;
-               stats->tx_packets = tx_packets;
-               stats->tx_dropped = tx_dropped;
-       }
-}
-EXPORT_SYMBOL(dev_txq_stats_fold);
-
 /* Convert net_device_stats to rtnl_link_stats64.  They have the same
  * fields in the same order, with only the type differing.
  */
@@ -5385,53 +5561,71 @@ struct rtnl_link_stats64 *dev_get_stats(struct net_device *dev,
 
        if (ops->ndo_get_stats64) {
                memset(storage, 0, sizeof(*storage));
-               return ops->ndo_get_stats64(dev, storage);
-       }
-       if (ops->ndo_get_stats) {
+               ops->ndo_get_stats64(dev, storage);
+       } else if (ops->ndo_get_stats) {
                netdev_stats_to_stats64(storage, ops->ndo_get_stats(dev));
-               return storage;
+       } else {
+               netdev_stats_to_stats64(storage, &dev->stats);
        }
-       netdev_stats_to_stats64(storage, &dev->stats);
-       dev_txq_stats_fold(dev, storage);
+       storage->rx_dropped += atomic_long_read(&dev->rx_dropped);
        return storage;
 }
 EXPORT_SYMBOL(dev_get_stats);
 
-static void netdev_init_one_queue(struct net_device *dev,
-                                 struct netdev_queue *queue,
-                                 void *_unused)
+struct netdev_queue *dev_ingress_queue_create(struct net_device *dev)
 {
-       queue->dev = dev;
-}
+       struct netdev_queue *queue = dev_ingress_queue(dev);
 
-static void netdev_init_queues(struct net_device *dev)
-{
-       netdev_init_one_queue(dev, &dev->rx_queue, NULL);
-       netdev_for_each_tx_queue(dev, netdev_init_one_queue, NULL);
-       spin_lock_init(&dev->tx_global_lock);
+#ifdef CONFIG_NET_CLS_ACT
+       if (queue)
+               return queue;
+       queue = kzalloc(sizeof(*queue), GFP_KERNEL);
+       if (!queue)
+               return NULL;
+       netdev_init_one_queue(dev, queue, NULL);
+       queue->qdisc = &noop_qdisc;
+       queue->qdisc_sleeping = &noop_qdisc;
+       rcu_assign_pointer(dev->ingress_queue, queue);
+#endif
+       return queue;
 }
 
 /**
- *     alloc_netdev_mq - allocate network device
+ *     alloc_netdev_mqs - allocate network device
  *     @sizeof_priv:   size of private data to allocate space for
  *     @name:          device name format string
  *     @setup:         callback to initialize device
- *     @queue_count:   the number of subqueues to allocate
+ *     @txqs:          the number of TX subqueues to allocate
+ *     @rxqs:          the number of RX subqueues to allocate
  *
  *     Allocates a struct net_device with private data area for driver use
  *     and performs basic initialization.  Also allocates subquue structs
- *     for each queue on the device at the end of the netdevice.
+ *     for each queue on the device.
  */
-struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
-               void (*setup)(struct net_device *), unsigned int queue_count)
+struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
+               void (*setup)(struct net_device *),
+               unsigned int txqs, unsigned int rxqs)
 {
-       struct netdev_queue *tx;
        struct net_device *dev;
        size_t alloc_size;
        struct net_device *p;
 
        BUG_ON(strlen(name) >= sizeof(dev->name));
 
+       if (txqs < 1) {
+               pr_err("alloc_netdev: Unable to allocate device "
+                      "with zero queues.\n");
+               return NULL;
+       }
+
+#ifdef CONFIG_RPS
+       if (rxqs < 1) {
+               pr_err("alloc_netdev: Unable to allocate device "
+                      "with zero RX queues.\n");
+               return NULL;
+       }
+#endif
+
        alloc_size = sizeof(struct net_device);
        if (sizeof_priv) {
                /* ensure 32-byte alignment of private area */
@@ -5447,37 +5641,35 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
                return NULL;
        }
 
-       tx = kcalloc(queue_count, sizeof(struct netdev_queue), GFP_KERNEL);
-       if (!tx) {
-               printk(KERN_ERR "alloc_netdev: Unable to allocate "
-                      "tx qdiscs.\n");
-               goto free_p;
-       }
-
-
        dev = PTR_ALIGN(p, NETDEV_ALIGN);
        dev->padded = (char *)dev - (char *)p;
 
+       dev->pcpu_refcnt = alloc_percpu(int);
+       if (!dev->pcpu_refcnt)
+               goto free_p;
+
        if (dev_addr_init(dev))
-               goto free_tx;
+               goto free_pcpu;
 
        dev_mc_init(dev);
        dev_uc_init(dev);
 
        dev_net_set(dev, &init_net);
 
-       dev->_tx = tx;
-       dev->num_tx_queues = queue_count;
-       dev->real_num_tx_queues = queue_count;
+       dev->num_tx_queues = txqs;
+       dev->real_num_tx_queues = txqs;
+       if (netif_alloc_netdev_queues(dev))
+               goto free_pcpu;
 
 #ifdef CONFIG_RPS
-       dev->num_rx_queues = queue_count;
+       dev->num_rx_queues = rxqs;
+       dev->real_num_rx_queues = rxqs;
+       if (netif_alloc_rx_queues(dev))
+               goto free_pcpu;
 #endif
 
        dev->gso_max_size = GSO_MAX_SIZE;
 
-       netdev_init_queues(dev);
-
        INIT_LIST_HEAD(&dev->ethtool_ntuple_list.list);
        dev->ethtool_ntuple_list.count = 0;
        INIT_LIST_HEAD(&dev->napi_list);
@@ -5488,13 +5680,18 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
        strcpy(dev->name, name);
        return dev;
 
-free_tx:
-       kfree(tx);
+free_pcpu:
+       free_percpu(dev->pcpu_refcnt);
+       kfree(dev->_tx);
+#ifdef CONFIG_RPS
+       kfree(dev->_rx);
+#endif
+
 free_p:
        kfree(p);
        return NULL;
 }
-EXPORT_SYMBOL(alloc_netdev_mq);
+EXPORT_SYMBOL(alloc_netdev_mqs);
 
 /**
  *     free_netdev - free network device
@@ -5511,6 +5708,11 @@ void free_netdev(struct net_device *dev)
        release_net(dev_net(dev));
 
        kfree(dev->_tx);
+#ifdef CONFIG_RPS
+       kfree(dev->_rx);
+#endif
+
+       kfree(rcu_dereference_raw(dev->ingress_queue));
 
        /* Flush device addresses */
        dev_addr_flush(dev);
@@ -5521,6 +5723,9 @@ void free_netdev(struct net_device *dev)
        list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
                netif_napi_del(p);
 
+       free_percpu(dev->pcpu_refcnt);
+       dev->pcpu_refcnt = NULL;
+
        /*  Compatibility with error handling in drivers */
        if (dev->reg_state == NETREG_UNINITIALIZED) {
                kfree((char *)dev - dev->padded);
@@ -5984,7 +6189,7 @@ static void __net_exit default_device_exit(struct net *net)
 static void __net_exit default_device_exit_batch(struct list_head *net_list)
 {
        /* At exit all network devices most be removed from a network
-        * namespace.  Do this in the reverse order of registeration.
+        * namespace.  Do this in the reverse order of registration.
         * Do this across as many network namespaces as possible to
         * improve batching efficiency.
         */