Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6

[linux-2.6.git] / net / core / dev.c
diff --git a/net/core/dev.c b/net/core/dev.c

index ca14ab407b338172758bc409efd2baaf75bdee99..cfc614ec3719c2380eb10f571b4f57a6e85730ff 100644 (file)
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -129,6 +129,12 @@
  
  #include "net-sysfs.h"
  
+/* Instead of increasing this, you should create a hash table. */
+#define MAX_GRO_SKBS 8
+
+/* This should be increased if a protocol with a bigger head is added. */
+#define GRO_MAX_HEAD (MAX_HEADER + 128)
+
  /*
   *     The list of packet types we will receive (as opposed to discard)
   *     and the routines to invoke.
@@ -164,25 +170,6 @@ static DEFINE_SPINLOCK(ptype_lock);
  static struct list_head ptype_base[PTYPE_HASH_SIZE] __read_mostly;
  static struct list_head ptype_all __read_mostly;       /* Taps */
  
-#ifdef CONFIG_NET_DMA
-struct net_dma {
-       struct dma_client client;
-       spinlock_t lock;
-       cpumask_t channel_mask;
-       struct dma_chan **channels;
-};
-
-static enum dma_state_client
-netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
-       enum dma_state state);
-
-static struct net_dma net_dma = {
-       .client = {
-               .event_callback = netdev_dma_event,
-       },
-};
-#endif
-
  /*
   * The @dev_base_head list is protected by @dev_base_lock and the rtnl
   * semaphore.
@@ -280,8 +267,8 @@ static const unsigned short netdev_lock_type[] =
          ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
          ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
          ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
-        ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_VOID,
-        ARPHRD_NONE};
+        ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET,
+        ARPHRD_PHONET_PIPE, ARPHRD_VOID, ARPHRD_NONE};
  
  static const char *netdev_lock_name[] =
         {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
@@ -297,8 +284,8 @@ static const char *netdev_lock_name[] =
          "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
          "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
          "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
-        "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_VOID",
-        "_xmit_NONE"};
+        "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET",
+        "_xmit_PHONET_PIPE", "_xmit_VOID", "_xmit_NONE"};
  
  static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
  static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
@@ -1100,6 +1087,11 @@ int dev_open(struct net_device *dev)
                  */
                 dev->flags |= IFF_UP;
  
+               /*
+                *      Enable NET_DMA
+                */
+               net_dmaengine_get();
+
                 /*
                  *      Initialize multicasting status
                  */
@@ -1177,6 +1169,11 @@ int dev_close(struct net_device *dev)
          */
         call_netdevice_notifiers(NETDEV_DOWN, dev);
  
+       /*
+        *      Shutdown NET_DMA
+        */
+       net_dmaengine_put();
+
         return 0;
  }
  
@@ -1433,7 +1430,7 @@ void netif_device_detach(struct net_device *dev)
  {
         if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
             netif_running(dev)) {
-               netif_stop_queue(dev);
+               netif_tx_stop_all_queues(dev);
         }
  }
  EXPORT_SYMBOL(netif_device_detach);
@@ -1448,7 +1445,7 @@ void netif_device_attach(struct net_device *dev)
  {
         if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
             netif_running(dev)) {
-               netif_wake_queue(dev);
+               netif_tx_wake_all_queues(dev);
                 __netdev_watchdog_up(dev);
         }
  }
@@ -1460,7 +1457,9 @@ static bool can_checksum_protocol(unsigned long features, __be16 protocol)
                 ((features & NETIF_F_IP_CSUM) &&
                  protocol == htons(ETH_P_IP)) ||
                 ((features & NETIF_F_IPV6_CSUM) &&
-                protocol == htons(ETH_P_IPV6)));
+                protocol == htons(ETH_P_IPV6)) ||
+               ((features & NETIF_F_FCOE_CRC) &&
+                protocol == htons(ETH_P_FCOE)));
  }
  
  static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
@@ -1533,13 +1532,23 @@ struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
         __be16 type = skb->protocol;
         int err;
  
-       BUG_ON(skb_shinfo(skb)->frag_list);
-
         skb_reset_mac_header(skb);
         skb->mac_len = skb->network_header - skb->mac_header;
         __skb_pull(skb, skb->mac_len);
  
-       if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) {
+       if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
+               struct net_device *dev = skb->dev;
+               struct ethtool_drvinfo info = {};
+
+               if (dev && dev->ethtool_ops && dev->ethtool_ops->get_drvinfo)
+                       dev->ethtool_ops->get_drvinfo(dev, &info);
+
+               WARN(1, "%s: caps=(0x%lx, 0x%lx) len=%d data_len=%d "
+                       "ip_summed=%d",
+                    info.driver, dev ? dev->features : 0L,
+                    skb->sk ? skb->sk->sk_route_caps : 0L,
+                    skb->len, skb->data_len, skb->ip_summed);
+
                 if (skb_header_cloned(skb) &&
                     (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
                         return ERR_PTR(err);
@@ -1660,6 +1669,9 @@ static int dev_gso_segment(struct sk_buff *skb)
  int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
                         struct netdev_queue *txq)
  {
+       const struct net_device_ops *ops = dev->netdev_ops;
+       int rc;
+
         if (likely(!skb->next)) {
                 if (!list_empty(&ptype_all))
                         dev_queue_xmit_nit(skb, dev);
@@ -1671,17 +1683,31 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
                                 goto gso;
                 }
  
-               return dev->hard_start_xmit(skb, dev);
+               rc = ops->ndo_start_xmit(skb, dev);
+               /*
+                * TODO: if skb_orphan() was called by
+                * dev->hard_start_xmit() (for example, the unmodified
+                * igb driver does that; bnx2 doesn't), then
+                * skb_tx_software_timestamp() will be unable to send
+                * back the time stamp.
+                *
+                * How can this be prevented? Always create another
+                * reference to the socket before calling
+                * dev->hard_start_xmit()? Prevent that skb_orphan()
+                * does anything in dev->hard_start_xmit() by clearing
+                * the skb destructor before the call and restoring it
+                * afterwards, then doing the skb_orphan() ourselves?
+                */
+               return rc;
         }
  
  gso:
         do {
                 struct sk_buff *nskb = skb->next;
-               int rc;
  
                 skb->next = nskb->next;
                 nskb->next = NULL;
-               rc = dev->hard_start_xmit(nskb, dev);
+               rc = ops->ndo_start_xmit(nskb, dev);
                 if (unlikely(rc)) {
                         nskb->next = skb->next;
                         skb->next = nskb;
@@ -1698,69 +1724,35 @@ out_kfree_skb:
         return 0;
  }
  
-static u32 simple_tx_hashrnd;
-static int simple_tx_hashrnd_initialized = 0;
+static u32 skb_tx_hashrnd;
  
-static u16 simple_tx_hash(struct net_device *dev, struct sk_buff *skb)
+u16 skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb)
  {
-       u32 addr1, addr2, ports;
-       u32 hash, ihl;
-       u8 ip_proto = 0;
-
-       if (unlikely(!simple_tx_hashrnd_initialized)) {
-               get_random_bytes(&simple_tx_hashrnd, 4);
-               simple_tx_hashrnd_initialized = 1;
-       }
+       u32 hash;
  
-       switch (skb->protocol) {
-       case htons(ETH_P_IP):
-               if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET)))
-                       ip_proto = ip_hdr(skb)->protocol;
-               addr1 = ip_hdr(skb)->saddr;
-               addr2 = ip_hdr(skb)->daddr;
-               ihl = ip_hdr(skb)->ihl;
-               break;
-       case htons(ETH_P_IPV6):
-               ip_proto = ipv6_hdr(skb)->nexthdr;
-               addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3];
-               addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3];
-               ihl = (40 >> 2);
-               break;
-       default:
-               return 0;
-       }
-
-
-       switch (ip_proto) {
-       case IPPROTO_TCP:
-       case IPPROTO_UDP:
-       case IPPROTO_DCCP:
-       case IPPROTO_ESP:
-       case IPPROTO_AH:
-       case IPPROTO_SCTP:
-       case IPPROTO_UDPLITE:
-               ports = *((u32 *) (skb_network_header(skb) + (ihl * 4)));
-               break;
-
-       default:
-               ports = 0;
-               break;
-       }
+       if (skb_rx_queue_recorded(skb)) {
+               hash = skb_get_rx_queue(skb);
+       } else if (skb->sk && skb->sk->sk_hash) {
+               hash = skb->sk->sk_hash;
+       } else
+               hash = skb->protocol;
  
-       hash = jhash_3words(addr1, addr2, ports, simple_tx_hashrnd);
+       hash = jhash_1word(hash, skb_tx_hashrnd);
  
         return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32);
  }
+EXPORT_SYMBOL(skb_tx_hash);
  
  static struct netdev_queue *dev_pick_tx(struct net_device *dev,
                                         struct sk_buff *skb)
  {
+       const struct net_device_ops *ops = dev->netdev_ops;
         u16 queue_index = 0;
  
-       if (dev->select_queue)
-               queue_index = dev->select_queue(dev, skb);
+       if (ops->ndo_select_queue)
+               queue_index = ops->ndo_select_queue(dev, skb);
         else if (dev->real_num_tx_queues > 1)
-               queue_index = simple_tx_hash(dev, skb);
+               queue_index = skb_tx_hash(dev, skb);
  
         skb_set_queue_mapping(skb, queue_index);
         return netdev_get_tx_queue(dev, queue_index);
@@ -2256,12 +2248,6 @@ int netif_receive_skb(struct sk_buff *skb)
  
         rcu_read_lock();
  
-       /* Don't receive packets in an exiting network namespace */
-       if (!net_alive(dev_net(skb->dev))) {
-               kfree_skb(skb);
-               goto out;
-       }
-
  #ifdef CONFIG_NET_CLS_ACT
         if (skb->tc_verd & TC_NCLS) {
                 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
@@ -2292,6 +2278,8 @@ ncls:
         if (!skb)
                 goto out;
  
+       skb_orphan(skb);
+
         type = skb->protocol;
         list_for_each_entry_rcu(ptype,
                         &ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
@@ -2333,6 +2321,294 @@ static void flush_backlog(void *arg)
                 }
  }
  
+static int napi_gro_complete(struct sk_buff *skb)
+{
+       struct packet_type *ptype;
+       __be16 type = skb->protocol;
+       struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
+       int err = -ENOENT;
+
+       if (NAPI_GRO_CB(skb)->count == 1) {
+               skb_shinfo(skb)->gso_size = 0;
+               goto out;
+       }
+
+       rcu_read_lock();
+       list_for_each_entry_rcu(ptype, head, list) {
+               if (ptype->type != type || ptype->dev || !ptype->gro_complete)
+                       continue;
+
+               err = ptype->gro_complete(skb);
+               break;
+       }
+       rcu_read_unlock();
+
+       if (err) {
+               WARN_ON(&ptype->list == head);
+               kfree_skb(skb);
+               return NET_RX_SUCCESS;
+       }
+
+out:
+       return netif_receive_skb(skb);
+}
+
+void napi_gro_flush(struct napi_struct *napi)
+{
+       struct sk_buff *skb, *next;
+
+       for (skb = napi->gro_list; skb; skb = next) {
+               next = skb->next;
+               skb->next = NULL;
+               napi_gro_complete(skb);
+       }
+
+       napi->gro_count = 0;
+       napi->gro_list = NULL;
+}
+EXPORT_SYMBOL(napi_gro_flush);
+
+void *skb_gro_header(struct sk_buff *skb, unsigned int hlen)
+{
+       unsigned int offset = skb_gro_offset(skb);
+
+       hlen += offset;
+       if (hlen <= skb_headlen(skb))
+               return skb->data + offset;
+
+       if (unlikely(!skb_shinfo(skb)->nr_frags ||
+                    skb_shinfo(skb)->frags[0].size <=
+                    hlen - skb_headlen(skb) ||
+                    PageHighMem(skb_shinfo(skb)->frags[0].page)))
+               return pskb_may_pull(skb, hlen) ? skb->data + offset : NULL;
+
+       return page_address(skb_shinfo(skb)->frags[0].page) +
+              skb_shinfo(skb)->frags[0].page_offset +
+              offset - skb_headlen(skb);
+}
+EXPORT_SYMBOL(skb_gro_header);
+
+int dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
+{
+       struct sk_buff **pp = NULL;
+       struct packet_type *ptype;
+       __be16 type = skb->protocol;
+       struct list_head *head = &ptype_base[ntohs(type) & PTYPE_HASH_MASK];
+       int same_flow;
+       int mac_len;
+       int ret;
+
+       if (!(skb->dev->features & NETIF_F_GRO))
+               goto normal;
+
+       if (skb_is_gso(skb) || skb_shinfo(skb)->frag_list)
+               goto normal;
+
+       rcu_read_lock();
+       list_for_each_entry_rcu(ptype, head, list) {
+               if (ptype->type != type || ptype->dev || !ptype->gro_receive)
+                       continue;
+
+               skb_set_network_header(skb, skb_gro_offset(skb));
+               mac_len = skb->network_header - skb->mac_header;
+               skb->mac_len = mac_len;
+               NAPI_GRO_CB(skb)->same_flow = 0;
+               NAPI_GRO_CB(skb)->flush = 0;
+               NAPI_GRO_CB(skb)->free = 0;
+
+               pp = ptype->gro_receive(&napi->gro_list, skb);
+               break;
+       }
+       rcu_read_unlock();
+
+       if (&ptype->list == head)
+               goto normal;
+
+       same_flow = NAPI_GRO_CB(skb)->same_flow;
+       ret = NAPI_GRO_CB(skb)->free ? GRO_MERGED_FREE : GRO_MERGED;
+
+       if (pp) {
+               struct sk_buff *nskb = *pp;
+
+               *pp = nskb->next;
+               nskb->next = NULL;
+               napi_gro_complete(nskb);
+               napi->gro_count--;
+       }
+
+       if (same_flow)
+               goto ok;
+
+       if (NAPI_GRO_CB(skb)->flush || napi->gro_count >= MAX_GRO_SKBS)
+               goto normal;
+
+       napi->gro_count++;
+       NAPI_GRO_CB(skb)->count = 1;
+       skb_shinfo(skb)->gso_size = skb_gro_len(skb);
+       skb->next = napi->gro_list;
+       napi->gro_list = skb;
+       ret = GRO_HELD;
+
+pull:
+       if (unlikely(!pskb_may_pull(skb, skb_gro_offset(skb)))) {
+               if (napi->gro_list == skb)
+                       napi->gro_list = skb->next;
+               ret = GRO_DROP;
+       }
+
+ok:
+       return ret;
+
+normal:
+       ret = GRO_NORMAL;
+       goto pull;
+}
+EXPORT_SYMBOL(dev_gro_receive);
+
+static int __napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
+{
+       struct sk_buff *p;
+
+       if (netpoll_rx_on(skb))
+               return GRO_NORMAL;
+
+       for (p = napi->gro_list; p; p = p->next) {
+               NAPI_GRO_CB(p)->same_flow = (p->dev == skb->dev)
+                       && !compare_ether_header(skb_mac_header(p),
+                                                skb_gro_mac_header(skb));
+               NAPI_GRO_CB(p)->flush = 0;
+       }
+
+       return dev_gro_receive(napi, skb);
+}
+
+int napi_skb_finish(int ret, struct sk_buff *skb)
+{
+       int err = NET_RX_SUCCESS;
+
+       switch (ret) {
+       case GRO_NORMAL:
+               return netif_receive_skb(skb);
+
+       case GRO_DROP:
+               err = NET_RX_DROP;
+               /* fall through */
+
+       case GRO_MERGED_FREE:
+               kfree_skb(skb);
+               break;
+       }
+
+       return err;
+}
+EXPORT_SYMBOL(napi_skb_finish);
+
+int napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
+{
+       skb_gro_reset_offset(skb);
+
+       return napi_skb_finish(__napi_gro_receive(napi, skb), skb);
+}
+EXPORT_SYMBOL(napi_gro_receive);
+
+void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb)
+{
+       __skb_pull(skb, skb_headlen(skb));
+       skb_reserve(skb, NET_IP_ALIGN - skb_headroom(skb));
+
+       napi->skb = skb;
+}
+EXPORT_SYMBOL(napi_reuse_skb);
+
+struct sk_buff *napi_get_frags(struct napi_struct *napi)
+{
+       struct net_device *dev = napi->dev;
+       struct sk_buff *skb = napi->skb;
+
+       if (!skb) {
+               skb = netdev_alloc_skb(dev, GRO_MAX_HEAD + NET_IP_ALIGN);
+               if (!skb)
+                       goto out;
+
+               skb_reserve(skb, NET_IP_ALIGN);
+
+               napi->skb = skb;
+       }
+
+out:
+       return skb;
+}
+EXPORT_SYMBOL(napi_get_frags);
+
+int napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb, int ret)
+{
+       int err = NET_RX_SUCCESS;
+
+       switch (ret) {
+       case GRO_NORMAL:
+       case GRO_HELD:
+               skb->protocol = eth_type_trans(skb, napi->dev);
+
+               if (ret == GRO_NORMAL)
+                       return netif_receive_skb(skb);
+
+               skb_gro_pull(skb, -ETH_HLEN);
+               break;
+
+       case GRO_DROP:
+               err = NET_RX_DROP;
+               /* fall through */
+
+       case GRO_MERGED_FREE:
+               napi_reuse_skb(napi, skb);
+               break;
+       }
+
+       return err;
+}
+EXPORT_SYMBOL(napi_frags_finish);
+
+struct sk_buff *napi_frags_skb(struct napi_struct *napi)
+{
+       struct sk_buff *skb = napi->skb;
+       struct ethhdr *eth;
+
+       napi->skb = NULL;
+
+       skb_reset_mac_header(skb);
+       skb_gro_reset_offset(skb);
+
+       eth = skb_gro_header(skb, sizeof(*eth));
+       if (!eth) {
+               napi_reuse_skb(napi, skb);
+               skb = NULL;
+               goto out;
+       }
+
+       skb_gro_pull(skb, sizeof(*eth));
+
+       /*
+        * This works because the only protocols we care about don't require
+        * special handling.  We'll fix it up properly at the end.
+        */
+       skb->protocol = eth->h_proto;
+
+out:
+       return skb;
+}
+EXPORT_SYMBOL(napi_frags_skb);
+
+int napi_gro_frags(struct napi_struct *napi)
+{
+       struct sk_buff *skb = napi_frags_skb(napi);
+
+       if (!skb)
+               return NET_RX_DROP;
+
+       return napi_frags_finish(napi, skb, __napi_gro_receive(napi, skb));
+}
+EXPORT_SYMBOL(napi_gro_frags);
+
  static int process_backlog(struct napi_struct *napi, int quota)
  {
         int work = 0;
@@ -2375,6 +2651,72 @@ void __napi_schedule(struct napi_struct *n)
  }
  EXPORT_SYMBOL(__napi_schedule);
  
+void __napi_complete(struct napi_struct *n)
+{
+       BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
+       BUG_ON(n->gro_list);
+
+       list_del(&n->poll_list);
+       smp_mb__before_clear_bit();
+       clear_bit(NAPI_STATE_SCHED, &n->state);
+}
+EXPORT_SYMBOL(__napi_complete);
+
+void napi_complete(struct napi_struct *n)
+{
+       unsigned long flags;
+
+       /*
+        * don't let napi dequeue from the cpu poll list
+        * just in case its running on a different cpu
+        */
+       if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
+               return;
+
+       napi_gro_flush(n);
+       local_irq_save(flags);
+       __napi_complete(n);
+       local_irq_restore(flags);
+}
+EXPORT_SYMBOL(napi_complete);
+
+void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
+                   int (*poll)(struct napi_struct *, int), int weight)
+{
+       INIT_LIST_HEAD(&napi->poll_list);
+       napi->gro_count = 0;
+       napi->gro_list = NULL;
+       napi->skb = NULL;
+       napi->poll = poll;
+       napi->weight = weight;
+       list_add(&napi->dev_list, &dev->napi_list);
+       napi->dev = dev;
+#ifdef CONFIG_NETPOLL
+       spin_lock_init(&napi->poll_lock);
+       napi->poll_owner = -1;
+#endif
+       set_bit(NAPI_STATE_SCHED, &napi->state);
+}
+EXPORT_SYMBOL(netif_napi_add);
+
+void netif_napi_del(struct napi_struct *napi)
+{
+       struct sk_buff *skb, *next;
+
+       list_del_init(&napi->dev_list);
+       napi_free_frags(napi);
+
+       for (skb = napi->gro_list; skb; skb = next) {
+               next = skb->next;
+               skb->next = NULL;
+               kfree_skb(skb);
+       }
+
+       napi->gro_list = NULL;
+       napi->gro_count = 0;
+}
+EXPORT_SYMBOL(netif_napi_del);
+
  
  static void net_rx_action(struct softirq_action *h)
  {
@@ -2447,14 +2789,7 @@ out:
          * There may not be any more sk_buffs coming right now, so push
          * any pending DMA copies to hardware
          */
-       if (!cpus_empty(net_dma.channel_mask)) {
-               int chan_idx;
-               for_each_cpu_mask_nr(chan_idx, net_dma.channel_mask) {
-                       struct dma_chan *chan = net_dma.channels[chan_idx];
-                       if (chan)
-                               dma_async_memcpy_issue_pending(chan);
-               }
-       }
+       dma_issue_pending_all();
  #endif
  
         return;
@@ -2620,7 +2955,7 @@ void dev_seq_stop(struct seq_file *seq, void *v)
  
  static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
  {
-       struct net_device_stats *stats = dev->get_stats(dev);
+       const struct net_device_stats *stats = dev_get_stats(dev);
  
         seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
                    "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
@@ -2941,6 +3276,8 @@ static void dev_change_rx_flags(struct net_device *dev, int flags)
  static int __dev_set_promiscuity(struct net_device *dev, int inc)
  {
         unsigned short old_flags = dev->flags;
+       uid_t uid;
+       gid_t gid;
  
         ASSERT_RTNL();
  
@@ -2965,15 +3302,17 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc)
                 printk(KERN_INFO "device %s %s promiscuous mode\n",
                        dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
                                                                "left");
-               if (audit_enabled)
+               if (audit_enabled) {
+                       current_uid_gid(&uid, &gid);
                         audit_log(current->audit_context, GFP_ATOMIC,
                                 AUDIT_ANOM_PROMISCUOUS,
                                 "dev=%s prom=%d old_prom=%d auid=%u uid=%u gid=%u ses=%u",
                                 dev->name, (dev->flags & IFF_PROMISC),
                                 (old_flags & IFF_PROMISC),
                                 audit_get_loginuid(current),
-                               current->uid, current->gid,
+                               uid, gid,
                                 audit_get_sessionid(current));
+               }
  
                 dev_change_rx_flags(dev, IFF_PROMISC);
         }
@@ -3560,11 +3899,13 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
  {
         int err;
         struct net_device *dev = __dev_get_by_name(net, ifr->ifr_name);
-       const struct net_device_ops *ops = dev->netdev_ops;
+       const struct net_device_ops *ops;
  
         if (!dev)
                 return -ENODEV;
  
+       ops = dev->netdev_ops;
+
         switch (cmd) {
                 case SIOCSIFFLAGS:      /* Set interface flags */
                         return dev_change_flags(dev, ifr->ifr_flags);
@@ -3641,6 +3982,7 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
                             cmd == SIOCSMIIREG ||
                             cmd == SIOCBRADDIF ||
                             cmd == SIOCBRDELIF ||
+                           cmd == SIOCSHWTSTAMP ||
                             cmd == SIOCWANDEV) {
                                 err = -EOPNOTSUPP;
                                 if (ops->ndo_do_ioctl) {
@@ -3795,6 +4137,7 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
                 case SIOCBONDCHANGEACTIVE:
                 case SIOCBRADDIF:
                 case SIOCBRDELIF:
+               case SIOCSHWTSTAMP:
                         if (!capable(CAP_NET_ADMIN))
                                 return -EPERM;
                         /* fall through */
@@ -3975,6 +4318,39 @@ unsigned long netdev_fix_features(unsigned long features, const char *name)
  }
  EXPORT_SYMBOL(netdev_fix_features);
  
+/* Some devices need to (re-)set their netdev_ops inside
+ * ->init() or similar.  If that happens, we have to setup
+ * the compat pointers again.
+ */
+void netdev_resync_ops(struct net_device *dev)
+{
+#ifdef CONFIG_COMPAT_NET_DEV_OPS
+       const struct net_device_ops *ops = dev->netdev_ops;
+
+       dev->init = ops->ndo_init;
+       dev->uninit = ops->ndo_uninit;
+       dev->open = ops->ndo_open;
+       dev->change_rx_flags = ops->ndo_change_rx_flags;
+       dev->set_rx_mode = ops->ndo_set_rx_mode;
+       dev->set_multicast_list = ops->ndo_set_multicast_list;
+       dev->set_mac_address = ops->ndo_set_mac_address;
+       dev->validate_addr = ops->ndo_validate_addr;
+       dev->do_ioctl = ops->ndo_do_ioctl;
+       dev->set_config = ops->ndo_set_config;
+       dev->change_mtu = ops->ndo_change_mtu;
+       dev->neigh_setup = ops->ndo_neigh_setup;
+       dev->tx_timeout = ops->ndo_tx_timeout;
+       dev->get_stats = ops->ndo_get_stats;
+       dev->vlan_rx_register = ops->ndo_vlan_rx_register;
+       dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid;
+       dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid;
+#ifdef CONFIG_NET_POLL_CONTROLLER
+       dev->poll_controller = ops->ndo_poll_controller;
+#endif
+#endif
+}
+EXPORT_SYMBOL(netdev_resync_ops);
+
  /**
   *     register_netdevice      - register a network device
   *     @dev: device to register
@@ -4019,27 +4395,7 @@ int register_netdevice(struct net_device *dev)
          * This is temporary until all network devices are converted.
          */
         if (dev->netdev_ops) {
-               const struct net_device_ops *ops = dev->netdev_ops;
-
-               dev->init = ops->ndo_init;
-               dev->uninit = ops->ndo_uninit;
-               dev->open = ops->ndo_open;
-               dev->change_rx_flags = ops->ndo_change_rx_flags;
-               dev->set_rx_mode = ops->ndo_set_rx_mode;
-               dev->set_multicast_list = ops->ndo_set_multicast_list;
-               dev->set_mac_address = ops->ndo_set_mac_address;
-               dev->validate_addr = ops->ndo_validate_addr;
-               dev->do_ioctl = ops->ndo_do_ioctl;
-               dev->set_config = ops->ndo_set_config;
-               dev->change_mtu = ops->ndo_change_mtu;
-               dev->tx_timeout = ops->ndo_tx_timeout;
-               dev->get_stats = ops->ndo_get_stats;
-               dev->vlan_rx_register = ops->ndo_vlan_rx_register;
-               dev->vlan_rx_add_vid = ops->ndo_vlan_rx_add_vid;
-               dev->vlan_rx_kill_vid = ops->ndo_vlan_rx_kill_vid;
-#ifdef CONFIG_NET_POLL_CONTROLLER
-               dev->poll_controller = ops->ndo_poll_controller;
-#endif
+               netdev_resync_ops(dev);
         } else {
                 char drivername[64];
                 pr_info("%s (%s): not using net_device_ops yet\n",
@@ -4136,6 +4492,45 @@ err_uninit:
         goto out;
  }
  
+/**
+ *     init_dummy_netdev       - init a dummy network device for NAPI
+ *     @dev: device to init
+ *
+ *     This takes a network device structure and initialize the minimum
+ *     amount of fields so it can be used to schedule NAPI polls without
+ *     registering a full blown interface. This is to be used by drivers
+ *     that need to tie several hardware interfaces to a single NAPI
+ *     poll scheduler due to HW limitations.
+ */
+int init_dummy_netdev(struct net_device *dev)
+{
+       /* Clear everything. Note we don't initialize spinlocks
+        * are they aren't supposed to be taken by any of the
+        * NAPI code and this dummy netdev is supposed to be
+        * only ever used for NAPI polls
+        */
+       memset(dev, 0, sizeof(struct net_device));
+
+       /* make sure we BUG if trying to hit standard
+        * register/unregister code path
+        */
+       dev->reg_state = NETREG_DUMMY;
+
+       /* initialize the ref count */
+       atomic_set(&dev->refcnt, 1);
+
+       /* NAPI wants this */
+       INIT_LIST_HEAD(&dev->napi_list);
+
+       /* a dummy interface is started by default */
+       set_bit(__LINK_STATE_PRESENT, &dev->state);
+       set_bit(__LINK_STATE_START, &dev->state);
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(init_dummy_netdev);
+
+
  /**
   *     register_netdev - register a network device
   *     @dev: device to register
@@ -4288,10 +4683,24 @@ void netdev_run_todo(void)
         }
  }
  
-static struct net_device_stats *internal_stats(struct net_device *dev)
-{
-       return &dev->stats;
+/**
+ *     dev_get_stats   - get network device statistics
+ *     @dev: device to get statistics from
+ *
+ *     Get network statistics from device. The device driver may provide
+ *     its own method by setting dev->netdev_ops->get_stats; otherwise
+ *     the internal statistics structure is used.
+ */
+const struct net_device_stats *dev_get_stats(struct net_device *dev)
+ {
+       const struct net_device_ops *ops = dev->netdev_ops;
+
+       if (ops->ndo_get_stats)
+               return ops->ndo_get_stats(dev);
+       else
+               return &dev->stats;
  }
+EXPORT_SYMBOL(dev_get_stats);
  
  static void netdev_init_one_queue(struct net_device *dev,
                                   struct netdev_queue *queue,
@@ -4360,18 +4769,11 @@ struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
         dev->num_tx_queues = queue_count;
         dev->real_num_tx_queues = queue_count;
  
-       if (sizeof_priv) {
-               dev->priv = ((char *)dev +
-                            ((sizeof(struct net_device) + NETDEV_ALIGN_CONST)
-                             & ~NETDEV_ALIGN_CONST));
-       }
-
         dev->gso_max_size = GSO_MAX_SIZE;
  
         netdev_init_queues(dev);
  
-       dev->get_stats = internal_stats;
-       netpoll_netdev_init(dev);
+       INIT_LIST_HEAD(&dev->napi_list);
         setup(dev);
         strcpy(dev->name, name);
         return dev;
@@ -4388,10 +4790,15 @@ EXPORT_SYMBOL(alloc_netdev_mq);
   */
  void free_netdev(struct net_device *dev)
  {
+       struct napi_struct *p, *n;
+
         release_net(dev_net(dev));
  
         kfree(dev->_tx);
  
+       list_for_each_entry_safe(p, n, &dev->napi_list, dev_list)
+               netif_napi_del(p);
+
         /*  Compatibility with error handling in drivers */
         if (dev->reg_state == NETREG_UNINITIALIZED) {
                 kfree((char *)dev - dev->padded);
@@ -4627,122 +5034,6 @@ static int dev_cpu_callback(struct notifier_block *nfb,
         return NOTIFY_OK;
  }
  
-#ifdef CONFIG_NET_DMA
-/**
- * net_dma_rebalance - try to maintain one DMA channel per CPU
- * @net_dma: DMA client and associated data (lock, channels, channel_mask)
- *
- * This is called when the number of channels allocated to the net_dma client
- * changes.  The net_dma client tries to have one DMA channel per CPU.
- */
-
-static void net_dma_rebalance(struct net_dma *net_dma)
-{
-       unsigned int cpu, i, n, chan_idx;
-       struct dma_chan *chan;
-
-       if (cpus_empty(net_dma->channel_mask)) {
-               for_each_online_cpu(cpu)
-                       rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL);
-               return;
-       }
-
-       i = 0;
-       cpu = first_cpu(cpu_online_map);
-
-       for_each_cpu_mask_nr(chan_idx, net_dma->channel_mask) {
-               chan = net_dma->channels[chan_idx];
-
-               n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask))
-                  + (i < (num_online_cpus() %
-                       cpus_weight(net_dma->channel_mask)) ? 1 : 0));
-
-               while(n) {
-                       per_cpu(softnet_data, cpu).net_dma = chan;
-                       cpu = next_cpu(cpu, cpu_online_map);
-                       n--;
-               }
-               i++;
-       }
-}
-
-/**
- * netdev_dma_event - event callback for the net_dma_client
- * @client: should always be net_dma_client
- * @chan: DMA channel for the event
- * @state: DMA state to be handled
- */
-static enum dma_state_client
-netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
-       enum dma_state state)
-{
-       int i, found = 0, pos = -1;
-       struct net_dma *net_dma =
-               container_of(client, struct net_dma, client);
-       enum dma_state_client ack = DMA_DUP; /* default: take no action */
-
-       spin_lock(&net_dma->lock);
-       switch (state) {
-       case DMA_RESOURCE_AVAILABLE:
-               for (i = 0; i < nr_cpu_ids; i++)
-                       if (net_dma->channels[i] == chan) {
-                               found = 1;
-                               break;
-                       } else if (net_dma->channels[i] == NULL && pos < 0)
-                               pos = i;
-
-               if (!found && pos >= 0) {
-                       ack = DMA_ACK;
-                       net_dma->channels[pos] = chan;
-                       cpu_set(pos, net_dma->channel_mask);
-                       net_dma_rebalance(net_dma);
-               }
-               break;
-       case DMA_RESOURCE_REMOVED:
-               for (i = 0; i < nr_cpu_ids; i++)
-                       if (net_dma->channels[i] == chan) {
-                               found = 1;
-                               pos = i;
-                               break;
-                       }
-
-               if (found) {
-                       ack = DMA_ACK;
-                       cpu_clear(pos, net_dma->channel_mask);
-                       net_dma->channels[i] = NULL;
-                       net_dma_rebalance(net_dma);
-               }
-               break;
-       default:
-               break;
-       }
-       spin_unlock(&net_dma->lock);
-
-       return ack;
-}
-
-/**
- * netdev_dma_register - register the networking subsystem as a DMA client
- */
-static int __init netdev_dma_register(void)
-{
-       net_dma.channels = kzalloc(nr_cpu_ids * sizeof(struct net_dma),
-                                                               GFP_KERNEL);
-       if (unlikely(!net_dma.channels)) {
-               printk(KERN_NOTICE
-                               "netdev_dma: no memory for net_dma.channels\n");
-               return -ENOMEM;
-       }
-       spin_lock_init(&net_dma.lock);
-       dma_cap_set(DMA_MEMCPY, net_dma.client.cap_mask);
-       dma_async_client_register(&net_dma.client);
-       dma_async_client_chan_request(&net_dma.client);
-       return 0;
-}
-
-#else
-static int __init netdev_dma_register(void) { return -ENODEV; }
-#endif /* CONFIG_NET_DMA */
  
  /**
   *     netdev_increment_features - increment feature set by one
@@ -4860,13 +5151,14 @@ static struct pernet_operations __net_initdata netdev_net_ops = {
  
  static void __net_exit default_device_exit(struct net *net)
  {
-       struct net_device *dev, *next;
+       struct net_device *dev;
         /*
          * Push all migratable of the network devices back to the
          * initial network namespace
          */
         rtnl_lock();
-       for_each_netdev_safe(net, dev, next) {
+restart:
+       for_each_netdev(net, dev) {
                 int err;
                 char fb_name[IFNAMSIZ];
  
@@ -4877,7 +5169,7 @@ static void __net_exit default_device_exit(struct net *net)
                 /* Delete virtual devices */
                 if (dev->rtnl_link_ops && dev->rtnl_link_ops->dellink) {
                         dev->rtnl_link_ops->dellink(dev);
-                       continue;
+                       goto restart;
                 }
  
                 /* Push remaing network devices to init_net */
@@ -4888,6 +5180,7 @@ static void __net_exit default_device_exit(struct net *net)
                                 __func__, dev->name, err);
                         BUG();
                 }
+               goto restart;
         }
         rtnl_unlock();
  }
@@ -4940,6 +5233,8 @@ static int __init net_dev_init(void)
  
                 queue->backlog.poll = process_backlog;
                 queue->backlog.weight = weight_p;
+               queue->backlog.gro_list = NULL;
+               queue->backlog.gro_count = 0;
         }
  
         dev_boot_phase = 0;
@@ -4959,8 +5254,6 @@ static int __init net_dev_init(void)
         if (register_pernet_device(&default_device_ops))
                 goto out;
  
-       netdev_dma_register();
-
         open_softirq(NET_TX_SOFTIRQ, net_tx_action);
         open_softirq(NET_RX_SOFTIRQ, net_rx_action);
  
@@ -4974,6 +5267,14 @@ out:
  
  subsys_initcall(net_dev_init);
  
+static int __init initialize_hashrnd(void)
+{
+       get_random_bytes(&skb_tx_hashrnd, sizeof(skb_tx_hashrnd));
+       return 0;
+}
+
+late_initcall_sync(initialize_hashrnd);
+
  EXPORT_SYMBOL(__dev_get_by_index);
  EXPORT_SYMBOL(__dev_get_by_name);
  EXPORT_SYMBOL(__dev_remove_pack);