Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
[linux-2.6.git] / drivers / net / vmxnet3 / vmxnet3_drv.c
index a4c97e7..fabcded 100644 (file)
@@ -35,7 +35,7 @@ char vmxnet3_driver_name[] = "vmxnet3";
  * PCI Device ID Table
  * Last entry must be all 0s
  */
-static const struct pci_device_id vmxnet3_pciid_table[] = {
+static DEFINE_PCI_DEVICE_TABLE(vmxnet3_pciid_table) = {
        {PCI_VDEVICE(VMWARE, PCI_DEVICE_ID_VMWARE_VMXNET3)},
        {0}
 };
@@ -44,6 +44,12 @@ MODULE_DEVICE_TABLE(pci, vmxnet3_pciid_table);
 
 static atomic_t devices_found;
 
+#define VMXNET3_MAX_DEVICES 10
+static int enable_mq = 1;
+static int irq_share_mode;
+
+static void
+vmxnet3_write_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac);
 
 /*
  *    Enable/Disable the given intr
@@ -72,6 +78,8 @@ vmxnet3_enable_all_intrs(struct vmxnet3_adapter *adapter)
 
        for (i = 0; i < adapter->intr.num_intrs; i++)
                vmxnet3_enable_intr(adapter, i);
+       adapter->shared->devRead.intrConf.intrCtrl &=
+                                       cpu_to_le32(~VMXNET3_IC_DISABLE_ALL);
 }
 
 
@@ -80,6 +88,8 @@ vmxnet3_disable_all_intrs(struct vmxnet3_adapter *adapter)
 {
        int i;
 
+       adapter->shared->devRead.intrConf.intrCtrl |=
+                                       cpu_to_le32(VMXNET3_IC_DISABLE_ALL);
        for (i = 0; i < adapter->intr.num_intrs; i++)
                vmxnet3_disable_intr(adapter, i);
 }
@@ -95,7 +105,7 @@ vmxnet3_ack_events(struct vmxnet3_adapter *adapter, u32 events)
 static bool
 vmxnet3_tq_stopped(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
 {
-       return netif_queue_stopped(adapter->netdev);
+       return tq->stopped;
 }
 
 
@@ -103,7 +113,7 @@ static void
 vmxnet3_tq_start(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
 {
        tq->stopped = false;
-       netif_start_queue(adapter->netdev);
+       netif_start_subqueue(adapter->netdev, tq - adapter->tx_queue);
 }
 
 
@@ -111,7 +121,7 @@ static void
 vmxnet3_tq_wake(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
 {
        tq->stopped = false;
-       netif_wake_queue(adapter->netdev);
+       netif_wake_subqueue(adapter->netdev, (tq - adapter->tx_queue));
 }
 
 
@@ -120,7 +130,7 @@ vmxnet3_tq_stop(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
 {
        tq->stopped = true;
        tq->num_stop++;
-       netif_stop_queue(adapter->netdev);
+       netif_stop_subqueue(adapter->netdev, (tq - adapter->tx_queue));
 }
 
 
@@ -128,12 +138,17 @@ vmxnet3_tq_stop(struct vmxnet3_tx_queue *tq, struct vmxnet3_adapter *adapter)
  * Check the link state. This may start or stop the tx queue.
  */
 static void
-vmxnet3_check_link(struct vmxnet3_adapter *adapter)
+vmxnet3_check_link(struct vmxnet3_adapter *adapter, bool affectTxQueue)
 {
        u32 ret;
+       int i;
+       unsigned long flags;
 
+       spin_lock_irqsave(&adapter->cmd_lock, flags);
        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_GET_LINK);
        ret = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
+       spin_unlock_irqrestore(&adapter->cmd_lock, flags);
+
        adapter->link_speed = ret >> 16;
        if (ret & 1) { /* Link is up. */
                printk(KERN_INFO "%s: NIC Link is Up %d Mbps\n",
@@ -141,20 +156,29 @@ vmxnet3_check_link(struct vmxnet3_adapter *adapter)
                if (!netif_carrier_ok(adapter->netdev))
                        netif_carrier_on(adapter->netdev);
 
-               vmxnet3_tq_start(&adapter->tx_queue, adapter);
+               if (affectTxQueue) {
+                       for (i = 0; i < adapter->num_tx_queues; i++)
+                               vmxnet3_tq_start(&adapter->tx_queue[i],
+                                                adapter);
+               }
        } else {
                printk(KERN_INFO "%s: NIC Link is Down\n",
                       adapter->netdev->name);
                if (netif_carrier_ok(adapter->netdev))
                        netif_carrier_off(adapter->netdev);
 
-               vmxnet3_tq_stop(&adapter->tx_queue, adapter);
+               if (affectTxQueue) {
+                       for (i = 0; i < adapter->num_tx_queues; i++)
+                               vmxnet3_tq_stop(&adapter->tx_queue[i], adapter);
+               }
        }
 }
 
 static void
 vmxnet3_process_events(struct vmxnet3_adapter *adapter)
 {
+       int i;
+       unsigned long flags;
        u32 events = le32_to_cpu(adapter->shared->ecr);
        if (!events)
                return;
@@ -163,23 +187,27 @@ vmxnet3_process_events(struct vmxnet3_adapter *adapter)
 
        /* Check if link state has changed */
        if (events & VMXNET3_ECR_LINK)
-               vmxnet3_check_link(adapter);
+               vmxnet3_check_link(adapter, true);
 
        /* Check if there is an error on xmit/recv queues */
        if (events & (VMXNET3_ECR_TQERR | VMXNET3_ECR_RQERR)) {
+               spin_lock_irqsave(&adapter->cmd_lock, flags);
                VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
                                       VMXNET3_CMD_GET_QUEUE_STATUS);
-
-               if (adapter->tqd_start->status.stopped) {
-                       printk(KERN_ERR "%s: tq error 0x%x\n",
-                              adapter->netdev->name,
-                              le32_to_cpu(adapter->tqd_start->status.error));
-               }
-               if (adapter->rqd_start->status.stopped) {
-                       printk(KERN_ERR "%s: rq error 0x%x\n",
-                              adapter->netdev->name,
-                              adapter->rqd_start->status.error);
-               }
+               spin_unlock_irqrestore(&adapter->cmd_lock, flags);
+
+               for (i = 0; i < adapter->num_tx_queues; i++)
+                       if (adapter->tqd_start[i].status.stopped)
+                               dev_err(&adapter->netdev->dev,
+                                       "%s: tq[%d] error 0x%x\n",
+                                       adapter->netdev->name, i, le32_to_cpu(
+                                       adapter->tqd_start[i].status.error));
+               for (i = 0; i < adapter->num_rx_queues; i++)
+                       if (adapter->rqd_start[i].status.stopped)
+                               dev_err(&adapter->netdev->dev,
+                                       "%s: rq[%d] error 0x%x\n",
+                                       adapter->netdev->name, i,
+                                       adapter->rqd_start[i].status.error);
 
                schedule_work(&adapter->work);
        }
@@ -377,10 +405,8 @@ vmxnet3_tq_cleanup(struct vmxnet3_tx_queue *tq,
 
        while (tq->tx_ring.next2comp != tq->tx_ring.next2fill) {
                struct vmxnet3_tx_buf_info *tbi;
-               union Vmxnet3_GenericDesc *gdesc;
 
                tbi = tq->buf_info + tq->tx_ring.next2comp;
-               gdesc = tq->tx_ring.base + tq->tx_ring.next2comp;
 
                vmxnet3_unmap_tx_buf(tbi, adapter->pdev);
                if (tbi->skb) {
@@ -404,7 +430,7 @@ vmxnet3_tq_cleanup(struct vmxnet3_tx_queue *tq,
 }
 
 
-void
+static void
 vmxnet3_tq_destroy(struct vmxnet3_tx_queue *tq,
                   struct vmxnet3_adapter *adapter)
 {
@@ -431,6 +457,17 @@ vmxnet3_tq_destroy(struct vmxnet3_tx_queue *tq,
 }
 
 
+/* Destroy all tx queues */
+void
+vmxnet3_tq_destroy_all(struct vmxnet3_adapter *adapter)
+{
+       int i;
+
+       for (i = 0; i < adapter->num_tx_queues; i++)
+               vmxnet3_tq_destroy(&adapter->tx_queue[i], adapter);
+}
+
+
 static void
 vmxnet3_tq_init(struct vmxnet3_tx_queue *tq,
                struct vmxnet3_adapter *adapter)
@@ -512,6 +549,14 @@ err:
        return -ENOMEM;
 }
 
+static void
+vmxnet3_tq_cleanup_all(struct vmxnet3_adapter *adapter)
+{
+       int i;
+
+       for (i = 0; i < adapter->num_tx_queues; i++)
+               vmxnet3_tq_cleanup(&adapter->tx_queue[i], adapter);
+}
 
 /*
  *    starting from ring->next2fill, allocate rx buffers for the given ring
@@ -528,7 +573,7 @@ vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx,
        struct vmxnet3_cmd_ring *ring = &rq->rx_ring[ring_idx];
        u32 val;
 
-       while (num_allocated < num_to_alloc) {
+       while (num_allocated <= num_to_alloc) {
                struct vmxnet3_rx_buf_info *rbi;
                union Vmxnet3_GenericDesc *gd;
 
@@ -574,9 +619,15 @@ vmxnet3_rq_alloc_rx_buf(struct vmxnet3_rx_queue *rq, u32 ring_idx,
 
                BUG_ON(rbi->dma_addr == 0);
                gd->rxd.addr = cpu_to_le64(rbi->dma_addr);
-               gd->dword[2] = cpu_to_le32((ring->gen << VMXNET3_RXD_GEN_SHIFT)
+               gd->dword[2] = cpu_to_le32((!ring->gen << VMXNET3_RXD_GEN_SHIFT)
                                           | val | rbi->len);
 
+               /* Fill the last buffer but dont mark it ready, or else the
+                * device will think that the queue is full */
+               if (num_allocated == num_to_alloc)
+                       break;
+
+               gd->dword[2] |= cpu_to_le32(ring->gen << VMXNET3_RXD_GEN_SHIFT);
                num_allocated++;
                vmxnet3_cmd_ring_adv_next2fill(ring);
        }
@@ -658,8 +709,13 @@ vmxnet3_map_pkt(struct sk_buff *skb, struct vmxnet3_tx_ctx *ctx,
        while (len) {
                u32 buf_size;
 
-               buf_size = len > VMXNET3_MAX_TX_BUF_SIZE ?
-                          VMXNET3_MAX_TX_BUF_SIZE : len;
+               if (len < VMXNET3_MAX_TX_BUF_SIZE) {
+                       buf_size = len;
+                       dw2 |= len;
+               } else {
+                       buf_size = VMXNET3_MAX_TX_BUF_SIZE;
+                       /* spec says that for TxDesc.len, 0 == 2^14 */
+               }
 
                tbi = tq->buf_info + tq->tx_ring.next2fill;
                tbi->map_type = VMXNET3_MAP_SINGLE;
@@ -667,13 +723,13 @@ vmxnet3_map_pkt(struct sk_buff *skb, struct vmxnet3_tx_ctx *ctx,
                                skb->data + buf_offset, buf_size,
                                PCI_DMA_TODEVICE);
 
-               tbi->len = buf_size; /* this automatically convert 2^14 to 0 */
+               tbi->len = buf_size;
 
                gdesc = tq->tx_ring.base + tq->tx_ring.next2fill;
                BUG_ON(gdesc->txd.gen == tq->tx_ring.gen);
 
                gdesc->txd.addr = cpu_to_le64(tbi->dma_addr);
-               gdesc->dword[2] = cpu_to_le32(dw2 | buf_size);
+               gdesc->dword[2] = cpu_to_le32(dw2);
                gdesc->dword[3] = 0;
 
                dev_dbg(&adapter->netdev->dev,
@@ -721,6 +777,17 @@ vmxnet3_map_pkt(struct sk_buff *skb, struct vmxnet3_tx_ctx *ctx,
 }
 
 
+/* Init all tx queues */
+static void
+vmxnet3_tq_init_all(struct vmxnet3_adapter *adapter)
+{
+       int i;
+
+       for (i = 0; i < adapter->num_tx_queues; i++)
+               vmxnet3_tq_init(&adapter->tx_queue[i], adapter);
+}
+
+
 /*
  *    parse and copy relevant protocol headers:
  *      For a tso pkt, relevant headers are L2/3/4 including options
@@ -745,36 +812,31 @@ vmxnet3_parse_and_copy_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
 {
        struct Vmxnet3_TxDataDesc *tdd;
 
-       if (ctx->mss) {
+       if (ctx->mss) { /* TSO */
                ctx->eth_ip_hdr_size = skb_transport_offset(skb);
                ctx->l4_hdr_size = ((struct tcphdr *)
                                   skb_transport_header(skb))->doff * 4;
                ctx->copy_size = ctx->eth_ip_hdr_size + ctx->l4_hdr_size;
        } else {
-               unsigned int pull_size;
-
                if (skb->ip_summed == CHECKSUM_PARTIAL) {
-                       ctx->eth_ip_hdr_size = skb_transport_offset(skb);
+                       ctx->eth_ip_hdr_size = skb_checksum_start_offset(skb);
 
                        if (ctx->ipv4) {
                                struct iphdr *iph = (struct iphdr *)
                                                    skb_network_header(skb);
-                               if (iph->protocol == IPPROTO_TCP) {
-                                       pull_size = ctx->eth_ip_hdr_size +
-                                                   sizeof(struct tcphdr);
-
-                                       if (unlikely(!pskb_may_pull(skb,
-                                                               pull_size))) {
-                                               goto err;
-                                       }
+                               if (iph->protocol == IPPROTO_TCP)
                                        ctx->l4_hdr_size = ((struct tcphdr *)
                                           skb_transport_header(skb))->doff * 4;
-                               } else if (iph->protocol == IPPROTO_UDP) {
+                               else if (iph->protocol == IPPROTO_UDP)
+                                       /*
+                                        * Use tcp header size so that bytes to
+                                        * be copied are more than required by
+                                        * the device.
+                                        */
                                        ctx->l4_hdr_size =
-                                                       sizeof(struct udphdr);
-                               } else {
+                                                       sizeof(struct tcphdr);
+                               else
                                        ctx->l4_hdr_size = 0;
-                               }
                        } else {
                                /* for simplicity, don't copy L4 headers */
                                ctx->l4_hdr_size = 0;
@@ -835,7 +897,7 @@ vmxnet3_prepare_tso(struct sk_buff *skb,
  * Transmits a pkt thru a given tq
  * Returns:
  *    NETDEV_TX_OK:      descriptors are setup successfully
- *    NETDEV_TX_OK:      error occured, the pkt is dropped
+ *    NETDEV_TX_OK:      error occurred, the pkt is dropped
  *    NETDEV_TX_BUSY:    tx ring is full, queue is stopped
  *
  * Side-effects:
@@ -862,7 +924,7 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
        count = VMXNET3_TXD_NEEDED(skb_headlen(skb)) +
                skb_shinfo(skb)->nr_frags + 1;
 
-       ctx.ipv4 = (skb->protocol == __constant_ntohs(ETH_P_IP));
+       ctx.ipv4 = (vlan_get_protocol(skb) == cpu_to_be16(ETH_P_IP));
 
        ctx.mss = skb_shinfo(skb)->gso_size;
        if (ctx.mss) {
@@ -892,6 +954,21 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
                }
        }
 
+       spin_lock_irqsave(&tq->tx_lock, flags);
+
+       if (count > vmxnet3_cmd_ring_desc_avail(&tq->tx_ring)) {
+               tq->stats.tx_ring_full++;
+               dev_dbg(&adapter->netdev->dev,
+                       "tx queue stopped on %s, next2comp %u"
+                       " next2fill %u\n", adapter->netdev->name,
+                       tq->tx_ring.next2comp, tq->tx_ring.next2fill);
+
+               vmxnet3_tq_stop(tq, adapter);
+               spin_unlock_irqrestore(&tq->tx_lock, flags);
+               return NETDEV_TX_BUSY;
+       }
+
+
        ret = vmxnet3_parse_and_copy_hdr(skb, tq, &ctx, adapter);
        if (ret >= 0) {
                BUG_ON(ret <= 0 && ctx.copy_size != 0);
@@ -912,21 +989,7 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
                }
        } else {
                tq->stats.drop_hdr_inspect_err++;
-               goto drop_pkt;
-       }
-
-       spin_lock_irqsave(&tq->tx_lock, flags);
-
-       if (count > vmxnet3_cmd_ring_desc_avail(&tq->tx_ring)) {
-               tq->stats.tx_ring_full++;
-               dev_dbg(&adapter->netdev->dev,
-                       "tx queue stopped on %s, next2comp %u"
-                       " next2fill %u\n", adapter->netdev->name,
-                       tq->tx_ring.next2comp, tq->tx_ring.next2fill);
-
-               vmxnet3_tq_stop(tq, adapter);
-               spin_unlock_irqrestore(&tq->tx_lock, flags);
-               return NETDEV_TX_BUSY;
+               goto unlock_drop_pkt;
        }
 
        /* fill tx descs related to addr & len */
@@ -989,15 +1052,17 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
        if (le32_to_cpu(tq->shared->txNumDeferred) >=
                                        le32_to_cpu(tq->shared->txThreshold)) {
                tq->shared->txNumDeferred = 0;
-               VMXNET3_WRITE_BAR0_REG(adapter, VMXNET3_REG_TXPROD,
+               VMXNET3_WRITE_BAR0_REG(adapter,
+                                      VMXNET3_REG_TXPROD + tq->qid * 8,
                                       tq->tx_ring.next2fill);
        }
-       netdev->trans_start = jiffies;
 
        return NETDEV_TX_OK;
 
 hdr_too_big:
        tq->stats.drop_oversized_hdr++;
+unlock_drop_pkt:
+       spin_unlock_irqrestore(&tq->tx_lock, flags);
 drop_pkt:
        tq->stats.drop_total++;
        dev_kfree_skb(skb);
@@ -1010,7 +1075,10 @@ vmxnet3_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 {
        struct vmxnet3_adapter *adapter = netdev_priv(netdev);
 
-       return vmxnet3_tq_xmit(skb, &adapter->tx_queue, adapter, netdev);
+               BUG_ON(skb->queue_mapping > adapter->num_tx_queues);
+               return vmxnet3_tq_xmit(skb,
+                                      &adapter->tx_queue[skb->queue_mapping],
+                                      adapter, netdev);
 }
 
 
@@ -1019,7 +1087,7 @@ vmxnet3_rx_csum(struct vmxnet3_adapter *adapter,
                struct sk_buff *skb,
                union Vmxnet3_GenericDesc *gdesc)
 {
-       if (!gdesc->rcd.cnc && adapter->rxcsum) {
+       if (!gdesc->rcd.cnc && adapter->netdev->features & NETIF_F_RXCSUM) {
                /* typical case: TCP/UDP over IP and both csums are correct */
                if ((le32_to_cpu(gdesc->dword[3]) & VMXNET3_RCD_CSUM_OK) ==
                                                        VMXNET3_RCD_CSUM_OK) {
@@ -1032,11 +1100,11 @@ vmxnet3_rx_csum(struct vmxnet3_adapter *adapter,
                                skb->csum = htons(gdesc->rcd.csum);
                                skb->ip_summed = CHECKSUM_PARTIAL;
                        } else {
-                               skb->ip_summed = CHECKSUM_NONE;
+                               skb_checksum_none_assert(skb);
                        }
                }
        } else {
-               skb->ip_summed = CHECKSUM_NONE;
+               skb_checksum_none_assert(skb);
        }
 }
 
@@ -1072,8 +1140,11 @@ static int
 vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
                       struct vmxnet3_adapter *adapter, int quota)
 {
-       static u32 rxprod_reg[2] = {VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2};
+       static const u32 rxprod_reg[2] = {
+               VMXNET3_REG_RXPROD, VMXNET3_REG_RXPROD2
+       };
        u32 num_rxd = 0;
+       bool skip_page_frags = false;
        struct Vmxnet3_RxCompDesc *rcd;
        struct vmxnet3_rx_ctx *ctx = &rq->rx_ctx;
 #ifdef __BIG_ENDIAN_BITFIELD
@@ -1084,11 +1155,12 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
                          &rxComp);
        while (rcd->gen == rq->comp_ring.gen) {
                struct vmxnet3_rx_buf_info *rbi;
-               struct sk_buff *skb;
+               struct sk_buff *skb, *new_skb = NULL;
+               struct page *new_page = NULL;
                int num_to_alloc;
                struct Vmxnet3_RxDesc *rxd;
                u32 idx, ring_idx;
-
+               struct vmxnet3_cmd_ring *ring = NULL;
                if (num_rxd >= quota) {
                        /* we may stop even before we see the EOP desc of
                         * the current pkt
@@ -1096,9 +1168,10 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
                        break;
                }
                num_rxd++;
-
+               BUG_ON(rcd->rqID != rq->qid && rcd->rqID != rq->qid2);
                idx = rcd->rxdIdx;
-               ring_idx = rcd->rqID == rq->qid ? 0 : 1;
+               ring_idx = rcd->rqID < adapter->num_rx_queues ? 0 : 1;
+               ring = rq->rx_ring + ring_idx;
                vmxnet3_getRxDesc(rxd, &rq->rx_ring[ring_idx].base[idx].rxd,
                                  &rxCmdDesc);
                rbi = rq->buf_info[ring_idx] + idx;
@@ -1127,37 +1200,80 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
                                goto rcd_done;
                        }
 
+                       skip_page_frags = false;
                        ctx->skb = rbi->skb;
-                       rbi->skb = NULL;
+                       new_skb = dev_alloc_skb(rbi->len + NET_IP_ALIGN);
+                       if (new_skb == NULL) {
+                               /* Skb allocation failed, do not handover this
+                                * skb to stack. Reuse it. Drop the existing pkt
+                                */
+                               rq->stats.rx_buf_alloc_failure++;
+                               ctx->skb = NULL;
+                               rq->stats.drop_total++;
+                               skip_page_frags = true;
+                               goto rcd_done;
+                       }
 
                        pci_unmap_single(adapter->pdev, rbi->dma_addr, rbi->len,
                                         PCI_DMA_FROMDEVICE);
 
                        skb_put(ctx->skb, rcd->len);
+
+                       /* Immediate refill */
+                       new_skb->dev = adapter->netdev;
+                       skb_reserve(new_skb, NET_IP_ALIGN);
+                       rbi->skb = new_skb;
+                       rbi->dma_addr = pci_map_single(adapter->pdev,
+                                       rbi->skb->data, rbi->len,
+                                       PCI_DMA_FROMDEVICE);
+                       rxd->addr = cpu_to_le64(rbi->dma_addr);
+                       rxd->len = rbi->len;
+
                } else {
-                       BUG_ON(ctx->skb == NULL);
+                       BUG_ON(ctx->skb == NULL && !skip_page_frags);
+
                        /* non SOP buffer must be type 1 in most cases */
-                       if (rbi->buf_type == VMXNET3_RX_BUF_PAGE) {
-                               BUG_ON(rxd->btype != VMXNET3_RXD_BTYPE_BODY);
+                       BUG_ON(rbi->buf_type != VMXNET3_RX_BUF_PAGE);
+                       BUG_ON(rxd->btype != VMXNET3_RXD_BTYPE_BODY);
 
-                               if (rcd->len) {
-                                       pci_unmap_page(adapter->pdev,
-                                                      rbi->dma_addr, rbi->len,
-                                                      PCI_DMA_FROMDEVICE);
+                       /* If an sop buffer was dropped, skip all
+                        * following non-sop fragments. They will be reused.
+                        */
+                       if (skip_page_frags)
+                               goto rcd_done;
 
-                                       vmxnet3_append_frag(ctx->skb, rcd, rbi);
-                                       rbi->page = NULL;
-                               }
-                       } else {
-                               /*
-                                * The only time a non-SOP buffer is type 0 is
-                                * when it's EOP and error flag is raised, which
-                                * has already been handled.
+                       new_page = alloc_page(GFP_ATOMIC);
+                       if (unlikely(new_page == NULL)) {
+                               /* Replacement page frag could not be allocated.
+                                * Reuse this page. Drop the pkt and free the
+                                * skb which contained this page as a frag. Skip
+                                * processing all the following non-sop frags.
                                 */
-                               BUG_ON(true);
+                               rq->stats.rx_buf_alloc_failure++;
+                               dev_kfree_skb(ctx->skb);
+                               ctx->skb = NULL;
+                               skip_page_frags = true;
+                               goto rcd_done;
                        }
+
+                       if (rcd->len) {
+                               pci_unmap_page(adapter->pdev,
+                                              rbi->dma_addr, rbi->len,
+                                              PCI_DMA_FROMDEVICE);
+
+                               vmxnet3_append_frag(ctx->skb, rcd, rbi);
+                       }
+
+                       /* Immediate refill */
+                       rbi->page = new_page;
+                       rbi->dma_addr = pci_map_page(adapter->pdev, rbi->page,
+                                                    0, PAGE_SIZE,
+                                                    PCI_DMA_FROMDEVICE);
+                       rxd->addr = cpu_to_le64(rbi->dma_addr);
+                       rxd->len = rbi->len;
                }
 
+
                skb = ctx->skb;
                if (rcd->eop) {
                        skb->len += skb->data_len;
@@ -1167,38 +1283,39 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq,
                                        (union Vmxnet3_GenericDesc *)rcd);
                        skb->protocol = eth_type_trans(skb, adapter->netdev);
 
-                       if (unlikely(adapter->vlan_grp && rcd->ts)) {
-                               vlan_hwaccel_receive_skb(skb,
-                                               adapter->vlan_grp, rcd->tci);
-                       } else {
+                       if (unlikely(rcd->ts))
+                               __vlan_hwaccel_put_tag(skb, rcd->tci);
+
+                       if (adapter->netdev->features & NETIF_F_LRO)
                                netif_receive_skb(skb);
-                       }
+                       else
+                               napi_gro_receive(&rq->napi, skb);
 
-                       adapter->netdev->last_rx = jiffies;
                        ctx->skb = NULL;
                }
 
 rcd_done:
-               /* device may skip some rx descs */
-               rq->rx_ring[ring_idx].next2comp = idx;
-               VMXNET3_INC_RING_IDX_ONLY(rq->rx_ring[ring_idx].next2comp,
-                                         rq->rx_ring[ring_idx].size);
-
-               /* refill rx buffers frequently to avoid starving the h/w */
-               num_to_alloc = vmxnet3_cmd_ring_desc_avail(rq->rx_ring +
-                                                          ring_idx);
-               if (unlikely(num_to_alloc > VMXNET3_RX_ALLOC_THRESHOLD(rq,
-                                                       ring_idx, adapter))) {
-                       vmxnet3_rq_alloc_rx_buf(rq, ring_idx, num_to_alloc,
-                                               adapter);
-
-                       /* if needed, update the register */
-                       if (unlikely(rq->shared->updateRxProd)) {
-                               VMXNET3_WRITE_BAR0_REG(adapter,
-                                       rxprod_reg[ring_idx] + rq->qid * 8,
-                                       rq->rx_ring[ring_idx].next2fill);
-                               rq->uncommitted[ring_idx] = 0;
-                       }
+               /* device may have skipped some rx descs */
+               ring->next2comp = idx;
+               num_to_alloc = vmxnet3_cmd_ring_desc_avail(ring);
+               ring = rq->rx_ring + ring_idx;
+               while (num_to_alloc) {
+                       vmxnet3_getRxDesc(rxd, &ring->base[ring->next2fill].rxd,
+                                         &rxCmdDesc);
+                       BUG_ON(!rxd->addr);
+
+                       /* Recv desc is ready to be used by the device */
+                       rxd->gen = ring->gen;
+                       vmxnet3_cmd_ring_adv_next2fill(ring);
+                       num_to_alloc--;
+               }
+
+               /* if needed, update the register */
+               if (unlikely(rq->shared->updateRxProd)) {
+                       VMXNET3_WRITE_BAR0_REG(adapter,
+                               rxprod_reg[ring_idx] + rq->qid * 8,
+                               ring->next2fill);
+                       rq->uncommitted[ring_idx] = 0;
                }
 
                vmxnet3_comp_ring_adv_next2proc(&rq->comp_ring);
@@ -1251,6 +1368,16 @@ vmxnet3_rq_cleanup(struct vmxnet3_rx_queue *rq,
 }
 
 
+static void
+vmxnet3_rq_cleanup_all(struct vmxnet3_adapter *adapter)
+{
+       int i;
+
+       for (i = 0; i < adapter->num_rx_queues; i++)
+               vmxnet3_rq_cleanup(&adapter->rx_queue[i], adapter);
+}
+
+
 void vmxnet3_rq_destroy(struct vmxnet3_rx_queue *rq,
                        struct vmxnet3_adapter *adapter)
 {
@@ -1342,6 +1469,25 @@ vmxnet3_rq_init(struct vmxnet3_rx_queue *rq,
 
 
 static int
+vmxnet3_rq_init_all(struct vmxnet3_adapter *adapter)
+{
+       int i, err = 0;
+
+       for (i = 0; i < adapter->num_rx_queues; i++) {
+               err = vmxnet3_rq_init(&adapter->rx_queue[i], adapter);
+               if (unlikely(err)) {
+                       dev_err(&adapter->netdev->dev, "%s: failed to "
+                               "initialize rx queue%i\n",
+                               adapter->netdev->name, i);
+                       break;
+               }
+       }
+       return err;
+
+}
+
+
+static int
 vmxnet3_rq_create(struct vmxnet3_rx_queue *rq, struct vmxnet3_adapter *adapter)
 {
        int i;
@@ -1371,13 +1517,12 @@ vmxnet3_rq_create(struct vmxnet3_rx_queue *rq, struct vmxnet3_adapter *adapter)
 
        sz = sizeof(struct vmxnet3_rx_buf_info) * (rq->rx_ring[0].size +
                                                   rq->rx_ring[1].size);
-       bi = kmalloc(sz, GFP_KERNEL);
+       bi = kzalloc(sz, GFP_KERNEL);
        if (!bi) {
                printk(KERN_ERR "%s: failed to allocate rx bufinfo\n",
                       adapter->netdev->name);
                goto err;
        }
-       memset(bi, 0, sz);
        rq->buf_info[0] = bi;
        rq->buf_info[1] = bi + rq->rx_ring[0].size;
 
@@ -1390,32 +1535,176 @@ err:
 
 
 static int
+vmxnet3_rq_create_all(struct vmxnet3_adapter *adapter)
+{
+       int i, err = 0;
+
+       for (i = 0; i < adapter->num_rx_queues; i++) {
+               err = vmxnet3_rq_create(&adapter->rx_queue[i], adapter);
+               if (unlikely(err)) {
+                       dev_err(&adapter->netdev->dev,
+                               "%s: failed to create rx queue%i\n",
+                               adapter->netdev->name, i);
+                       goto err_out;
+               }
+       }
+       return err;
+err_out:
+       vmxnet3_rq_destroy_all(adapter);
+       return err;
+
+}
+
+/* Multiple queue aware polling function for tx and rx */
+
+static int
 vmxnet3_do_poll(struct vmxnet3_adapter *adapter, int budget)
 {
+       int rcd_done = 0, i;
        if (unlikely(adapter->shared->ecr))
                vmxnet3_process_events(adapter);
+       for (i = 0; i < adapter->num_tx_queues; i++)
+               vmxnet3_tq_tx_complete(&adapter->tx_queue[i], adapter);
 
-       vmxnet3_tq_tx_complete(&adapter->tx_queue, adapter);
-       return vmxnet3_rq_rx_complete(&adapter->rx_queue, adapter, budget);
+       for (i = 0; i < adapter->num_rx_queues; i++)
+               rcd_done += vmxnet3_rq_rx_complete(&adapter->rx_queue[i],
+                                                  adapter, budget);
+       return rcd_done;
 }
 
 
 static int
 vmxnet3_poll(struct napi_struct *napi, int budget)
 {
-       struct vmxnet3_adapter *adapter = container_of(napi,
-                                         struct vmxnet3_adapter, napi);
+       struct vmxnet3_rx_queue *rx_queue = container_of(napi,
+                                         struct vmxnet3_rx_queue, napi);
        int rxd_done;
 
-       rxd_done = vmxnet3_do_poll(adapter, budget);
+       rxd_done = vmxnet3_do_poll(rx_queue->adapter, budget);
 
        if (rxd_done < budget) {
                napi_complete(napi);
-               vmxnet3_enable_intr(adapter, 0);
+               vmxnet3_enable_all_intrs(rx_queue->adapter);
        }
        return rxd_done;
 }
 
+/*
+ * NAPI polling function for MSI-X mode with multiple Rx queues
+ * Returns the # of the NAPI credit consumed (# of rx descriptors processed)
+ */
+
+static int
+vmxnet3_poll_rx_only(struct napi_struct *napi, int budget)
+{
+       struct vmxnet3_rx_queue *rq = container_of(napi,
+                                               struct vmxnet3_rx_queue, napi);
+       struct vmxnet3_adapter *adapter = rq->adapter;
+       int rxd_done;
+
+       /* When sharing interrupt with corresponding tx queue, process
+        * tx completions in that queue as well
+        */
+       if (adapter->share_intr == VMXNET3_INTR_BUDDYSHARE) {
+               struct vmxnet3_tx_queue *tq =
+                               &adapter->tx_queue[rq - adapter->rx_queue];
+               vmxnet3_tq_tx_complete(tq, adapter);
+       }
+
+       rxd_done = vmxnet3_rq_rx_complete(rq, adapter, budget);
+
+       if (rxd_done < budget) {
+               napi_complete(napi);
+               vmxnet3_enable_intr(adapter, rq->comp_ring.intr_idx);
+       }
+       return rxd_done;
+}
+
+
+#ifdef CONFIG_PCI_MSI
+
+/*
+ * Handle completion interrupts on tx queues
+ * Returns whether or not the intr is handled
+ */
+
+static irqreturn_t
+vmxnet3_msix_tx(int irq, void *data)
+{
+       struct vmxnet3_tx_queue *tq = data;
+       struct vmxnet3_adapter *adapter = tq->adapter;
+
+       if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
+               vmxnet3_disable_intr(adapter, tq->comp_ring.intr_idx);
+
+       /* Handle the case where only one irq is allocate for all tx queues */
+       if (adapter->share_intr == VMXNET3_INTR_TXSHARE) {
+               int i;
+               for (i = 0; i < adapter->num_tx_queues; i++) {
+                       struct vmxnet3_tx_queue *txq = &adapter->tx_queue[i];
+                       vmxnet3_tq_tx_complete(txq, adapter);
+               }
+       } else {
+               vmxnet3_tq_tx_complete(tq, adapter);
+       }
+       vmxnet3_enable_intr(adapter, tq->comp_ring.intr_idx);
+
+       return IRQ_HANDLED;
+}
+
+
+/*
+ * Handle completion interrupts on rx queues. Returns whether or not the
+ * intr is handled
+ */
+
+static irqreturn_t
+vmxnet3_msix_rx(int irq, void *data)
+{
+       struct vmxnet3_rx_queue *rq = data;
+       struct vmxnet3_adapter *adapter = rq->adapter;
+
+       /* disable intr if needed */
+       if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
+               vmxnet3_disable_intr(adapter, rq->comp_ring.intr_idx);
+       napi_schedule(&rq->napi);
+
+       return IRQ_HANDLED;
+}
+
+/*
+ *----------------------------------------------------------------------------
+ *
+ * vmxnet3_msix_event --
+ *
+ *    vmxnet3 msix event intr handler
+ *
+ * Result:
+ *    whether or not the intr is handled
+ *
+ *----------------------------------------------------------------------------
+ */
+
+static irqreturn_t
+vmxnet3_msix_event(int irq, void *data)
+{
+       struct net_device *dev = data;
+       struct vmxnet3_adapter *adapter = netdev_priv(dev);
+
+       /* disable intr if needed */
+       if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
+               vmxnet3_disable_intr(adapter, adapter->intr.event_intr_idx);
+
+       if (adapter->shared->ecr)
+               vmxnet3_process_events(adapter);
+
+       vmxnet3_enable_intr(adapter, adapter->intr.event_intr_idx);
+
+       return IRQ_HANDLED;
+}
+
+#endif /* CONFIG_PCI_MSI  */
+
 
 /* Interrupt handler for vmxnet3  */
 static irqreturn_t
@@ -1424,7 +1713,7 @@ vmxnet3_intr(int irq, void *dev_id)
        struct net_device *dev = dev_id;
        struct vmxnet3_adapter *adapter = netdev_priv(dev);
 
-       if (unlikely(adapter->intr.type == VMXNET3_IT_INTX)) {
+       if (adapter->intr.type == VMXNET3_IT_INTX) {
                u32 icr = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_ICR);
                if (unlikely(icr == 0))
                        /* not ours */
@@ -1434,77 +1723,144 @@ vmxnet3_intr(int irq, void *dev_id)
 
        /* disable intr if needed */
        if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
-               vmxnet3_disable_intr(adapter, 0);
+               vmxnet3_disable_all_intrs(adapter);
 
-       napi_schedule(&adapter->napi);
+       napi_schedule(&adapter->rx_queue[0].napi);
 
        return IRQ_HANDLED;
 }
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
 
-
 /* netpoll callback. */
 static void
 vmxnet3_netpoll(struct net_device *netdev)
 {
        struct vmxnet3_adapter *adapter = netdev_priv(netdev);
-       int irq;
 
-#ifdef CONFIG_PCI_MSI
-       if (adapter->intr.type == VMXNET3_IT_MSIX)
-               irq = adapter->intr.msix_entries[0].vector;
-       else
-#endif
-               irq = adapter->pdev->irq;
+       if (adapter->intr.mask_mode == VMXNET3_IMM_ACTIVE)
+               vmxnet3_disable_all_intrs(adapter);
+
+       vmxnet3_do_poll(adapter, adapter->rx_queue[0].rx_ring[0].size);
+       vmxnet3_enable_all_intrs(adapter);
 
-       disable_irq(irq);
-       vmxnet3_intr(irq, netdev);
-       enable_irq(irq);
 }
-#endif
+#endif /* CONFIG_NET_POLL_CONTROLLER */
 
 static int
 vmxnet3_request_irqs(struct vmxnet3_adapter *adapter)
 {
-       int err;
+       struct vmxnet3_intr *intr = &adapter->intr;
+       int err = 0, i;
+       int vector = 0;
 
 #ifdef CONFIG_PCI_MSI
        if (adapter->intr.type == VMXNET3_IT_MSIX) {
-               /* we only use 1 MSI-X vector */
-               err = request_irq(adapter->intr.msix_entries[0].vector,
-                                 vmxnet3_intr, 0, adapter->netdev->name,
-                                 adapter->netdev);
-       } else if (adapter->intr.type == VMXNET3_IT_MSI) {
+               for (i = 0; i < adapter->num_tx_queues; i++) {
+                       if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE) {
+                               sprintf(adapter->tx_queue[i].name, "%s-tx-%d",
+                                       adapter->netdev->name, vector);
+                               err = request_irq(
+                                             intr->msix_entries[vector].vector,
+                                             vmxnet3_msix_tx, 0,
+                                             adapter->tx_queue[i].name,
+                                             &adapter->tx_queue[i]);
+                       } else {
+                               sprintf(adapter->tx_queue[i].name, "%s-rxtx-%d",
+                                       adapter->netdev->name, vector);
+                       }
+                       if (err) {
+                               dev_err(&adapter->netdev->dev,
+                                       "Failed to request irq for MSIX, %s, "
+                                       "error %d\n",
+                                       adapter->tx_queue[i].name, err);
+                               return err;
+                       }
+
+                       /* Handle the case where only 1 MSIx was allocated for
+                        * all tx queues */
+                       if (adapter->share_intr == VMXNET3_INTR_TXSHARE) {
+                               for (; i < adapter->num_tx_queues; i++)
+                                       adapter->tx_queue[i].comp_ring.intr_idx
+                                                               = vector;
+                               vector++;
+                               break;
+                       } else {
+                               adapter->tx_queue[i].comp_ring.intr_idx
+                                                               = vector++;
+                       }
+               }
+               if (adapter->share_intr == VMXNET3_INTR_BUDDYSHARE)
+                       vector = 0;
+
+               for (i = 0; i < adapter->num_rx_queues; i++) {
+                       if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE)
+                               sprintf(adapter->rx_queue[i].name, "%s-rx-%d",
+                                       adapter->netdev->name, vector);
+                       else
+                               sprintf(adapter->rx_queue[i].name, "%s-rxtx-%d",
+                                       adapter->netdev->name, vector);
+                       err = request_irq(intr->msix_entries[vector].vector,
+                                         vmxnet3_msix_rx, 0,
+                                         adapter->rx_queue[i].name,
+                                         &(adapter->rx_queue[i]));
+                       if (err) {
+                               printk(KERN_ERR "Failed to request irq for MSIX"
+                                      ", %s, error %d\n",
+                                      adapter->rx_queue[i].name, err);
+                               return err;
+                       }
+
+                       adapter->rx_queue[i].comp_ring.intr_idx = vector++;
+               }
+
+               sprintf(intr->event_msi_vector_name, "%s-event-%d",
+                       adapter->netdev->name, vector);
+               err = request_irq(intr->msix_entries[vector].vector,
+                                 vmxnet3_msix_event, 0,
+                                 intr->event_msi_vector_name, adapter->netdev);
+               intr->event_intr_idx = vector;
+
+       } else if (intr->type == VMXNET3_IT_MSI) {
+               adapter->num_rx_queues = 1;
                err = request_irq(adapter->pdev->irq, vmxnet3_intr, 0,
                                  adapter->netdev->name, adapter->netdev);
-       } else
+       } else {
 #endif
-       {
+               adapter->num_rx_queues = 1;
                err = request_irq(adapter->pdev->irq, vmxnet3_intr,
                                  IRQF_SHARED, adapter->netdev->name,
                                  adapter->netdev);
+#ifdef CONFIG_PCI_MSI
        }
-
-       if (err)
+#endif
+       intr->num_intrs = vector + 1;
+       if (err) {
                printk(KERN_ERR "Failed to request irq %s (intr type:%d), error"
-                      ":%d\n", adapter->netdev->name, adapter->intr.type, err);
+                      ":%d\n", adapter->netdev->name, intr->type, err);
+       } else {
+               /* Number of rx queues will not change after this */
+               for (i = 0; i < adapter->num_rx_queues; i++) {
+                       struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i];
+                       rq->qid = i;
+                       rq->qid2 = i + adapter->num_rx_queues;
+               }
 
 
-       if (!err) {
-               int i;
-               /* init our intr settings */
-               for (i = 0; i < adapter->intr.num_intrs; i++)
-                       adapter->intr.mod_levels[i] = UPT1_IML_ADAPTIVE;
 
-               /* next setup intr index for all intr sources */
-               adapter->tx_queue.comp_ring.intr_idx = 0;
-               adapter->rx_queue.comp_ring.intr_idx = 0;
-               adapter->intr.event_intr_idx = 0;
+               /* init our intr settings */
+               for (i = 0; i < intr->num_intrs; i++)
+                       intr->mod_levels[i] = UPT1_IML_ADAPTIVE;
+               if (adapter->intr.type != VMXNET3_IT_MSIX) {
+                       adapter->intr.event_intr_idx = 0;
+                       for (i = 0; i < adapter->num_tx_queues; i++)
+                               adapter->tx_queue[i].comp_ring.intr_idx = 0;
+                       adapter->rx_queue[0].comp_ring.intr_idx = 0;
+               }
 
                printk(KERN_INFO "%s: intr type %u, mode %u, %u vectors "
-                      "allocated\n", adapter->netdev->name, adapter->intr.type,
-                      adapter->intr.mask_mode, adapter->intr.num_intrs);
+                      "allocated\n", adapter->netdev->name, intr->type,
+                      intr->mask_mode, intr->num_intrs);
        }
 
        return err;
@@ -1514,18 +1870,32 @@ vmxnet3_request_irqs(struct vmxnet3_adapter *adapter)
 static void
 vmxnet3_free_irqs(struct vmxnet3_adapter *adapter)
 {
-       BUG_ON(adapter->intr.type == VMXNET3_IT_AUTO ||
-              adapter->intr.num_intrs <= 0);
+       struct vmxnet3_intr *intr = &adapter->intr;
+       BUG_ON(intr->type == VMXNET3_IT_AUTO || intr->num_intrs <= 0);
 
-       switch (adapter->intr.type) {
+       switch (intr->type) {
 #ifdef CONFIG_PCI_MSI
        case VMXNET3_IT_MSIX:
        {
-               int i;
+               int i, vector = 0;
 
-               for (i = 0; i < adapter->intr.num_intrs; i++)
-                       free_irq(adapter->intr.msix_entries[i].vector,
-                                adapter->netdev);
+               if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE) {
+                       for (i = 0; i < adapter->num_tx_queues; i++) {
+                               free_irq(intr->msix_entries[vector++].vector,
+                                        &(adapter->tx_queue[i]));
+                               if (adapter->share_intr == VMXNET3_INTR_TXSHARE)
+                                       break;
+                       }
+               }
+
+               for (i = 0; i < adapter->num_rx_queues; i++) {
+                       free_irq(intr->msix_entries[vector++].vector,
+                                &(adapter->rx_queue[i]));
+               }
+
+               free_irq(intr->msix_entries[vector].vector,
+                        adapter->netdev);
+               BUG_ON(vector >= intr->num_intrs);
                break;
        }
 #endif
@@ -1541,102 +1911,17 @@ vmxnet3_free_irqs(struct vmxnet3_adapter *adapter)
 }
 
 
-inline void set_flag_le16(__le16 *data, u16 flag)
-{
-       *data = cpu_to_le16(le16_to_cpu(*data) | flag);
-}
-
-inline void set_flag_le64(__le64 *data, u64 flag)
-{
-       *data = cpu_to_le64(le64_to_cpu(*data) | flag);
-}
-
-inline void reset_flag_le64(__le64 *data, u64 flag)
-{
-       *data = cpu_to_le64(le64_to_cpu(*data) & ~flag);
-}
-
-
 static void
-vmxnet3_vlan_rx_register(struct net_device *netdev, struct vlan_group *grp)
+vmxnet3_restore_vlan(struct vmxnet3_adapter *adapter)
 {
-       struct vmxnet3_adapter *adapter = netdev_priv(netdev);
-       struct Vmxnet3_DriverShared *shared = adapter->shared;
        u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
+       u16 vid;
 
-       if (grp) {
-               /* add vlan rx stripping. */
-               if (adapter->netdev->features & NETIF_F_HW_VLAN_RX) {
-                       int i;
-                       struct Vmxnet3_DSDevRead *devRead = &shared->devRead;
-                       adapter->vlan_grp = grp;
-
-                       /* update FEATURES to device */
-                       set_flag_le64(&devRead->misc.uptFeatures,
-                                     UPT1_F_RXVLAN);
-                       VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
-                                              VMXNET3_CMD_UPDATE_FEATURE);
-                       /*
-                        *  Clear entire vfTable; then enable untagged pkts.
-                        *  Note: setting one entry in vfTable to non-zero turns
-                        *  on VLAN rx filtering.
-                        */
-                       for (i = 0; i < VMXNET3_VFT_SIZE; i++)
-                               vfTable[i] = 0;
-
-                       VMXNET3_SET_VFTABLE_ENTRY(vfTable, 0);
-                       VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
-                                              VMXNET3_CMD_UPDATE_VLAN_FILTERS);
-               } else {
-                       printk(KERN_ERR "%s: vlan_rx_register when device has "
-                              "no NETIF_F_HW_VLAN_RX\n", netdev->name);
-               }
-       } else {
-               /* remove vlan rx stripping. */
-               struct Vmxnet3_DSDevRead *devRead = &shared->devRead;
-               adapter->vlan_grp = NULL;
-
-               if (le64_to_cpu(devRead->misc.uptFeatures) & UPT1_F_RXVLAN) {
-                       int i;
-
-                       for (i = 0; i < VMXNET3_VFT_SIZE; i++) {
-                               /* clear entire vfTable; this also disables
-                                * VLAN rx filtering
-                                */
-                               vfTable[i] = 0;
-                       }
-                       VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
-                                              VMXNET3_CMD_UPDATE_VLAN_FILTERS);
-
-                       /* update FEATURES to device */
-                       reset_flag_le64(&devRead->misc.uptFeatures,
-                                       UPT1_F_RXVLAN);
-                       VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
-                                              VMXNET3_CMD_UPDATE_FEATURE);
-               }
-       }
-}
-
-
-static void
-vmxnet3_restore_vlan(struct vmxnet3_adapter *adapter)
-{
-       if (adapter->vlan_grp) {
-               u16 vid;
-               u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
-               bool activeVlan = false;
+       /* allow untagged pkts */
+       VMXNET3_SET_VFTABLE_ENTRY(vfTable, 0);
 
-               for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
-                       if (vlan_group_get_device(adapter->vlan_grp, vid)) {
-                               VMXNET3_SET_VFTABLE_ENTRY(vfTable, vid);
-                               activeVlan = true;
-                       }
-               }
-               if (activeVlan) {
-                       /* continue to allow untagged pkts */
-                       VMXNET3_SET_VFTABLE_ENTRY(vfTable, 0);
-               }
-       }
+       for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID)
+               VMXNET3_SET_VFTABLE_ENTRY(vfTable, vid);
 }
 
 
@@ -1645,10 +1930,15 @@ vmxnet3_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
 {
        struct vmxnet3_adapter *adapter = netdev_priv(netdev);
        u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
+       unsigned long flags;
 
        VMXNET3_SET_VFTABLE_ENTRY(vfTable, vid);
+       spin_lock_irqsave(&adapter->cmd_lock, flags);
        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
                               VMXNET3_CMD_UPDATE_VLAN_FILTERS);
+       spin_unlock_irqrestore(&adapter->cmd_lock, flags);
+
+       set_bit(vid, adapter->active_vlans);
 }
 
 
@@ -1657,10 +1947,15 @@ vmxnet3_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
 {
        struct vmxnet3_adapter *adapter = netdev_priv(netdev);
        u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
+       unsigned long flags;
 
        VMXNET3_CLEAR_VFTABLE_ENTRY(vfTable, vid);
+       spin_lock_irqsave(&adapter->cmd_lock, flags);
        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
                               VMXNET3_CMD_UPDATE_VLAN_FILTERS);
+       spin_unlock_irqrestore(&adapter->cmd_lock, flags);
+
+       clear_bit(vid, adapter->active_vlans);
 }
 
 
@@ -1668,22 +1963,19 @@ static u8 *
 vmxnet3_copy_mc(struct net_device *netdev)
 {
        u8 *buf = NULL;
-       u32 sz = netdev->mc_count * ETH_ALEN;
+       u32 sz = netdev_mc_count(netdev) * ETH_ALEN;
 
        /* struct Vmxnet3_RxFilterConf.mfTableLen is u16. */
        if (sz <= 0xffff) {
                /* We may be called with BH disabled */
                buf = kmalloc(sz, GFP_ATOMIC);
                if (buf) {
-                       int i;
-                       struct dev_mc_list *mc = netdev->mc_list;
+                       struct netdev_hw_addr *ha;
+                       int i = 0;
 
-                       for (i = 0; i < netdev->mc_count; i++) {
-                               BUG_ON(!mc);
-                               memcpy(buf + i * ETH_ALEN, mc->dmi_addr,
+                       netdev_for_each_mc_addr(ha, netdev)
+                               memcpy(buf + i++ * ETH_ALEN, ha->addr,
                                       ETH_ALEN);
-                               mc = mc->next;
-                       }
                }
        }
        return buf;
@@ -1694,13 +1986,20 @@ static void
 vmxnet3_set_mc(struct net_device *netdev)
 {
        struct vmxnet3_adapter *adapter = netdev_priv(netdev);
+       unsigned long flags;
        struct Vmxnet3_RxFilterConf *rxConf =
                                        &adapter->shared->devRead.rxFilterConf;
        u8 *new_table = NULL;
        u32 new_mode = VMXNET3_RXM_UCAST;
 
-       if (netdev->flags & IFF_PROMISC)
+       if (netdev->flags & IFF_PROMISC) {
+               u32 *vfTable = adapter->shared->devRead.rxFilterConf.vfTable;
+               memset(vfTable, 0, VMXNET3_VFT_SIZE * sizeof(*vfTable));
+
                new_mode |= VMXNET3_RXM_PROMISC;
+       } else {
+               vmxnet3_restore_vlan(adapter);
+       }
 
        if (netdev->flags & IFF_BROADCAST)
                new_mode |= VMXNET3_RXM_BCAST;
@@ -1708,12 +2007,12 @@ vmxnet3_set_mc(struct net_device *netdev)
        if (netdev->flags & IFF_ALLMULTI)
                new_mode |= VMXNET3_RXM_ALL_MULTI;
        else
-               if (netdev->mc_count > 0) {
+               if (!netdev_mc_empty(netdev)) {
                        new_table = vmxnet3_copy_mc(netdev);
                        if (new_table) {
                                new_mode |= VMXNET3_RXM_MCAST;
                                rxConf->mfTableLen = cpu_to_le16(
-                                               netdev->mc_count * ETH_ALEN);
+                                       netdev_mc_count(netdev) * ETH_ALEN);
                                rxConf->mfTablePA = cpu_to_le64(virt_to_phys(
                                                    new_table));
                        } else {
@@ -1729,18 +2028,31 @@ vmxnet3_set_mc(struct net_device *netdev)
                rxConf->mfTablePA = 0;
        }
 
+       spin_lock_irqsave(&adapter->cmd_lock, flags);
        if (new_mode != rxConf->rxMode) {
                rxConf->rxMode = cpu_to_le32(new_mode);
                VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
                                       VMXNET3_CMD_UPDATE_RX_MODE);
+               VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
+                                      VMXNET3_CMD_UPDATE_VLAN_FILTERS);
        }
 
        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
                               VMXNET3_CMD_UPDATE_MAC_FILTERS);
+       spin_unlock_irqrestore(&adapter->cmd_lock, flags);
 
        kfree(new_table);
 }
 
+void
+vmxnet3_rq_destroy_all(struct vmxnet3_adapter *adapter)
+{
+       int i;
+
+       for (i = 0; i < adapter->num_rx_queues; i++)
+               vmxnet3_rq_destroy(&adapter->rx_queue[i], adapter);
+}
+
 
 /*
  *   Set up driver_shared based on settings in adapter.
@@ -1773,55 +2085,85 @@ vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter)
        devRead->misc.ddLen = cpu_to_le32(sizeof(struct vmxnet3_adapter));
 
        /* set up feature flags */
-       if (adapter->rxcsum)
-               set_flag_le64(&devRead->misc.uptFeatures, UPT1_F_RXCSUM);
+       if (adapter->netdev->features & NETIF_F_RXCSUM)
+               devRead->misc.uptFeatures |= UPT1_F_RXCSUM;
 
-       if (adapter->lro) {
-               set_flag_le64(&devRead->misc.uptFeatures, UPT1_F_LRO);
+       if (adapter->netdev->features & NETIF_F_LRO) {
+               devRead->misc.uptFeatures |= UPT1_F_LRO;
                devRead->misc.maxNumRxSG = cpu_to_le16(1 + MAX_SKB_FRAGS);
        }
-       if ((adapter->netdev->features & NETIF_F_HW_VLAN_RX)
-                       && adapter->vlan_grp) {
-               set_flag_le64(&devRead->misc.uptFeatures, UPT1_F_RXVLAN);
-       }
+       if (adapter->netdev->features & NETIF_F_HW_VLAN_RX)
+               devRead->misc.uptFeatures |= UPT1_F_RXVLAN;
 
        devRead->misc.mtu = cpu_to_le32(adapter->netdev->mtu);
        devRead->misc.queueDescPA = cpu_to_le64(adapter->queue_desc_pa);
        devRead->misc.queueDescLen = cpu_to_le32(
-                                    sizeof(struct Vmxnet3_TxQueueDesc) +
-                                    sizeof(struct Vmxnet3_RxQueueDesc));
+               adapter->num_tx_queues * sizeof(struct Vmxnet3_TxQueueDesc) +
+               adapter->num_rx_queues * sizeof(struct Vmxnet3_RxQueueDesc));
 
        /* tx queue settings */
-       BUG_ON(adapter->tx_queue.tx_ring.base == NULL);
-
-       devRead->misc.numTxQueues = 1;
-       tqc = &adapter->tqd_start->conf;
-       tqc->txRingBasePA   = cpu_to_le64(adapter->tx_queue.tx_ring.basePA);
-       tqc->dataRingBasePA = cpu_to_le64(adapter->tx_queue.data_ring.basePA);
-       tqc->compRingBasePA = cpu_to_le64(adapter->tx_queue.comp_ring.basePA);
-       tqc->ddPA           = cpu_to_le64(virt_to_phys(
-                                               adapter->tx_queue.buf_info));
-       tqc->txRingSize     = cpu_to_le32(adapter->tx_queue.tx_ring.size);
-       tqc->dataRingSize   = cpu_to_le32(adapter->tx_queue.data_ring.size);
-       tqc->compRingSize   = cpu_to_le32(adapter->tx_queue.comp_ring.size);
-       tqc->ddLen          = cpu_to_le32(sizeof(struct vmxnet3_tx_buf_info) *
-                             tqc->txRingSize);
-       tqc->intrIdx        = adapter->tx_queue.comp_ring.intr_idx;
+       devRead->misc.numTxQueues =  adapter->num_tx_queues;
+       for (i = 0; i < adapter->num_tx_queues; i++) {
+               struct vmxnet3_tx_queue *tq = &adapter->tx_queue[i];
+               BUG_ON(adapter->tx_queue[i].tx_ring.base == NULL);
+               tqc = &adapter->tqd_start[i].conf;
+               tqc->txRingBasePA   = cpu_to_le64(tq->tx_ring.basePA);
+               tqc->dataRingBasePA = cpu_to_le64(tq->data_ring.basePA);
+               tqc->compRingBasePA = cpu_to_le64(tq->comp_ring.basePA);
+               tqc->ddPA           = cpu_to_le64(virt_to_phys(tq->buf_info));
+               tqc->txRingSize     = cpu_to_le32(tq->tx_ring.size);
+               tqc->dataRingSize   = cpu_to_le32(tq->data_ring.size);
+               tqc->compRingSize   = cpu_to_le32(tq->comp_ring.size);
+               tqc->ddLen          = cpu_to_le32(
+                                       sizeof(struct vmxnet3_tx_buf_info) *
+                                       tqc->txRingSize);
+               tqc->intrIdx        = tq->comp_ring.intr_idx;
+       }
 
        /* rx queue settings */
-       devRead->misc.numRxQueues = 1;
-       rqc = &adapter->rqd_start->conf;
-       rqc->rxRingBasePA[0] = cpu_to_le64(adapter->rx_queue.rx_ring[0].basePA);
-       rqc->rxRingBasePA[1] = cpu_to_le64(adapter->rx_queue.rx_ring[1].basePA);
-       rqc->compRingBasePA  = cpu_to_le64(adapter->rx_queue.comp_ring.basePA);
-       rqc->ddPA            = cpu_to_le64(virt_to_phys(
-                                               adapter->rx_queue.buf_info));
-       rqc->rxRingSize[0]   = cpu_to_le32(adapter->rx_queue.rx_ring[0].size);
-       rqc->rxRingSize[1]   = cpu_to_le32(adapter->rx_queue.rx_ring[1].size);
-       rqc->compRingSize    = cpu_to_le32(adapter->rx_queue.comp_ring.size);
-       rqc->ddLen           = cpu_to_le32(sizeof(struct vmxnet3_rx_buf_info) *
-                              (rqc->rxRingSize[0] + rqc->rxRingSize[1]));
-       rqc->intrIdx         = adapter->rx_queue.comp_ring.intr_idx;
+       devRead->misc.numRxQueues = adapter->num_rx_queues;
+       for (i = 0; i < adapter->num_rx_queues; i++) {
+               struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i];
+               rqc = &adapter->rqd_start[i].conf;
+               rqc->rxRingBasePA[0] = cpu_to_le64(rq->rx_ring[0].basePA);
+               rqc->rxRingBasePA[1] = cpu_to_le64(rq->rx_ring[1].basePA);
+               rqc->compRingBasePA  = cpu_to_le64(rq->comp_ring.basePA);
+               rqc->ddPA            = cpu_to_le64(virt_to_phys(
+                                                       rq->buf_info));
+               rqc->rxRingSize[0]   = cpu_to_le32(rq->rx_ring[0].size);
+               rqc->rxRingSize[1]   = cpu_to_le32(rq->rx_ring[1].size);
+               rqc->compRingSize    = cpu_to_le32(rq->comp_ring.size);
+               rqc->ddLen           = cpu_to_le32(
+                                       sizeof(struct vmxnet3_rx_buf_info) *
+                                       (rqc->rxRingSize[0] +
+                                        rqc->rxRingSize[1]));
+               rqc->intrIdx         = rq->comp_ring.intr_idx;
+       }
+
+#ifdef VMXNET3_RSS
+       memset(adapter->rss_conf, 0, sizeof(*adapter->rss_conf));
+
+       if (adapter->rss) {
+               struct UPT1_RSSConf *rssConf = adapter->rss_conf;
+               devRead->misc.uptFeatures |= UPT1_F_RSS;
+               devRead->misc.numRxQueues = adapter->num_rx_queues;
+               rssConf->hashType = UPT1_RSS_HASH_TYPE_TCP_IPV4 |
+                                   UPT1_RSS_HASH_TYPE_IPV4 |
+                                   UPT1_RSS_HASH_TYPE_TCP_IPV6 |
+                                   UPT1_RSS_HASH_TYPE_IPV6;
+               rssConf->hashFunc = UPT1_RSS_HASH_FUNC_TOEPLITZ;
+               rssConf->hashKeySize = UPT1_RSS_MAX_KEY_SIZE;
+               rssConf->indTableSize = VMXNET3_RSS_IND_TABLE_SIZE;
+               get_random_bytes(&rssConf->hashKey[0], rssConf->hashKeySize);
+               for (i = 0; i < rssConf->indTableSize; i++)
+                       rssConf->indTable[i] = i % adapter->num_rx_queues;
+
+               devRead->rssConfDesc.confVer = 1;
+               devRead->rssConfDesc.confLen = sizeof(*rssConf);
+               devRead->rssConfDesc.confPA  = virt_to_phys(rssConf);
+       }
+
+#endif /* VMXNET3_RSS */
 
        /* intr settings */
        devRead->intrConf.autoMask = adapter->intr.mask_mode ==
@@ -1831,10 +2173,13 @@ vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter)
                devRead->intrConf.modLevels[i] = adapter->intr.mod_levels[i];
 
        devRead->intrConf.eventIntrIdx = adapter->intr.event_intr_idx;
+       devRead->intrConf.intrCtrl |= cpu_to_le32(VMXNET3_IC_DISABLE_ALL);
 
        /* rx filter settings */
        devRead->rxFilterConf.rxMode = 0;
        vmxnet3_restore_vlan(adapter);
+       vmxnet3_write_mac_addr(adapter, adapter->netdev->dev_addr);
+
        /* the rest are already zeroed */
 }
 
@@ -1842,18 +2187,19 @@ vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter)
 int
 vmxnet3_activate_dev(struct vmxnet3_adapter *adapter)
 {
-       int err;
+       int err, i;
        u32 ret;
+       unsigned long flags;
 
-       dev_dbg(&adapter->netdev->dev,
-               "%s: skb_buf_size %d, rx_buf_per_pkt %d, ring sizes"
-               " %u %u %u\n", adapter->netdev->name, adapter->skb_buf_size,
-               adapter->rx_buf_per_pkt, adapter->tx_queue.tx_ring.size,
-               adapter->rx_queue.rx_ring[0].size,
-               adapter->rx_queue.rx_ring[1].size);
-
-       vmxnet3_tq_init(&adapter->tx_queue, adapter);
-       err = vmxnet3_rq_init(&adapter->rx_queue, adapter);
+       dev_dbg(&adapter->netdev->dev, "%s: skb_buf_size %d, rx_buf_per_pkt %d,"
+               " ring sizes %u %u %u\n", adapter->netdev->name,
+               adapter->skb_buf_size, adapter->rx_buf_per_pkt,
+               adapter->tx_queue[0].tx_ring.size,
+               adapter->rx_queue[0].rx_ring[0].size,
+               adapter->rx_queue[0].rx_ring[1].size);
+
+       vmxnet3_tq_init_all(adapter);
+       err = vmxnet3_rq_init_all(adapter);
        if (err) {
                printk(KERN_ERR "Failed to init rx queue for %s: error %d\n",
                       adapter->netdev->name, err);
@@ -1873,9 +2219,11 @@ vmxnet3_activate_dev(struct vmxnet3_adapter *adapter)
                               adapter->shared_pa));
        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_DSAH, VMXNET3_GET_ADDR_HI(
                               adapter->shared_pa));
+       spin_lock_irqsave(&adapter->cmd_lock, flags);
        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
                               VMXNET3_CMD_ACTIVATE_DEV);
        ret = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
+       spin_unlock_irqrestore(&adapter->cmd_lock, flags);
 
        if (ret != 0) {
                printk(KERN_ERR "Failed to activate dev %s: error %u\n",
@@ -1883,10 +2231,15 @@ vmxnet3_activate_dev(struct vmxnet3_adapter *adapter)
                err = -EINVAL;
                goto activate_err;
        }
-       VMXNET3_WRITE_BAR0_REG(adapter, VMXNET3_REG_RXPROD,
-                              adapter->rx_queue.rx_ring[0].next2fill);
-       VMXNET3_WRITE_BAR0_REG(adapter, VMXNET3_REG_RXPROD2,
-                              adapter->rx_queue.rx_ring[1].next2fill);
+
+       for (i = 0; i < adapter->num_rx_queues; i++) {
+               VMXNET3_WRITE_BAR0_REG(adapter,
+                               VMXNET3_REG_RXPROD + i * VMXNET3_REG_ALIGN,
+                               adapter->rx_queue[i].rx_ring[0].next2fill);
+               VMXNET3_WRITE_BAR0_REG(adapter, (VMXNET3_REG_RXPROD2 +
+                               (i * VMXNET3_REG_ALIGN)),
+                               adapter->rx_queue[i].rx_ring[1].next2fill);
+       }
 
        /* Apply the rx filter settins last. */
        vmxnet3_set_mc(adapter->netdev);
@@ -1895,9 +2248,9 @@ vmxnet3_activate_dev(struct vmxnet3_adapter *adapter)
         * Check link state when first activating device. It will start the
         * tx queue if the link is up.
         */
-       vmxnet3_check_link(adapter);
-
-       napi_enable(&adapter->napi);
+       vmxnet3_check_link(adapter, true);
+       for (i = 0; i < adapter->num_rx_queues; i++)
+               napi_enable(&adapter->rx_queue[i].napi);
        vmxnet3_enable_all_intrs(adapter);
        clear_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state);
        return 0;
@@ -1909,7 +2262,7 @@ activate_err:
 irq_err:
 rq_err:
        /* free up buffers we allocated */
-       vmxnet3_rq_cleanup(&adapter->rx_queue, adapter);
+       vmxnet3_rq_cleanup_all(adapter);
        return err;
 }
 
@@ -1917,28 +2270,36 @@ rq_err:
 void
 vmxnet3_reset_dev(struct vmxnet3_adapter *adapter)
 {
+       unsigned long flags;
+       spin_lock_irqsave(&adapter->cmd_lock, flags);
        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD, VMXNET3_CMD_RESET_DEV);
+       spin_unlock_irqrestore(&adapter->cmd_lock, flags);
 }
 
 
 int
 vmxnet3_quiesce_dev(struct vmxnet3_adapter *adapter)
 {
+       int i;
+       unsigned long flags;
        if (test_and_set_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state))
                return 0;
 
 
+       spin_lock_irqsave(&adapter->cmd_lock, flags);
        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
                               VMXNET3_CMD_QUIESCE_DEV);
+       spin_unlock_irqrestore(&adapter->cmd_lock, flags);
        vmxnet3_disable_all_intrs(adapter);
 
-       napi_disable(&adapter->napi);
+       for (i = 0; i < adapter->num_rx_queues; i++)
+               napi_disable(&adapter->rx_queue[i].napi);
        netif_tx_disable(adapter->netdev);
        adapter->link_speed = 0;
        netif_carrier_off(adapter->netdev);
 
-       vmxnet3_tq_cleanup(&adapter->tx_queue, adapter);
-       vmxnet3_rq_cleanup(&adapter->rx_queue, adapter);
+       vmxnet3_tq_cleanup_all(adapter);
+       vmxnet3_rq_cleanup_all(adapter);
        vmxnet3_free_irqs(adapter);
        return 0;
 }
@@ -2060,7 +2421,9 @@ vmxnet3_free_pci_resources(struct vmxnet3_adapter *adapter)
 static void
 vmxnet3_adjust_rx_ring_size(struct vmxnet3_adapter *adapter)
 {
-       size_t sz;
+       size_t sz, i, ring0_size, ring1_size, comp_size;
+       struct vmxnet3_rx_queue *rq = &adapter->rx_queue[0];
+
 
        if (adapter->netdev->mtu <= VMXNET3_MAX_SKB_BUF_SIZE -
                                    VMXNET3_MAX_ETH_HDR_SIZE) {
@@ -2082,11 +2445,19 @@ vmxnet3_adjust_rx_ring_size(struct vmxnet3_adapter *adapter)
         * rx_buf_per_pkt * VMXNET3_RING_SIZE_ALIGN
         */
        sz = adapter->rx_buf_per_pkt * VMXNET3_RING_SIZE_ALIGN;
-       adapter->rx_queue.rx_ring[0].size = (adapter->rx_queue.rx_ring[0].size +
-                                            sz - 1) / sz * sz;
-       adapter->rx_queue.rx_ring[0].size = min_t(u32,
-                                           adapter->rx_queue.rx_ring[0].size,
-                                           VMXNET3_RX_RING_MAX_SIZE / sz * sz);
+       ring0_size = adapter->rx_queue[0].rx_ring[0].size;
+       ring0_size = (ring0_size + sz - 1) / sz * sz;
+       ring0_size = min_t(u32, ring0_size, VMXNET3_RX_RING_MAX_SIZE /
+                          sz * sz);
+       ring1_size = adapter->rx_queue[0].rx_ring[1].size;
+       comp_size = ring0_size + ring1_size;
+
+       for (i = 0; i < adapter->num_rx_queues; i++) {
+               rq = &adapter->rx_queue[i];
+               rq->rx_ring[0].size = ring0_size;
+               rq->rx_ring[1].size = ring1_size;
+               rq->comp_ring.size = comp_size;
+       }
 }
 
 
@@ -2094,29 +2465,53 @@ int
 vmxnet3_create_queues(struct vmxnet3_adapter *adapter, u32 tx_ring_size,
                      u32 rx_ring_size, u32 rx_ring2_size)
 {
-       int err;
-
-       adapter->tx_queue.tx_ring.size   = tx_ring_size;
-       adapter->tx_queue.data_ring.size = tx_ring_size;
-       adapter->tx_queue.comp_ring.size = tx_ring_size;
-       adapter->tx_queue.shared = &adapter->tqd_start->ctrl;
-       adapter->tx_queue.stopped = true;
-       err = vmxnet3_tq_create(&adapter->tx_queue, adapter);
-       if (err)
-               return err;
+       int err = 0, i;
+
+       for (i = 0; i < adapter->num_tx_queues; i++) {
+               struct vmxnet3_tx_queue *tq = &adapter->tx_queue[i];
+               tq->tx_ring.size   = tx_ring_size;
+               tq->data_ring.size = tx_ring_size;
+               tq->comp_ring.size = tx_ring_size;
+               tq->shared = &adapter->tqd_start[i].ctrl;
+               tq->stopped = true;
+               tq->adapter = adapter;
+               tq->qid = i;
+               err = vmxnet3_tq_create(tq, adapter);
+               /*
+                * Too late to change num_tx_queues. We cannot do away with
+                * lesser number of queues than what we asked for
+                */
+               if (err)
+                       goto queue_err;
+       }
 
-       adapter->rx_queue.rx_ring[0].size = rx_ring_size;
-       adapter->rx_queue.rx_ring[1].size = rx_ring2_size;
+       adapter->rx_queue[0].rx_ring[0].size = rx_ring_size;
+       adapter->rx_queue[0].rx_ring[1].size = rx_ring2_size;
        vmxnet3_adjust_rx_ring_size(adapter);
-       adapter->rx_queue.comp_ring.size  = adapter->rx_queue.rx_ring[0].size +
-                                           adapter->rx_queue.rx_ring[1].size;
-       adapter->rx_queue.qid  = 0;
-       adapter->rx_queue.qid2 = 1;
-       adapter->rx_queue.shared = &adapter->rqd_start->ctrl;
-       err = vmxnet3_rq_create(&adapter->rx_queue, adapter);
-       if (err)
-               vmxnet3_tq_destroy(&adapter->tx_queue, adapter);
-
+       for (i = 0; i < adapter->num_rx_queues; i++) {
+               struct vmxnet3_rx_queue *rq = &adapter->rx_queue[i];
+               /* qid and qid2 for rx queues will be assigned later when num
+                * of rx queues is finalized after allocating intrs */
+               rq->shared = &adapter->rqd_start[i].ctrl;
+               rq->adapter = adapter;
+               err = vmxnet3_rq_create(rq, adapter);
+               if (err) {
+                       if (i == 0) {
+                               printk(KERN_ERR "Could not allocate any rx"
+                                      "queues. Aborting.\n");
+                               goto queue_err;
+                       } else {
+                               printk(KERN_INFO "Number of rx queues changed "
+                                      "to : %d.\n", i);
+                               adapter->num_rx_queues = i;
+                               err = 0;
+                               break;
+                       }
+               }
+       }
+       return err;
+queue_err:
+       vmxnet3_tq_destroy_all(adapter);
        return err;
 }
 
@@ -2124,11 +2519,12 @@ static int
 vmxnet3_open(struct net_device *netdev)
 {
        struct vmxnet3_adapter *adapter;
-       int err;
+       int err, i;
 
        adapter = netdev_priv(netdev);
 
-       spin_lock_init(&adapter->tx_queue.tx_lock);
+       for (i = 0; i < adapter->num_tx_queues; i++)
+               spin_lock_init(&adapter->tx_queue[i].tx_lock);
 
        err = vmxnet3_create_queues(adapter, VMXNET3_DEF_TX_RING_SIZE,
                                    VMXNET3_DEF_RX_RING_SIZE,
@@ -2143,8 +2539,8 @@ vmxnet3_open(struct net_device *netdev)
        return 0;
 
 activate_err:
-       vmxnet3_rq_destroy(&adapter->rx_queue, adapter);
-       vmxnet3_tq_destroy(&adapter->tx_queue, adapter);
+       vmxnet3_rq_destroy_all(adapter);
+       vmxnet3_tq_destroy_all(adapter);
 queue_err:
        return err;
 }
@@ -2164,8 +2560,8 @@ vmxnet3_close(struct net_device *netdev)
 
        vmxnet3_quiesce_dev(adapter);
 
-       vmxnet3_rq_destroy(&adapter->rx_queue, adapter);
-       vmxnet3_tq_destroy(&adapter->tx_queue, adapter);
+       vmxnet3_rq_destroy_all(adapter);
+       vmxnet3_tq_destroy_all(adapter);
 
        clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
 
@@ -2177,6 +2573,8 @@ vmxnet3_close(struct net_device *netdev)
 void
 vmxnet3_force_close(struct vmxnet3_adapter *adapter)
 {
+       int i;
+
        /*
         * we must clear VMXNET3_STATE_BIT_RESETTING, otherwise
         * vmxnet3_close() will deadlock.
@@ -2184,7 +2582,8 @@ vmxnet3_force_close(struct vmxnet3_adapter *adapter)
        BUG_ON(test_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state));
 
        /* we need to enable NAPI, otherwise dev_close will deadlock */
-       napi_enable(&adapter->napi);
+       for (i = 0; i < adapter->num_rx_queues; i++)
+               napi_enable(&adapter->rx_queue[i].napi);
        dev_close(adapter->netdev);
 }
 
@@ -2198,9 +2597,6 @@ vmxnet3_change_mtu(struct net_device *netdev, int new_mtu)
        if (new_mtu < VMXNET3_MIN_MTU || new_mtu > VMXNET3_MAX_MTU)
                return -EINVAL;
 
-       if (new_mtu > 1500 && !adapter->jumbo_frame)
-               return -EINVAL;
-
        netdev->mtu = new_mtu;
 
        /*
@@ -2215,14 +2611,11 @@ vmxnet3_change_mtu(struct net_device *netdev, int new_mtu)
                vmxnet3_reset_dev(adapter);
 
                /* we need to re-create the rx queue based on the new mtu */
-               vmxnet3_rq_destroy(&adapter->rx_queue, adapter);
+               vmxnet3_rq_destroy_all(adapter);
                vmxnet3_adjust_rx_ring_size(adapter);
-               adapter->rx_queue.comp_ring.size  =
-                                       adapter->rx_queue.rx_ring[0].size +
-                                       adapter->rx_queue.rx_ring[1].size;
-               err = vmxnet3_rq_create(&adapter->rx_queue, adapter);
+               err = vmxnet3_rq_create_all(adapter);
                if (err) {
-                       printk(KERN_ERR "%s: failed to re-create rx queue,"
+                       printk(KERN_ERR "%s: failed to re-create rx queues,"
                                " error %d. Closing it.\n", netdev->name, err);
                        goto out;
                }
@@ -2249,28 +2642,19 @@ vmxnet3_declare_features(struct vmxnet3_adapter *adapter, bool dma64)
 {
        struct net_device *netdev = adapter->netdev;
 
-       netdev->features = NETIF_F_SG |
-               NETIF_F_HW_CSUM |
-               NETIF_F_HW_VLAN_TX |
-               NETIF_F_HW_VLAN_RX |
-               NETIF_F_HW_VLAN_FILTER |
-               NETIF_F_TSO |
-               NETIF_F_TSO6 |
+       netdev->hw_features = NETIF_F_SG | NETIF_F_RXCSUM |
+               NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_TX |
+               NETIF_F_HW_VLAN_RX | NETIF_F_TSO | NETIF_F_TSO6 |
                NETIF_F_LRO;
-
-       printk(KERN_INFO "features: sg csum vlan jf tso tsoIPv6 lro");
-
-       adapter->rxcsum = true;
-       adapter->jumbo_frame = true;
-       adapter->lro = true;
-
-       if (dma64) {
+       if (dma64)
                netdev->features |= NETIF_F_HIGHDMA;
-               printk(" highDMA");
-       }
+       netdev->vlan_features = netdev->hw_features &
+                               ~(NETIF_F_HW_VLAN_TX | NETIF_F_HW_VLAN_RX);
+       netdev->features = netdev->hw_features | NETIF_F_HW_VLAN_FILTER;
 
-       netdev->vlan_features = netdev->features;
-       printk("\n");
+       netdev_info(adapter->netdev,
+               "features: sg csum vlan jf tso tsoIPv6 lro%s\n",
+               dma64 ? " highDMA" : "");
 }
 
 
@@ -2287,41 +2671,134 @@ vmxnet3_read_mac_addr(struct vmxnet3_adapter *adapter, u8 *mac)
        mac[5] = (tmp >> 8) & 0xff;
 }
 
+#ifdef CONFIG_PCI_MSI
+
+/*
+ * Enable MSIx vectors.
+ * Returns :
+ *     0 on successful enabling of required vectors,
+ *     VMXNET3_LINUX_MIN_MSIX_VECT when only minimum number of vectors required
+ *      could be enabled.
+ *     number of vectors which can be enabled otherwise (this number is smaller
+ *      than VMXNET3_LINUX_MIN_MSIX_VECT)
+ */
+
+static int
+vmxnet3_acquire_msix_vectors(struct vmxnet3_adapter *adapter,
+                            int vectors)
+{
+       int err = 0, vector_threshold;
+       vector_threshold = VMXNET3_LINUX_MIN_MSIX_VECT;
+
+       while (vectors >= vector_threshold) {
+               err = pci_enable_msix(adapter->pdev, adapter->intr.msix_entries,
+                                     vectors);
+               if (!err) {
+                       adapter->intr.num_intrs = vectors;
+                       return 0;
+               } else if (err < 0) {
+                       printk(KERN_ERR "Failed to enable MSI-X for %s, error"
+                              " %d\n", adapter->netdev->name, err);
+                       vectors = 0;
+               } else if (err < vector_threshold) {
+                       break;
+               } else {
+                       /* If fails to enable required number of MSI-x vectors
+                        * try enabling minimum number of vectors required.
+                        */
+                       vectors = vector_threshold;
+                       printk(KERN_ERR "Failed to enable %d MSI-X for %s, try"
+                              " %d instead\n", vectors, adapter->netdev->name,
+                              vector_threshold);
+               }
+       }
+
+       printk(KERN_INFO "Number of MSI-X interrupts which can be allocatedi"
+              " are lower than min threshold required.\n");
+       return err;
+}
+
+
+#endif /* CONFIG_PCI_MSI */
 
 static void
 vmxnet3_alloc_intr_resources(struct vmxnet3_adapter *adapter)
 {
        u32 cfg;
+       unsigned long flags;
 
        /* intr settings */
+       spin_lock_irqsave(&adapter->cmd_lock, flags);
        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
                               VMXNET3_CMD_GET_CONF_INTR);
        cfg = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
+       spin_unlock_irqrestore(&adapter->cmd_lock, flags);
        adapter->intr.type = cfg & 0x3;
        adapter->intr.mask_mode = (cfg >> 2) & 0x3;
 
        if (adapter->intr.type == VMXNET3_IT_AUTO) {
-               int err;
+               adapter->intr.type = VMXNET3_IT_MSIX;
+       }
 
 #ifdef CONFIG_PCI_MSI
-               adapter->intr.msix_entries[0].entry = 0;
-               err = pci_enable_msix(adapter->pdev, adapter->intr.msix_entries,
-                                     VMXNET3_LINUX_MAX_MSIX_VECT);
-               if (!err) {
-                       adapter->intr.num_intrs = 1;
-                       adapter->intr.type = VMXNET3_IT_MSIX;
+       if (adapter->intr.type == VMXNET3_IT_MSIX) {
+               int vector, err = 0;
+
+               adapter->intr.num_intrs = (adapter->share_intr ==
+                                          VMXNET3_INTR_TXSHARE) ? 1 :
+                                          adapter->num_tx_queues;
+               adapter->intr.num_intrs += (adapter->share_intr ==
+                                          VMXNET3_INTR_BUDDYSHARE) ? 0 :
+                                          adapter->num_rx_queues;
+               adapter->intr.num_intrs += 1;           /* for link event */
+
+               adapter->intr.num_intrs = (adapter->intr.num_intrs >
+                                          VMXNET3_LINUX_MIN_MSIX_VECT
+                                          ? adapter->intr.num_intrs :
+                                          VMXNET3_LINUX_MIN_MSIX_VECT);
+
+               for (vector = 0; vector < adapter->intr.num_intrs; vector++)
+                       adapter->intr.msix_entries[vector].entry = vector;
+
+               err = vmxnet3_acquire_msix_vectors(adapter,
+                                                  adapter->intr.num_intrs);
+               /* If we cannot allocate one MSIx vector per queue
+                * then limit the number of rx queues to 1
+                */
+               if (err == VMXNET3_LINUX_MIN_MSIX_VECT) {
+                       if (adapter->share_intr != VMXNET3_INTR_BUDDYSHARE
+                           || adapter->num_rx_queues != 1) {
+                               adapter->share_intr = VMXNET3_INTR_TXSHARE;
+                               printk(KERN_ERR "Number of rx queues : 1\n");
+                               adapter->num_rx_queues = 1;
+                               adapter->intr.num_intrs =
+                                               VMXNET3_LINUX_MIN_MSIX_VECT;
+                       }
                        return;
                }
-#endif
+               if (!err)
+                       return;
+
+               /* If we cannot allocate MSIx vectors use only one rx queue */
+               printk(KERN_INFO "Failed to enable MSI-X for %s, error %d."
+                      "#rx queues : 1, try MSI\n", adapter->netdev->name, err);
 
+               adapter->intr.type = VMXNET3_IT_MSI;
+       }
+
+       if (adapter->intr.type == VMXNET3_IT_MSI) {
+               int err;
                err = pci_enable_msi(adapter->pdev);
                if (!err) {
+                       adapter->num_rx_queues = 1;
                        adapter->intr.num_intrs = 1;
-                       adapter->intr.type = VMXNET3_IT_MSI;
                        return;
                }
        }
+#endif /* CONFIG_PCI_MSI */
 
+       adapter->num_rx_queues = 1;
+       printk(KERN_INFO "Using INTx interrupt, #Rx queues: 1.\n");
        adapter->intr.type = VMXNET3_IT_INTX;
 
        /* INT-X related setting */
@@ -2349,6 +2826,7 @@ vmxnet3_tx_timeout(struct net_device *netdev)
 
        printk(KERN_ERR "%s: tx hang\n", adapter->netdev->name);
        schedule_work(&adapter->work);
+       netif_wake_queue(adapter->netdev);
 }
 
 
@@ -2364,6 +2842,7 @@ vmxnet3_reset_work(struct work_struct *data)
                return;
 
        /* if the device is closed, we must leave it alone */
+       rtnl_lock();
        if (netif_running(adapter->netdev)) {
                printk(KERN_INFO "%s: resetting\n", adapter->netdev->name);
                vmxnet3_quiesce_dev(adapter);
@@ -2372,6 +2851,7 @@ vmxnet3_reset_work(struct work_struct *data)
        } else {
                printk(KERN_INFO "%s: already closed\n", adapter->netdev->name);
        }
+       rtnl_unlock();
 
        clear_bit(VMXNET3_STATE_BIT_RESETTING, &adapter->state);
 }
@@ -2387,10 +2867,10 @@ vmxnet3_probe_device(struct pci_dev *pdev,
                .ndo_start_xmit = vmxnet3_xmit_frame,
                .ndo_set_mac_address = vmxnet3_set_mac_addr,
                .ndo_change_mtu = vmxnet3_change_mtu,
-               .ndo_get_stats = vmxnet3_get_stats,
+               .ndo_set_features = vmxnet3_set_features,
+               .ndo_get_stats64 = vmxnet3_get_stats64,
                .ndo_tx_timeout = vmxnet3_tx_timeout,
                .ndo_set_multicast_list = vmxnet3_set_mc,
-               .ndo_vlan_rx_register = vmxnet3_vlan_rx_register,
                .ndo_vlan_rx_add_vid = vmxnet3_vlan_rx_add_vid,
                .ndo_vlan_rx_kill_vid = vmxnet3_vlan_rx_kill_vid,
 #ifdef CONFIG_NET_POLL_CONTROLLER
@@ -2403,8 +2883,32 @@ vmxnet3_probe_device(struct pci_dev *pdev,
        struct net_device *netdev;
        struct vmxnet3_adapter *adapter;
        u8 mac[ETH_ALEN];
+       int size;
+       int num_tx_queues;
+       int num_rx_queues;
+
+       if (!pci_msi_enabled())
+               enable_mq = 0;
+
+#ifdef VMXNET3_RSS
+       if (enable_mq)
+               num_rx_queues = min(VMXNET3_DEVICE_MAX_RX_QUEUES,
+                                   (int)num_online_cpus());
+       else
+#endif
+               num_rx_queues = 1;
+
+       if (enable_mq)
+               num_tx_queues = min(VMXNET3_DEVICE_MAX_TX_QUEUES,
+                                   (int)num_online_cpus());
+       else
+               num_tx_queues = 1;
+
+       netdev = alloc_etherdev_mq(sizeof(struct vmxnet3_adapter),
+                                  max(num_tx_queues, num_rx_queues));
+       printk(KERN_INFO "# of Tx queues : %d, # of Rx queues : %d\n",
+              num_tx_queues, num_rx_queues);
 
-       netdev = alloc_etherdev(sizeof(struct vmxnet3_adapter));
        if (!netdev) {
                printk(KERN_ERR "Failed to alloc ethernet device for adapter "
                        "%s\n", pci_name(pdev));
@@ -2416,6 +2920,7 @@ vmxnet3_probe_device(struct pci_dev *pdev,
        adapter->netdev = netdev;
        adapter->pdev = pdev;
 
+       spin_lock_init(&adapter->cmd_lock);
        adapter->shared = pci_alloc_consistent(adapter->pdev,
                          sizeof(struct Vmxnet3_DriverShared),
                          &adapter->shared_pa);
@@ -2426,9 +2931,12 @@ vmxnet3_probe_device(struct pci_dev *pdev,
                goto err_alloc_shared;
        }
 
-       adapter->tqd_start = pci_alloc_consistent(adapter->pdev,
-                            sizeof(struct Vmxnet3_TxQueueDesc) +
-                            sizeof(struct Vmxnet3_RxQueueDesc),
+       adapter->num_rx_queues = num_rx_queues;
+       adapter->num_tx_queues = num_tx_queues;
+
+       size = sizeof(struct Vmxnet3_TxQueueDesc) * adapter->num_tx_queues;
+       size += sizeof(struct Vmxnet3_RxQueueDesc) * adapter->num_rx_queues;
+       adapter->tqd_start = pci_alloc_consistent(adapter->pdev, size,
                             &adapter->queue_desc_pa);
 
        if (!adapter->tqd_start) {
@@ -2437,8 +2945,8 @@ vmxnet3_probe_device(struct pci_dev *pdev,
                err = -ENOMEM;
                goto err_alloc_queue_desc;
        }
-       adapter->rqd_start = (struct Vmxnet3_RxQueueDesc *)(adapter->tqd_start
-                                                           + 1);
+       adapter->rqd_start = (struct Vmxnet3_RxQueueDesc *)(adapter->tqd_start +
+                                                       adapter->num_tx_queues);
 
        adapter->pm_conf = kmalloc(sizeof(struct Vmxnet3_PMConf), GFP_KERNEL);
        if (adapter->pm_conf == NULL) {
@@ -2448,6 +2956,17 @@ vmxnet3_probe_device(struct pci_dev *pdev,
                goto err_alloc_pm;
        }
 
+#ifdef VMXNET3_RSS
+
+       adapter->rss_conf = kmalloc(sizeof(struct UPT1_RSSConf), GFP_KERNEL);
+       if (adapter->rss_conf == NULL) {
+               printk(KERN_ERR "Failed to allocate memory for %s\n",
+                      pci_name(pdev));
+               err = -ENOMEM;
+               goto err_alloc_rss;
+       }
+#endif /* VMXNET3_RSS */
+
        err = vmxnet3_alloc_pci_resources(adapter, &dma64);
        if (err < 0)
                goto err_alloc_pci;
@@ -2475,18 +2994,48 @@ vmxnet3_probe_device(struct pci_dev *pdev,
        vmxnet3_declare_features(adapter, dma64);
 
        adapter->dev_number = atomic_read(&devices_found);
+
+        adapter->share_intr = irq_share_mode;
+       if (adapter->share_intr == VMXNET3_INTR_BUDDYSHARE &&
+           adapter->num_tx_queues != adapter->num_rx_queues)
+               adapter->share_intr = VMXNET3_INTR_DONTSHARE;
+
        vmxnet3_alloc_intr_resources(adapter);
 
+#ifdef VMXNET3_RSS
+       if (adapter->num_rx_queues > 1 &&
+           adapter->intr.type == VMXNET3_IT_MSIX) {
+               adapter->rss = true;
+               printk(KERN_INFO "RSS is enabled.\n");
+       } else {
+               adapter->rss = false;
+       }
+#endif
+
        vmxnet3_read_mac_addr(adapter, mac);
        memcpy(netdev->dev_addr,  mac, netdev->addr_len);
 
        netdev->netdev_ops = &vmxnet3_netdev_ops;
-       netdev->watchdog_timeo = 5 * HZ;
        vmxnet3_set_ethtool_ops(netdev);
+       netdev->watchdog_timeo = 5 * HZ;
 
        INIT_WORK(&adapter->work, vmxnet3_reset_work);
 
-       netif_napi_add(netdev, &adapter->napi, vmxnet3_poll, 64);
+       if (adapter->intr.type == VMXNET3_IT_MSIX) {
+               int i;
+               for (i = 0; i < adapter->num_rx_queues; i++) {
+                       netif_napi_add(adapter->netdev,
+                                      &adapter->rx_queue[i].napi,
+                                      vmxnet3_poll_rx_only, 64);
+               }
+       } else {
+               netif_napi_add(adapter->netdev, &adapter->rx_queue[0].napi,
+                              vmxnet3_poll, 64);
+       }
+
+       netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
+       netif_set_real_num_rx_queues(adapter->netdev, adapter->num_rx_queues);
+
        SET_NETDEV_DEV(netdev, &pdev->dev);
        err = register_netdev(netdev);
 
@@ -2497,6 +3046,7 @@ vmxnet3_probe_device(struct pci_dev *pdev,
        }
 
        set_bit(VMXNET3_STATE_BIT_QUIESCED, &adapter->state);
+       vmxnet3_check_link(adapter, false);
        atomic_inc(&devices_found);
        return 0;
 
@@ -2505,11 +3055,14 @@ err_register:
 err_ver:
        vmxnet3_free_pci_resources(adapter);
 err_alloc_pci:
+#ifdef VMXNET3_RSS
+       kfree(adapter->rss_conf);
+err_alloc_rss:
+#endif
        kfree(adapter->pm_conf);
 err_alloc_pm:
-       pci_free_consistent(adapter->pdev, sizeof(struct Vmxnet3_TxQueueDesc) +
-                           sizeof(struct Vmxnet3_RxQueueDesc),
-                           adapter->tqd_start, adapter->queue_desc_pa);
+       pci_free_consistent(adapter->pdev, size, adapter->tqd_start,
+                           adapter->queue_desc_pa);
 err_alloc_queue_desc:
        pci_free_consistent(adapter->pdev, sizeof(struct Vmxnet3_DriverShared),
                            adapter->shared, adapter->shared_pa);
@@ -2525,17 +3078,32 @@ vmxnet3_remove_device(struct pci_dev *pdev)
 {
        struct net_device *netdev = pci_get_drvdata(pdev);
        struct vmxnet3_adapter *adapter = netdev_priv(netdev);
+       int size = 0;
+       int num_rx_queues;
+
+#ifdef VMXNET3_RSS
+       if (enable_mq)
+               num_rx_queues = min(VMXNET3_DEVICE_MAX_RX_QUEUES,
+                                   (int)num_online_cpus());
+       else
+#endif
+               num_rx_queues = 1;
 
-       flush_scheduled_work();
+       cancel_work_sync(&adapter->work);
 
        unregister_netdev(netdev);
 
        vmxnet3_free_intr_resources(adapter);
        vmxnet3_free_pci_resources(adapter);
+#ifdef VMXNET3_RSS
+       kfree(adapter->rss_conf);
+#endif
        kfree(adapter->pm_conf);
-       pci_free_consistent(adapter->pdev, sizeof(struct Vmxnet3_TxQueueDesc) +
-                           sizeof(struct Vmxnet3_RxQueueDesc),
-                           adapter->tqd_start, adapter->queue_desc_pa);
+
+       size = sizeof(struct Vmxnet3_TxQueueDesc) * adapter->num_tx_queues;
+       size += sizeof(struct Vmxnet3_RxQueueDesc) * num_rx_queues;
+       pci_free_consistent(adapter->pdev, size, adapter->tqd_start,
+                           adapter->queue_desc_pa);
        pci_free_consistent(adapter->pdev, sizeof(struct Vmxnet3_DriverShared),
                            adapter->shared, adapter->shared_pa);
        free_netdev(netdev);
@@ -2556,17 +3124,21 @@ vmxnet3_suspend(struct device *device)
        u8 *arpreq;
        struct in_device *in_dev;
        struct in_ifaddr *ifa;
+       unsigned long flags;
        int i = 0;
 
        if (!netif_running(netdev))
                return 0;
 
+       for (i = 0; i < adapter->num_rx_queues; i++)
+               napi_disable(&adapter->rx_queue[i].napi);
+
        vmxnet3_disable_all_intrs(adapter);
        vmxnet3_free_irqs(adapter);
        vmxnet3_free_intr_resources(adapter);
 
        netif_device_detach(netdev);
-       netif_stop_queue(netdev);
+       netif_tx_stop_all_queues(netdev);
 
        /* Create wake-up filters. */
        pmConf = adapter->pm_conf;
@@ -2578,7 +3150,7 @@ vmxnet3_suspend(struct device *device)
                memcpy(pmConf->filters[i].pattern, netdev->dev_addr, ETH_ALEN);
                pmConf->filters[i].mask[0] = 0x3F; /* LSB ETH_ALEN bits */
 
-               set_flag_le16(&pmConf->wakeUpEvents, VMXNET3_PM_WAKEUP_FILTER);
+               pmConf->wakeUpEvents |= VMXNET3_PM_WAKEUP_FILTER;
                i++;
        }
 
@@ -2620,13 +3192,13 @@ vmxnet3_suspend(struct device *device)
                pmConf->filters[i].mask[5] = 0x03; /* IPv4 TIP */
                in_dev_put(in_dev);
 
-               set_flag_le16(&pmConf->wakeUpEvents, VMXNET3_PM_WAKEUP_FILTER);
+               pmConf->wakeUpEvents |= VMXNET3_PM_WAKEUP_FILTER;
                i++;
        }
 
 skip_arp:
        if (adapter->wol & WAKE_MAGIC)
-               set_flag_le16(&pmConf->wakeUpEvents, VMXNET3_PM_WAKEUP_MAGIC);
+               pmConf->wakeUpEvents |= VMXNET3_PM_WAKEUP_MAGIC;
 
        pmConf->numFilters = i;
 
@@ -2636,8 +3208,10 @@ skip_arp:
        adapter->shared->devRead.pmConfDesc.confPA = cpu_to_le64(virt_to_phys(
                                                                 pmConf));
 
+       spin_lock_irqsave(&adapter->cmd_lock, flags);
        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
                               VMXNET3_CMD_UPDATE_PMCFG);
+       spin_unlock_irqrestore(&adapter->cmd_lock, flags);
 
        pci_save_state(pdev);
        pci_enable_wake(pdev, pci_choose_state(pdev, PMSG_SUSPEND),
@@ -2652,7 +3226,8 @@ skip_arp:
 static int
 vmxnet3_resume(struct device *device)
 {
-       int err;
+       int err, i = 0;
+       unsigned long flags;
        struct pci_dev *pdev = to_pci_dev(device);
        struct net_device *netdev = pci_get_drvdata(pdev);
        struct vmxnet3_adapter *adapter = netdev_priv(netdev);
@@ -2668,7 +3243,7 @@ vmxnet3_resume(struct device *device)
        adapter->shared->devRead.pmConfDesc.confVer = cpu_to_le32(1);
        adapter->shared->devRead.pmConfDesc.confLen = cpu_to_le32(sizeof(
                                                                  *pmConf));
-       adapter->shared->devRead.pmConfDesc.confPA = cpu_to_le32(virt_to_phys(
+       adapter->shared->devRead.pmConfDesc.confPA = cpu_to_le64(virt_to_phys(
                                                                 pmConf));
 
        netif_device_attach(netdev);
@@ -2680,16 +3255,20 @@ vmxnet3_resume(struct device *device)
 
        pci_enable_wake(pdev, PCI_D0, 0);
 
+       spin_lock_irqsave(&adapter->cmd_lock, flags);
        VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
                               VMXNET3_CMD_UPDATE_PMCFG);
+       spin_unlock_irqrestore(&adapter->cmd_lock, flags);
        vmxnet3_alloc_intr_resources(adapter);
        vmxnet3_request_irqs(adapter);
+       for (i = 0; i < adapter->num_rx_queues; i++)
+               napi_enable(&adapter->rx_queue[i].napi);
        vmxnet3_enable_all_intrs(adapter);
 
        return 0;
 }
 
-static struct dev_pm_ops vmxnet3_pm_ops = {
+static const struct dev_pm_ops vmxnet3_pm_ops = {
        .suspend = vmxnet3_suspend,
        .resume = vmxnet3_resume,
 };