bitops: rename for_each_bit() to for_each_set_bit()
[linux-2.6.git] / drivers / net / gianfar.c
index fa0188e..61a7b43 100644 (file)
@@ -171,71 +171,89 @@ static int gfar_init_bds(struct net_device *ndev)
        struct gfar_priv_rx_q *rx_queue = NULL;
        struct txbd8 *txbdp;
        struct rxbd8 *rxbdp;
-       int i;
-
-       tx_queue = priv->tx_queue;
-       rx_queue = priv->rx_queue;
+       int i, j;
 
-       /* Initialize some variables in our dev structure */
-       tx_queue->num_txbdfree = tx_queue->tx_ring_size;
-       tx_queue->dirty_tx = tx_queue->cur_tx = tx_queue->tx_bd_base;
-       rx_queue->cur_rx = rx_queue->rx_bd_base;
-       tx_queue->skb_curtx = tx_queue->skb_dirtytx = 0;
-       rx_queue->skb_currx = 0;
+       for (i = 0; i < priv->num_tx_queues; i++) {
+               tx_queue = priv->tx_queue[i];
+               /* Initialize some variables in our dev structure */
+               tx_queue->num_txbdfree = tx_queue->tx_ring_size;
+               tx_queue->dirty_tx = tx_queue->tx_bd_base;
+               tx_queue->cur_tx = tx_queue->tx_bd_base;
+               tx_queue->skb_curtx = 0;
+               tx_queue->skb_dirtytx = 0;
+
+               /* Initialize Transmit Descriptor Ring */
+               txbdp = tx_queue->tx_bd_base;
+               for (j = 0; j < tx_queue->tx_ring_size; j++) {
+                       txbdp->lstatus = 0;
+                       txbdp->bufPtr = 0;
+                       txbdp++;
+               }
 
-       /* Initialize Transmit Descriptor Ring */
-       txbdp = tx_queue->tx_bd_base;
-       for (i = 0; i < tx_queue->tx_ring_size; i++) {
-               txbdp->lstatus = 0;
-               txbdp->bufPtr = 0;
-               txbdp++;
+               /* Set the last descriptor in the ring to indicate wrap */
+               txbdp--;
+               txbdp->status |= TXBD_WRAP;
        }
 
-       /* Set the last descriptor in the ring to indicate wrap */
-       txbdp--;
-       txbdp->status |= TXBD_WRAP;
+       for (i = 0; i < priv->num_rx_queues; i++) {
+               rx_queue = priv->rx_queue[i];
+               rx_queue->cur_rx = rx_queue->rx_bd_base;
+               rx_queue->skb_currx = 0;
+               rxbdp = rx_queue->rx_bd_base;
 
-       rxbdp = rx_queue->rx_bd_base;
-       for (i = 0; i < rx_queue->rx_ring_size; i++) {
-               struct sk_buff *skb = rx_queue->rx_skbuff[i];
+               for (j = 0; j < rx_queue->rx_ring_size; j++) {
+                       struct sk_buff *skb = rx_queue->rx_skbuff[j];
 
-               if (skb) {
-                       gfar_init_rxbdp(rx_queue, rxbdp, rxbdp->bufPtr);
-               } else {
-                       skb = gfar_new_skb(ndev);
-                       if (!skb) {
-                               pr_err("%s: Can't allocate RX buffers\n",
-                                      ndev->name);
-                               return -ENOMEM;
+                       if (skb) {
+                               gfar_init_rxbdp(rx_queue, rxbdp,
+                                               rxbdp->bufPtr);
+                       } else {
+                               skb = gfar_new_skb(ndev);
+                               if (!skb) {
+                                       pr_err("%s: Can't allocate RX buffers\n",
+                                                       ndev->name);
+                                       goto err_rxalloc_fail;
+                               }
+                               rx_queue->rx_skbuff[j] = skb;
+
+                               gfar_new_rxbdp(rx_queue, rxbdp, skb);
                        }
-                       rx_queue->rx_skbuff[i] = skb;
 
-                       gfar_new_rxbdp(rx_queue, rxbdp, skb);
+                       rxbdp++;
                }
 
-               rxbdp++;
        }
 
        return 0;
+
+err_rxalloc_fail:
+       free_skb_resources(priv);
+       return -ENOMEM;
 }
 
 static int gfar_alloc_skb_resources(struct net_device *ndev)
 {
        void *vaddr;
-       int i;
+       dma_addr_t addr;
+       int i, j, k;
        struct gfar_private *priv = netdev_priv(ndev);
        struct device *dev = &priv->ofdev->dev;
        struct gfar_priv_tx_q *tx_queue = NULL;
        struct gfar_priv_rx_q *rx_queue = NULL;
 
-       tx_queue = priv->tx_queue;
-       rx_queue = priv->rx_queue;
+       priv->total_tx_ring_size = 0;
+       for (i = 0; i < priv->num_tx_queues; i++)
+               priv->total_tx_ring_size += priv->tx_queue[i]->tx_ring_size;
+
+       priv->total_rx_ring_size = 0;
+       for (i = 0; i < priv->num_rx_queues; i++)
+               priv->total_rx_ring_size += priv->rx_queue[i]->rx_ring_size;
 
        /* Allocate memory for the buffer descriptors */
        vaddr = dma_alloc_coherent(dev,
-                       sizeof(*tx_queue->tx_bd_base) * tx_queue->tx_ring_size +
-                       sizeof(*rx_queue->rx_bd_base) * rx_queue->rx_ring_size,
-                       &tx_queue->tx_bd_dma_base, GFP_KERNEL);
+                       sizeof(struct txbd8) * priv->total_tx_ring_size +
+                       sizeof(struct rxbd8) * priv->total_rx_ring_size,
+                       &addr, GFP_KERNEL);
        if (!vaddr) {
                if (netif_msg_ifup(priv))
                        pr_err("%s: Could not allocate buffer descriptors!\n",
@@ -243,38 +261,57 @@ static int gfar_alloc_skb_resources(struct net_device *ndev)
                return -ENOMEM;
        }
 
-       tx_queue->tx_bd_base = vaddr;
-       tx_queue->dev = ndev;
+       for (i = 0; i < priv->num_tx_queues; i++) {
+               tx_queue = priv->tx_queue[i];
+               tx_queue->tx_bd_base = (struct txbd8 *) vaddr;
+               tx_queue->tx_bd_dma_base = addr;
+               tx_queue->dev = ndev;
+               /* enet DMA only understands physical addresses */
+               addr    += sizeof(struct txbd8) *tx_queue->tx_ring_size;
+               vaddr   += sizeof(struct txbd8) *tx_queue->tx_ring_size;
+       }
 
        /* Start the rx descriptor ring where the tx ring leaves off */
-       vaddr = vaddr + sizeof(*tx_queue->tx_bd_base) * tx_queue->tx_ring_size;
-       rx_queue->rx_bd_base = vaddr;
-       rx_queue->dev = ndev;
+       for (i = 0; i < priv->num_rx_queues; i++) {
+               rx_queue = priv->rx_queue[i];
+               rx_queue->rx_bd_base = (struct rxbd8 *) vaddr;
+               rx_queue->rx_bd_dma_base = addr;
+               rx_queue->dev = ndev;
+               addr    += sizeof (struct rxbd8) * rx_queue->rx_ring_size;
+               vaddr   += sizeof (struct rxbd8) * rx_queue->rx_ring_size;
+       }
 
        /* Setup the skbuff rings */
-       tx_queue->tx_skbuff = kmalloc(sizeof(*tx_queue->tx_skbuff) *
+       for (i = 0; i < priv->num_tx_queues; i++) {
+               tx_queue = priv->tx_queue[i];
+               tx_queue->tx_skbuff = kmalloc(sizeof(*tx_queue->tx_skbuff) *
                                  tx_queue->tx_ring_size, GFP_KERNEL);
-       if (!tx_queue->tx_skbuff) {
-               if (netif_msg_ifup(priv))
-                       pr_err("%s: Could not allocate tx_skbuff\n",
-                              ndev->name);
-               goto cleanup;
-       }
+               if (!tx_queue->tx_skbuff) {
+                       if (netif_msg_ifup(priv))
+                               pr_err("%s: Could not allocate tx_skbuff\n",
+                                               ndev->name);
+                       goto cleanup;
+               }
 
-       for (i = 0; i < tx_queue->tx_ring_size; i++)
-               tx_queue->tx_skbuff[i] = NULL;
+               for (k = 0; k < tx_queue->tx_ring_size; k++)
+                       tx_queue->tx_skbuff[k] = NULL;
+       }
 
-       rx_queue->rx_skbuff = kmalloc(sizeof(*rx_queue->rx_skbuff) *
+       for (i = 0; i < priv->num_rx_queues; i++) {
+               rx_queue = priv->rx_queue[i];
+               rx_queue->rx_skbuff = kmalloc(sizeof(*rx_queue->rx_skbuff) *
                                  rx_queue->rx_ring_size, GFP_KERNEL);
-       if (!rx_queue->rx_skbuff) {
-               if (netif_msg_ifup(priv))
-                       pr_err("%s: Could not allocate rx_skbuff\n",
-                              ndev->name);
-               goto cleanup;
-       }
 
-       for (i = 0; i < rx_queue->rx_ring_size; i++)
-               rx_queue->rx_skbuff[i] = NULL;
+               if (!rx_queue->rx_skbuff) {
+                       if (netif_msg_ifup(priv))
+                               pr_err("%s: Could not allocate rx_skbuff\n",
+                                      ndev->name);
+                       goto cleanup;
+               }
+
+               for (j = 0; j < rx_queue->rx_ring_size; j++)
+                       rx_queue->rx_skbuff[j] = NULL;
+       }
 
        if (gfar_init_bds(ndev))
                goto cleanup;
@@ -286,33 +323,44 @@ cleanup:
        return -ENOMEM;
 }
 
+static void gfar_init_tx_rx_base(struct gfar_private *priv)
+{
+       struct gfar __iomem *regs = priv->gfargrp[0].regs;
+       u32 __iomem *baddr;
+       int i;
+
+       baddr = &regs->tbase0;
+       for(i = 0; i < priv->num_tx_queues; i++) {
+               gfar_write(baddr, priv->tx_queue[i]->tx_bd_dma_base);
+               baddr   += 2;
+       }
+
+       baddr = &regs->rbase0;
+       for(i = 0; i < priv->num_rx_queues; i++) {
+               gfar_write(baddr, priv->rx_queue[i]->rx_bd_dma_base);
+               baddr   += 2;
+       }
+}
+
 static void gfar_init_mac(struct net_device *ndev)
 {
        struct gfar_private *priv = netdev_priv(ndev);
-       struct gfar_priv_tx_q *tx_queue = NULL;
-       struct gfar_priv_rx_q *rx_queue = NULL;
-       struct gfar __iomem *regs = priv->gfargrp.regs;
+       struct gfar __iomem *regs = priv->gfargrp[0].regs;
        u32 rctrl = 0;
        u32 tctrl = 0;
        u32 attrs = 0;
 
-       tx_queue = priv->tx_queue;
-       rx_queue = priv->rx_queue;
-
-       /* enet DMA only understands physical addresses */
-       gfar_write(&regs->tbase0, tx_queue->tx_bd_dma_base);
-       gfar_write(&regs->rbase0, tx_queue->tx_bd_dma_base +
-                                 sizeof(*tx_queue->tx_bd_base) *
-                                 tx_queue->tx_ring_size);
+       /* write the tx/rx base registers */
+       gfar_init_tx_rx_base(priv);
 
        /* Configure the coalescing support */
-       gfar_write(&regs->txic, 0);
-       if (tx_queue->txcoalescing)
-               gfar_write(&regs->txic, tx_queue->txic);
+       gfar_configure_coalescing(priv, 0xFF, 0xFF);
 
-       gfar_write(&regs->rxic, 0);
-       if (rx_queue->rxcoalescing)
-               gfar_write(&regs->rxic, rx_queue->rxic);
+       if (priv->rx_filer_enable) {
+               rctrl |= RCTRL_FILREN;
+               /* Program the RIR0 reg with the required distribution */
+               gfar_write(&regs->rir0, DEFAULT_RIR0);
+       }
 
        if (priv->rx_csum_enable)
                rctrl |= RCTRL_CHECKSUMMING;
@@ -341,6 +389,8 @@ static void gfar_init_mac(struct net_device *ndev)
        if (ndev->features & NETIF_F_IP_CSUM)
                tctrl |= TCTRL_INIT_CSUM;
 
+       tctrl |= TCTRL_TXSCHED_PRIO;
+
        gfar_write(&regs->tctrl, tctrl);
 
        /* Set the extraction length and index */
@@ -366,6 +416,36 @@ static void gfar_init_mac(struct net_device *ndev)
        gfar_write(&regs->fifo_tx_starve_shutoff, priv->fifo_starve_off);
 }
 
+static struct net_device_stats *gfar_get_stats(struct net_device *dev)
+{
+       struct gfar_private *priv = netdev_priv(dev);
+       struct netdev_queue *txq;
+       unsigned long rx_packets = 0, rx_bytes = 0, rx_dropped = 0;
+       unsigned long tx_packets = 0, tx_bytes = 0;
+       int i = 0;
+
+       for (i = 0; i < priv->num_rx_queues; i++) {
+               rx_packets += priv->rx_queue[i]->stats.rx_packets;
+               rx_bytes += priv->rx_queue[i]->stats.rx_bytes;
+               rx_dropped += priv->rx_queue[i]->stats.rx_dropped;
+       }
+
+       dev->stats.rx_packets = rx_packets;
+       dev->stats.rx_bytes = rx_bytes;
+       dev->stats.rx_dropped = rx_dropped;
+
+       for (i = 0; i < priv->num_tx_queues; i++) {
+               txq = netdev_get_tx_queue(dev, i);
+               tx_bytes += txq->tx_bytes;
+               tx_packets += txq->tx_packets;
+       }
+
+       dev->stats.tx_bytes = tx_bytes;
+       dev->stats.tx_packets = tx_packets;
+
+       return &dev->stats;
+}
+
 static const struct net_device_ops gfar_netdev_ops = {
        .ndo_open = gfar_enet_open,
        .ndo_start_xmit = gfar_start_xmit,
@@ -374,6 +454,7 @@ static const struct net_device_ops gfar_netdev_ops = {
        .ndo_set_multicast_list = gfar_set_multi,
        .ndo_tx_timeout = gfar_timeout,
        .ndo_do_ioctl = gfar_ioctl,
+       .ndo_get_stats = gfar_get_stats,
        .ndo_vlan_rx_register = gfar_vlan_rx_register,
        .ndo_set_mac_address = eth_mac_addr,
        .ndo_validate_addr = eth_validate_addr,
@@ -382,54 +463,245 @@ static const struct net_device_ops gfar_netdev_ops = {
 #endif
 };
 
+unsigned int ftp_rqfpr[MAX_FILER_IDX + 1];
+unsigned int ftp_rqfcr[MAX_FILER_IDX + 1];
+
+void lock_rx_qs(struct gfar_private *priv)
+{
+       int i = 0x0;
+
+       for (i = 0; i < priv->num_rx_queues; i++)
+               spin_lock(&priv->rx_queue[i]->rxlock);
+}
+
+void lock_tx_qs(struct gfar_private *priv)
+{
+       int i = 0x0;
+
+       for (i = 0; i < priv->num_tx_queues; i++)
+               spin_lock(&priv->tx_queue[i]->txlock);
+}
+
+void unlock_rx_qs(struct gfar_private *priv)
+{
+       int i = 0x0;
+
+       for (i = 0; i < priv->num_rx_queues; i++)
+               spin_unlock(&priv->rx_queue[i]->rxlock);
+}
+
+void unlock_tx_qs(struct gfar_private *priv)
+{
+       int i = 0x0;
+
+       for (i = 0; i < priv->num_tx_queues; i++)
+               spin_unlock(&priv->tx_queue[i]->txlock);
+}
+
 /* Returns 1 if incoming frames use an FCB */
 static inline int gfar_uses_fcb(struct gfar_private *priv)
 {
        return priv->vlgrp || priv->rx_csum_enable;
 }
 
-static int gfar_of_init(struct net_device *dev)
+static void free_tx_pointers(struct gfar_private *priv)
+{
+       int i = 0;
+
+       for (i = 0; i < priv->num_tx_queues; i++)
+               kfree(priv->tx_queue[i]);
+}
+
+static void free_rx_pointers(struct gfar_private *priv)
+{
+       int i = 0;
+
+       for (i = 0; i < priv->num_rx_queues; i++)
+               kfree(priv->rx_queue[i]);
+}
+
+static void unmap_group_regs(struct gfar_private *priv)
+{
+       int i = 0;
+
+       for (i = 0; i < MAXGROUPS; i++)
+               if (priv->gfargrp[i].regs)
+                       iounmap(priv->gfargrp[i].regs);
+}
+
+static void disable_napi(struct gfar_private *priv)
+{
+       int i = 0;
+
+       for (i = 0; i < priv->num_grps; i++)
+               napi_disable(&priv->gfargrp[i].napi);
+}
+
+static void enable_napi(struct gfar_private *priv)
+{
+       int i = 0;
+
+       for (i = 0; i < priv->num_grps; i++)
+               napi_enable(&priv->gfargrp[i].napi);
+}
+
+static int gfar_parse_group(struct device_node *np,
+               struct gfar_private *priv, const char *model)
+{
+       u32 *queue_mask;
+       u64 addr, size;
+
+       addr = of_translate_address(np,
+                       of_get_address(np, 0, &size, NULL));
+       priv->gfargrp[priv->num_grps].regs = ioremap(addr, size);
+
+       if (!priv->gfargrp[priv->num_grps].regs)
+               return -ENOMEM;
+
+       priv->gfargrp[priv->num_grps].interruptTransmit =
+                       irq_of_parse_and_map(np, 0);
+
+       /* If we aren't the FEC we have multiple interrupts */
+       if (model && strcasecmp(model, "FEC")) {
+               priv->gfargrp[priv->num_grps].interruptReceive =
+                       irq_of_parse_and_map(np, 1);
+               priv->gfargrp[priv->num_grps].interruptError =
+                       irq_of_parse_and_map(np,2);
+               if (priv->gfargrp[priv->num_grps].interruptTransmit < 0 ||
+                       priv->gfargrp[priv->num_grps].interruptReceive < 0 ||
+                       priv->gfargrp[priv->num_grps].interruptError < 0) {
+                       return -EINVAL;
+               }
+       }
+
+       priv->gfargrp[priv->num_grps].grp_id = priv->num_grps;
+       priv->gfargrp[priv->num_grps].priv = priv;
+       spin_lock_init(&priv->gfargrp[priv->num_grps].grplock);
+       if(priv->mode == MQ_MG_MODE) {
+               queue_mask = (u32 *)of_get_property(np,
+                                       "fsl,rx-bit-map", NULL);
+               priv->gfargrp[priv->num_grps].rx_bit_map =
+                       queue_mask ?  *queue_mask :(DEFAULT_MAPPING >> priv->num_grps);
+               queue_mask = (u32 *)of_get_property(np,
+                                       "fsl,tx-bit-map", NULL);
+               priv->gfargrp[priv->num_grps].tx_bit_map =
+                       queue_mask ? *queue_mask : (DEFAULT_MAPPING >> priv->num_grps);
+       } else {
+               priv->gfargrp[priv->num_grps].rx_bit_map = 0xFF;
+               priv->gfargrp[priv->num_grps].tx_bit_map = 0xFF;
+       }
+       priv->num_grps++;
+
+       return 0;
+}
+
+static int gfar_of_init(struct of_device *ofdev, struct net_device **pdev)
 {
        const char *model;
        const char *ctype;
        const void *mac_addr;
-       u64 addr, size;
-       int err = 0;
-       struct gfar_private *priv = netdev_priv(dev);
-       struct device_node *np = priv->node;
+       int err = 0, i;
+       struct net_device *dev = NULL;
+       struct gfar_private *priv = NULL;
+       struct device_node *np = ofdev->node;
+       struct device_node *child = NULL;
        const u32 *stash;
        const u32 *stash_len;
        const u32 *stash_idx;
+       unsigned int num_tx_qs, num_rx_qs;
+       u32 *tx_queues, *rx_queues;
 
        if (!np || !of_device_is_available(np))
                return -ENODEV;
 
-       /* get a pointer to the register memory */
-       addr = of_translate_address(np, of_get_address(np, 0, &size, NULL));
-       priv->gfargrp.regs = ioremap(addr, size);
+       /* parse the num of tx and rx queues */
+       tx_queues = (u32 *)of_get_property(np, "fsl,num_tx_queues", NULL);
+       num_tx_qs = tx_queues ? *tx_queues : 1;
+
+       if (num_tx_qs > MAX_TX_QS) {
+               printk(KERN_ERR "num_tx_qs(=%d) greater than MAX_TX_QS(=%d)\n",
+                               num_tx_qs, MAX_TX_QS);
+               printk(KERN_ERR "Cannot do alloc_etherdev, aborting\n");
+               return -EINVAL;
+       }
+
+       rx_queues = (u32 *)of_get_property(np, "fsl,num_rx_queues", NULL);
+       num_rx_qs = rx_queues ? *rx_queues : 1;
 
-       if (priv->gfargrp.regs == NULL)
+       if (num_rx_qs > MAX_RX_QS) {
+               printk(KERN_ERR "num_rx_qs(=%d) greater than MAX_RX_QS(=%d)\n",
+                               num_tx_qs, MAX_TX_QS);
+               printk(KERN_ERR "Cannot do alloc_etherdev, aborting\n");
+               return -EINVAL;
+       }
+
+       *pdev = alloc_etherdev_mq(sizeof(*priv), num_tx_qs);
+       dev = *pdev;
+       if (NULL == dev)
                return -ENOMEM;
 
-       priv->gfargrp.priv = priv; /* back pointer from group to priv */
-       priv->gfargrp.interruptTransmit = irq_of_parse_and_map(np, 0);
+       priv = netdev_priv(dev);
+       priv->node = ofdev->node;
+       priv->ndev = dev;
+
+       dev->num_tx_queues = num_tx_qs;
+       dev->real_num_tx_queues = num_tx_qs;
+       priv->num_tx_queues = num_tx_qs;
+       priv->num_rx_queues = num_rx_qs;
+       priv->num_grps = 0x0;
 
        model = of_get_property(np, "model", NULL);
 
-       /* If we aren't the FEC we have multiple interrupts */
-       if (model && strcasecmp(model, "FEC")) {
-               priv->gfargrp.interruptReceive = irq_of_parse_and_map(np, 1);
+       for (i = 0; i < MAXGROUPS; i++)
+               priv->gfargrp[i].regs = NULL;
 
-               priv->gfargrp.interruptError = irq_of_parse_and_map(np, 2);
+       /* Parse and initialize group specific information */
+       if (of_device_is_compatible(np, "fsl,etsec2")) {
+               priv->mode = MQ_MG_MODE;
+               for_each_child_of_node(np, child) {
+                       err = gfar_parse_group(child, priv, model);
+                       if (err)
+                               goto err_grp_init;
+               }
+       } else {
+               priv->mode = SQ_SG_MODE;
+               err = gfar_parse_group(np, priv, model);
+               if(err)
+                       goto err_grp_init;
+       }
+
+       for (i = 0; i < priv->num_tx_queues; i++)
+              priv->tx_queue[i] = NULL;
+       for (i = 0; i < priv->num_rx_queues; i++)
+               priv->rx_queue[i] = NULL;
+
+       for (i = 0; i < priv->num_tx_queues; i++) {
+               priv->tx_queue[i] =  (struct gfar_priv_tx_q *)kmalloc(
+                               sizeof (struct gfar_priv_tx_q), GFP_KERNEL);
+               if (!priv->tx_queue[i]) {
+                       err = -ENOMEM;
+                       goto tx_alloc_failed;
+               }
+               priv->tx_queue[i]->tx_skbuff = NULL;
+               priv->tx_queue[i]->qindex = i;
+               priv->tx_queue[i]->dev = dev;
+               spin_lock_init(&(priv->tx_queue[i]->txlock));
+       }
 
-               if (priv->gfargrp.interruptTransmit < 0 ||
-                               priv->gfargrp.interruptReceive < 0 ||
-                               priv->gfargrp.interruptError < 0) {
-                       err = -EINVAL;
-                       goto err_out;
+       for (i = 0; i < priv->num_rx_queues; i++) {
+               priv->rx_queue[i] = (struct gfar_priv_rx_q *)kmalloc(
+                                       sizeof (struct gfar_priv_rx_q), GFP_KERNEL);
+               if (!priv->rx_queue[i]) {
+                       err = -ENOMEM;
+                       goto rx_alloc_failed;
                }
+               priv->rx_queue[i]->rx_skbuff = NULL;
+               priv->rx_queue[i]->qindex = i;
+               priv->rx_queue[i]->dev = dev;
+               spin_lock_init(&(priv->rx_queue[i]->rxlock));
        }
 
+
        stash = of_get_property(np, "bd-stash", NULL);
 
        if (stash) {
@@ -490,8 +762,13 @@ static int gfar_of_init(struct net_device *dev)
 
        return 0;
 
-err_out:
-       iounmap(priv->gfargrp.regs);
+rx_alloc_failed:
+       free_rx_pointers(priv);
+tx_alloc_failed:
+       free_tx_pointers(priv);
+err_grp_init:
+       unmap_group_regs(priv);
+       free_netdev(dev);
        return err;
 }
 
@@ -509,6 +786,85 @@ static int gfar_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
        return phy_mii_ioctl(priv->phydev, if_mii(rq), cmd);
 }
 
+static unsigned int reverse_bitmap(unsigned int bit_map, unsigned int max_qs)
+{
+       unsigned int new_bit_map = 0x0;
+       int mask = 0x1 << (max_qs - 1), i;
+       for (i = 0; i < max_qs; i++) {
+               if (bit_map & mask)
+                       new_bit_map = new_bit_map + (1 << i);
+               mask = mask >> 0x1;
+       }
+       return new_bit_map;
+}
+
+static u32 cluster_entry_per_class(struct gfar_private *priv, u32 rqfar,
+                                  u32 class)
+{
+       u32 rqfpr = FPR_FILER_MASK;
+       u32 rqfcr = 0x0;
+
+       rqfar--;
+       rqfcr = RQFCR_CLE | RQFCR_PID_MASK | RQFCR_CMP_EXACT;
+       ftp_rqfpr[rqfar] = rqfpr;
+       ftp_rqfcr[rqfar] = rqfcr;
+       gfar_write_filer(priv, rqfar, rqfcr, rqfpr);
+
+       rqfar--;
+       rqfcr = RQFCR_CMP_NOMATCH;
+       ftp_rqfpr[rqfar] = rqfpr;
+       ftp_rqfcr[rqfar] = rqfcr;
+       gfar_write_filer(priv, rqfar, rqfcr, rqfpr);
+
+       rqfar--;
+       rqfcr = RQFCR_CMP_EXACT | RQFCR_PID_PARSE | RQFCR_CLE | RQFCR_AND;
+       rqfpr = class;
+       ftp_rqfcr[rqfar] = rqfcr;
+       ftp_rqfpr[rqfar] = rqfpr;
+       gfar_write_filer(priv, rqfar, rqfcr, rqfpr);
+
+       rqfar--;
+       rqfcr = RQFCR_CMP_EXACT | RQFCR_PID_MASK | RQFCR_AND;
+       rqfpr = class;
+       ftp_rqfcr[rqfar] = rqfcr;
+       ftp_rqfpr[rqfar] = rqfpr;
+       gfar_write_filer(priv, rqfar, rqfcr, rqfpr);
+
+       return rqfar;
+}
+
+static void gfar_init_filer_table(struct gfar_private *priv)
+{
+       int i = 0x0;
+       u32 rqfar = MAX_FILER_IDX;
+       u32 rqfcr = 0x0;
+       u32 rqfpr = FPR_FILER_MASK;
+
+       /* Default rule */
+       rqfcr = RQFCR_CMP_MATCH;
+       ftp_rqfcr[rqfar] = rqfcr;
+       ftp_rqfpr[rqfar] = rqfpr;
+       gfar_write_filer(priv, rqfar, rqfcr, rqfpr);
+
+       rqfar = cluster_entry_per_class(priv, rqfar, RQFPR_IPV6);
+       rqfar = cluster_entry_per_class(priv, rqfar, RQFPR_IPV6 | RQFPR_UDP);
+       rqfar = cluster_entry_per_class(priv, rqfar, RQFPR_IPV6 | RQFPR_TCP);
+       rqfar = cluster_entry_per_class(priv, rqfar, RQFPR_IPV4);
+       rqfar = cluster_entry_per_class(priv, rqfar, RQFPR_IPV4 | RQFPR_UDP);
+       rqfar = cluster_entry_per_class(priv, rqfar, RQFPR_IPV4 | RQFPR_TCP);
+
+       /* cur_filer_idx indicated the fisrt non-masked rule */
+       priv->cur_filer_idx = rqfar;
+
+       /* Rest are masked rules */
+       rqfcr = RQFCR_CMP_NOMATCH;
+       for (i = 0; i < rqfar; i++) {
+               ftp_rqfcr[i] = rqfcr;
+               ftp_rqfpr[i] = rqfpr;
+               gfar_write_filer(priv, i, rqfcr, rqfpr);
+       }
+}
+
 /* Set up the ethernet device structure, private data,
  * and anything else we need before we start */
 static int gfar_probe(struct of_device *ofdev,
@@ -518,14 +874,16 @@ static int gfar_probe(struct of_device *ofdev,
        struct net_device *dev = NULL;
        struct gfar_private *priv = NULL;
        struct gfar __iomem *regs = NULL;
-       int err = 0;
+       int err = 0, i, grp_idx = 0;
        int len_devname;
+       u32 rstat = 0, tstat = 0, rqueue = 0, tqueue = 0;
+       u32 isrg = 0;
+       u32 __iomem *baddr;
 
-       /* Create an ethernet device instance */
-       dev = alloc_etherdev(sizeof (*priv));
+       err = gfar_of_init(ofdev, &dev);
 
-       if (NULL == dev)
-               return -ENOMEM;
+       if (err)
+               return err;
 
        priv = netdev_priv(dev);
        priv->ndev = dev;
@@ -533,29 +891,11 @@ static int gfar_probe(struct of_device *ofdev,
        priv->node = ofdev->node;
        SET_NETDEV_DEV(dev, &ofdev->dev);
 
-       err = gfar_of_init(dev);
-
-       if (err)
-               goto regs_fail;
-
-       priv->tx_queue = (struct gfar_priv_tx_q *)kmalloc(
-                               sizeof (struct gfar_priv_tx_q), GFP_KERNEL);
-       if (!priv->tx_queue)
-               goto regs_fail;
-
-       priv->rx_queue = (struct gfar_priv_rx_q *)kmalloc(
-                               sizeof (struct gfar_priv_rx_q), GFP_KERNEL);
-       if (!priv->rx_queue)
-               goto rx_queue_fail;
-
-       spin_lock_init(&priv->tx_queue->txlock);
-       spin_lock_init(&priv->rx_queue->rxlock);
-       spin_lock_init(&priv->gfargrp.grplock);
        spin_lock_init(&priv->bflock);
        INIT_WORK(&priv->reset_task, gfar_reset_task);
 
        dev_set_drvdata(&ofdev->dev, priv);
-       regs = priv->gfargrp.regs;
+       regs = priv->gfargrp[0].regs;
 
        /* Stop the DMA engine now, in case it was running before */
        /* (The firmware could have used it, and left it running). */
@@ -587,8 +927,9 @@ static int gfar_probe(struct of_device *ofdev,
        dev->netdev_ops = &gfar_netdev_ops;
        dev->ethtool_ops = &gfar_ethtool_ops;
 
-       /* Register for napi ...NAPI is for each rx_queue */
-       netif_napi_add(dev, &priv->rx_queue->napi, gfar_poll, GFAR_DEV_WEIGHT);
+       /* Register for napi ...We are registering NAPI for each grp */
+       for (i = 0; i < priv->num_grps; i++)
+               netif_napi_add(dev, &priv->gfargrp[i].napi, gfar_poll, GFAR_DEV_WEIGHT);
 
        if (priv->device_flags & FSL_GIANFAR_DEV_HAS_CSUM) {
                priv->rx_csum_enable = 1;
@@ -644,18 +985,74 @@ static int gfar_probe(struct of_device *ofdev,
        if (dev->features & NETIF_F_IP_CSUM)
                dev->hard_header_len += GMAC_FCB_LEN;
 
+       /* Program the isrg regs only if number of grps > 1 */
+       if (priv->num_grps > 1) {
+               baddr = &regs->isrg0;
+               for (i = 0; i < priv->num_grps; i++) {
+                       isrg |= (priv->gfargrp[i].rx_bit_map << ISRG_SHIFT_RX);
+                       isrg |= (priv->gfargrp[i].tx_bit_map << ISRG_SHIFT_TX);
+                       gfar_write(baddr, isrg);
+                       baddr++;
+                       isrg = 0x0;
+               }
+       }
+
+       /* Need to reverse the bit maps as  bit_map's MSB is q0
+        * but, for_each_set_bit parses from right to left, which
+        * basically reverses the queue numbers */
+       for (i = 0; i< priv->num_grps; i++) {
+               priv->gfargrp[i].tx_bit_map = reverse_bitmap(
+                               priv->gfargrp[i].tx_bit_map, MAX_TX_QS);
+               priv->gfargrp[i].rx_bit_map = reverse_bitmap(
+                               priv->gfargrp[i].rx_bit_map, MAX_RX_QS);
+       }
+
+       /* Calculate RSTAT, TSTAT, RQUEUE and TQUEUE values,
+        * also assign queues to groups */
+       for (grp_idx = 0; grp_idx < priv->num_grps; grp_idx++) {
+               priv->gfargrp[grp_idx].num_rx_queues = 0x0;
+               for_each_set_bit(i, &priv->gfargrp[grp_idx].rx_bit_map,
+                               priv->num_rx_queues) {
+                       priv->gfargrp[grp_idx].num_rx_queues++;
+                       priv->rx_queue[i]->grp = &priv->gfargrp[grp_idx];
+                       rstat = rstat | (RSTAT_CLEAR_RHALT >> i);
+                       rqueue = rqueue | ((RQUEUE_EN0 | RQUEUE_EX0) >> i);
+               }
+               priv->gfargrp[grp_idx].num_tx_queues = 0x0;
+               for_each_set_bit(i, &priv->gfargrp[grp_idx].tx_bit_map,
+                               priv->num_tx_queues) {
+                       priv->gfargrp[grp_idx].num_tx_queues++;
+                       priv->tx_queue[i]->grp = &priv->gfargrp[grp_idx];
+                       tstat = tstat | (TSTAT_CLEAR_THALT >> i);
+                       tqueue = tqueue | (TQUEUE_EN0 >> i);
+               }
+               priv->gfargrp[grp_idx].rstat = rstat;
+               priv->gfargrp[grp_idx].tstat = tstat;
+               rstat = tstat =0;
+       }
+
+       gfar_write(&regs->rqueue, rqueue);
+       gfar_write(&regs->tqueue, tqueue);
+
        priv->rx_buffer_size = DEFAULT_RX_BUFFER_SIZE;
 
        /* Initializing some of the rx/tx queue level parameters */
-       priv->tx_queue->tx_ring_size = DEFAULT_TX_RING_SIZE;
-       priv->tx_queue->num_txbdfree = DEFAULT_TX_RING_SIZE;
-       priv->tx_queue->txcoalescing = DEFAULT_TX_COALESCE;
-       priv->tx_queue->txic = DEFAULT_TXIC;
+       for (i = 0; i < priv->num_tx_queues; i++) {
+               priv->tx_queue[i]->tx_ring_size = DEFAULT_TX_RING_SIZE;
+               priv->tx_queue[i]->num_txbdfree = DEFAULT_TX_RING_SIZE;
+               priv->tx_queue[i]->txcoalescing = DEFAULT_TX_COALESCE;
+               priv->tx_queue[i]->txic = DEFAULT_TXIC;
+       }
 
-       priv->rx_queue->rx_ring_size = DEFAULT_RX_RING_SIZE;
-       priv->rx_queue->rxcoalescing = DEFAULT_RX_COALESCE;
-       priv->rx_queue->rxic = DEFAULT_RXIC;
+       for (i = 0; i < priv->num_rx_queues; i++) {
+               priv->rx_queue[i]->rx_ring_size = DEFAULT_RX_RING_SIZE;
+               priv->rx_queue[i]->rxcoalescing = DEFAULT_RX_COALESCE;
+               priv->rx_queue[i]->rxic = DEFAULT_RXIC;
+       }
 
+       /* enable filer if using multiple RX queues*/
+       if(priv->num_rx_queues > 1)
+               priv->rx_filer_enable = 1;
        /* Enable most messages by default */
        priv->msg_enable = (NETIF_MSG_IFUP << 1 ) - 1;
 
@@ -675,20 +1072,43 @@ static int gfar_probe(struct of_device *ofdev,
 
        /* fill out IRQ number and name fields */
        len_devname = strlen(dev->name);
-       strncpy(&priv->gfargrp.int_name_tx[0], dev->name, len_devname);
-       if (priv->device_flags & FSL_GIANFAR_DEV_HAS_MULTI_INTR) {
-               strncpy(&priv->gfargrp.int_name_tx[len_devname],
-                       "_tx", sizeof("_tx") + 1);
-
-               strncpy(&priv->gfargrp.int_name_rx[0], dev->name, len_devname);
-               strncpy(&priv->gfargrp.int_name_rx[len_devname],
-                       "_rx", sizeof("_rx") + 1);
+       for (i = 0; i < priv->num_grps; i++) {
+               strncpy(&priv->gfargrp[i].int_name_tx[0], dev->name,
+                               len_devname);
+               if (priv->device_flags & FSL_GIANFAR_DEV_HAS_MULTI_INTR) {
+                       strncpy(&priv->gfargrp[i].int_name_tx[len_devname],
+                               "_g", sizeof("_g"));
+                       priv->gfargrp[i].int_name_tx[
+                               strlen(priv->gfargrp[i].int_name_tx)] = i+48;
+                       strncpy(&priv->gfargrp[i].int_name_tx[strlen(
+                               priv->gfargrp[i].int_name_tx)],
+                               "_tx", sizeof("_tx") + 1);
+
+                       strncpy(&priv->gfargrp[i].int_name_rx[0], dev->name,
+                                       len_devname);
+                       strncpy(&priv->gfargrp[i].int_name_rx[len_devname],
+                                       "_g", sizeof("_g"));
+                       priv->gfargrp[i].int_name_rx[
+                               strlen(priv->gfargrp[i].int_name_rx)] = i+48;
+                       strncpy(&priv->gfargrp[i].int_name_rx[strlen(
+                               priv->gfargrp[i].int_name_rx)],
+                               "_rx", sizeof("_rx") + 1);
+
+                       strncpy(&priv->gfargrp[i].int_name_er[0], dev->name,
+                                       len_devname);
+                       strncpy(&priv->gfargrp[i].int_name_er[len_devname],
+                               "_g", sizeof("_g"));
+                       priv->gfargrp[i].int_name_er[strlen(
+                                       priv->gfargrp[i].int_name_er)] = i+48;
+                       strncpy(&priv->gfargrp[i].int_name_er[strlen(\
+                               priv->gfargrp[i].int_name_er)],
+                               "_er", sizeof("_er") + 1);
+               } else
+                       priv->gfargrp[i].int_name_tx[len_devname] = '\0';
+       }
 
-               strncpy(&priv->gfargrp.int_name_er[0], dev->name, len_devname);
-               strncpy(&priv->gfargrp.int_name_er[len_devname],
-                       "_er", sizeof("_er") + 1);
-       } else
-               priv->gfargrp.int_name_tx[len_devname] = '\0';
+       /* Initialize the filer table */
+       gfar_init_filer_table(priv);
 
        /* Create all the sysfs files */
        gfar_init_sysfs(dev);
@@ -699,17 +1119,19 @@ static int gfar_probe(struct of_device *ofdev,
        /* Even more device info helps when determining which kernel */
        /* provided which set of benchmarks. */
        printk(KERN_INFO "%s: Running with NAPI enabled\n", dev->name);
-       printk(KERN_INFO "%s: %d/%d RX/TX BD ring size\n",
-              dev->name, priv->rx_queue->rx_ring_size, priv->tx_queue->tx_ring_size);
+       for (i = 0; i < priv->num_rx_queues; i++)
+               printk(KERN_INFO "%s: :RX BD ring size for Q[%d]: %d\n",
+                       dev->name, i, priv->rx_queue[i]->rx_ring_size);
+       for(i = 0; i < priv->num_tx_queues; i++)
+                printk(KERN_INFO "%s:TX BD ring size for Q[%d]: %d\n",
+                       dev->name, i, priv->tx_queue[i]->tx_ring_size);
 
        return 0;
 
 register_fail:
-       iounmap(priv->gfargrp.regs);
-       kfree(priv->rx_queue);
-rx_queue_fail:
-       kfree(priv->tx_queue);
-regs_fail:
+       unmap_group_regs(priv);
+       free_tx_pointers(priv);
+       free_rx_pointers(priv);
        if (priv->phy_node)
                of_node_put(priv->phy_node);
        if (priv->tbi_node)
@@ -730,7 +1152,7 @@ static int gfar_remove(struct of_device *ofdev)
        dev_set_drvdata(&ofdev->dev, NULL);
 
        unregister_netdev(priv->ndev);
-       iounmap(priv->gfargrp.regs);
+       unmap_group_regs(priv);
        free_netdev(priv->ndev);
 
        return 0;
@@ -742,9 +1164,7 @@ static int gfar_suspend(struct device *dev)
 {
        struct gfar_private *priv = dev_get_drvdata(dev);
        struct net_device *ndev = priv->ndev;
-       struct gfar_priv_tx_q *tx_queue = NULL;
-       struct gfar_priv_rx_q *rx_queue = NULL;
-       struct gfar __iomem *regs = NULL;
+       struct gfar __iomem *regs = priv->gfargrp[0].regs;
        unsigned long flags;
        u32 tempval;
 
@@ -752,13 +1172,12 @@ static int gfar_suspend(struct device *dev)
                (priv->device_flags & FSL_GIANFAR_DEV_HAS_MAGIC_PACKET);
 
        netif_device_detach(ndev);
-       tx_queue = priv->tx_queue;
-       rx_queue = priv->rx_queue;
-       regs = priv->gfargrp.regs;
 
        if (netif_running(ndev)) {
-               spin_lock_irqsave(&tx_queue->txlock, flags);
-               spin_lock(&rx_queue->rxlock);
+
+               local_irq_save(flags);
+               lock_tx_qs(priv);
+               lock_rx_qs(priv);
 
                gfar_halt_nodisable(ndev);
 
@@ -772,10 +1191,11 @@ static int gfar_suspend(struct device *dev)
 
                gfar_write(&regs->maccfg1, tempval);
 
-               spin_unlock(&rx_queue->rxlock);
-               spin_unlock_irqrestore(&tx_queue->txlock, flags);
+               unlock_rx_qs(priv);
+               unlock_tx_qs(priv);
+               local_irq_restore(flags);
 
-               napi_disable(&rx_queue->napi);
+               disable_napi(priv);
 
                if (magic_packet) {
                        /* Enable interrupt on Magic Packet */
@@ -797,9 +1217,7 @@ static int gfar_resume(struct device *dev)
 {
        struct gfar_private *priv = dev_get_drvdata(dev);
        struct net_device *ndev = priv->ndev;
-       struct gfar_priv_tx_q *tx_queue = NULL;
-       struct gfar_priv_rx_q *rx_queue = NULL;
-       struct gfar __iomem *regs = NULL;
+       struct gfar __iomem *regs = priv->gfargrp[0].regs;
        unsigned long flags;
        u32 tempval;
        int magic_packet = priv->wol_en &&
@@ -816,12 +1234,9 @@ static int gfar_resume(struct device *dev)
        /* Disable Magic Packet mode, in case something
         * else woke us up.
         */
-       rx_queue = priv->rx_queue;
-       tx_queue = priv->tx_queue;
-       regs = priv->gfargrp.regs;
-
-       spin_lock_irqsave(&tx_queue->txlock, flags);
-       spin_lock(&rx_queue->rxlock);
+       local_irq_save(flags);
+       lock_tx_qs(priv);
+       lock_rx_qs(priv);
 
        tempval = gfar_read(&regs->maccfg2);
        tempval &= ~MACCFG2_MPEN;
@@ -829,12 +1244,13 @@ static int gfar_resume(struct device *dev)
 
        gfar_start(ndev);
 
-       spin_unlock(&rx_queue->rxlock);
-       spin_unlock_irqrestore(&tx_queue->txlock, flags);
+       unlock_rx_qs(priv);
+       unlock_tx_qs(priv);
+       local_irq_restore(flags);
 
        netif_device_attach(ndev);
 
-       napi_enable(&rx_queue->napi);
+       enable_napi(priv);
 
        return 0;
 }
@@ -861,7 +1277,7 @@ static int gfar_restore(struct device *dev)
                phy_start(priv->phydev);
 
        netif_device_attach(ndev);
-       napi_enable(&priv->napi);
+       enable_napi(priv);
 
        return 0;
 }
@@ -900,10 +1316,9 @@ static int gfar_legacy_resume(struct of_device *ofdev)
 static phy_interface_t gfar_get_interface(struct net_device *dev)
 {
        struct gfar_private *priv = netdev_priv(dev);
-       struct gfar __iomem *regs = NULL;
+       struct gfar __iomem *regs = priv->gfargrp[0].regs;
        u32 ecntrl;
 
-       regs = priv->gfargrp.regs;
        ecntrl = gfar_read(&regs->ecntrl);
 
        if (ecntrl & ECNTRL_SGMII_MODE)
@@ -1027,14 +1442,18 @@ static void init_registers(struct net_device *dev)
 {
        struct gfar_private *priv = netdev_priv(dev);
        struct gfar __iomem *regs = NULL;
+       int i = 0;
 
-       regs = priv->gfargrp.regs;
-       /* Clear IEVENT */
-       gfar_write(&regs->ievent, IEVENT_INIT_CLEAR);
+       for (i = 0; i < priv->num_grps; i++) {
+               regs = priv->gfargrp[i].regs;
+               /* Clear IEVENT */
+               gfar_write(&regs->ievent, IEVENT_INIT_CLEAR);
 
-       /* Initialize IMASK */
-       gfar_write(&regs->imask, IMASK_INIT_CLEAR);
+               /* Initialize IMASK */
+               gfar_write(&regs->imask, IMASK_INIT_CLEAR);
+       }
 
+       regs = priv->gfargrp[0].regs;
        /* Init hash registers to zero */
        gfar_write(&regs->igaddr0, 0);
        gfar_write(&regs->igaddr1, 0);
@@ -1075,15 +1494,20 @@ static void init_registers(struct net_device *dev)
 static void gfar_halt_nodisable(struct net_device *dev)
 {
        struct gfar_private *priv = netdev_priv(dev);
-       struct gfar __iomem *regs = priv->gfargrp.regs;
+       struct gfar __iomem *regs = NULL;
        u32 tempval;
+       int i = 0;
 
-       /* Mask all interrupts */
-       gfar_write(&regs->imask, IMASK_INIT_CLEAR);
+       for (i = 0; i < priv->num_grps; i++) {
+               regs = priv->gfargrp[i].regs;
+               /* Mask all interrupts */
+               gfar_write(&regs->imask, IMASK_INIT_CLEAR);
 
-       /* Clear all interrupts */
-       gfar_write(&regs->ievent, IEVENT_INIT_CLEAR);
+               /* Clear all interrupts */
+               gfar_write(&regs->ievent, IEVENT_INIT_CLEAR);
+       }
 
+       regs = priv->gfargrp[0].regs;
        /* Stop the DMA, and wait for it to stop */
        tempval = gfar_read(&regs->dmactrl);
        if ((tempval & (DMACTRL_GRS | DMACTRL_GTS))
@@ -1101,7 +1525,7 @@ static void gfar_halt_nodisable(struct net_device *dev)
 void gfar_halt(struct net_device *dev)
 {
        struct gfar_private *priv = netdev_priv(dev);
-       struct gfar __iomem *regs = priv->gfargrp.regs;
+       struct gfar __iomem *regs = priv->gfargrp[0].regs;
        u32 tempval;
 
        gfar_halt_nodisable(dev);
@@ -1112,57 +1536,54 @@ void gfar_halt(struct net_device *dev)
        gfar_write(&regs->maccfg1, tempval);
 }
 
+static void free_grp_irqs(struct gfar_priv_grp *grp)
+{
+       free_irq(grp->interruptError, grp);
+       free_irq(grp->interruptTransmit, grp);
+       free_irq(grp->interruptReceive, grp);
+}
+
 void stop_gfar(struct net_device *dev)
 {
        struct gfar_private *priv = netdev_priv(dev);
-       struct gfar_priv_tx_q *tx_queue = NULL;
-       struct gfar_priv_rx_q *rx_queue = NULL;
        unsigned long flags;
+       int i;
 
        phy_stop(priv->phydev);
 
-       tx_queue = priv->tx_queue;
-       rx_queue = priv->rx_queue;
 
        /* Lock it down */
-       spin_lock_irqsave(&tx_queue->txlock, flags);
-       spin_lock(&rx_queue->rxlock);
+       local_irq_save(flags);
+       lock_tx_qs(priv);
+       lock_rx_qs(priv);
 
        gfar_halt(dev);
 
-       spin_unlock(&rx_queue->rxlock);
-       spin_unlock_irqrestore(&tx_queue->txlock, flags);
+       unlock_rx_qs(priv);
+       unlock_tx_qs(priv);
+       local_irq_restore(flags);
 
        /* Free the IRQs */
        if (priv->device_flags & FSL_GIANFAR_DEV_HAS_MULTI_INTR) {
-               free_irq(priv->gfargrp.interruptError, &priv->gfargrp);
-               free_irq(priv->gfargrp.interruptTransmit, &priv->gfargrp);
-               free_irq(priv->gfargrp.interruptReceive, &priv->gfargrp);
+               for (i = 0; i < priv->num_grps; i++)
+                       free_grp_irqs(&priv->gfargrp[i]);
        } else {
-               free_irq(priv->gfargrp.interruptTransmit, &priv->gfargrp);
+               for (i = 0; i < priv->num_grps; i++)
+                       free_irq(priv->gfargrp[i].interruptTransmit,
+                                       &priv->gfargrp[i]);
        }
 
        free_skb_resources(priv);
 }
 
-/* If there are any tx skbs or rx skbs still around, free them.
- * Then free tx_skbuff and rx_skbuff */
-static void free_skb_resources(struct gfar_private *priv)
+static void free_skb_tx_queue(struct gfar_priv_tx_q *tx_queue)
 {
-       struct device *dev = &priv->ofdev->dev;
-       struct rxbd8 *rxbdp;
        struct txbd8 *txbdp;
-       struct gfar_priv_tx_q *tx_queue = NULL;
-       struct gfar_priv_rx_q *rx_queue = NULL;
+       struct gfar_private *priv = netdev_priv(tx_queue->dev);
        int i, j;
 
-       /* Go through all the buffer descriptors and free their data buffers */
-       tx_queue = priv->tx_queue;
        txbdp = tx_queue->tx_bd_base;
 
-       if (!tx_queue->tx_skbuff)
-               goto skip_tx_skbuff;
-
        for (i = 0; i < tx_queue->tx_ring_size; i++) {
                if (!tx_queue->tx_skbuff[i])
                        continue;
@@ -1170,7 +1591,8 @@ static void free_skb_resources(struct gfar_private *priv)
                dma_unmap_single(&priv->ofdev->dev, txbdp->bufPtr,
                                txbdp->length, DMA_TO_DEVICE);
                txbdp->lstatus = 0;
-               for (j = 0; j < skb_shinfo(tx_queue->tx_skbuff[i])->nr_frags; j++) {
+               for (j = 0; j < skb_shinfo(tx_queue->tx_skbuff[i])->nr_frags;
+                               j++) {
                        txbdp++;
                        dma_unmap_page(&priv->ofdev->dev, txbdp->bufPtr,
                                        txbdp->length, DMA_TO_DEVICE);
@@ -1179,43 +1601,66 @@ static void free_skb_resources(struct gfar_private *priv)
                dev_kfree_skb_any(tx_queue->tx_skbuff[i]);
                tx_queue->tx_skbuff[i] = NULL;
        }
-
        kfree(tx_queue->tx_skbuff);
-skip_tx_skbuff:
+}
 
-       rx_queue = priv->rx_queue;
-       rxbdp = rx_queue->rx_bd_base;
+static void free_skb_rx_queue(struct gfar_priv_rx_q *rx_queue)
+{
+       struct rxbd8 *rxbdp;
+       struct gfar_private *priv = netdev_priv(rx_queue->dev);
+       int i;
 
-       if (!rx_queue->rx_skbuff)
-               goto skip_rx_skbuff;
+       rxbdp = rx_queue->rx_bd_base;
 
        for (i = 0; i < rx_queue->rx_ring_size; i++) {
                if (rx_queue->rx_skbuff[i]) {
-                       dma_unmap_single(&priv->ofdev->dev, rxbdp->bufPtr,
-                                        priv->rx_buffer_size,
+                       dma_unmap_single(&priv->ofdev->dev,
+                                       rxbdp->bufPtr, priv->rx_buffer_size,
                                        DMA_FROM_DEVICE);
                        dev_kfree_skb_any(rx_queue->rx_skbuff[i]);
                        rx_queue->rx_skbuff[i] = NULL;
                }
-
                rxbdp->lstatus = 0;
                rxbdp->bufPtr = 0;
                rxbdp++;
        }
-
        kfree(rx_queue->rx_skbuff);
-skip_rx_skbuff:
+}
+
+/* If there are any tx skbs or rx skbs still around, free them.
+ * Then free tx_skbuff and rx_skbuff */
+static void free_skb_resources(struct gfar_private *priv)
+{
+       struct gfar_priv_tx_q *tx_queue = NULL;
+       struct gfar_priv_rx_q *rx_queue = NULL;
+       int i;
 
-       dma_free_coherent(dev, sizeof(*txbdp) * tx_queue->tx_ring_size +
-                              sizeof(*rxbdp) * rx_queue->rx_ring_size,
-                         tx_queue->tx_bd_base, tx_queue->tx_bd_dma_base);
+       /* Go through all the buffer descriptors and free their data buffers */
+       for (i = 0; i < priv->num_tx_queues; i++) {
+               tx_queue = priv->tx_queue[i];
+               if(!tx_queue->tx_skbuff)
+                       free_skb_tx_queue(tx_queue);
+       }
+
+       for (i = 0; i < priv->num_rx_queues; i++) {
+               rx_queue = priv->rx_queue[i];
+               if(!rx_queue->rx_skbuff)
+                       free_skb_rx_queue(rx_queue);
+       }
+
+       dma_free_coherent(&priv->ofdev->dev,
+                       sizeof(struct txbd8) * priv->total_tx_ring_size +
+                       sizeof(struct rxbd8) * priv->total_rx_ring_size,
+                       priv->tx_queue[0]->tx_bd_base,
+                       priv->tx_queue[0]->tx_bd_dma_base);
 }
 
 void gfar_start(struct net_device *dev)
 {
        struct gfar_private *priv = netdev_priv(dev);
-       struct gfar __iomem *regs = priv->gfargrp.regs;
+       struct gfar __iomem *regs = priv->gfargrp[0].regs;
        u32 tempval;
+       int i = 0;
 
        /* Enable Rx and Tx in MACCFG1 */
        tempval = gfar_read(&regs->maccfg1);
@@ -1232,92 +1677,149 @@ void gfar_start(struct net_device *dev)
        tempval &= ~(DMACTRL_GRS | DMACTRL_GTS);
        gfar_write(&regs->dmactrl, tempval);
 
-       /* Clear THLT/RHLT, so that the DMA starts polling now */
-       gfar_write(&regs->tstat, TSTAT_CLEAR_THALT);
-       gfar_write(&regs->rstat, RSTAT_CLEAR_RHALT);
-
-       /* Unmask the interrupts we look for */
-       gfar_write(&regs->imask, IMASK_DEFAULT);
+       for (i = 0; i < priv->num_grps; i++) {
+               regs = priv->gfargrp[i].regs;
+               /* Clear THLT/RHLT, so that the DMA starts polling now */
+               gfar_write(&regs->tstat, priv->gfargrp[i].tstat);
+               gfar_write(&regs->rstat, priv->gfargrp[i].rstat);
+               /* Unmask the interrupts we look for */
+               gfar_write(&regs->imask, IMASK_DEFAULT);
+       }
 
        dev->trans_start = jiffies;
 }
 
-/* Bring the controller up and running */
-int startup_gfar(struct net_device *ndev)
+void gfar_configure_coalescing(struct gfar_private *priv,
+       unsigned long tx_mask, unsigned long rx_mask)
 {
-       struct gfar_private *priv = netdev_priv(ndev);
-       struct gfar __iomem *regs = priv->gfargrp.regs;
-       int err;
+       struct gfar __iomem *regs = priv->gfargrp[0].regs;
+       u32 __iomem *baddr;
+       int i = 0;
 
-       gfar_write(&regs->imask, IMASK_INIT_CLEAR);
+       /* Backward compatible case ---- even if we enable
+        * multiple queues, there's only single reg to program
+        */
+       gfar_write(&regs->txic, 0);
+       if(likely(priv->tx_queue[0]->txcoalescing))
+               gfar_write(&regs->txic, priv->tx_queue[0]->txic);
 
-       err = gfar_alloc_skb_resources(ndev);
-       if (err)
-               return err;
+       gfar_write(&regs->rxic, 0);
+       if(unlikely(priv->rx_queue[0]->rxcoalescing))
+               gfar_write(&regs->rxic, priv->rx_queue[0]->rxic);
+
+       if (priv->mode == MQ_MG_MODE) {
+               baddr = &regs->txic0;
+               for_each_set_bit(i, &tx_mask, priv->num_tx_queues) {
+                       if (likely(priv->tx_queue[i]->txcoalescing)) {
+                               gfar_write(baddr + i, 0);
+                               gfar_write(baddr + i, priv->tx_queue[i]->txic);
+                       }
+               }
 
-       gfar_init_mac(ndev);
+               baddr = &regs->rxic0;
+               for_each_set_bit(i, &rx_mask, priv->num_rx_queues) {
+                       if (likely(priv->rx_queue[i]->rxcoalescing)) {
+                               gfar_write(baddr + i, 0);
+                               gfar_write(baddr + i, priv->rx_queue[i]->rxic);
+                       }
+               }
+       }
+}
+
+static int register_grp_irqs(struct gfar_priv_grp *grp)
+{
+       struct gfar_private *priv = grp->priv;
+       struct net_device *dev = priv->ndev;
+       int err;
 
        /* If the device has multiple interrupts, register for
         * them.  Otherwise, only register for the one */
        if (priv->device_flags & FSL_GIANFAR_DEV_HAS_MULTI_INTR) {
                /* Install our interrupt handlers for Error,
                 * Transmit, and Receive */
-               err = request_irq(priv->gfargrp.interruptError, gfar_error, 0,
-                                 priv->gfargrp.int_name_er, &priv->gfargrp);
-               if (err) {
+               if ((err = request_irq(grp->interruptError, gfar_error, 0,
+                               grp->int_name_er,grp)) < 0) {
                        if (netif_msg_intr(priv))
-                               pr_err("%s: Can't get IRQ %d\n", ndev->name,
-                                      priv->gfargrp.interruptError);
-                       goto err_irq_fail;
+                               printk(KERN_ERR "%s: Can't get IRQ %d\n",
+                                       dev->name, grp->interruptError);
+
+                               goto err_irq_fail;
                }
 
-               err = request_irq(priv->gfargrp.interruptTransmit,
-                                       gfar_transmit, 0,
-                                       priv->gfargrp.int_name_tx,
-                                       &priv->gfargrp);
-               if (err) {
+               if ((err = request_irq(grp->interruptTransmit, gfar_transmit,
+                               0, grp->int_name_tx, grp)) < 0) {
                        if (netif_msg_intr(priv))
-                               pr_err("%s: Can't get IRQ %d\n", ndev->name,
-                                      priv->gfargrp.interruptTransmit);
+                               printk(KERN_ERR "%s: Can't get IRQ %d\n",
+                                       dev->name, grp->interruptTransmit);
                        goto tx_irq_fail;
                }
 
-               err = request_irq(priv->gfargrp.interruptReceive,
-                                       gfar_receive, 0,
-                                       priv->gfargrp.int_name_rx,
-                                       &priv->gfargrp);
-               if (err) {
+               if ((err = request_irq(grp->interruptReceive, gfar_receive, 0,
+                               grp->int_name_rx, grp)) < 0) {
                        if (netif_msg_intr(priv))
-                               pr_err("%s: Can't get IRQ %d (receive0)\n",
-                                       ndev->name,
-                                       priv->gfargrp.interruptReceive);
+                               printk(KERN_ERR "%s: Can't get IRQ %d\n",
+                                       dev->name, grp->interruptReceive);
                        goto rx_irq_fail;
                }
        } else {
-               err = request_irq(priv->gfargrp.interruptTransmit,
-                                       gfar_interrupt, 0,
-                                       priv->gfargrp.int_name_tx,
-                                       &priv->gfargrp);
-               if (err) {
+               if ((err = request_irq(grp->interruptTransmit, gfar_interrupt, 0,
+                               grp->int_name_tx, grp)) < 0) {
                        if (netif_msg_intr(priv))
-                               pr_err("%s: Can't get IRQ %d\n", ndev->name,
-                                      priv->gfargrp.interruptTransmit);
+                               printk(KERN_ERR "%s: Can't get IRQ %d\n",
+                                       dev->name, grp->interruptTransmit);
                        goto err_irq_fail;
                }
        }
 
+       return 0;
+
+rx_irq_fail:
+       free_irq(grp->interruptTransmit, grp);
+tx_irq_fail:
+       free_irq(grp->interruptError, grp);
+err_irq_fail:
+       return err;
+
+}
+
+/* Bring the controller up and running */
+int startup_gfar(struct net_device *ndev)
+{
+       struct gfar_private *priv = netdev_priv(ndev);
+       struct gfar __iomem *regs = NULL;
+       int err, i, j;
+
+       for (i = 0; i < priv->num_grps; i++) {
+               regs= priv->gfargrp[i].regs;
+               gfar_write(&regs->imask, IMASK_INIT_CLEAR);
+       }
+
+       regs= priv->gfargrp[0].regs;
+       err = gfar_alloc_skb_resources(ndev);
+       if (err)
+               return err;
+
+       gfar_init_mac(ndev);
+
+       for (i = 0; i < priv->num_grps; i++) {
+               err = register_grp_irqs(&priv->gfargrp[i]);
+               if (err) {
+                       for (j = 0; j < i; j++)
+                               free_grp_irqs(&priv->gfargrp[j]);
+                               goto irq_fail;
+               }
+       }
+
        /* Start the controller */
        gfar_start(ndev);
 
        phy_start(priv->phydev);
 
+       gfar_configure_coalescing(priv, 0xFF, 0xFF);
+
        return 0;
 
-rx_irq_fail:
-       free_irq(priv->gfargrp.interruptTransmit, &priv->gfargrp);
-tx_irq_fail:
-       free_irq(priv->gfargrp.interruptError, &priv->gfargrp);
-err_irq_fail:
+irq_fail:
        free_skb_resources(priv);
        return err;
 }
@@ -1329,7 +1831,7 @@ static int gfar_enet_open(struct net_device *dev)
        struct gfar_private *priv = netdev_priv(dev);
        int err;
 
-       napi_enable(&priv->rx_queue->napi);
+       enable_napi(priv);
 
        skb_queue_head_init(&priv->rx_recycle);
 
@@ -1341,17 +1843,17 @@ static int gfar_enet_open(struct net_device *dev)
        err = init_phy(dev);
 
        if (err) {
-               napi_disable(&priv->rx_queue->napi);
+               disable_napi(priv);
                return err;
        }
 
        err = startup_gfar(dev);
        if (err) {
-               napi_disable(&priv->rx_queue->napi);
+               disable_napi(priv);
                return err;
        }
 
-       netif_start_queue(dev);
+       netif_tx_start_all_queues(dev);
 
        device_set_wakeup_enable(&dev->dev, priv->wol_en);
 
@@ -1421,18 +1923,22 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
        struct gfar_private *priv = netdev_priv(dev);
        struct gfar_priv_tx_q *tx_queue = NULL;
+       struct netdev_queue *txq;
        struct gfar __iomem *regs = NULL;
        struct txfcb *fcb = NULL;
        struct txbd8 *txbdp, *txbdp_start, *base;
        u32 lstatus;
-       int i;
+       int i, rq = 0;
        u32 bufaddr;
        unsigned long flags;
        unsigned int nr_frags, length;
 
-       tx_queue = priv->tx_queue;
+
+       rq = skb->queue_mapping;
+       tx_queue = priv->tx_queue[rq];
+       txq = netdev_get_tx_queue(dev, rq);
        base = tx_queue->tx_bd_base;
-       regs = priv->gfargrp.regs;
+       regs = tx_queue->grp->regs;
 
        /* make space for additional header when fcb is needed */
        if (((skb->ip_summed == CHECKSUM_PARTIAL) ||
@@ -1453,19 +1959,17 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev)
        /* total number of fragments in the SKB */
        nr_frags = skb_shinfo(skb)->nr_frags;
 
-       spin_lock_irqsave(&tx_queue->txlock, flags);
-
        /* check if there is space to queue this packet */
        if ((nr_frags+1) > tx_queue->num_txbdfree) {
                /* no space, stop the queue */
-               netif_stop_queue(dev);
+               netif_tx_stop_queue(txq);
                dev->stats.tx_fifo_errors++;
-               spin_unlock_irqrestore(&tx_queue->txlock, flags);
                return NETDEV_TX_BUSY;
        }
 
        /* Update transmit stats */
-       dev->stats.tx_bytes += skb->len;
+       txq->tx_bytes += skb->len;
+       txq->tx_packets ++;
 
        txbdp = txbdp_start = tx_queue->cur_tx;
 
@@ -1524,6 +2028,20 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev)
        lstatus |= BD_LFLAG(TXBD_CRC | TXBD_READY) | skb_headlen(skb);
 
        /*
+        * We can work in parallel with gfar_clean_tx_ring(), except
+        * when modifying num_txbdfree. Note that we didn't grab the lock
+        * when we were reading the num_txbdfree and checking for available
+        * space, that's because outside of this function it can only grow,
+        * and once we've got needed space, it cannot suddenly disappear.
+        *
+        * The lock also protects us from gfar_error(), which can modify
+        * regs->tstat and thus retrigger the transfers, which is why we
+        * also must grab the lock before setting ready bit for the first
+        * to be transmitted BD.
+        */
+       spin_lock_irqsave(&tx_queue->txlock, flags);
+
+       /*
         * The powerpc-specific eieio() is used, as wmb() has too strong
         * semantics (it requires synchronization between cacheable and
         * uncacheable mappings, which eieio doesn't provide and which we
@@ -1550,13 +2068,13 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev)
        /* If the next BD still needs to be cleaned up, then the bds
           are full.  We need to tell the kernel to stop sending us stuff. */
        if (!tx_queue->num_txbdfree) {
-               netif_stop_queue(dev);
+               netif_tx_stop_queue(txq);
 
                dev->stats.tx_fifo_errors++;
        }
 
        /* Tell the DMA to go go go */
-       gfar_write(&regs->tstat, TSTAT_CLEAR_THALT);
+       gfar_write(&regs->tstat, TSTAT_CLEAR_THALT >> tx_queue->qindex);
 
        /* Unlock priv */
        spin_unlock_irqrestore(&tx_queue->txlock, flags);
@@ -1569,7 +2087,7 @@ static int gfar_close(struct net_device *dev)
 {
        struct gfar_private *priv = netdev_priv(dev);
 
-       napi_disable(&priv->rx_queue->napi);
+       disable_napi(priv);
 
        skb_queue_purge(&priv->rx_recycle);
        cancel_work_sync(&priv->reset_task);
@@ -1579,7 +2097,7 @@ static int gfar_close(struct net_device *dev)
        phy_disconnect(priv->phydev);
        priv->phydev = NULL;
 
-       netif_stop_queue(dev);
+       netif_tx_stop_all_queues(dev);
 
        return 0;
 }
@@ -1598,14 +2116,13 @@ static void gfar_vlan_rx_register(struct net_device *dev,
                struct vlan_group *grp)
 {
        struct gfar_private *priv = netdev_priv(dev);
-       struct gfar_priv_rx_q *rx_queue = NULL;
        struct gfar __iomem *regs = NULL;
        unsigned long flags;
        u32 tempval;
 
-       rx_queue = priv->rx_queue;
-       regs = priv->gfargrp.regs;
-       spin_lock_irqsave(&rx_queue->rxlock, flags);
+       regs = priv->gfargrp[0].regs;
+       local_irq_save(flags);
+       lock_rx_qs(priv);
 
        priv->vlgrp = grp;
 
@@ -1639,14 +2156,15 @@ static void gfar_vlan_rx_register(struct net_device *dev,
 
        gfar_change_mtu(dev, dev->mtu);
 
-       spin_unlock_irqrestore(&rx_queue->rxlock, flags);
+       unlock_rx_qs(priv);
+       local_irq_restore(flags);
 }
 
 static int gfar_change_mtu(struct net_device *dev, int new_mtu)
 {
        int tempsize, tempval;
        struct gfar_private *priv = netdev_priv(dev);
-       struct gfar __iomem *regs = priv->gfargrp.regs;
+       struct gfar __iomem *regs = priv->gfargrp[0].regs;
        int oldsize = priv->rx_buffer_size;
        int frame_size = new_mtu + ETH_HLEN;
 
@@ -1711,10 +2229,10 @@ static void gfar_reset_task(struct work_struct *work)
        struct net_device *dev = priv->ndev;
 
        if (dev->flags & IFF_UP) {
-               netif_stop_queue(dev);
+               netif_tx_stop_all_queues(dev);
                stop_gfar(dev);
                startup_gfar(dev);
-               netif_start_queue(dev);
+               netif_tx_start_all_queues(dev);
        }
 
        netif_tx_schedule_all(dev);
@@ -1745,11 +2263,13 @@ static int gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue)
        int howmany = 0;
        u32 lstatus;
 
-       rx_queue = priv->rx_queue;
+       rx_queue = priv->rx_queue[tx_queue->qindex];
        bdp = tx_queue->dirty_tx;
        skb_dirtytx = tx_queue->skb_dirtytx;
 
        while ((skb = tx_queue->tx_skbuff[skb_dirtytx])) {
+               unsigned long flags;
+
                frags = skb_shinfo(skb)->nr_frags;
                lbdp = skip_txbd(bdp, frags, base, tx_ring_size);
 
@@ -1794,37 +2314,30 @@ static int gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue)
                        TX_RING_MOD_MASK(tx_ring_size);
 
                howmany++;
+               spin_lock_irqsave(&tx_queue->txlock, flags);
                tx_queue->num_txbdfree += frags + 1;
+               spin_unlock_irqrestore(&tx_queue->txlock, flags);
        }
 
        /* If we freed a buffer, we can restart transmission, if necessary */
-       if (netif_queue_stopped(dev) && tx_queue->num_txbdfree)
-               netif_wake_queue(dev);
+       if (__netif_subqueue_stopped(dev, tx_queue->qindex) && tx_queue->num_txbdfree)
+               netif_wake_subqueue(dev, tx_queue->qindex);
 
        /* Update dirty indicators */
        tx_queue->skb_dirtytx = skb_dirtytx;
        tx_queue->dirty_tx = bdp;
 
-       dev->stats.tx_packets += howmany;
-
        return howmany;
 }
 
 static void gfar_schedule_cleanup(struct gfar_priv_grp *gfargrp)
 {
-       struct gfar_private *priv = gfargrp->priv;
-       struct gfar_priv_tx_q *tx_queue = NULL;
-       struct gfar_priv_rx_q *rx_queue = NULL;
        unsigned long flags;
 
-       rx_queue = priv->rx_queue;
-       tx_queue = priv->tx_queue;
-       spin_lock_irqsave(&tx_queue->txlock, flags);
-       spin_lock(&rx_queue->rxlock);
-
-       if (napi_schedule_prep(&rx_queue->napi)) {
+       spin_lock_irqsave(&gfargrp->grplock, flags);
+       if (napi_schedule_prep(&gfargrp->napi)) {
                gfar_write(&gfargrp->regs->imask, IMASK_RTX_DISABLED);
-               __napi_schedule(&rx_queue->napi);
+               __napi_schedule(&gfargrp->napi);
        } else {
                /*
                 * Clear IEVENT, so interrupts aren't called again
@@ -1832,9 +2345,8 @@ static void gfar_schedule_cleanup(struct gfar_priv_grp *gfargrp)
                 */
                gfar_write(&gfargrp->regs->ievent, IEVENT_RTX_MASK);
        }
+       spin_unlock_irqrestore(&gfargrp->grplock, flags);
 
-       spin_unlock(&rx_queue->rxlock);
-       spin_unlock_irqrestore(&tx_queue->txlock, flags);
 }
 
 /* Interrupt Handler for Transmit complete */
@@ -1953,8 +2465,10 @@ static int gfar_process_frame(struct net_device *dev, struct sk_buff *skb,
 
        /* Remove the FCB from the skb */
        /* Remove the padded bytes, if there are any */
-       if (amount_pull)
+       if (amount_pull) {
+               skb_record_rx_queue(skb, fcb->rq);
                skb_pull(skb, amount_pull);
+       }
 
        if (priv->rx_csum_enable)
                gfar_rx_checksum(skb, fcb);
@@ -2027,24 +2541,22 @@ int gfar_clean_rx_ring(struct gfar_priv_rx_q *rx_queue, int rx_work_limit)
                        }
                } else {
                        /* Increment the number of packets */
-                       dev->stats.rx_packets++;
+                       rx_queue->stats.rx_packets++;
                        howmany++;
 
                        if (likely(skb)) {
                                pkt_len = bdp->length - ETH_FCS_LEN;
                                /* Remove the FCS from the packet length */
                                skb_put(skb, pkt_len);
-                               dev->stats.rx_bytes += pkt_len;
-
-                               if (in_irq() || irqs_disabled())
-                                       printk("Interrupt problem!\n");
+                               rx_queue->stats.rx_bytes += pkt_len;
+                               skb_record_rx_queue(skb, rx_queue->qindex);
                                gfar_process_frame(dev, skb, amount_pull);
 
                        } else {
                                if (netif_msg_rx_err(priv))
                                        printk(KERN_WARNING
                                               "%s: Missing skb!\n", dev->name);
-                               dev->stats.rx_dropped++;
+                               rx_queue->stats.rx_dropped++;
                                priv->extra_stats.rx_skbmissing++;
                        }
 
@@ -2072,29 +2584,48 @@ int gfar_clean_rx_ring(struct gfar_priv_rx_q *rx_queue, int rx_work_limit)
 
 static int gfar_poll(struct napi_struct *napi, int budget)
 {
-       struct gfar_priv_rx_q *rx_queue = container_of(napi,
-                       struct gfar_priv_rx_q, napi);
-       struct net_device *dev = rx_queue->dev;
-       struct gfar_private *priv = netdev_priv(dev);
-       struct gfar __iomem *regs = priv->gfargrp.regs;
+       struct gfar_priv_grp *gfargrp = container_of(napi,
+                       struct gfar_priv_grp, napi);
+       struct gfar_private *priv = gfargrp->priv;
+       struct gfar __iomem *regs = gfargrp->regs;
        struct gfar_priv_tx_q *tx_queue = NULL;
-       int tx_cleaned = 0;
-       int rx_cleaned = 0;
-       unsigned long flags;
+       struct gfar_priv_rx_q *rx_queue = NULL;
+       int rx_cleaned = 0, budget_per_queue = 0, rx_cleaned_per_queue = 0;
+       int tx_cleaned = 0, i, left_over_budget = budget;
+       unsigned long serviced_queues = 0;
+       int num_queues = 0;
+
+       num_queues = gfargrp->num_rx_queues;
+       budget_per_queue = budget/num_queues;
 
        /* Clear IEVENT, so interrupts aren't called again
         * because of the packets that have already arrived */
        gfar_write(&regs->ievent, IEVENT_RTX_MASK);
-       tx_queue = priv->tx_queue;
 
-       /* If we fail to get the lock, don't bother with the TX BDs */
-       if (spin_trylock_irqsave(&tx_queue->txlock, flags)) {
-               tx_cleaned = gfar_clean_tx_ring(tx_queue);
-               spin_unlock_irqrestore(&tx_queue->txlock, flags);
+       while (num_queues && left_over_budget) {
+
+               budget_per_queue = left_over_budget/num_queues;
+               left_over_budget = 0;
+
+               for_each_set_bit(i, &gfargrp->rx_bit_map, priv->num_rx_queues) {
+                       if (test_bit(i, &serviced_queues))
+                               continue;
+                       rx_queue = priv->rx_queue[i];
+                       tx_queue = priv->tx_queue[rx_queue->qindex];
+
+                       tx_cleaned += gfar_clean_tx_ring(tx_queue);
+                       rx_cleaned_per_queue = gfar_clean_rx_ring(rx_queue,
+                                                       budget_per_queue);
+                       rx_cleaned += rx_cleaned_per_queue;
+                       if(rx_cleaned_per_queue < budget_per_queue) {
+                               left_over_budget = left_over_budget +
+                                       (budget_per_queue - rx_cleaned_per_queue);
+                               set_bit(i, &serviced_queues);
+                               num_queues--;
+                       }
+               }
        }
 
-       rx_cleaned = gfar_clean_rx_ring(rx_queue, budget);
-
        if (tx_cleaned)
                return budget;
 
@@ -2102,20 +2633,14 @@ static int gfar_poll(struct napi_struct *napi, int budget)
                napi_complete(napi);
 
                /* Clear the halt bit in RSTAT */
-               gfar_write(&regs->rstat, RSTAT_CLEAR_RHALT);
+               gfar_write(&regs->rstat, gfargrp->rstat);
 
                gfar_write(&regs->imask, IMASK_DEFAULT);
 
                /* If we are coalescing interrupts, update the timer */
                /* Otherwise, clear it */
-               if (likely(rx_queue->rxcoalescing)) {
-                       gfar_write(&regs->rxic, 0);
-                       gfar_write(&regs->rxic, rx_queue->rxic);
-               }
-               if (likely(tx_queue->txcoalescing)) {
-                       gfar_write(&regs->txic, 0);
-                       gfar_write(&regs->txic, tx_queue->txic);
-               }
+               gfar_configure_coalescing(priv,
+                               gfargrp->rx_bit_map, gfargrp->tx_bit_map);
        }
 
        return rx_cleaned;
@@ -2130,20 +2655,27 @@ static int gfar_poll(struct napi_struct *napi, int budget)
 static void gfar_netpoll(struct net_device *dev)
 {
        struct gfar_private *priv = netdev_priv(dev);
+       int i = 0;
 
        /* If the device has multiple interrupts, run tx/rx */
        if (priv->device_flags & FSL_GIANFAR_DEV_HAS_MULTI_INTR) {
-               disable_irq(priv->gfargrp.interruptTransmit);
-               disable_irq(priv->gfargrp.interruptReceive);
-               disable_irq(priv->gfargrp.interruptError);
-               gfar_interrupt(priv->gfargrp.interruptTransmit, &priv->gfargrp);
-               enable_irq(priv->gfargrp.interruptError);
-               enable_irq(priv->gfargrp.interruptReceive);
-               enable_irq(priv->gfargrp.interruptTransmit);
+               for (i = 0; i < priv->num_grps; i++) {
+                       disable_irq(priv->gfargrp[i].interruptTransmit);
+                       disable_irq(priv->gfargrp[i].interruptReceive);
+                       disable_irq(priv->gfargrp[i].interruptError);
+                       gfar_interrupt(priv->gfargrp[i].interruptTransmit,
+                                               &priv->gfargrp[i]);
+                       enable_irq(priv->gfargrp[i].interruptError);
+                       enable_irq(priv->gfargrp[i].interruptReceive);
+                       enable_irq(priv->gfargrp[i].interruptTransmit);
+               }
        } else {
-               disable_irq(priv->gfargrp.interruptTransmit);
-               gfar_interrupt(priv->gfargrp.interruptTransmit, &priv->gfargrp);
-               enable_irq(priv->gfargrp.interruptTransmit);
+               for (i = 0; i < priv->num_grps; i++) {
+                       disable_irq(priv->gfargrp[i].interruptTransmit);
+                       gfar_interrupt(priv->gfargrp[i].interruptTransmit,
+                                               &priv->gfargrp[i]);
+                       enable_irq(priv->gfargrp[i].interruptTransmit);
+               }
        }
 }
 #endif
@@ -2180,14 +2712,14 @@ static irqreturn_t gfar_interrupt(int irq, void *grp_id)
 static void adjust_link(struct net_device *dev)
 {
        struct gfar_private *priv = netdev_priv(dev);
-       struct gfar_priv_tx_q *tx_queue = NULL;
-       struct gfar __iomem *regs = priv->gfargrp.regs;
+       struct gfar __iomem *regs = priv->gfargrp[0].regs;
        unsigned long flags;
        struct phy_device *phydev = priv->phydev;
        int new_state = 0;
 
-       tx_queue = priv->tx_queue;
-       spin_lock_irqsave(&tx_queue->txlock, flags);
+       local_irq_save(flags);
+       lock_tx_qs(priv);
+
        if (phydev->link) {
                u32 tempval = gfar_read(&regs->maccfg2);
                u32 ecntrl = gfar_read(&regs->ecntrl);
@@ -2252,8 +2784,8 @@ static void adjust_link(struct net_device *dev)
 
        if (new_state && netif_msg_link(priv))
                phy_print_status(phydev);
-
-       spin_unlock_irqrestore(&tx_queue->txlock, flags);
+       unlock_tx_qs(priv);
+       local_irq_restore(flags);
 }
 
 /* Update the hash table based on the current list of multicast
@@ -2264,7 +2796,7 @@ static void gfar_set_multi(struct net_device *dev)
 {
        struct dev_mc_list *mc_ptr;
        struct gfar_private *priv = netdev_priv(dev);
-       struct gfar __iomem *regs = priv->gfargrp.regs;
+       struct gfar __iomem *regs = priv->gfargrp[0].regs;
        u32 tempval;
 
        if (dev->flags & IFF_PROMISC) {
@@ -2331,11 +2863,11 @@ static void gfar_set_multi(struct net_device *dev)
                        em_num = 0;
                }
 
-               if (dev->mc_count == 0)
+               if (netdev_mc_empty(dev))
                        return;
 
                /* Parse the list, and set the appropriate bits */
-               for(mc_ptr = dev->mc_list; mc_ptr; mc_ptr = mc_ptr->next) {
+               netdev_for_each_mc_addr(mc_ptr, dev) {
                        if (idx < em_num) {
                                gfar_set_mac_for_addr(dev, idx,
                                                mc_ptr->dmi_addr);
@@ -2397,7 +2929,7 @@ static void gfar_set_hash_for_addr(struct net_device *dev, u8 *addr)
 static void gfar_set_mac_for_addr(struct net_device *dev, int num, u8 *addr)
 {
        struct gfar_private *priv = netdev_priv(dev);
-       struct gfar __iomem *regs = priv->gfargrp.regs;
+       struct gfar __iomem *regs = priv->gfargrp[0].regs;
        int idx;
        char tmpbuf[MAC_ADDR_LEN];
        u32 tempval;
@@ -2450,14 +2982,22 @@ static irqreturn_t gfar_error(int irq, void *grp_id)
                if (events & IEVENT_CRL)
                        dev->stats.tx_aborted_errors++;
                if (events & IEVENT_XFUN) {
+                       unsigned long flags;
+
                        if (netif_msg_tx_err(priv))
                                printk(KERN_DEBUG "%s: TX FIFO underrun, "
                                       "packet dropped.\n", dev->name);
                        dev->stats.tx_dropped++;
                        priv->extra_stats.tx_underrun++;
 
+                       local_irq_save(flags);
+                       lock_tx_qs(priv);
+
                        /* Reactivate the Tx Queues */
-                       gfar_write(&regs->tstat, TSTAT_CLEAR_THALT);
+                       gfar_write(&regs->tstat, gfargrp->tstat);
+
+                       unlock_tx_qs(priv);
+                       local_irq_restore(flags);
                }
                if (netif_msg_tx_err(priv))
                        printk(KERN_DEBUG "%s: Transmit Error\n", dev->name);
@@ -2501,6 +3041,9 @@ static struct of_device_id gfar_match[] =
                .type = "network",
                .compatible = "gianfar",
        },
+       {
+               .compatible = "fsl,etsec2",
+       },
        {},
 };
 MODULE_DEVICE_TABLE(of, gfar_match);