]> nv-tegra.nvidia Code Review - linux-3.10.git/blobdiff - drivers/net/forcedeth.c
forcedeth: tx locking
[linux-3.10.git] / drivers / net / forcedeth.c
index 2f48fe9a29a7eb160e69ad908433d485dd383ee7..75dbd67323ad147c673f08dfa8169e0e11c15965 100644 (file)
@@ -234,6 +234,7 @@ enum {
 #define NVREG_XMITCTL_HOST_SEMA_MASK   0x0000f000
 #define NVREG_XMITCTL_HOST_SEMA_ACQ    0x0000f000
 #define NVREG_XMITCTL_HOST_LOADED      0x00004000
+#define NVREG_XMITCTL_TX_PATH_EN       0x01000000
        NvRegTransmitterStatus = 0x088,
 #define NVREG_XMITSTAT_BUSY    0x01
 
@@ -249,6 +250,7 @@ enum {
 #define NVREG_OFFLOAD_NORMAL   RX_NIC_BUFSIZE
        NvRegReceiverControl = 0x094,
 #define NVREG_RCVCTL_START     0x01
+#define NVREG_RCVCTL_RX_PATH_EN        0x01000000
        NvRegReceiverStatus = 0x98,
 #define NVREG_RCVSTAT_BUSY     0x01
 
@@ -302,8 +304,8 @@ enum {
 #define NVREG_TXRXCTL_RESET    0x0010
 #define NVREG_TXRXCTL_RXCHECK  0x0400
 #define NVREG_TXRXCTL_DESC_1   0
-#define NVREG_TXRXCTL_DESC_2   0x02100
-#define NVREG_TXRXCTL_DESC_3   0x02200
+#define NVREG_TXRXCTL_DESC_2   0x002100
+#define NVREG_TXRXCTL_DESC_3   0xc02200
 #define NVREG_TXRXCTL_VLANSTRIP 0x00040
 #define NVREG_TXRXCTL_VLANINS  0x00080
        NvRegTxRingPhysAddrHigh = 0x148,
@@ -689,6 +691,12 @@ static const struct register_test nv_registers_test[] = {
        { 0,0 }
 };
 
+struct nv_skb_map {
+       struct sk_buff *skb;
+       dma_addr_t dma;
+       unsigned int dma_len;
+};
+
 /*
  * SMP locking:
  * All hardware access under dev->priv->lock, except the performance
@@ -739,10 +747,12 @@ struct fe_priv {
        /* rx specific fields.
         * Locking: Within irq hander or disable_irq+spin_lock(&np->lock);
         */
+       union ring_type get_rx, put_rx, first_rx, last_rx;
+       struct nv_skb_map *get_rx_ctx, *put_rx_ctx;
+       struct nv_skb_map *first_rx_ctx, *last_rx_ctx;
+       struct nv_skb_map *rx_skb;
+
        union ring_type rx_ring;
-       unsigned int cur_rx, refill_rx;
-       struct sk_buff **rx_skbuff;
-       dma_addr_t *rx_dma;
        unsigned int rx_buf_sz;
        unsigned int pkt_limit;
        struct timer_list oom_kick;
@@ -759,11 +769,12 @@ struct fe_priv {
        /*
         * tx specific fields.
         */
+       union ring_type get_tx, put_tx, first_tx, last_tx;
+       struct nv_skb_map *get_tx_ctx, *put_tx_ctx;
+       struct nv_skb_map *first_tx_ctx, *last_tx_ctx;
+       struct nv_skb_map *tx_skb;
+
        union ring_type tx_ring;
-       unsigned int next_tx, nic_tx;
-       struct sk_buff **tx_skbuff;
-       dma_addr_t *tx_dma;
-       unsigned int *tx_dma_len;
        u32 tx_flags;
        int tx_ring_size;
        int tx_limit_start;
@@ -919,16 +930,10 @@ static void free_rings(struct net_device *dev)
                        pci_free_consistent(np->pci_dev, sizeof(struct ring_desc_ex) * (np->rx_ring_size + np->tx_ring_size),
                                            np->rx_ring.ex, np->ring_addr);
        }
-       if (np->rx_skbuff)
-               kfree(np->rx_skbuff);
-       if (np->rx_dma)
-               kfree(np->rx_dma);
-       if (np->tx_skbuff)
-               kfree(np->tx_skbuff);
-       if (np->tx_dma)
-               kfree(np->tx_dma);
-       if (np->tx_dma_len)
-               kfree(np->tx_dma_len);
+       if (np->rx_skb)
+               kfree(np->rx_skb);
+       if (np->tx_skb)
+               kfree(np->tx_skb);
 }
 
 static int using_multi_irqs(struct net_device *dev)
@@ -1169,16 +1174,21 @@ static void nv_start_rx(struct net_device *dev)
 {
        struct fe_priv *np = netdev_priv(dev);
        u8 __iomem *base = get_hwbase(dev);
+       u32 rx_ctrl = readl(base + NvRegReceiverControl);
 
        dprintk(KERN_DEBUG "%s: nv_start_rx\n", dev->name);
        /* Already running? Stop it. */
-       if (readl(base + NvRegReceiverControl) & NVREG_RCVCTL_START) {
-               writel(0, base + NvRegReceiverControl);
+       if ((readl(base + NvRegReceiverControl) & NVREG_RCVCTL_START) && !np->mac_in_use) {
+               rx_ctrl &= ~NVREG_RCVCTL_START;
+               writel(rx_ctrl, base + NvRegReceiverControl);
                pci_push(base);
        }
        writel(np->linkspeed, base + NvRegLinkSpeed);
        pci_push(base);
-       writel(NVREG_RCVCTL_START, base + NvRegReceiverControl);
+        rx_ctrl |= NVREG_RCVCTL_START;
+        if (np->mac_in_use)
+               rx_ctrl &= ~NVREG_RCVCTL_RX_PATH_EN;
+       writel(rx_ctrl, base + NvRegReceiverControl);
        dprintk(KERN_DEBUG "%s: nv_start_rx to duplex %d, speed 0x%08x.\n",
                                dev->name, np->duplex, np->linkspeed);
        pci_push(base);
@@ -1186,39 +1196,59 @@ static void nv_start_rx(struct net_device *dev)
 
 static void nv_stop_rx(struct net_device *dev)
 {
+       struct fe_priv *np = netdev_priv(dev);
        u8 __iomem *base = get_hwbase(dev);
+       u32 rx_ctrl = readl(base + NvRegReceiverControl);
 
        dprintk(KERN_DEBUG "%s: nv_stop_rx\n", dev->name);
-       writel(0, base + NvRegReceiverControl);
+       if (!np->mac_in_use)
+               rx_ctrl &= ~NVREG_RCVCTL_START;
+       else
+               rx_ctrl |= NVREG_RCVCTL_RX_PATH_EN;
+       writel(rx_ctrl, base + NvRegReceiverControl);
        reg_delay(dev, NvRegReceiverStatus, NVREG_RCVSTAT_BUSY, 0,
                        NV_RXSTOP_DELAY1, NV_RXSTOP_DELAY1MAX,
                        KERN_INFO "nv_stop_rx: ReceiverStatus remained busy");
 
        udelay(NV_RXSTOP_DELAY2);
-       writel(0, base + NvRegLinkSpeed);
+       if (!np->mac_in_use)
+               writel(0, base + NvRegLinkSpeed);
 }
 
 static void nv_start_tx(struct net_device *dev)
 {
+       struct fe_priv *np = netdev_priv(dev);
        u8 __iomem *base = get_hwbase(dev);
+       u32 tx_ctrl = readl(base + NvRegTransmitterControl);
 
        dprintk(KERN_DEBUG "%s: nv_start_tx\n", dev->name);
-       writel(NVREG_XMITCTL_START, base + NvRegTransmitterControl);
+       tx_ctrl |= NVREG_XMITCTL_START;
+       if (np->mac_in_use)
+               tx_ctrl &= ~NVREG_XMITCTL_TX_PATH_EN;
+       writel(tx_ctrl, base + NvRegTransmitterControl);
        pci_push(base);
 }
 
 static void nv_stop_tx(struct net_device *dev)
 {
+       struct fe_priv *np = netdev_priv(dev);
        u8 __iomem *base = get_hwbase(dev);
+       u32 tx_ctrl = readl(base + NvRegTransmitterControl);
 
        dprintk(KERN_DEBUG "%s: nv_stop_tx\n", dev->name);
-       writel(0, base + NvRegTransmitterControl);
+       if (!np->mac_in_use)
+               tx_ctrl &= ~NVREG_XMITCTL_START;
+       else
+               tx_ctrl |= NVREG_XMITCTL_TX_PATH_EN;
+       writel(tx_ctrl, base + NvRegTransmitterControl);
        reg_delay(dev, NvRegTransmitterStatus, NVREG_XMITSTAT_BUSY, 0,
                        NV_TXSTOP_DELAY1, NV_TXSTOP_DELAY1MAX,
                        KERN_INFO "nv_stop_tx: TransmitterStatus remained busy");
 
        udelay(NV_TXSTOP_DELAY2);
-       writel(readl(base + NvRegTransmitPoll) & NVREG_TRANSMITPOLL_MAC_ADDR_REV, base + NvRegTransmitPoll);
+       if (!np->mac_in_use)
+               writel(readl(base + NvRegTransmitPoll) & NVREG_TRANSMITPOLL_MAC_ADDR_REV,
+                      base + NvRegTransmitPoll);
 }
 
 static void nv_txrx_reset(struct net_device *dev)
@@ -1277,43 +1307,60 @@ static struct net_device_stats *nv_get_stats(struct net_device *dev)
 static int nv_alloc_rx(struct net_device *dev)
 {
        struct fe_priv *np = netdev_priv(dev);
-       unsigned int refill_rx = np->refill_rx;
-       int nr;
+       union ring_type less_rx;
 
-       while (np->cur_rx != refill_rx) {
+       if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
+               less_rx.orig = np->get_rx.orig;
+               if (less_rx.orig-- == np->first_rx.orig)
+                       less_rx.orig = np->last_rx.orig;
+       } else {
+               less_rx.ex = np->get_rx.ex;
+               if (less_rx.ex-- == np->first_rx.ex)
+                       less_rx.ex = np->last_rx.ex;
+       }
+
+       while (1) {
                struct sk_buff *skb;
 
-               nr = refill_rx % np->rx_ring_size;
-               if (np->rx_skbuff[nr] == NULL) {
+               if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
+                       if (np->put_rx.orig == less_rx.orig)
+                               break;
+               } else {
+                       if (np->put_rx.ex == less_rx.ex)
+                               break;
+               }
+
+               if (np->put_rx_ctx->skb == NULL) {
 
                        skb = dev_alloc_skb(np->rx_buf_sz + NV_RX_ALLOC_PAD);
                        if (!skb)
-                               break;
+                               return 1;
 
                        skb->dev = dev;
-                       np->rx_skbuff[nr] = skb;
+                       np->put_rx_ctx->skb = skb;
                } else {
-                       skb = np->rx_skbuff[nr];
+                       skb = np->put_rx_ctx->skb;
                }
-               np->rx_dma[nr] = pci_map_single(np->pci_dev, skb->data,
+               np->put_rx_ctx->dma = pci_map_single(np->pci_dev, skb->data,
                                        skb->end-skb->data, PCI_DMA_FROMDEVICE);
+               np->put_rx_ctx->dma_len = skb->end-skb->data;
                if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
-                       np->rx_ring.orig[nr].buf = cpu_to_le32(np->rx_dma[nr]);
+                       np->put_rx.orig->buf = cpu_to_le32(np->put_rx_ctx->dma);
                        wmb();
-                       np->rx_ring.orig[nr].flaglen = cpu_to_le32(np->rx_buf_sz | NV_RX_AVAIL);
+                       np->put_rx.orig->flaglen = cpu_to_le32(np->rx_buf_sz | NV_RX_AVAIL);
+                       if (np->put_rx.orig++ == np->last_rx.orig)
+                               np->put_rx.orig = np->first_rx.orig;
                } else {
-                       np->rx_ring.ex[nr].bufhigh = cpu_to_le64(np->rx_dma[nr]) >> 32;
-                       np->rx_ring.ex[nr].buflow = cpu_to_le64(np->rx_dma[nr]) & 0x0FFFFFFFF;
+                       np->put_rx.ex->bufhigh = cpu_to_le64(np->put_rx_ctx->dma) >> 32;
+                       np->put_rx.ex->buflow = cpu_to_le64(np->put_rx_ctx->dma) & 0x0FFFFFFFF;
                        wmb();
-                       np->rx_ring.ex[nr].flaglen = cpu_to_le32(np->rx_buf_sz | NV_RX2_AVAIL);
+                       np->put_rx.ex->flaglen = cpu_to_le32(np->rx_buf_sz | NV_RX2_AVAIL);
+                       if (np->put_rx.ex++ == np->last_rx.ex)
+                               np->put_rx.ex = np->first_rx.ex;
                }
-               dprintk(KERN_DEBUG "%s: nv_alloc_rx: Packet %d marked as Available\n",
-                                       dev->name, refill_rx);
-               refill_rx++;
+               if (np->put_rx_ctx++ == np->last_rx_ctx)
+                       np->put_rx_ctx = np->first_rx_ctx;
        }
-       np->refill_rx = refill_rx;
-       if (np->cur_rx - refill_rx == np->rx_ring_size)
-               return 1;
        return 0;
 }
 
@@ -1361,29 +1408,53 @@ static void nv_init_rx(struct net_device *dev)
 {
        struct fe_priv *np = netdev_priv(dev);
        int i;
+       np->get_rx = np->put_rx = np->first_rx = np->rx_ring;
+       if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2)
+               np->last_rx.orig = &np->rx_ring.orig[np->rx_ring_size-1];
+       else
+               np->last_rx.ex = &np->rx_ring.ex[np->rx_ring_size-1];
+       np->get_rx_ctx = np->put_rx_ctx = np->first_rx_ctx = np->rx_skb;
+       np->last_rx_ctx = &np->rx_skb[np->rx_ring_size-1];
 
-       np->cur_rx = np->rx_ring_size;
-       np->refill_rx = 0;
-       for (i = 0; i < np->rx_ring_size; i++)
-               if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2)
+       for (i = 0; i < np->rx_ring_size; i++) {
+               if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
                        np->rx_ring.orig[i].flaglen = 0;
-               else
+                       np->rx_ring.orig[i].buf = 0;
+               } else {
                        np->rx_ring.ex[i].flaglen = 0;
+                       np->rx_ring.ex[i].txvlan = 0;
+                       np->rx_ring.ex[i].bufhigh = 0;
+                       np->rx_ring.ex[i].buflow = 0;
+               }
+               np->rx_skb[i].skb = NULL;
+               np->rx_skb[i].dma = 0;
+       }
 }
 
 static void nv_init_tx(struct net_device *dev)
 {
        struct fe_priv *np = netdev_priv(dev);
        int i;
+       np->get_tx = np->put_tx = np->first_tx = np->tx_ring;
+       if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2)
+               np->last_tx.orig = &np->tx_ring.orig[np->tx_ring_size-1];
+       else
+               np->last_tx.ex = &np->tx_ring.ex[np->tx_ring_size-1];
+       np->get_tx_ctx = np->put_tx_ctx = np->first_tx_ctx = np->tx_skb;
+       np->last_tx_ctx = &np->tx_skb[np->tx_ring_size-1];
 
-       np->next_tx = np->nic_tx = 0;
        for (i = 0; i < np->tx_ring_size; i++) {
-               if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2)
+               if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
                        np->tx_ring.orig[i].flaglen = 0;
-               else
+                       np->tx_ring.orig[i].buf = 0;
+               } else {
                        np->tx_ring.ex[i].flaglen = 0;
-               np->tx_skbuff[i] = NULL;
-               np->tx_dma[i] = 0;
+                       np->tx_ring.ex[i].txvlan = 0;
+                       np->tx_ring.ex[i].bufhigh = 0;
+                       np->tx_ring.ex[i].buflow = 0;
+               }
+               np->tx_skb[i].skb = NULL;
+               np->tx_skb[i].dma = 0;
        }
 }
 
@@ -1394,23 +1465,19 @@ static int nv_init_ring(struct net_device *dev)
        return nv_alloc_rx(dev);
 }
 
-static int nv_release_txskb(struct net_device *dev, unsigned int skbnr)
+static int nv_release_txskb(struct net_device *dev, struct nv_skb_map* tx_skb)
 {
        struct fe_priv *np = netdev_priv(dev);
 
-       dprintk(KERN_INFO "%s: nv_release_txskb for skbnr %d\n",
-               dev->name, skbnr);
-
-       if (np->tx_dma[skbnr]) {
-               pci_unmap_page(np->pci_dev, np->tx_dma[skbnr],
-                              np->tx_dma_len[skbnr],
+       if (tx_skb->dma) {
+               pci_unmap_page(np->pci_dev, tx_skb->dma,
+                              tx_skb->dma_len,
                               PCI_DMA_TODEVICE);
-               np->tx_dma[skbnr] = 0;
+               tx_skb->dma = 0;
        }
-
-       if (np->tx_skbuff[skbnr]) {
-               dev_kfree_skb_any(np->tx_skbuff[skbnr]);
-               np->tx_skbuff[skbnr] = NULL;
+       if (tx_skb->skb) {
+               dev_kfree_skb_any(tx_skb->skb);
+               tx_skb->skb = NULL;
                return 1;
        } else {
                return 0;
@@ -1423,11 +1490,16 @@ static void nv_drain_tx(struct net_device *dev)
        unsigned int i;
 
        for (i = 0; i < np->tx_ring_size; i++) {
-               if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2)
+               if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
                        np->tx_ring.orig[i].flaglen = 0;
-               else
+                       np->tx_ring.orig[i].buf = 0;
+               } else {
                        np->tx_ring.ex[i].flaglen = 0;
-               if (nv_release_txskb(dev, i))
+                       np->tx_ring.ex[i].txvlan = 0;
+                       np->tx_ring.ex[i].bufhigh = 0;
+                       np->tx_ring.ex[i].buflow = 0;
+               }
+               if (nv_release_txskb(dev, &np->tx_skb[i]))
                        np->stats.tx_dropped++;
        }
 }
@@ -1436,18 +1508,24 @@ static void nv_drain_rx(struct net_device *dev)
 {
        struct fe_priv *np = netdev_priv(dev);
        int i;
+
        for (i = 0; i < np->rx_ring_size; i++) {
-               if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2)
+               if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
                        np->rx_ring.orig[i].flaglen = 0;
-               else
+                       np->rx_ring.orig[i].buf = 0;
+               } else {
                        np->rx_ring.ex[i].flaglen = 0;
+                       np->rx_ring.ex[i].txvlan = 0;
+                       np->rx_ring.ex[i].bufhigh = 0;
+                       np->rx_ring.ex[i].buflow = 0;
+               }
                wmb();
-               if (np->rx_skbuff[i]) {
-                       pci_unmap_single(np->pci_dev, np->rx_dma[i],
-                                               np->rx_skbuff[i]->end-np->rx_skbuff[i]->data,
+               if (np->rx_skb[i].skb) {
+                       pci_unmap_single(np->pci_dev, np->rx_skb[i].dma,
+                                               np->rx_skb[i].skb->end-np->rx_skb[i].skb->data,
                                                PCI_DMA_FROMDEVICE);
-                       dev_kfree_skb(np->rx_skbuff[i]);
-                       np->rx_skbuff[i] = NULL;
+                       dev_kfree_skb(np->rx_skb[i].skb);
+                       np->rx_skb[i].skb = NULL;
                }
        }
 }
@@ -1458,6 +1536,11 @@ static void drain_ring(struct net_device *dev)
        nv_drain_rx(dev);
 }
 
+static inline u32 nv_get_empty_tx_slots(struct fe_priv *np)
+{
+       return (u32)(np->tx_ring_size - ((np->tx_ring_size + (np->put_tx_ctx - np->get_tx_ctx)) % np->tx_ring_size));
+}
+
 /*
  * nv_start_xmit: dev->hard_start_xmit function
  * Called with netif_tx_lock held.
@@ -1468,14 +1551,17 @@ static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
        u32 tx_flags = 0;
        u32 tx_flags_extra = (np->desc_ver == DESC_VER_1 ? NV_TX_LASTPACKET : NV_TX2_LASTPACKET);
        unsigned int fragments = skb_shinfo(skb)->nr_frags;
-       unsigned int nr = (np->next_tx - 1) % np->tx_ring_size;
-       unsigned int start_nr = np->next_tx % np->tx_ring_size;
        unsigned int i;
        u32 offset = 0;
        u32 bcnt;
        u32 size = skb->len-skb->data_len;
        u32 entries = (size >> NV_TX2_TSO_MAX_SHIFT) + ((size & (NV_TX2_TSO_MAX_SIZE-1)) ? 1 : 0);
+       u32 empty_slots;
        u32 tx_flags_vlan = 0;
+       union ring_type put_tx;
+       union ring_type start_tx;
+       union ring_type prev_tx;
+       struct nv_skb_map* prev_tx_ctx;
 
        /* add fragments to entries count */
        for (i = 0; i < fragments; i++) {
@@ -1483,34 +1569,47 @@ static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
                           ((skb_shinfo(skb)->frags[i].size & (NV_TX2_TSO_MAX_SIZE-1)) ? 1 : 0);
        }
 
-       spin_lock_irq(&np->lock);
-
-       if ((np->next_tx - np->nic_tx + entries - 1) > np->tx_limit_stop) {
-               spin_unlock_irq(&np->lock);
+       empty_slots = nv_get_empty_tx_slots(np);
+       if ((empty_slots - np->tx_limit_stop) <= entries) {
+               spin_lock_irq(&np->lock);
                netif_stop_queue(dev);
+               spin_unlock_irq(&np->lock);
                return NETDEV_TX_BUSY;
        }
 
+       if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2)
+               start_tx.orig = put_tx.orig = np->put_tx.orig;
+       else
+               start_tx.ex = put_tx.ex = np->put_tx.ex;
+
        /* setup the header buffer */
        do {
+               prev_tx = put_tx;
+               prev_tx_ctx = np->put_tx_ctx;
                bcnt = (size > NV_TX2_TSO_MAX_SIZE) ? NV_TX2_TSO_MAX_SIZE : size;
-               nr = (nr + 1) % np->tx_ring_size;
-
-               np->tx_dma[nr] = pci_map_single(np->pci_dev, skb->data + offset, bcnt,
+               np->put_tx_ctx->dma = pci_map_single(np->pci_dev, skb->data + offset, bcnt,
                                                PCI_DMA_TODEVICE);
-               np->tx_dma_len[nr] = bcnt;
-
+               np->put_tx_ctx->dma_len = bcnt;
                if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
-                       np->tx_ring.orig[nr].buf = cpu_to_le32(np->tx_dma[nr]);
-                       np->tx_ring.orig[nr].flaglen = cpu_to_le32((bcnt-1) | tx_flags);
+                       put_tx.orig->buf = cpu_to_le32(np->put_tx_ctx->dma);
+                       put_tx.orig->flaglen = cpu_to_le32((bcnt-1) | tx_flags);
                } else {
-                       np->tx_ring.ex[nr].bufhigh = cpu_to_le64(np->tx_dma[nr]) >> 32;
-                       np->tx_ring.ex[nr].buflow = cpu_to_le64(np->tx_dma[nr]) & 0x0FFFFFFFF;
-                       np->tx_ring.ex[nr].flaglen = cpu_to_le32((bcnt-1) | tx_flags);
+                       put_tx.ex->bufhigh = cpu_to_le64(np->put_tx_ctx->dma) >> 32;
+                       put_tx.ex->buflow = cpu_to_le64(np->put_tx_ctx->dma) & 0x0FFFFFFFF;
+                       put_tx.ex->flaglen = cpu_to_le32((bcnt-1) | tx_flags);
                }
                tx_flags = np->tx_flags;
                offset += bcnt;
                size -= bcnt;
+               if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
+                       if (put_tx.orig++ == np->last_tx.orig)
+                               put_tx.orig = np->first_tx.orig;
+               } else {
+                       if (put_tx.ex++ == np->last_tx.ex)
+                               put_tx.ex = np->first_tx.ex;
+               }
+               if (np->put_tx_ctx++ == np->last_tx_ctx)
+                       np->put_tx_ctx = np->first_tx_ctx;
        } while (size);
 
        /* setup the fragments */
@@ -1520,41 +1619,48 @@ static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
                offset = 0;
 
                do {
+                       prev_tx = put_tx;
+                       prev_tx_ctx = np->put_tx_ctx;
                        bcnt = (size > NV_TX2_TSO_MAX_SIZE) ? NV_TX2_TSO_MAX_SIZE : size;
-                       nr = (nr + 1) % np->tx_ring_size;
-
-                       np->tx_dma[nr] = pci_map_page(np->pci_dev, frag->page, frag->page_offset+offset, bcnt,
-                                                     PCI_DMA_TODEVICE);
-                       np->tx_dma_len[nr] = bcnt;
+                       np->put_tx_ctx->dma = pci_map_page(np->pci_dev, frag->page, frag->page_offset+offset, bcnt,
+                                                          PCI_DMA_TODEVICE);
+                       np->put_tx_ctx->dma_len = bcnt;
 
                        if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
-                               np->tx_ring.orig[nr].buf = cpu_to_le32(np->tx_dma[nr]);
-                               np->tx_ring.orig[nr].flaglen = cpu_to_le32((bcnt-1) | tx_flags);
+                               put_tx.orig->buf = cpu_to_le32(np->put_tx_ctx->dma);
+                               put_tx.orig->flaglen = cpu_to_le32((bcnt-1) | tx_flags);
                        } else {
-                               np->tx_ring.ex[nr].bufhigh = cpu_to_le64(np->tx_dma[nr]) >> 32;
-                               np->tx_ring.ex[nr].buflow = cpu_to_le64(np->tx_dma[nr]) & 0x0FFFFFFFF;
-                               np->tx_ring.ex[nr].flaglen = cpu_to_le32((bcnt-1) | tx_flags);
+                               put_tx.ex->bufhigh = cpu_to_le64(np->put_tx_ctx->dma) >> 32;
+                               put_tx.ex->buflow = cpu_to_le64(np->put_tx_ctx->dma) & 0x0FFFFFFFF;
+                               put_tx.ex->flaglen = cpu_to_le32((bcnt-1) | tx_flags);
                        }
                        offset += bcnt;
                        size -= bcnt;
+                       if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
+                               if (put_tx.orig++ == np->last_tx.orig)
+                                       put_tx.orig = np->first_tx.orig;
+                       } else {
+                               if (put_tx.ex++ == np->last_tx.ex)
+                                       put_tx.ex = np->first_tx.ex;
+                       }
+                       if (np->put_tx_ctx++ == np->last_tx_ctx)
+                               np->put_tx_ctx = np->first_tx_ctx;
                } while (size);
        }
 
        /* set last fragment flag  */
-       if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
-               np->tx_ring.orig[nr].flaglen |= cpu_to_le32(tx_flags_extra);
-       } else {
-               np->tx_ring.ex[nr].flaglen |= cpu_to_le32(tx_flags_extra);
-       }
+       if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2)
+               prev_tx.orig->flaglen |= cpu_to_le32(tx_flags_extra);
+       else
+               prev_tx.ex->flaglen |= cpu_to_le32(tx_flags_extra);
 
-       np->tx_skbuff[nr] = skb;
+       /* save skb in this slot's context area */
+       prev_tx_ctx->skb = skb;
 
-#ifdef NETIF_F_TSO
        if (skb_is_gso(skb))
                tx_flags_extra = NV_TX2_TSO | (skb_shinfo(skb)->gso_size << NV_TX2_TSO_SHIFT);
        else
-#endif
-       tx_flags_extra = skb->ip_summed == CHECKSUM_PARTIAL ?
+               tx_flags_extra = skb->ip_summed == CHECKSUM_PARTIAL ?
                         NV_TX2_CHECKSUM_L3 | NV_TX2_CHECKSUM_L4 : 0;
 
        /* vlan tag */
@@ -1562,16 +1668,22 @@ static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
                tx_flags_vlan = NV_TX3_VLAN_TAG_PRESENT | vlan_tx_tag_get(skb);
        }
 
+       spin_lock_irq(&np->lock);
+
        /* set tx flags */
        if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
-               np->tx_ring.orig[start_nr].flaglen |= cpu_to_le32(tx_flags | tx_flags_extra);
+               start_tx.orig->flaglen |= cpu_to_le32(tx_flags | tx_flags_extra);
+               np->put_tx.orig = put_tx.orig;
        } else {
-               np->tx_ring.ex[start_nr].txvlan = cpu_to_le32(tx_flags_vlan);
-               np->tx_ring.ex[start_nr].flaglen |= cpu_to_le32(tx_flags | tx_flags_extra);
+               start_tx.ex->txvlan = cpu_to_le32(tx_flags_vlan);
+               start_tx.ex->flaglen |= cpu_to_le32(tx_flags | tx_flags_extra);
+               np->put_tx.ex = put_tx.ex;
        }
 
-       dprintk(KERN_DEBUG "%s: nv_start_xmit: packet %d (entries %d) queued for transmission. tx_flags_extra: %x\n",
-               dev->name, np->next_tx, entries, tx_flags_extra);
+       spin_unlock_irq(&np->lock);
+
+       dprintk(KERN_DEBUG "%s: nv_start_xmit: entries %d queued for transmission. tx_flags_extra: %x\n",
+               dev->name, entries, tx_flags_extra);
        {
                int j;
                for (j=0; j<64; j++) {
@@ -1582,10 +1694,7 @@ static int nv_start_xmit(struct sk_buff *skb, struct net_device *dev)
                dprintk("\n");
        }
 
-       np->next_tx += entries;
-
        dev->trans_start = jiffies;
-       spin_unlock_irq(&np->lock);
        writel(NVREG_TXRXCTL_KICK|np->txrxctl_bits, get_hwbase(dev) + NvRegTxRxControl);
        pci_push(get_hwbase(dev));
        return NETDEV_TX_OK;
@@ -1600,24 +1709,26 @@ static void nv_tx_done(struct net_device *dev)
 {
        struct fe_priv *np = netdev_priv(dev);
        u32 flags;
-       unsigned int i;
        struct sk_buff *skb;
 
-       while (np->nic_tx != np->next_tx) {
-               i = np->nic_tx % np->tx_ring_size;
-
-               if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2)
-                       flags = le32_to_cpu(np->tx_ring.orig[i].flaglen);
-               else
-                       flags = le32_to_cpu(np->tx_ring.ex[i].flaglen);
+       while (1) {
+               if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
+                       if (np->get_tx.orig == np->put_tx.orig)
+                               break;
+                       flags = le32_to_cpu(np->get_tx.orig->flaglen);
+               } else {
+                       if (np->get_tx.ex == np->put_tx.ex)
+                               break;
+                       flags = le32_to_cpu(np->get_tx.ex->flaglen);
+               }
 
-               dprintk(KERN_DEBUG "%s: nv_tx_done: looking at packet %d, flags 0x%x.\n",
-                                       dev->name, np->nic_tx, flags);
+               dprintk(KERN_DEBUG "%s: nv_tx_done: flags 0x%x.\n",
+                                       dev->name, flags);
                if (flags & NV_TX_VALID)
                        break;
                if (np->desc_ver == DESC_VER_1) {
                        if (flags & NV_TX_LASTPACKET) {
-                               skb = np->tx_skbuff[i];
+                               skb = np->get_tx_ctx->skb;
                                if (flags & (NV_TX_RETRYERROR|NV_TX_CARRIERLOST|NV_TX_LATECOLLISION|
                                             NV_TX_UNDERFLOW|NV_TX_ERROR)) {
                                        if (flags & NV_TX_UNDERFLOW)
@@ -1632,7 +1743,7 @@ static void nv_tx_done(struct net_device *dev)
                        }
                } else {
                        if (flags & NV_TX2_LASTPACKET) {
-                               skb = np->tx_skbuff[i];
+                               skb = np->get_tx_ctx->skb;
                                if (flags & (NV_TX2_RETRYERROR|NV_TX2_CARRIERLOST|NV_TX2_LATECOLLISION|
                                             NV_TX2_UNDERFLOW|NV_TX2_ERROR)) {
                                        if (flags & NV_TX2_UNDERFLOW)
@@ -1646,10 +1757,18 @@ static void nv_tx_done(struct net_device *dev)
                                }
                        }
                }
-               nv_release_txskb(dev, i);
-               np->nic_tx++;
+               nv_release_txskb(dev, np->get_tx_ctx);
+               if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
+                       if (np->get_tx.orig++ == np->last_tx.orig)
+                               np->get_tx.orig = np->first_tx.orig;
+               } else {
+                       if (np->get_tx.ex++ == np->last_tx.ex)
+                               np->get_tx.ex = np->first_tx.ex;
+               }
+               if (np->get_tx_ctx++ == np->last_tx_ctx)
+                       np->get_tx_ctx = np->first_tx_ctx;
        }
-       if (np->next_tx - np->nic_tx < np->tx_limit_start)
+       if (nv_get_empty_tx_slots(np) > np->tx_limit_start)
                netif_wake_queue(dev);
 }
 
@@ -1673,9 +1792,8 @@ static void nv_tx_timeout(struct net_device *dev)
        {
                int i;
 
-               printk(KERN_INFO "%s: Ring at %lx: next %d nic %d\n",
-                               dev->name, (unsigned long)np->ring_addr,
-                               np->next_tx, np->nic_tx);
+               printk(KERN_INFO "%s: Ring at %lx\n",
+                      dev->name, (unsigned long)np->ring_addr);
                printk(KERN_INFO "%s: Dumping tx registers\n", dev->name);
                for (i=0;i<=np->register_size;i+= 32) {
                        printk(KERN_INFO "%3x: %08x %08x %08x %08x %08x %08x %08x %08x\n",
@@ -1726,10 +1844,10 @@ static void nv_tx_timeout(struct net_device *dev)
        nv_tx_done(dev);
 
        /* 3) if there are dead entries: clear everything */
-       if (np->next_tx != np->nic_tx) {
+       if (np->get_tx_ctx != np->put_tx_ctx) {
                printk(KERN_DEBUG "%s: tx_timeout: dead entries!\n", dev->name);
                nv_drain_tx(dev);
-               np->next_tx = np->nic_tx = 0;
+               nv_init_tx(dev);
                setup_hw_rings(dev, NV_SETUP_TX_RING);
                netif_wake_queue(dev);
        }
@@ -1802,22 +1920,22 @@ static int nv_rx_process(struct net_device *dev, int limit)
        for (count = 0; count < limit; ++count) {
                struct sk_buff *skb;
                int len;
-               int i;
-               if (np->cur_rx - np->refill_rx >= np->rx_ring_size)
-                       break;  /* we scanned the whole ring - do not continue */
 
-               i = np->cur_rx % np->rx_ring_size;
                if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
-                       flags = le32_to_cpu(np->rx_ring.orig[i].flaglen);
-                       len = nv_descr_getlength(&np->rx_ring.orig[i], np->desc_ver);
+                       if (np->get_rx.orig == np->put_rx.orig)
+                               break;  /* we scanned the whole ring - do not continue */
+                       flags = le32_to_cpu(np->get_rx.orig->flaglen);
+                       len = nv_descr_getlength(np->get_rx.orig, np->desc_ver);
                } else {
-                       flags = le32_to_cpu(np->rx_ring.ex[i].flaglen);
-                       len = nv_descr_getlength_ex(&np->rx_ring.ex[i], np->desc_ver);
-                       vlanflags = le32_to_cpu(np->rx_ring.ex[i].buflow);
+                       if (np->get_rx.ex == np->put_rx.ex)
+                               break;  /* we scanned the whole ring - do not continue */
+                       flags = le32_to_cpu(np->get_rx.ex->flaglen);
+                       len = nv_descr_getlength_ex(np->get_rx.ex, np->desc_ver);
+                       vlanflags = le32_to_cpu(np->get_rx.ex->buflow);
                }
 
-               dprintk(KERN_DEBUG "%s: nv_rx_process: looking at packet %d, flags 0x%x.\n",
-                                       dev->name, np->cur_rx, flags);
+               dprintk(KERN_DEBUG "%s: nv_rx_process: flags 0x%x.\n",
+                                       dev->name, flags);
 
                if (flags & NV_RX_AVAIL)
                        break;  /* still owned by hardware, */
@@ -1827,8 +1945,8 @@ static int nv_rx_process(struct net_device *dev, int limit)
                 * TODO: check if a prefetch of the first cacheline improves
                 * the performance.
                 */
-               pci_unmap_single(np->pci_dev, np->rx_dma[i],
-                               np->rx_skbuff[i]->end-np->rx_skbuff[i]->data,
+               pci_unmap_single(np->pci_dev, np->get_rx_ctx->dma,
+                               np->get_rx_ctx->dma_len,
                                PCI_DMA_FROMDEVICE);
 
                {
@@ -1837,7 +1955,7 @@ static int nv_rx_process(struct net_device *dev, int limit)
                        for (j=0; j<64; j++) {
                                if ((j%16) == 0)
                                        dprintk("\n%03x:", j);
-                               dprintk(" %02x", ((unsigned char*)np->rx_skbuff[i]->data)[j]);
+                               dprintk(" %02x", ((unsigned char*)np->get_rx_ctx->skb->data)[j]);
                        }
                        dprintk("\n");
                }
@@ -1867,7 +1985,7 @@ static int nv_rx_process(struct net_device *dev, int limit)
                                        goto next_pkt;
                                }
                                if (flags & NV_RX_ERROR4) {
-                                       len = nv_getlen(dev, np->rx_skbuff[i]->data, len);
+                                       len = nv_getlen(dev, np->get_rx_ctx->skb->data, len);
                                        if (len < 0) {
                                                np->stats.rx_errors++;
                                                goto next_pkt;
@@ -1900,7 +2018,7 @@ static int nv_rx_process(struct net_device *dev, int limit)
                                        goto next_pkt;
                                }
                                if (flags & NV_RX2_ERROR4) {
-                                       len = nv_getlen(dev, np->rx_skbuff[i]->data, len);
+                                       len = nv_getlen(dev, np->get_rx_ctx->skb->data, len);
                                        if (len < 0) {
                                                np->stats.rx_errors++;
                                                goto next_pkt;
@@ -1919,20 +2037,20 @@ static int nv_rx_process(struct net_device *dev, int limit)
                                    flags == NV_RX2_CHECKSUMOK2 ||
                                    flags == NV_RX2_CHECKSUMOK3) {
                                        dprintk(KERN_DEBUG "%s: hw checksum hit!.\n", dev->name);
-                                       np->rx_skbuff[i]->ip_summed = CHECKSUM_UNNECESSARY;
+                                       np->get_rx_ctx->skb->ip_summed = CHECKSUM_UNNECESSARY;
                                } else {
                                        dprintk(KERN_DEBUG "%s: hwchecksum miss!.\n", dev->name);
                                }
                        }
                }
                /* got a valid packet - forward it to the network core */
-               skb = np->rx_skbuff[i];
-               np->rx_skbuff[i] = NULL;
+               skb = np->get_rx_ctx->skb;
+               np->get_rx_ctx->skb = NULL;
 
                skb_put(skb, len);
                skb->protocol = eth_type_trans(skb, dev);
-               dprintk(KERN_DEBUG "%s: nv_rx_process: packet %d with %d bytes, proto %d accepted.\n",
-                                       dev->name, np->cur_rx, len, skb->protocol);
+               dprintk(KERN_DEBUG "%s: nv_rx_process: %d bytes, proto %d accepted.\n",
+                                       dev->name, len, skb->protocol);
 #ifdef CONFIG_FORCEDETH_NAPI
                if (np->vlangrp && (vlanflags & NV_RX3_VLAN_TAG_PRESENT))
                        vlan_hwaccel_receive_skb(skb, np->vlangrp,
@@ -1950,7 +2068,15 @@ static int nv_rx_process(struct net_device *dev, int limit)
                np->stats.rx_packets++;
                np->stats.rx_bytes += len;
 next_pkt:
-               np->cur_rx++;
+               if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
+                       if (np->get_rx.orig++ == np->last_rx.orig)
+                               np->get_rx.orig = np->first_rx.orig;
+               } else {
+                       if (np->get_rx.ex++ == np->last_rx.ex)
+                               np->get_rx.ex = np->first_rx.ex;
+               }
+               if (np->get_rx_ctx++ == np->last_rx_ctx)
+                       np->get_rx_ctx = np->first_rx_ctx;
        }
 
        return count;
@@ -3438,7 +3564,7 @@ static int nv_set_ringparam(struct net_device *dev, struct ethtool_ringparam* ri
 {
        struct fe_priv *np = netdev_priv(dev);
        u8 __iomem *base = get_hwbase(dev);
-       u8 *rxtx_ring, *rx_skbuff, *tx_skbuff, *rx_dma, *tx_dma, *tx_dma_len;
+       u8 *rxtx_ring, *rx_skbuff, *tx_skbuff;
        dma_addr_t ring_addr;
 
        if (ring->rx_pending < RX_RING_MIN ||
@@ -3464,12 +3590,9 @@ static int nv_set_ringparam(struct net_device *dev, struct ethtool_ringparam* ri
                                            sizeof(struct ring_desc_ex) * (ring->rx_pending + ring->tx_pending),
                                            &ring_addr);
        }
-       rx_skbuff = kmalloc(sizeof(struct sk_buff*) * ring->rx_pending, GFP_KERNEL);
-       rx_dma = kmalloc(sizeof(dma_addr_t) * ring->rx_pending, GFP_KERNEL);
-       tx_skbuff = kmalloc(sizeof(struct sk_buff*) * ring->tx_pending, GFP_KERNEL);
-       tx_dma = kmalloc(sizeof(dma_addr_t) * ring->tx_pending, GFP_KERNEL);
-       tx_dma_len = kmalloc(sizeof(unsigned int) * ring->tx_pending, GFP_KERNEL);
-       if (!rxtx_ring || !rx_skbuff || !rx_dma || !tx_skbuff || !tx_dma || !tx_dma_len) {
+       rx_skbuff = kmalloc(sizeof(struct nv_skb_map) * ring->rx_pending, GFP_KERNEL);
+       tx_skbuff = kmalloc(sizeof(struct nv_skb_map) * ring->tx_pending, GFP_KERNEL);
+       if (!rxtx_ring || !rx_skbuff || !tx_skbuff) {
                /* fall back to old rings */
                if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
                        if (rxtx_ring)
@@ -3482,14 +3605,8 @@ static int nv_set_ringparam(struct net_device *dev, struct ethtool_ringparam* ri
                }
                if (rx_skbuff)
                        kfree(rx_skbuff);
-               if (rx_dma)
-                       kfree(rx_dma);
                if (tx_skbuff)
                        kfree(tx_skbuff);
-               if (tx_dma)
-                       kfree(tx_dma);
-               if (tx_dma_len)
-                       kfree(tx_dma_len);
                goto exit;
        }
 
@@ -3511,8 +3628,8 @@ static int nv_set_ringparam(struct net_device *dev, struct ethtool_ringparam* ri
        /* set new values */
        np->rx_ring_size = ring->rx_pending;
        np->tx_ring_size = ring->tx_pending;
-       np->tx_limit_stop = ring->tx_pending - TX_LIMIT_DIFFERENCE;
-       np->tx_limit_start = ring->tx_pending - TX_LIMIT_DIFFERENCE - 1;
+       np->tx_limit_stop = TX_LIMIT_DIFFERENCE;
+       np->tx_limit_start = TX_LIMIT_DIFFERENCE;
        if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
                np->rx_ring.orig = (struct ring_desc*)rxtx_ring;
                np->tx_ring.orig = &np->rx_ring.orig[np->rx_ring_size];
@@ -3520,18 +3637,12 @@ static int nv_set_ringparam(struct net_device *dev, struct ethtool_ringparam* ri
                np->rx_ring.ex = (struct ring_desc_ex*)rxtx_ring;
                np->tx_ring.ex = &np->rx_ring.ex[np->rx_ring_size];
        }
-       np->rx_skbuff = (struct sk_buff**)rx_skbuff;
-       np->rx_dma = (dma_addr_t*)rx_dma;
-       np->tx_skbuff = (struct sk_buff**)tx_skbuff;
-       np->tx_dma = (dma_addr_t*)tx_dma;
-       np->tx_dma_len = (unsigned int*)tx_dma_len;
+       np->rx_skb = (struct nv_skb_map*)rx_skbuff;
+       np->tx_skb = (struct nv_skb_map*)tx_skbuff;
        np->ring_addr = ring_addr;
 
-       memset(np->rx_skbuff, 0, sizeof(struct sk_buff*) * np->rx_ring_size);
-       memset(np->rx_dma, 0, sizeof(dma_addr_t) * np->rx_ring_size);
-       memset(np->tx_skbuff, 0, sizeof(struct sk_buff*) * np->tx_ring_size);
-       memset(np->tx_dma, 0, sizeof(dma_addr_t) * np->tx_ring_size);
-       memset(np->tx_dma_len, 0, sizeof(unsigned int) * np->tx_ring_size);
+       memset(np->rx_skb, 0, sizeof(struct nv_skb_map) * np->rx_ring_size);
+       memset(np->tx_skb, 0, sizeof(struct nv_skb_map) * np->tx_ring_size);
 
        if (netif_running(dev)) {
                /* reinit driver view of the queues */
@@ -3928,7 +4039,7 @@ static int nv_loopback_test(struct net_device *dev)
                        dprintk(KERN_DEBUG "%s: loopback len mismatch %d vs %d\n",
                                dev->name, len, pkt_len);
                } else {
-                       rx_skb = np->rx_skbuff[0];
+                       rx_skb = np->rx_skb[0].skb;
                        for (i = 0; i < pkt_len; i++) {
                                if (rx_skb->data[i] != (u8)(i & 0xff)) {
                                        ret = 0;
@@ -4148,20 +4259,6 @@ static int nv_mgmt_acquire_sema(struct net_device *dev)
        return 0;
 }
 
-/* Indicate to mgmt unit whether driver is loaded or not */
-static void nv_mgmt_driver_loaded(struct net_device *dev, int loaded)
-{
-       u8 __iomem *base = get_hwbase(dev);
-       u32 tx_ctrl;
-
-       tx_ctrl = readl(base + NvRegTransmitterControl);
-       if (loaded)
-               tx_ctrl |= NVREG_XMITCTL_HOST_LOADED;
-       else
-               tx_ctrl &= ~NVREG_XMITCTL_HOST_LOADED;
-       writel(tx_ctrl, base + NvRegTransmitterControl);
-}
-
 static int nv_open(struct net_device *dev)
 {
        struct fe_priv *np = netdev_priv(dev);
@@ -4462,9 +4559,7 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
                np->rx_csum = 1;
                np->txrxctl_bits |= NVREG_TXRXCTL_RXCHECK;
                dev->features |= NETIF_F_HW_CSUM | NETIF_F_SG;
-#ifdef NETIF_F_TSO
                dev->features |= NETIF_F_TSO;
-#endif
        }
 
        np->vlanctl_bits = 0;
@@ -4499,8 +4594,8 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
 
        np->rx_ring_size = RX_RING_DEFAULT;
        np->tx_ring_size = TX_RING_DEFAULT;
-       np->tx_limit_stop = np->tx_ring_size - TX_LIMIT_DIFFERENCE;
-       np->tx_limit_start = np->tx_ring_size - TX_LIMIT_DIFFERENCE - 1;
+       np->tx_limit_stop = TX_LIMIT_DIFFERENCE;
+       np->tx_limit_start = TX_LIMIT_DIFFERENCE;
 
        if (np->desc_ver == DESC_VER_1 || np->desc_ver == DESC_VER_2) {
                np->rx_ring.orig = pci_alloc_consistent(pci_dev,
@@ -4517,18 +4612,12 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
                        goto out_unmap;
                np->tx_ring.ex = &np->rx_ring.ex[np->rx_ring_size];
        }
-       np->rx_skbuff = kmalloc(sizeof(struct sk_buff*) * np->rx_ring_size, GFP_KERNEL);
-       np->rx_dma = kmalloc(sizeof(dma_addr_t) * np->rx_ring_size, GFP_KERNEL);
-       np->tx_skbuff = kmalloc(sizeof(struct sk_buff*) * np->tx_ring_size, GFP_KERNEL);
-       np->tx_dma = kmalloc(sizeof(dma_addr_t) * np->tx_ring_size, GFP_KERNEL);
-       np->tx_dma_len = kmalloc(sizeof(unsigned int) * np->tx_ring_size, GFP_KERNEL);
-       if (!np->rx_skbuff || !np->rx_dma || !np->tx_skbuff || !np->tx_dma || !np->tx_dma_len)
+       np->rx_skb = kmalloc(sizeof(struct nv_skb_map) * np->rx_ring_size, GFP_KERNEL);
+       np->tx_skb = kmalloc(sizeof(struct nv_skb_map) * np->tx_ring_size, GFP_KERNEL);
+       if (!np->rx_skb || !np->tx_skb)
                goto out_freering;
-       memset(np->rx_skbuff, 0, sizeof(struct sk_buff*) * np->rx_ring_size);
-       memset(np->rx_dma, 0, sizeof(dma_addr_t) * np->rx_ring_size);
-       memset(np->tx_skbuff, 0, sizeof(struct sk_buff*) * np->tx_ring_size);
-       memset(np->tx_dma, 0, sizeof(dma_addr_t) * np->tx_ring_size);
-       memset(np->tx_dma_len, 0, sizeof(unsigned int) * np->tx_ring_size);
+       memset(np->rx_skb, 0, sizeof(struct nv_skb_map) * np->rx_ring_size);
+       memset(np->tx_skb, 0, sizeof(struct nv_skb_map) * np->tx_ring_size);
 
        dev->open = nv_open;
        dev->stop = nv_close;
@@ -4659,33 +4748,24 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
        writel(NVREG_MIISTAT_MASK, base + NvRegMIIStatus);
 
        if (id->driver_data & DEV_HAS_MGMT_UNIT) {
-               writel(0x1, base + 0x204); pci_push(base);
-               msleep(500);
                /* management unit running on the mac? */
-               np->mac_in_use = readl(base + NvRegTransmitterControl) & NVREG_XMITCTL_MGMT_ST;
-               if (np->mac_in_use) {
-                       u32 mgmt_sync;
-                       /* management unit setup the phy already? */
-                       mgmt_sync = readl(base + NvRegTransmitterControl) & NVREG_XMITCTL_SYNC_MASK;
-                       if (mgmt_sync == NVREG_XMITCTL_SYNC_NOT_READY) {
-                               if (!nv_mgmt_acquire_sema(dev)) {
-                                       for (i = 0; i < 5000; i++) {
-                                               msleep(1);
-                                               mgmt_sync = readl(base + NvRegTransmitterControl) & NVREG_XMITCTL_SYNC_MASK;
-                                               if (mgmt_sync == NVREG_XMITCTL_SYNC_NOT_READY)
-                                                       continue;
-                                               if (mgmt_sync == NVREG_XMITCTL_SYNC_PHY_INIT)
-                                                       phyinitialized = 1;
-                                               break;
+               if (readl(base + NvRegTransmitterControl) & NVREG_XMITCTL_SYNC_PHY_INIT) {
+                       np->mac_in_use = readl(base + NvRegTransmitterControl) & NVREG_XMITCTL_MGMT_ST;
+                       dprintk(KERN_INFO "%s: mgmt unit is running. mac in use %x.\n", pci_name(pci_dev), np->mac_in_use);
+                       for (i = 0; i < 5000; i++) {
+                               msleep(1);
+                               if (nv_mgmt_acquire_sema(dev)) {
+                                       /* management unit setup the phy already? */
+                                       if ((readl(base + NvRegTransmitterControl) & NVREG_XMITCTL_SYNC_MASK) ==
+                                           NVREG_XMITCTL_SYNC_PHY_INIT) {
+                                               /* phy is inited by mgmt unit */
+                                               phyinitialized = 1;
+                                               dprintk(KERN_INFO "%s: Phy already initialized by mgmt unit.\n", pci_name(pci_dev));
+                                       } else {
+                                               /* we need to init the phy */
                                        }
-                               } else {
-                                       /* we need to init the phy */
+                                       break;
                                }
-                       } else if (mgmt_sync == NVREG_XMITCTL_SYNC_PHY_INIT) {
-                               /* phy is inited by SMU */
-                               phyinitialized = 1;
-                       } else {
-                               /* we need to init the phy */
                        }
                }
        }
@@ -4724,10 +4804,12 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
        if (!phyinitialized) {
                /* reset it */
                phy_init(dev);
-       }
-
-       if (id->driver_data & DEV_HAS_MGMT_UNIT) {
-               nv_mgmt_driver_loaded(dev, 1);
+       } else {
+               /* see if it is a gigabit phy */
+               u32 mii_status = mii_rw(dev, np->phyaddr, MII_BMSR, MII_READ);
+               if (mii_status & PHY_GIGABIT) {
+                       np->gigabit = PHY_GIGABIT;
+               }
        }
 
        /* set default link speed settings */
@@ -4749,8 +4831,6 @@ static int __devinit nv_probe(struct pci_dev *pci_dev, const struct pci_device_i
 out_error:
        if (phystate_orig)
                writel(phystate|NVREG_ADAPTCTL_RUNNING, base + NvRegAdapterControl);
-       if (np->mac_in_use)
-               nv_mgmt_driver_loaded(dev, 0);
        pci_set_drvdata(pci_dev, NULL);
 out_freering:
        free_rings(dev);
@@ -4780,9 +4860,6 @@ static void __devexit nv_remove(struct pci_dev *pci_dev)
        writel(np->orig_mac[0], base + NvRegMacAddrA);
        writel(np->orig_mac[1], base + NvRegMacAddrB);
 
-       if (np->mac_in_use)
-               nv_mgmt_driver_loaded(dev, 0);
-
        /* free all structures */
        free_rings(dev);
        iounmap(get_hwbase(dev));