]> nv-tegra.nvidia Code Review - linux-3.10.git/blobdiff - drivers/net/chelsio/sge.c
chelsio: NAPI speed improvement
[linux-3.10.git] / drivers / net / chelsio / sge.c
index 6b1e857ee07eb63c84d470c075849bc6bf83df7a..8e287e79e4e3988c89ece043969fcb4e6258cb07 100644 (file)
  */
 #define TX_RECLAIM_PERIOD (HZ / 4)
 
-#ifndef NET_IP_ALIGN
-# define NET_IP_ALIGN 2
-#endif
-
 #define M_CMD_LEN       0x7fffffff
 #define V_CMD_LEN(v)    (v)
 #define G_CMD_LEN(v)    ((v) & M_CMD_LEN)
@@ -575,11 +571,10 @@ static int alloc_rx_resources(struct sge *sge, struct sge_params *p)
                q->size = p->freelQ_size[i];
                q->dma_offset = sge->rx_pkt_pad ? 0 : NET_IP_ALIGN;
                size = sizeof(struct freelQ_e) * q->size;
-               q->entries = (struct freelQ_e *)
-                             pci_alloc_consistent(pdev, size, &q->dma_addr);
+               q->entries = pci_alloc_consistent(pdev, size, &q->dma_addr);
                if (!q->entries)
                        goto err_no_mem;
-               memset(q->entries, 0, size);
+
                size = sizeof(struct freelQ_ce) * q->size;
                q->centries = kzalloc(size, GFP_KERNEL);
                if (!q->centries)
@@ -613,11 +608,10 @@ static int alloc_rx_resources(struct sge *sge, struct sge_params *p)
        sge->respQ.size = SGE_RESPQ_E_N;
        sge->respQ.credits = 0;
        size = sizeof(struct respQ_e) * sge->respQ.size;
-       sge->respQ.entries = (struct respQ_e *)
+       sge->respQ.entries =
                pci_alloc_consistent(pdev, size, &sge->respQ.dma_addr);
        if (!sge->respQ.entries)
                goto err_no_mem;
-       memset(sge->respQ.entries, 0, size);
        return 0;
 
 err_no_mem:
@@ -637,20 +631,12 @@ static void free_cmdQ_buffers(struct sge *sge, struct cmdQ *q, unsigned int n)
        q->in_use -= n;
        ce = &q->centries[cidx];
        while (n--) {
-               if (q->sop) {
-                       if (likely(pci_unmap_len(ce, dma_len))) {
-                               pci_unmap_single(pdev,
-                                                pci_unmap_addr(ce, dma_addr),
-                                                pci_unmap_len(ce, dma_len),
-                                                PCI_DMA_TODEVICE);
+               if (likely(pci_unmap_len(ce, dma_len))) {
+                       pci_unmap_single(pdev, pci_unmap_addr(ce, dma_addr),
+                                        pci_unmap_len(ce, dma_len),
+                                        PCI_DMA_TODEVICE);
+                       if (q->sop)
                                q->sop = 0;
-                       }
-               } else {
-                       if (likely(pci_unmap_len(ce, dma_len))) {
-                               pci_unmap_page(pdev, pci_unmap_addr(ce, dma_addr),
-                                              pci_unmap_len(ce, dma_len),
-                                              PCI_DMA_TODEVICE);
-                       }
                }
                if (ce->skb) {
                        dev_kfree_skb_any(ce->skb);
@@ -711,11 +697,10 @@ static int alloc_tx_resources(struct sge *sge, struct sge_params *p)
                q->stop_thres = 0;
                spin_lock_init(&q->lock);
                size = sizeof(struct cmdQ_e) * q->size;
-               q->entries = (struct cmdQ_e *)
-                             pci_alloc_consistent(pdev, size, &q->dma_addr);
+               q->entries = pci_alloc_consistent(pdev, size, &q->dma_addr);
                if (!q->entries)
                        goto err_no_mem;
-               memset(q->entries, 0, size);
+
                size = sizeof(struct cmdQ_ce) * q->size;
                q->centries = kzalloc(size, GFP_KERNEL);
                if (!q->centries)
@@ -1447,19 +1432,18 @@ static inline int enough_free_Tx_descs(const struct cmdQ *q)
 static void restart_tx_queues(struct sge *sge)
 {
        struct adapter *adap = sge->adapter;
+       int i;
 
-       if (enough_free_Tx_descs(&sge->cmdQ[0])) {
-               int i;
+       if (!enough_free_Tx_descs(&sge->cmdQ[0]))
+               return;
 
-               for_each_port(adap, i) {
-                       struct net_device *nd = adap->port[i].dev;
+       for_each_port(adap, i) {
+               struct net_device *nd = adap->port[i].dev;
 
-                       if (test_and_clear_bit(nd->if_port,
-                                              &sge->stopped_tx_queues) &&
-                           netif_running(nd)) {
-                               sge->stats.cmdQ_restarted[2]++;
-                               netif_wake_queue(nd);
-                       }
+               if (test_and_clear_bit(nd->if_port, &sge->stopped_tx_queues) &&
+                   netif_running(nd)) {
+                       sge->stats.cmdQ_restarted[2]++;
+                       netif_wake_queue(nd);
                }
        }
 }
@@ -1575,6 +1559,14 @@ static int process_responses(struct adapter *adapter, int budget)
        return budget;
 }
 
+static inline int responses_pending(const struct adapter *adapter)
+{
+       const struct respQ *Q = &adapter->sge->respQ;
+       const struct respQ_e *e = &Q->entries[Q->cidx];
+
+       return (e->GenerationBit == Q->genbit);
+}
+
 #ifdef CONFIG_CHELSIO_T1_NAPI
 /*
  * A simpler version of process_responses() that handles only pure (i.e.,
@@ -1584,13 +1576,16 @@ static int process_responses(struct adapter *adapter, int budget)
  * which the caller must ensure is a valid pure response.  Returns 1 if it
  * encounters a valid data-carrying response, 0 otherwise.
  */
-static int process_pure_responses(struct adapter *adapter, struct respQ_e *e)
+static int process_pure_responses(struct adapter *adapter)
 {
        struct sge *sge = adapter->sge;
        struct respQ *q = &sge->respQ;
+       struct respQ_e *e = &q->entries[q->cidx];
        unsigned int flags = 0;
        unsigned int cmdq_processed[SGE_CMDQ_N] = {0, 0};
 
+       if (e->DataValid)
+               return 1;
        do {
                flags |= e->Qsleeping;
 
@@ -1626,23 +1621,20 @@ static int process_pure_responses(struct adapter *adapter, struct respQ_e *e)
 int t1_poll(struct net_device *dev, int *budget)
 {
        struct adapter *adapter = dev->priv;
-       int effective_budget = min(*budget, dev->quota);
-       int work_done = process_responses(adapter, effective_budget);
+       int work_done;
 
+       work_done = process_responses(adapter, min(*budget, dev->quota));
        *budget -= work_done;
        dev->quota -= work_done;
 
-       if (work_done >= effective_budget)
+       if (unlikely(responses_pending(adapter)))
                return 1;
 
-       spin_lock_irq(&adapter->async_lock);
-       __netif_rx_complete(dev);
+       netif_rx_complete(dev);
        writel(adapter->sge->respQ.cidx, adapter->regs + A_SG_SLEEPING);
-       writel(adapter->slow_intr_mask | F_PL_INTR_SGE_DATA,
-              adapter->regs + A_PL_ENABLE);
-       spin_unlock_irq(&adapter->async_lock);
 
        return 0;
+
 }
 
 /*
@@ -1651,44 +1643,33 @@ int t1_poll(struct net_device *dev, int *budget)
 irqreturn_t t1_interrupt(int irq, void *data)
 {
        struct adapter *adapter = data;
-       struct net_device *dev = adapter->sge->netdev;
        struct sge *sge = adapter->sge;
-       u32 cause;
-       int handled = 0;
+       int handled;
 
-       cause = readl(adapter->regs + A_PL_CAUSE);
-       if (cause == 0 || cause == ~0)
-               return IRQ_NONE;
-
-       spin_lock(&adapter->async_lock);
-       if (cause & F_PL_INTR_SGE_DATA) {
-               struct respQ *q = &adapter->sge->respQ;
-               struct respQ_e *e = &q->entries[q->cidx];
+       if (likely(responses_pending(adapter))) {
+               struct net_device *dev = sge->netdev;
 
-               handled = 1;
                writel(F_PL_INTR_SGE_DATA, adapter->regs + A_PL_CAUSE);
 
-               if (e->GenerationBit == q->genbit &&
-                   __netif_rx_schedule_prep(dev)) {
-                       if (e->DataValid || process_pure_responses(adapter, e)) {
-                               /* mask off data IRQ */
-                               writel(adapter->slow_intr_mask,
-                                      adapter->regs + A_PL_ENABLE);
-                               __netif_rx_schedule(sge->netdev);
-                               goto unlock;
+               if (__netif_rx_schedule_prep(dev)) {
+                       if (process_pure_responses(adapter))
+                               __netif_rx_schedule(dev);
+                       else {
+                               /* no data, no NAPI needed */
+                               writel(sge->respQ.cidx, adapter->regs + A_SG_SLEEPING);
+                               netif_poll_enable(dev); /* undo schedule_prep */
                        }
-                       /* no data, no NAPI needed */
-                       netif_poll_enable(dev);
-
                }
-               writel(q->cidx, adapter->regs + A_SG_SLEEPING);
-       } else
-               handled = t1_slow_intr_handler(adapter);
+               return IRQ_HANDLED;
+       }
+
+       spin_lock(&adapter->async_lock);
+       handled = t1_slow_intr_handler(adapter);
+       spin_unlock(&adapter->async_lock);
 
        if (!handled)
                sge->stats.unhandled_irqs++;
-unlock:
-       spin_unlock(&adapter->async_lock);
+
        return IRQ_RETVAL(handled != 0);
 }
 
@@ -1711,17 +1692,13 @@ unlock:
 irqreturn_t t1_interrupt(int irq, void *cookie)
 {
        int work_done;
-       struct respQ_e *e;
        struct adapter *adapter = cookie;
-       struct respQ *Q = &adapter->sge->respQ;
 
        spin_lock(&adapter->async_lock);
-       e = &Q->entries[Q->cidx];
-       prefetch(e);
 
        writel(F_PL_INTR_SGE_DATA, adapter->regs + A_PL_CAUSE);
 
-       if (likely(e->GenerationBit == Q->genbit))
+       if (likely(responses_pending(adapter))
                work_done = process_responses(adapter, -1);
        else
                work_done = t1_slow_intr_handler(adapter);
@@ -2195,9 +2172,8 @@ struct sge * __devinit t1_sge_create(struct adapter *adapter,
                if (adapter->params.nports > 1) {
                        tx_sched_init(sge);
                        sge->espibug_timer.function = espibug_workaround_t204;
-               } else {
+               } else
                        sge->espibug_timer.function = espibug_workaround;
-               }
                sge->espibug_timer.data = (unsigned long)sge->adapter;
 
                sge->espibug_timeout = 1;