net/mlx4_en: Saving mem access on data path
Yevgeny Petrilin [Tue, 6 Mar 2012 04:03:34 +0000 (04:03 +0000)]
Localized the pdev->dev, and using dma_map instead of pci_map
There are multiple map/unmap operations on data path,
optimizing those by saving redundant pointer access.
Those places were identified as hot-spots when running kernel profiling
during some benchmarks.
The fixes had most impact when testing packet rate with small packets,
reducing several % from CPU load, and in some case being the difference
between reaching wire speed or being CPU bound.

Signed-off-by: Yevgeny Petrilin <yevgenyp@mellanox.co.il>
Signed-off-by: David S. Miller <davem@davemloft.net>

drivers/net/ethernet/mellanox/mlx4/en_netdev.c
drivers/net/ethernet/mellanox/mlx4/en_rx.c
drivers/net/ethernet/mellanox/mlx4/en_tx.c
drivers/net/ethernet/mellanox/mlx4/mlx4_en.h

index 9fe4f94..31b455a 100644 (file)
@@ -1062,6 +1062,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
        memset(priv, 0, sizeof(struct mlx4_en_priv));
        priv->dev = dev;
        priv->mdev = mdev;
+       priv->ddev = &mdev->pdev->dev;
        priv->prof = prof;
        priv->port = port;
        priv->port_up = false;
index d703ef2..c881712 100644 (file)
@@ -48,7 +48,6 @@ static int mlx4_en_alloc_frag(struct mlx4_en_priv *priv,
                              struct mlx4_en_rx_alloc *ring_alloc,
                              int i)
 {
-       struct mlx4_en_dev *mdev = priv->mdev;
        struct mlx4_en_frag_info *frag_info = &priv->frag_info[i];
        struct mlx4_en_rx_alloc *page_alloc = &ring_alloc[i];
        struct page *page;
@@ -72,7 +71,7 @@ static int mlx4_en_alloc_frag(struct mlx4_en_priv *priv,
                skb_frags[i].offset = page_alloc->offset;
                page_alloc->offset += frag_info->frag_stride;
        }
-       dma = pci_map_single(mdev->pdev, page_address(skb_frags[i].page) +
+       dma = dma_map_single(priv->ddev, page_address(skb_frags[i].page) +
                             skb_frags[i].offset, frag_info->frag_size,
                             PCI_DMA_FROMDEVICE);
        rx_desc->data[i].addr = cpu_to_be64(dma);
@@ -186,7 +185,6 @@ static void mlx4_en_free_rx_desc(struct mlx4_en_priv *priv,
                                 struct mlx4_en_rx_ring *ring,
                                 int index)
 {
-       struct mlx4_en_dev *mdev = priv->mdev;
        struct page_frag *skb_frags;
        struct mlx4_en_rx_desc *rx_desc = ring->buf + (index << ring->log_stride);
        dma_addr_t dma;
@@ -198,7 +196,7 @@ static void mlx4_en_free_rx_desc(struct mlx4_en_priv *priv,
                dma = be64_to_cpu(rx_desc->data[nr].addr);
 
                en_dbg(DRV, priv, "Unmapping buffer at dma:0x%llx\n", (u64) dma);
-               pci_unmap_single(mdev->pdev, dma, skb_frags[nr].size,
+               dma_unmap_single(priv->ddev, dma, skb_frags[nr].size,
                                 PCI_DMA_FROMDEVICE);
                put_page(skb_frags[nr].page);
        }
@@ -412,7 +410,6 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv,
                                    int length)
 {
        struct skb_frag_struct *skb_frags_rx = skb_shinfo(skb)->frags;
-       struct mlx4_en_dev *mdev = priv->mdev;
        struct mlx4_en_frag_info *frag_info;
        int nr;
        dma_addr_t dma;
@@ -435,7 +432,7 @@ static int mlx4_en_complete_rx_desc(struct mlx4_en_priv *priv,
                        goto fail;
 
                /* Unmap buffer */
-               pci_unmap_single(mdev->pdev, dma, skb_frag_size(&skb_frags_rx[nr]),
+               dma_unmap_single(priv->ddev, dma, skb_frag_size(&skb_frags_rx[nr]),
                                 PCI_DMA_FROMDEVICE);
        }
        /* Adjust size of last fragment to match actual length */
@@ -461,7 +458,6 @@ static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv,
                                      struct mlx4_en_rx_alloc *page_alloc,
                                      unsigned int length)
 {
-       struct mlx4_en_dev *mdev = priv->mdev;
        struct sk_buff *skb;
        void *va;
        int used_frags;
@@ -483,10 +479,10 @@ static struct sk_buff *mlx4_en_rx_skb(struct mlx4_en_priv *priv,
                /* We are copying all relevant data to the skb - temporarily
                 * synch buffers for the copy */
                dma = be64_to_cpu(rx_desc->data[0].addr);
-               dma_sync_single_for_cpu(&mdev->pdev->dev, dma, length,
+               dma_sync_single_for_cpu(priv->ddev, dma, length,
                                        DMA_FROM_DEVICE);
                skb_copy_to_linear_data(skb, va, length);
-               dma_sync_single_for_device(&mdev->pdev->dev, dma, length,
+               dma_sync_single_for_device(priv->ddev, dma, length,
                                           DMA_FROM_DEVICE);
                skb->tail += length;
        } else {
index 50b3fa5..008f0af 100644 (file)
@@ -198,7 +198,6 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
                                struct mlx4_en_tx_ring *ring,
                                int index, u8 owner)
 {
-       struct mlx4_en_dev *mdev = priv->mdev;
        struct mlx4_en_tx_info *tx_info = &ring->tx_info[index];
        struct mlx4_en_tx_desc *tx_desc = ring->buf + index * TXBB_SIZE;
        struct mlx4_wqe_data_seg *data = (void *) tx_desc + tx_info->data_offset;
@@ -214,7 +213,7 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
        if (likely((void *) tx_desc + tx_info->nr_txbb * TXBB_SIZE <= end)) {
                if (!tx_info->inl) {
                        if (tx_info->linear) {
-                               pci_unmap_single(mdev->pdev,
+                               dma_unmap_single(priv->ddev,
                                        (dma_addr_t) be64_to_cpu(data->addr),
                                         be32_to_cpu(data->byte_count),
                                         PCI_DMA_TODEVICE);
@@ -223,7 +222,7 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
 
                        for (i = 0; i < frags; i++) {
                                frag = &skb_shinfo(skb)->frags[i];
-                               pci_unmap_page(mdev->pdev,
+                               dma_unmap_page(priv->ddev,
                                        (dma_addr_t) be64_to_cpu(data[i].addr),
                                        skb_frag_size(frag), PCI_DMA_TODEVICE);
                        }
@@ -241,7 +240,7 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
                        }
 
                        if (tx_info->linear) {
-                               pci_unmap_single(mdev->pdev,
+                               dma_unmap_single(priv->ddev,
                                        (dma_addr_t) be64_to_cpu(data->addr),
                                         be32_to_cpu(data->byte_count),
                                         PCI_DMA_TODEVICE);
@@ -253,7 +252,7 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
                                if ((void *) data >= end)
                                        data = ring->buf;
                                frag = &skb_shinfo(skb)->frags[i];
-                               pci_unmap_page(mdev->pdev,
+                               dma_unmap_page(priv->ddev,
                                        (dma_addr_t) be64_to_cpu(data->addr),
                                         skb_frag_size(frag), PCI_DMA_TODEVICE);
                                ++data;
@@ -733,7 +732,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
                /* Map fragments */
                for (i = skb_shinfo(skb)->nr_frags - 1; i >= 0; i--) {
                        frag = &skb_shinfo(skb)->frags[i];
-                       dma = skb_frag_dma_map(&mdev->dev->pdev->dev, frag,
+                       dma = skb_frag_dma_map(priv->ddev, frag,
                                               0, skb_frag_size(frag),
                                               DMA_TO_DEVICE);
                        data->addr = cpu_to_be64(dma);
@@ -745,7 +744,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 
                /* Map linear part */
                if (tx_info->linear) {
-                       dma = pci_map_single(mdev->dev->pdev, skb->data + lso_header_size,
+                       dma = dma_map_single(priv->ddev, skb->data + lso_header_size,
                                             skb_headlen(skb) - lso_header_size, PCI_DMA_TODEVICE);
                        data->addr = cpu_to_be64(dma);
                        data->lkey = cpu_to_be32(mdev->mr.key);
index d60335f..a5bb0b7 100644 (file)
@@ -482,6 +482,7 @@ struct mlx4_en_priv {
        struct mlx4_en_stat_out_mbox hw_stats;
        int vids[128];
        bool wol;
+       struct device *ddev;
 };
 
 enum mlx4_en_wol {