6b4f0148ee6ca8643eb49ccef7f7caa4e7f12547
[linux-2.6.git] / drivers / net / ethernet / intel / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2012 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
29
30 #include <linux/module.h>
31 #include <linux/types.h>
32 #include <linux/init.h>
33 #include <linux/bitops.h>
34 #include <linux/vmalloc.h>
35 #include <linux/pagemap.h>
36 #include <linux/netdevice.h>
37 #include <linux/ipv6.h>
38 #include <linux/slab.h>
39 #include <net/checksum.h>
40 #include <net/ip6_checksum.h>
41 #include <linux/net_tstamp.h>
42 #include <linux/mii.h>
43 #include <linux/ethtool.h>
44 #include <linux/if.h>
45 #include <linux/if_vlan.h>
46 #include <linux/pci.h>
47 #include <linux/pci-aspm.h>
48 #include <linux/delay.h>
49 #include <linux/interrupt.h>
50 #include <linux/ip.h>
51 #include <linux/tcp.h>
52 #include <linux/sctp.h>
53 #include <linux/if_ether.h>
54 #include <linux/aer.h>
55 #include <linux/prefetch.h>
56 #include <linux/pm_runtime.h>
57 #ifdef CONFIG_IGB_DCA
58 #include <linux/dca.h>
59 #endif
60 #include "igb.h"
61
62 #define MAJ 3
63 #define MIN 2
64 #define BUILD 10
65 #define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \
66 __stringify(BUILD) "-k"
67 char igb_driver_name[] = "igb";
68 char igb_driver_version[] = DRV_VERSION;
69 static const char igb_driver_string[] =
70                                 "Intel(R) Gigabit Ethernet Network Driver";
71 static const char igb_copyright[] = "Copyright (c) 2007-2012 Intel Corporation.";
72
73 static const struct e1000_info *igb_info_tbl[] = {
74         [board_82575] = &e1000_82575_info,
75 };
76
77 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
78         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
79         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
80         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
81         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
82         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
83         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
84         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER), board_82575 },
85         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
86         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
87         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
88         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
89         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
90         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE), board_82575 },
91         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP), board_82575 },
92         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
93         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
94         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
95         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
96         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
97         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
98         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
99         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
100         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
101         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
102         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
103         /* required last entry */
104         {0, }
105 };
106
107 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
108
109 void igb_reset(struct igb_adapter *);
110 static int igb_setup_all_tx_resources(struct igb_adapter *);
111 static int igb_setup_all_rx_resources(struct igb_adapter *);
112 static void igb_free_all_tx_resources(struct igb_adapter *);
113 static void igb_free_all_rx_resources(struct igb_adapter *);
114 static void igb_setup_mrqc(struct igb_adapter *);
115 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
116 static void __devexit igb_remove(struct pci_dev *pdev);
117 static void igb_init_hw_timer(struct igb_adapter *adapter);
118 static int igb_sw_init(struct igb_adapter *);
119 static int igb_open(struct net_device *);
120 static int igb_close(struct net_device *);
121 static void igb_configure_tx(struct igb_adapter *);
122 static void igb_configure_rx(struct igb_adapter *);
123 static void igb_clean_all_tx_rings(struct igb_adapter *);
124 static void igb_clean_all_rx_rings(struct igb_adapter *);
125 static void igb_clean_tx_ring(struct igb_ring *);
126 static void igb_clean_rx_ring(struct igb_ring *);
127 static void igb_set_rx_mode(struct net_device *);
128 static void igb_update_phy_info(unsigned long);
129 static void igb_watchdog(unsigned long);
130 static void igb_watchdog_task(struct work_struct *);
131 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
132 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
133                                                  struct rtnl_link_stats64 *stats);
134 static int igb_change_mtu(struct net_device *, int);
135 static int igb_set_mac(struct net_device *, void *);
136 static void igb_set_uta(struct igb_adapter *adapter);
137 static irqreturn_t igb_intr(int irq, void *);
138 static irqreturn_t igb_intr_msi(int irq, void *);
139 static irqreturn_t igb_msix_other(int irq, void *);
140 static irqreturn_t igb_msix_ring(int irq, void *);
141 #ifdef CONFIG_IGB_DCA
142 static void igb_update_dca(struct igb_q_vector *);
143 static void igb_setup_dca(struct igb_adapter *);
144 #endif /* CONFIG_IGB_DCA */
145 static int igb_poll(struct napi_struct *, int);
146 static bool igb_clean_tx_irq(struct igb_q_vector *);
147 static bool igb_clean_rx_irq(struct igb_q_vector *, int);
148 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
149 static void igb_tx_timeout(struct net_device *);
150 static void igb_reset_task(struct work_struct *);
151 static void igb_vlan_mode(struct net_device *netdev, netdev_features_t features);
152 static int igb_vlan_rx_add_vid(struct net_device *, u16);
153 static int igb_vlan_rx_kill_vid(struct net_device *, u16);
154 static void igb_restore_vlan(struct igb_adapter *);
155 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
156 static void igb_ping_all_vfs(struct igb_adapter *);
157 static void igb_msg_task(struct igb_adapter *);
158 static void igb_vmm_control(struct igb_adapter *);
159 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
160 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
161 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
162 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
163                                int vf, u16 vlan, u8 qos);
164 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
165 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
166                                  struct ifla_vf_info *ivi);
167 static void igb_check_vf_rate_limit(struct igb_adapter *);
168
169 #ifdef CONFIG_PCI_IOV
170 static int igb_vf_configure(struct igb_adapter *adapter, int vf);
171 static int igb_find_enabled_vfs(struct igb_adapter *adapter);
172 static int igb_check_vf_assignment(struct igb_adapter *adapter);
173 #endif
174
175 #ifdef CONFIG_PM
176 #ifdef CONFIG_PM_SLEEP
177 static int igb_suspend(struct device *);
178 #endif
179 static int igb_resume(struct device *);
180 #ifdef CONFIG_PM_RUNTIME
181 static int igb_runtime_suspend(struct device *dev);
182 static int igb_runtime_resume(struct device *dev);
183 static int igb_runtime_idle(struct device *dev);
184 #endif
185 static const struct dev_pm_ops igb_pm_ops = {
186         SET_SYSTEM_SLEEP_PM_OPS(igb_suspend, igb_resume)
187         SET_RUNTIME_PM_OPS(igb_runtime_suspend, igb_runtime_resume,
188                         igb_runtime_idle)
189 };
190 #endif
191 static void igb_shutdown(struct pci_dev *);
192 #ifdef CONFIG_IGB_DCA
193 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
194 static struct notifier_block dca_notifier = {
195         .notifier_call  = igb_notify_dca,
196         .next           = NULL,
197         .priority       = 0
198 };
199 #endif
200 #ifdef CONFIG_NET_POLL_CONTROLLER
201 /* for netdump / net console */
202 static void igb_netpoll(struct net_device *);
203 #endif
204 #ifdef CONFIG_PCI_IOV
205 static unsigned int max_vfs = 0;
206 module_param(max_vfs, uint, 0);
207 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
208                  "per physical function");
209 #endif /* CONFIG_PCI_IOV */
210
211 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
212                      pci_channel_state_t);
213 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
214 static void igb_io_resume(struct pci_dev *);
215
216 static struct pci_error_handlers igb_err_handler = {
217         .error_detected = igb_io_error_detected,
218         .slot_reset = igb_io_slot_reset,
219         .resume = igb_io_resume,
220 };
221
222 static void igb_init_dmac(struct igb_adapter *adapter, u32 pba);
223
224 static struct pci_driver igb_driver = {
225         .name     = igb_driver_name,
226         .id_table = igb_pci_tbl,
227         .probe    = igb_probe,
228         .remove   = __devexit_p(igb_remove),
229 #ifdef CONFIG_PM
230         .driver.pm = &igb_pm_ops,
231 #endif
232         .shutdown = igb_shutdown,
233         .err_handler = &igb_err_handler
234 };
235
236 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
237 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
238 MODULE_LICENSE("GPL");
239 MODULE_VERSION(DRV_VERSION);
240
241 #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK)
242 static int debug = -1;
243 module_param(debug, int, 0);
244 MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
245
246 struct igb_reg_info {
247         u32 ofs;
248         char *name;
249 };
250
251 static const struct igb_reg_info igb_reg_info_tbl[] = {
252
253         /* General Registers */
254         {E1000_CTRL, "CTRL"},
255         {E1000_STATUS, "STATUS"},
256         {E1000_CTRL_EXT, "CTRL_EXT"},
257
258         /* Interrupt Registers */
259         {E1000_ICR, "ICR"},
260
261         /* RX Registers */
262         {E1000_RCTL, "RCTL"},
263         {E1000_RDLEN(0), "RDLEN"},
264         {E1000_RDH(0), "RDH"},
265         {E1000_RDT(0), "RDT"},
266         {E1000_RXDCTL(0), "RXDCTL"},
267         {E1000_RDBAL(0), "RDBAL"},
268         {E1000_RDBAH(0), "RDBAH"},
269
270         /* TX Registers */
271         {E1000_TCTL, "TCTL"},
272         {E1000_TDBAL(0), "TDBAL"},
273         {E1000_TDBAH(0), "TDBAH"},
274         {E1000_TDLEN(0), "TDLEN"},
275         {E1000_TDH(0), "TDH"},
276         {E1000_TDT(0), "TDT"},
277         {E1000_TXDCTL(0), "TXDCTL"},
278         {E1000_TDFH, "TDFH"},
279         {E1000_TDFT, "TDFT"},
280         {E1000_TDFHS, "TDFHS"},
281         {E1000_TDFPC, "TDFPC"},
282
283         /* List Terminator */
284         {}
285 };
286
287 /*
288  * igb_regdump - register printout routine
289  */
290 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
291 {
292         int n = 0;
293         char rname[16];
294         u32 regs[8];
295
296         switch (reginfo->ofs) {
297         case E1000_RDLEN(0):
298                 for (n = 0; n < 4; n++)
299                         regs[n] = rd32(E1000_RDLEN(n));
300                 break;
301         case E1000_RDH(0):
302                 for (n = 0; n < 4; n++)
303                         regs[n] = rd32(E1000_RDH(n));
304                 break;
305         case E1000_RDT(0):
306                 for (n = 0; n < 4; n++)
307                         regs[n] = rd32(E1000_RDT(n));
308                 break;
309         case E1000_RXDCTL(0):
310                 for (n = 0; n < 4; n++)
311                         regs[n] = rd32(E1000_RXDCTL(n));
312                 break;
313         case E1000_RDBAL(0):
314                 for (n = 0; n < 4; n++)
315                         regs[n] = rd32(E1000_RDBAL(n));
316                 break;
317         case E1000_RDBAH(0):
318                 for (n = 0; n < 4; n++)
319                         regs[n] = rd32(E1000_RDBAH(n));
320                 break;
321         case E1000_TDBAL(0):
322                 for (n = 0; n < 4; n++)
323                         regs[n] = rd32(E1000_RDBAL(n));
324                 break;
325         case E1000_TDBAH(0):
326                 for (n = 0; n < 4; n++)
327                         regs[n] = rd32(E1000_TDBAH(n));
328                 break;
329         case E1000_TDLEN(0):
330                 for (n = 0; n < 4; n++)
331                         regs[n] = rd32(E1000_TDLEN(n));
332                 break;
333         case E1000_TDH(0):
334                 for (n = 0; n < 4; n++)
335                         regs[n] = rd32(E1000_TDH(n));
336                 break;
337         case E1000_TDT(0):
338                 for (n = 0; n < 4; n++)
339                         regs[n] = rd32(E1000_TDT(n));
340                 break;
341         case E1000_TXDCTL(0):
342                 for (n = 0; n < 4; n++)
343                         regs[n] = rd32(E1000_TXDCTL(n));
344                 break;
345         default:
346                 pr_info("%-15s %08x\n", reginfo->name, rd32(reginfo->ofs));
347                 return;
348         }
349
350         snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
351         pr_info("%-15s %08x %08x %08x %08x\n", rname, regs[0], regs[1],
352                 regs[2], regs[3]);
353 }
354
355 /*
356  * igb_dump - Print registers, tx-rings and rx-rings
357  */
358 static void igb_dump(struct igb_adapter *adapter)
359 {
360         struct net_device *netdev = adapter->netdev;
361         struct e1000_hw *hw = &adapter->hw;
362         struct igb_reg_info *reginfo;
363         struct igb_ring *tx_ring;
364         union e1000_adv_tx_desc *tx_desc;
365         struct my_u0 { u64 a; u64 b; } *u0;
366         struct igb_ring *rx_ring;
367         union e1000_adv_rx_desc *rx_desc;
368         u32 staterr;
369         u16 i, n;
370
371         if (!netif_msg_hw(adapter))
372                 return;
373
374         /* Print netdevice Info */
375         if (netdev) {
376                 dev_info(&adapter->pdev->dev, "Net device Info\n");
377                 pr_info("Device Name     state            trans_start      "
378                         "last_rx\n");
379                 pr_info("%-15s %016lX %016lX %016lX\n", netdev->name,
380                         netdev->state, netdev->trans_start, netdev->last_rx);
381         }
382
383         /* Print Registers */
384         dev_info(&adapter->pdev->dev, "Register Dump\n");
385         pr_info(" Register Name   Value\n");
386         for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
387              reginfo->name; reginfo++) {
388                 igb_regdump(hw, reginfo);
389         }
390
391         /* Print TX Ring Summary */
392         if (!netdev || !netif_running(netdev))
393                 goto exit;
394
395         dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
396         pr_info("Queue [NTU] [NTC] [bi(ntc)->dma  ] leng ntw timestamp\n");
397         for (n = 0; n < adapter->num_tx_queues; n++) {
398                 struct igb_tx_buffer *buffer_info;
399                 tx_ring = adapter->tx_ring[n];
400                 buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
401                 pr_info(" %5d %5X %5X %016llX %04X %p %016llX\n",
402                         n, tx_ring->next_to_use, tx_ring->next_to_clean,
403                         (u64)buffer_info->dma,
404                         buffer_info->length,
405                         buffer_info->next_to_watch,
406                         (u64)buffer_info->time_stamp);
407         }
408
409         /* Print TX Rings */
410         if (!netif_msg_tx_done(adapter))
411                 goto rx_ring_summary;
412
413         dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
414
415         /* Transmit Descriptor Formats
416          *
417          * Advanced Transmit Descriptor
418          *   +--------------------------------------------------------------+
419          * 0 |         Buffer Address [63:0]                                |
420          *   +--------------------------------------------------------------+
421          * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
422          *   +--------------------------------------------------------------+
423          *   63      46 45    40 39 38 36 35 32 31   24             15       0
424          */
425
426         for (n = 0; n < adapter->num_tx_queues; n++) {
427                 tx_ring = adapter->tx_ring[n];
428                 pr_info("------------------------------------\n");
429                 pr_info("TX QUEUE INDEX = %d\n", tx_ring->queue_index);
430                 pr_info("------------------------------------\n");
431                 pr_info("T [desc]     [address 63:0  ] [PlPOCIStDDM Ln] "
432                         "[bi->dma       ] leng  ntw timestamp        "
433                         "bi->skb\n");
434
435                 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
436                         const char *next_desc;
437                         struct igb_tx_buffer *buffer_info;
438                         tx_desc = IGB_TX_DESC(tx_ring, i);
439                         buffer_info = &tx_ring->tx_buffer_info[i];
440                         u0 = (struct my_u0 *)tx_desc;
441                         if (i == tx_ring->next_to_use &&
442                             i == tx_ring->next_to_clean)
443                                 next_desc = " NTC/U";
444                         else if (i == tx_ring->next_to_use)
445                                 next_desc = " NTU";
446                         else if (i == tx_ring->next_to_clean)
447                                 next_desc = " NTC";
448                         else
449                                 next_desc = "";
450
451                         pr_info("T [0x%03X]    %016llX %016llX %016llX"
452                                 " %04X  %p %016llX %p%s\n", i,
453                                 le64_to_cpu(u0->a),
454                                 le64_to_cpu(u0->b),
455                                 (u64)buffer_info->dma,
456                                 buffer_info->length,
457                                 buffer_info->next_to_watch,
458                                 (u64)buffer_info->time_stamp,
459                                 buffer_info->skb, next_desc);
460
461                         if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
462                                 print_hex_dump(KERN_INFO, "",
463                                         DUMP_PREFIX_ADDRESS,
464                                         16, 1, phys_to_virt(buffer_info->dma),
465                                         buffer_info->length, true);
466                 }
467         }
468
469         /* Print RX Rings Summary */
470 rx_ring_summary:
471         dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
472         pr_info("Queue [NTU] [NTC]\n");
473         for (n = 0; n < adapter->num_rx_queues; n++) {
474                 rx_ring = adapter->rx_ring[n];
475                 pr_info(" %5d %5X %5X\n",
476                         n, rx_ring->next_to_use, rx_ring->next_to_clean);
477         }
478
479         /* Print RX Rings */
480         if (!netif_msg_rx_status(adapter))
481                 goto exit;
482
483         dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
484
485         /* Advanced Receive Descriptor (Read) Format
486          *    63                                           1        0
487          *    +-----------------------------------------------------+
488          *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
489          *    +----------------------------------------------+------+
490          *  8 |       Header Buffer Address [63:1]           |  DD  |
491          *    +-----------------------------------------------------+
492          *
493          *
494          * Advanced Receive Descriptor (Write-Back) Format
495          *
496          *   63       48 47    32 31  30      21 20 17 16   4 3     0
497          *   +------------------------------------------------------+
498          * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
499          *   | Checksum   Ident  |   |           |    | Type | Type |
500          *   +------------------------------------------------------+
501          * 8 | VLAN Tag | Length | Extended Error | Extended Status |
502          *   +------------------------------------------------------+
503          *   63       48 47    32 31            20 19               0
504          */
505
506         for (n = 0; n < adapter->num_rx_queues; n++) {
507                 rx_ring = adapter->rx_ring[n];
508                 pr_info("------------------------------------\n");
509                 pr_info("RX QUEUE INDEX = %d\n", rx_ring->queue_index);
510                 pr_info("------------------------------------\n");
511                 pr_info("R  [desc]      [ PktBuf     A0] [  HeadBuf   DD] "
512                         "[bi->dma       ] [bi->skb] <-- Adv Rx Read format\n");
513                 pr_info("RWB[desc]      [PcsmIpSHl PtRs] [vl er S cks ln] -----"
514                         "----------- [bi->skb] <-- Adv Rx Write-Back format\n");
515
516                 for (i = 0; i < rx_ring->count; i++) {
517                         const char *next_desc;
518                         struct igb_rx_buffer *buffer_info;
519                         buffer_info = &rx_ring->rx_buffer_info[i];
520                         rx_desc = IGB_RX_DESC(rx_ring, i);
521                         u0 = (struct my_u0 *)rx_desc;
522                         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
523
524                         if (i == rx_ring->next_to_use)
525                                 next_desc = " NTU";
526                         else if (i == rx_ring->next_to_clean)
527                                 next_desc = " NTC";
528                         else
529                                 next_desc = "";
530
531                         if (staterr & E1000_RXD_STAT_DD) {
532                                 /* Descriptor Done */
533                                 pr_info("%s[0x%03X]     %016llX %016llX -------"
534                                         "--------- %p%s\n", "RWB", i,
535                                         le64_to_cpu(u0->a),
536                                         le64_to_cpu(u0->b),
537                                         buffer_info->skb, next_desc);
538                         } else {
539                                 pr_info("%s[0x%03X]     %016llX %016llX %016llX"
540                                         " %p%s\n", "R  ", i,
541                                         le64_to_cpu(u0->a),
542                                         le64_to_cpu(u0->b),
543                                         (u64)buffer_info->dma,
544                                         buffer_info->skb, next_desc);
545
546                                 if (netif_msg_pktdata(adapter)) {
547                                         print_hex_dump(KERN_INFO, "",
548                                                 DUMP_PREFIX_ADDRESS,
549                                                 16, 1,
550                                                 phys_to_virt(buffer_info->dma),
551                                                 IGB_RX_HDR_LEN, true);
552                                         print_hex_dump(KERN_INFO, "",
553                                           DUMP_PREFIX_ADDRESS,
554                                           16, 1,
555                                           phys_to_virt(
556                                             buffer_info->page_dma +
557                                             buffer_info->page_offset),
558                                           PAGE_SIZE/2, true);
559                                 }
560                         }
561                 }
562         }
563
564 exit:
565         return;
566 }
567
568
569 /**
570  * igb_read_clock - read raw cycle counter (to be used by time counter)
571  */
572 static cycle_t igb_read_clock(const struct cyclecounter *tc)
573 {
574         struct igb_adapter *adapter =
575                 container_of(tc, struct igb_adapter, cycles);
576         struct e1000_hw *hw = &adapter->hw;
577         u64 stamp = 0;
578         int shift = 0;
579
580         /*
581          * The timestamp latches on lowest register read. For the 82580
582          * the lowest register is SYSTIMR instead of SYSTIML.  However we never
583          * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
584          */
585         if (hw->mac.type >= e1000_82580) {
586                 stamp = rd32(E1000_SYSTIMR) >> 8;
587                 shift = IGB_82580_TSYNC_SHIFT;
588         }
589
590         stamp |= (u64)rd32(E1000_SYSTIML) << shift;
591         stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
592         return stamp;
593 }
594
595 /**
596  * igb_get_hw_dev - return device
597  * used by hardware layer to print debugging information
598  **/
599 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
600 {
601         struct igb_adapter *adapter = hw->back;
602         return adapter->netdev;
603 }
604
605 /**
606  * igb_init_module - Driver Registration Routine
607  *
608  * igb_init_module is the first routine called when the driver is
609  * loaded. All it does is register with the PCI subsystem.
610  **/
611 static int __init igb_init_module(void)
612 {
613         int ret;
614         pr_info("%s - version %s\n",
615                igb_driver_string, igb_driver_version);
616
617         pr_info("%s\n", igb_copyright);
618
619 #ifdef CONFIG_IGB_DCA
620         dca_register_notify(&dca_notifier);
621 #endif
622         ret = pci_register_driver(&igb_driver);
623         return ret;
624 }
625
626 module_init(igb_init_module);
627
628 /**
629  * igb_exit_module - Driver Exit Cleanup Routine
630  *
631  * igb_exit_module is called just before the driver is removed
632  * from memory.
633  **/
634 static void __exit igb_exit_module(void)
635 {
636 #ifdef CONFIG_IGB_DCA
637         dca_unregister_notify(&dca_notifier);
638 #endif
639         pci_unregister_driver(&igb_driver);
640 }
641
642 module_exit(igb_exit_module);
643
644 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
645 /**
646  * igb_cache_ring_register - Descriptor ring to register mapping
647  * @adapter: board private structure to initialize
648  *
649  * Once we know the feature-set enabled for the device, we'll cache
650  * the register offset the descriptor ring is assigned to.
651  **/
652 static void igb_cache_ring_register(struct igb_adapter *adapter)
653 {
654         int i = 0, j = 0;
655         u32 rbase_offset = adapter->vfs_allocated_count;
656
657         switch (adapter->hw.mac.type) {
658         case e1000_82576:
659                 /* The queues are allocated for virtualization such that VF 0
660                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
661                  * In order to avoid collision we start at the first free queue
662                  * and continue consuming queues in the same sequence
663                  */
664                 if (adapter->vfs_allocated_count) {
665                         for (; i < adapter->rss_queues; i++)
666                                 adapter->rx_ring[i]->reg_idx = rbase_offset +
667                                                                Q_IDX_82576(i);
668                 }
669         case e1000_82575:
670         case e1000_82580:
671         case e1000_i350:
672         default:
673                 for (; i < adapter->num_rx_queues; i++)
674                         adapter->rx_ring[i]->reg_idx = rbase_offset + i;
675                 for (; j < adapter->num_tx_queues; j++)
676                         adapter->tx_ring[j]->reg_idx = rbase_offset + j;
677                 break;
678         }
679 }
680
681 static void igb_free_queues(struct igb_adapter *adapter)
682 {
683         int i;
684
685         for (i = 0; i < adapter->num_tx_queues; i++) {
686                 kfree(adapter->tx_ring[i]);
687                 adapter->tx_ring[i] = NULL;
688         }
689         for (i = 0; i < adapter->num_rx_queues; i++) {
690                 kfree(adapter->rx_ring[i]);
691                 adapter->rx_ring[i] = NULL;
692         }
693         adapter->num_rx_queues = 0;
694         adapter->num_tx_queues = 0;
695 }
696
697 /**
698  * igb_alloc_queues - Allocate memory for all rings
699  * @adapter: board private structure to initialize
700  *
701  * We allocate one ring per queue at run-time since we don't know the
702  * number of queues at compile-time.
703  **/
704 static int igb_alloc_queues(struct igb_adapter *adapter)
705 {
706         struct igb_ring *ring;
707         int i;
708         int orig_node = adapter->node;
709
710         for (i = 0; i < adapter->num_tx_queues; i++) {
711                 if (orig_node == -1) {
712                         int cur_node = next_online_node(adapter->node);
713                         if (cur_node == MAX_NUMNODES)
714                                 cur_node = first_online_node;
715                         adapter->node = cur_node;
716                 }
717                 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
718                                     adapter->node);
719                 if (!ring)
720                         ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
721                 if (!ring)
722                         goto err;
723                 ring->count = adapter->tx_ring_count;
724                 ring->queue_index = i;
725                 ring->dev = &adapter->pdev->dev;
726                 ring->netdev = adapter->netdev;
727                 ring->numa_node = adapter->node;
728                 /* For 82575, context index must be unique per ring. */
729                 if (adapter->hw.mac.type == e1000_82575)
730                         set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags);
731                 adapter->tx_ring[i] = ring;
732         }
733         /* Restore the adapter's original node */
734         adapter->node = orig_node;
735
736         for (i = 0; i < adapter->num_rx_queues; i++) {
737                 if (orig_node == -1) {
738                         int cur_node = next_online_node(adapter->node);
739                         if (cur_node == MAX_NUMNODES)
740                                 cur_node = first_online_node;
741                         adapter->node = cur_node;
742                 }
743                 ring = kzalloc_node(sizeof(struct igb_ring), GFP_KERNEL,
744                                     adapter->node);
745                 if (!ring)
746                         ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
747                 if (!ring)
748                         goto err;
749                 ring->count = adapter->rx_ring_count;
750                 ring->queue_index = i;
751                 ring->dev = &adapter->pdev->dev;
752                 ring->netdev = adapter->netdev;
753                 ring->numa_node = adapter->node;
754                 /* set flag indicating ring supports SCTP checksum offload */
755                 if (adapter->hw.mac.type >= e1000_82576)
756                         set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags);
757
758                 /* On i350, loopback VLAN packets have the tag byte-swapped. */
759                 if (adapter->hw.mac.type == e1000_i350)
760                         set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags);
761
762                 adapter->rx_ring[i] = ring;
763         }
764         /* Restore the adapter's original node */
765         adapter->node = orig_node;
766
767         igb_cache_ring_register(adapter);
768
769         return 0;
770
771 err:
772         /* Restore the adapter's original node */
773         adapter->node = orig_node;
774         igb_free_queues(adapter);
775
776         return -ENOMEM;
777 }
778
779 /**
780  *  igb_write_ivar - configure ivar for given MSI-X vector
781  *  @hw: pointer to the HW structure
782  *  @msix_vector: vector number we are allocating to a given ring
783  *  @index: row index of IVAR register to write within IVAR table
784  *  @offset: column offset of in IVAR, should be multiple of 8
785  *
786  *  This function is intended to handle the writing of the IVAR register
787  *  for adapters 82576 and newer.  The IVAR table consists of 2 columns,
788  *  each containing an cause allocation for an Rx and Tx ring, and a
789  *  variable number of rows depending on the number of queues supported.
790  **/
791 static void igb_write_ivar(struct e1000_hw *hw, int msix_vector,
792                            int index, int offset)
793 {
794         u32 ivar = array_rd32(E1000_IVAR0, index);
795
796         /* clear any bits that are currently set */
797         ivar &= ~((u32)0xFF << offset);
798
799         /* write vector and valid bit */
800         ivar |= (msix_vector | E1000_IVAR_VALID) << offset;
801
802         array_wr32(E1000_IVAR0, index, ivar);
803 }
804
805 #define IGB_N0_QUEUE -1
806 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
807 {
808         struct igb_adapter *adapter = q_vector->adapter;
809         struct e1000_hw *hw = &adapter->hw;
810         int rx_queue = IGB_N0_QUEUE;
811         int tx_queue = IGB_N0_QUEUE;
812         u32 msixbm = 0;
813
814         if (q_vector->rx.ring)
815                 rx_queue = q_vector->rx.ring->reg_idx;
816         if (q_vector->tx.ring)
817                 tx_queue = q_vector->tx.ring->reg_idx;
818
819         switch (hw->mac.type) {
820         case e1000_82575:
821                 /* The 82575 assigns vectors using a bitmask, which matches the
822                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
823                    or more queues to a vector, we write the appropriate bits
824                    into the MSIXBM register for that vector. */
825                 if (rx_queue > IGB_N0_QUEUE)
826                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
827                 if (tx_queue > IGB_N0_QUEUE)
828                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
829                 if (!adapter->msix_entries && msix_vector == 0)
830                         msixbm |= E1000_EIMS_OTHER;
831                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
832                 q_vector->eims_value = msixbm;
833                 break;
834         case e1000_82576:
835                 /*
836                  * 82576 uses a table that essentially consists of 2 columns
837                  * with 8 rows.  The ordering is column-major so we use the
838                  * lower 3 bits as the row index, and the 4th bit as the
839                  * column offset.
840                  */
841                 if (rx_queue > IGB_N0_QUEUE)
842                         igb_write_ivar(hw, msix_vector,
843                                        rx_queue & 0x7,
844                                        (rx_queue & 0x8) << 1);
845                 if (tx_queue > IGB_N0_QUEUE)
846                         igb_write_ivar(hw, msix_vector,
847                                        tx_queue & 0x7,
848                                        ((tx_queue & 0x8) << 1) + 8);
849                 q_vector->eims_value = 1 << msix_vector;
850                 break;
851         case e1000_82580:
852         case e1000_i350:
853                 /*
854                  * On 82580 and newer adapters the scheme is similar to 82576
855                  * however instead of ordering column-major we have things
856                  * ordered row-major.  So we traverse the table by using
857                  * bit 0 as the column offset, and the remaining bits as the
858                  * row index.
859                  */
860                 if (rx_queue > IGB_N0_QUEUE)
861                         igb_write_ivar(hw, msix_vector,
862                                        rx_queue >> 1,
863                                        (rx_queue & 0x1) << 4);
864                 if (tx_queue > IGB_N0_QUEUE)
865                         igb_write_ivar(hw, msix_vector,
866                                        tx_queue >> 1,
867                                        ((tx_queue & 0x1) << 4) + 8);
868                 q_vector->eims_value = 1 << msix_vector;
869                 break;
870         default:
871                 BUG();
872                 break;
873         }
874
875         /* add q_vector eims value to global eims_enable_mask */
876         adapter->eims_enable_mask |= q_vector->eims_value;
877
878         /* configure q_vector to set itr on first interrupt */
879         q_vector->set_itr = 1;
880 }
881
882 /**
883  * igb_configure_msix - Configure MSI-X hardware
884  *
885  * igb_configure_msix sets up the hardware to properly
886  * generate MSI-X interrupts.
887  **/
888 static void igb_configure_msix(struct igb_adapter *adapter)
889 {
890         u32 tmp;
891         int i, vector = 0;
892         struct e1000_hw *hw = &adapter->hw;
893
894         adapter->eims_enable_mask = 0;
895
896         /* set vector for other causes, i.e. link changes */
897         switch (hw->mac.type) {
898         case e1000_82575:
899                 tmp = rd32(E1000_CTRL_EXT);
900                 /* enable MSI-X PBA support*/
901                 tmp |= E1000_CTRL_EXT_PBA_CLR;
902
903                 /* Auto-Mask interrupts upon ICR read. */
904                 tmp |= E1000_CTRL_EXT_EIAME;
905                 tmp |= E1000_CTRL_EXT_IRCA;
906
907                 wr32(E1000_CTRL_EXT, tmp);
908
909                 /* enable msix_other interrupt */
910                 array_wr32(E1000_MSIXBM(0), vector++,
911                                       E1000_EIMS_OTHER);
912                 adapter->eims_other = E1000_EIMS_OTHER;
913
914                 break;
915
916         case e1000_82576:
917         case e1000_82580:
918         case e1000_i350:
919                 /* Turn on MSI-X capability first, or our settings
920                  * won't stick.  And it will take days to debug. */
921                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
922                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
923                                 E1000_GPIE_NSICR);
924
925                 /* enable msix_other interrupt */
926                 adapter->eims_other = 1 << vector;
927                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
928
929                 wr32(E1000_IVAR_MISC, tmp);
930                 break;
931         default:
932                 /* do nothing, since nothing else supports MSI-X */
933                 break;
934         } /* switch (hw->mac.type) */
935
936         adapter->eims_enable_mask |= adapter->eims_other;
937
938         for (i = 0; i < adapter->num_q_vectors; i++)
939                 igb_assign_vector(adapter->q_vector[i], vector++);
940
941         wrfl();
942 }
943
944 /**
945  * igb_request_msix - Initialize MSI-X interrupts
946  *
947  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
948  * kernel.
949  **/
950 static int igb_request_msix(struct igb_adapter *adapter)
951 {
952         struct net_device *netdev = adapter->netdev;
953         struct e1000_hw *hw = &adapter->hw;
954         int i, err = 0, vector = 0, free_vector = 0;
955
956         err = request_irq(adapter->msix_entries[vector].vector,
957                           igb_msix_other, 0, netdev->name, adapter);
958         if (err)
959                 goto err_out;
960
961         for (i = 0; i < adapter->num_q_vectors; i++) {
962                 struct igb_q_vector *q_vector = adapter->q_vector[i];
963
964                 vector++;
965
966                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
967
968                 if (q_vector->rx.ring && q_vector->tx.ring)
969                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
970                                 q_vector->rx.ring->queue_index);
971                 else if (q_vector->tx.ring)
972                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
973                                 q_vector->tx.ring->queue_index);
974                 else if (q_vector->rx.ring)
975                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
976                                 q_vector->rx.ring->queue_index);
977                 else
978                         sprintf(q_vector->name, "%s-unused", netdev->name);
979
980                 err = request_irq(adapter->msix_entries[vector].vector,
981                                   igb_msix_ring, 0, q_vector->name,
982                                   q_vector);
983                 if (err)
984                         goto err_free;
985         }
986
987         igb_configure_msix(adapter);
988         return 0;
989
990 err_free:
991         /* free already assigned IRQs */
992         free_irq(adapter->msix_entries[free_vector++].vector, adapter);
993
994         vector--;
995         for (i = 0; i < vector; i++) {
996                 free_irq(adapter->msix_entries[free_vector++].vector,
997                          adapter->q_vector[i]);
998         }
999 err_out:
1000         return err;
1001 }
1002
1003 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
1004 {
1005         if (adapter->msix_entries) {
1006                 pci_disable_msix(adapter->pdev);
1007                 kfree(adapter->msix_entries);
1008                 adapter->msix_entries = NULL;
1009         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
1010                 pci_disable_msi(adapter->pdev);
1011         }
1012 }
1013
1014 /**
1015  * igb_free_q_vectors - Free memory allocated for interrupt vectors
1016  * @adapter: board private structure to initialize
1017  *
1018  * This function frees the memory allocated to the q_vectors.  In addition if
1019  * NAPI is enabled it will delete any references to the NAPI struct prior
1020  * to freeing the q_vector.
1021  **/
1022 static void igb_free_q_vectors(struct igb_adapter *adapter)
1023 {
1024         int v_idx;
1025
1026         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1027                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1028                 adapter->q_vector[v_idx] = NULL;
1029                 if (!q_vector)
1030                         continue;
1031                 netif_napi_del(&q_vector->napi);
1032                 kfree(q_vector);
1033         }
1034         adapter->num_q_vectors = 0;
1035 }
1036
1037 /**
1038  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
1039  *
1040  * This function resets the device so that it has 0 rx queues, tx queues, and
1041  * MSI-X interrupts allocated.
1042  */
1043 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
1044 {
1045         igb_free_queues(adapter);
1046         igb_free_q_vectors(adapter);
1047         igb_reset_interrupt_capability(adapter);
1048 }
1049
1050 /**
1051  * igb_set_interrupt_capability - set MSI or MSI-X if supported
1052  *
1053  * Attempt to configure interrupts using the best available
1054  * capabilities of the hardware and kernel.
1055  **/
1056 static int igb_set_interrupt_capability(struct igb_adapter *adapter)
1057 {
1058         int err;
1059         int numvecs, i;
1060
1061         /* Number of supported queues. */
1062         adapter->num_rx_queues = adapter->rss_queues;
1063         if (adapter->vfs_allocated_count)
1064                 adapter->num_tx_queues = 1;
1065         else
1066                 adapter->num_tx_queues = adapter->rss_queues;
1067
1068         /* start with one vector for every rx queue */
1069         numvecs = adapter->num_rx_queues;
1070
1071         /* if tx handler is separate add 1 for every tx queue */
1072         if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1073                 numvecs += adapter->num_tx_queues;
1074
1075         /* store the number of vectors reserved for queues */
1076         adapter->num_q_vectors = numvecs;
1077
1078         /* add 1 vector for link status interrupts */
1079         numvecs++;
1080         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1081                                         GFP_KERNEL);
1082         if (!adapter->msix_entries)
1083                 goto msi_only;
1084
1085         for (i = 0; i < numvecs; i++)
1086                 adapter->msix_entries[i].entry = i;
1087
1088         err = pci_enable_msix(adapter->pdev,
1089                               adapter->msix_entries,
1090                               numvecs);
1091         if (err == 0)
1092                 goto out;
1093
1094         igb_reset_interrupt_capability(adapter);
1095
1096         /* If we can't do MSI-X, try MSI */
1097 msi_only:
1098 #ifdef CONFIG_PCI_IOV
1099         /* disable SR-IOV for non MSI-X configurations */
1100         if (adapter->vf_data) {
1101                 struct e1000_hw *hw = &adapter->hw;
1102                 /* disable iov and allow time for transactions to clear */
1103                 pci_disable_sriov(adapter->pdev);
1104                 msleep(500);
1105
1106                 kfree(adapter->vf_data);
1107                 adapter->vf_data = NULL;
1108                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1109                 wrfl();
1110                 msleep(100);
1111                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1112         }
1113 #endif
1114         adapter->vfs_allocated_count = 0;
1115         adapter->rss_queues = 1;
1116         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1117         adapter->num_rx_queues = 1;
1118         adapter->num_tx_queues = 1;
1119         adapter->num_q_vectors = 1;
1120         if (!pci_enable_msi(adapter->pdev))
1121                 adapter->flags |= IGB_FLAG_HAS_MSI;
1122 out:
1123         /* Notify the stack of the (possibly) reduced queue counts. */
1124         rtnl_lock();
1125         netif_set_real_num_tx_queues(adapter->netdev, adapter->num_tx_queues);
1126         err = netif_set_real_num_rx_queues(adapter->netdev,
1127                 adapter->num_rx_queues);
1128         rtnl_unlock();
1129         return err;
1130 }
1131
1132 /**
1133  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1134  * @adapter: board private structure to initialize
1135  *
1136  * We allocate one q_vector per queue interrupt.  If allocation fails we
1137  * return -ENOMEM.
1138  **/
1139 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1140 {
1141         struct igb_q_vector *q_vector;
1142         struct e1000_hw *hw = &adapter->hw;
1143         int v_idx;
1144         int orig_node = adapter->node;
1145
1146         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1147                 if ((adapter->num_q_vectors == (adapter->num_rx_queues +
1148                                                 adapter->num_tx_queues)) &&
1149                     (adapter->num_rx_queues == v_idx))
1150                         adapter->node = orig_node;
1151                 if (orig_node == -1) {
1152                         int cur_node = next_online_node(adapter->node);
1153                         if (cur_node == MAX_NUMNODES)
1154                                 cur_node = first_online_node;
1155                         adapter->node = cur_node;
1156                 }
1157                 q_vector = kzalloc_node(sizeof(struct igb_q_vector), GFP_KERNEL,
1158                                         adapter->node);
1159                 if (!q_vector)
1160                         q_vector = kzalloc(sizeof(struct igb_q_vector),
1161                                            GFP_KERNEL);
1162                 if (!q_vector)
1163                         goto err_out;
1164                 q_vector->adapter = adapter;
1165                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1166                 q_vector->itr_val = IGB_START_ITR;
1167                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1168                 adapter->q_vector[v_idx] = q_vector;
1169         }
1170         /* Restore the adapter's original node */
1171         adapter->node = orig_node;
1172
1173         return 0;
1174
1175 err_out:
1176         /* Restore the adapter's original node */
1177         adapter->node = orig_node;
1178         igb_free_q_vectors(adapter);
1179         return -ENOMEM;
1180 }
1181
1182 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1183                                       int ring_idx, int v_idx)
1184 {
1185         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1186
1187         q_vector->rx.ring = adapter->rx_ring[ring_idx];
1188         q_vector->rx.ring->q_vector = q_vector;
1189         q_vector->rx.count++;
1190         q_vector->itr_val = adapter->rx_itr_setting;
1191         if (q_vector->itr_val && q_vector->itr_val <= 3)
1192                 q_vector->itr_val = IGB_START_ITR;
1193 }
1194
1195 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1196                                       int ring_idx, int v_idx)
1197 {
1198         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1199
1200         q_vector->tx.ring = adapter->tx_ring[ring_idx];
1201         q_vector->tx.ring->q_vector = q_vector;
1202         q_vector->tx.count++;
1203         q_vector->itr_val = adapter->tx_itr_setting;
1204         q_vector->tx.work_limit = adapter->tx_work_limit;
1205         if (q_vector->itr_val && q_vector->itr_val <= 3)
1206                 q_vector->itr_val = IGB_START_ITR;
1207 }
1208
1209 /**
1210  * igb_map_ring_to_vector - maps allocated queues to vectors
1211  *
1212  * This function maps the recently allocated queues to vectors.
1213  **/
1214 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1215 {
1216         int i;
1217         int v_idx = 0;
1218
1219         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1220             (adapter->num_q_vectors < adapter->num_tx_queues))
1221                 return -ENOMEM;
1222
1223         if (adapter->num_q_vectors >=
1224             (adapter->num_rx_queues + adapter->num_tx_queues)) {
1225                 for (i = 0; i < adapter->num_rx_queues; i++)
1226                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1227                 for (i = 0; i < adapter->num_tx_queues; i++)
1228                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1229         } else {
1230                 for (i = 0; i < adapter->num_rx_queues; i++) {
1231                         if (i < adapter->num_tx_queues)
1232                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1233                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1234                 }
1235                 for (; i < adapter->num_tx_queues; i++)
1236                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1237         }
1238         return 0;
1239 }
1240
1241 /**
1242  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1243  *
1244  * This function initializes the interrupts and allocates all of the queues.
1245  **/
1246 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1247 {
1248         struct pci_dev *pdev = adapter->pdev;
1249         int err;
1250
1251         err = igb_set_interrupt_capability(adapter);
1252         if (err)
1253                 return err;
1254
1255         err = igb_alloc_q_vectors(adapter);
1256         if (err) {
1257                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1258                 goto err_alloc_q_vectors;
1259         }
1260
1261         err = igb_alloc_queues(adapter);
1262         if (err) {
1263                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1264                 goto err_alloc_queues;
1265         }
1266
1267         err = igb_map_ring_to_vector(adapter);
1268         if (err) {
1269                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1270                 goto err_map_queues;
1271         }
1272
1273
1274         return 0;
1275 err_map_queues:
1276         igb_free_queues(adapter);
1277 err_alloc_queues:
1278         igb_free_q_vectors(adapter);
1279 err_alloc_q_vectors:
1280         igb_reset_interrupt_capability(adapter);
1281         return err;
1282 }
1283
1284 /**
1285  * igb_request_irq - initialize interrupts
1286  *
1287  * Attempts to configure interrupts using the best available
1288  * capabilities of the hardware and kernel.
1289  **/
1290 static int igb_request_irq(struct igb_adapter *adapter)
1291 {
1292         struct net_device *netdev = adapter->netdev;
1293         struct pci_dev *pdev = adapter->pdev;
1294         int err = 0;
1295
1296         if (adapter->msix_entries) {
1297                 err = igb_request_msix(adapter);
1298                 if (!err)
1299                         goto request_done;
1300                 /* fall back to MSI */
1301                 igb_clear_interrupt_scheme(adapter);
1302                 if (!pci_enable_msi(pdev))
1303                         adapter->flags |= IGB_FLAG_HAS_MSI;
1304                 igb_free_all_tx_resources(adapter);
1305                 igb_free_all_rx_resources(adapter);
1306                 adapter->num_tx_queues = 1;
1307                 adapter->num_rx_queues = 1;
1308                 adapter->num_q_vectors = 1;
1309                 err = igb_alloc_q_vectors(adapter);
1310                 if (err) {
1311                         dev_err(&pdev->dev,
1312                                 "Unable to allocate memory for vectors\n");
1313                         goto request_done;
1314                 }
1315                 err = igb_alloc_queues(adapter);
1316                 if (err) {
1317                         dev_err(&pdev->dev,
1318                                 "Unable to allocate memory for queues\n");
1319                         igb_free_q_vectors(adapter);
1320                         goto request_done;
1321                 }
1322                 igb_setup_all_tx_resources(adapter);
1323                 igb_setup_all_rx_resources(adapter);
1324         }
1325
1326         igb_assign_vector(adapter->q_vector[0], 0);
1327
1328         if (adapter->flags & IGB_FLAG_HAS_MSI) {
1329                 err = request_irq(pdev->irq, igb_intr_msi, 0,
1330                                   netdev->name, adapter);
1331                 if (!err)
1332                         goto request_done;
1333
1334                 /* fall back to legacy interrupts */
1335                 igb_reset_interrupt_capability(adapter);
1336                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1337         }
1338
1339         err = request_irq(pdev->irq, igb_intr, IRQF_SHARED,
1340                           netdev->name, adapter);
1341
1342         if (err)
1343                 dev_err(&pdev->dev, "Error %d getting interrupt\n",
1344                         err);
1345
1346 request_done:
1347         return err;
1348 }
1349
1350 static void igb_free_irq(struct igb_adapter *adapter)
1351 {
1352         if (adapter->msix_entries) {
1353                 int vector = 0, i;
1354
1355                 free_irq(adapter->msix_entries[vector++].vector, adapter);
1356
1357                 for (i = 0; i < adapter->num_q_vectors; i++)
1358                         free_irq(adapter->msix_entries[vector++].vector,
1359                                  adapter->q_vector[i]);
1360         } else {
1361                 free_irq(adapter->pdev->irq, adapter);
1362         }
1363 }
1364
1365 /**
1366  * igb_irq_disable - Mask off interrupt generation on the NIC
1367  * @adapter: board private structure
1368  **/
1369 static void igb_irq_disable(struct igb_adapter *adapter)
1370 {
1371         struct e1000_hw *hw = &adapter->hw;
1372
1373         /*
1374          * we need to be careful when disabling interrupts.  The VFs are also
1375          * mapped into these registers and so clearing the bits can cause
1376          * issues on the VF drivers so we only need to clear what we set
1377          */
1378         if (adapter->msix_entries) {
1379                 u32 regval = rd32(E1000_EIAM);
1380                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1381                 wr32(E1000_EIMC, adapter->eims_enable_mask);
1382                 regval = rd32(E1000_EIAC);
1383                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1384         }
1385
1386         wr32(E1000_IAM, 0);
1387         wr32(E1000_IMC, ~0);
1388         wrfl();
1389         if (adapter->msix_entries) {
1390                 int i;
1391                 for (i = 0; i < adapter->num_q_vectors; i++)
1392                         synchronize_irq(adapter->msix_entries[i].vector);
1393         } else {
1394                 synchronize_irq(adapter->pdev->irq);
1395         }
1396 }
1397
1398 /**
1399  * igb_irq_enable - Enable default interrupt generation settings
1400  * @adapter: board private structure
1401  **/
1402 static void igb_irq_enable(struct igb_adapter *adapter)
1403 {
1404         struct e1000_hw *hw = &adapter->hw;
1405
1406         if (adapter->msix_entries) {
1407                 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA;
1408                 u32 regval = rd32(E1000_EIAC);
1409                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1410                 regval = rd32(E1000_EIAM);
1411                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1412                 wr32(E1000_EIMS, adapter->eims_enable_mask);
1413                 if (adapter->vfs_allocated_count) {
1414                         wr32(E1000_MBVFIMR, 0xFF);
1415                         ims |= E1000_IMS_VMMB;
1416                 }
1417                 wr32(E1000_IMS, ims);
1418         } else {
1419                 wr32(E1000_IMS, IMS_ENABLE_MASK |
1420                                 E1000_IMS_DRSTA);
1421                 wr32(E1000_IAM, IMS_ENABLE_MASK |
1422                                 E1000_IMS_DRSTA);
1423         }
1424 }
1425
1426 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1427 {
1428         struct e1000_hw *hw = &adapter->hw;
1429         u16 vid = adapter->hw.mng_cookie.vlan_id;
1430         u16 old_vid = adapter->mng_vlan_id;
1431
1432         if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1433                 /* add VID to filter table */
1434                 igb_vfta_set(hw, vid, true);
1435                 adapter->mng_vlan_id = vid;
1436         } else {
1437                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1438         }
1439
1440         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1441             (vid != old_vid) &&
1442             !test_bit(old_vid, adapter->active_vlans)) {
1443                 /* remove VID from filter table */
1444                 igb_vfta_set(hw, old_vid, false);
1445         }
1446 }
1447
1448 /**
1449  * igb_release_hw_control - release control of the h/w to f/w
1450  * @adapter: address of board private structure
1451  *
1452  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1453  * For ASF and Pass Through versions of f/w this means that the
1454  * driver is no longer loaded.
1455  *
1456  **/
1457 static void igb_release_hw_control(struct igb_adapter *adapter)
1458 {
1459         struct e1000_hw *hw = &adapter->hw;
1460         u32 ctrl_ext;
1461
1462         /* Let firmware take over control of h/w */
1463         ctrl_ext = rd32(E1000_CTRL_EXT);
1464         wr32(E1000_CTRL_EXT,
1465                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1466 }
1467
1468 /**
1469  * igb_get_hw_control - get control of the h/w from f/w
1470  * @adapter: address of board private structure
1471  *
1472  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1473  * For ASF and Pass Through versions of f/w this means that
1474  * the driver is loaded.
1475  *
1476  **/
1477 static void igb_get_hw_control(struct igb_adapter *adapter)
1478 {
1479         struct e1000_hw *hw = &adapter->hw;
1480         u32 ctrl_ext;
1481
1482         /* Let firmware know the driver has taken over */
1483         ctrl_ext = rd32(E1000_CTRL_EXT);
1484         wr32(E1000_CTRL_EXT,
1485                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1486 }
1487
1488 /**
1489  * igb_configure - configure the hardware for RX and TX
1490  * @adapter: private board structure
1491  **/
1492 static void igb_configure(struct igb_adapter *adapter)
1493 {
1494         struct net_device *netdev = adapter->netdev;
1495         int i;
1496
1497         igb_get_hw_control(adapter);
1498         igb_set_rx_mode(netdev);
1499
1500         igb_restore_vlan(adapter);
1501
1502         igb_setup_tctl(adapter);
1503         igb_setup_mrqc(adapter);
1504         igb_setup_rctl(adapter);
1505
1506         igb_configure_tx(adapter);
1507         igb_configure_rx(adapter);
1508
1509         igb_rx_fifo_flush_82575(&adapter->hw);
1510
1511         /* call igb_desc_unused which always leaves
1512          * at least 1 descriptor unused to make sure
1513          * next_to_use != next_to_clean */
1514         for (i = 0; i < adapter->num_rx_queues; i++) {
1515                 struct igb_ring *ring = adapter->rx_ring[i];
1516                 igb_alloc_rx_buffers(ring, igb_desc_unused(ring));
1517         }
1518 }
1519
1520 /**
1521  * igb_power_up_link - Power up the phy/serdes link
1522  * @adapter: address of board private structure
1523  **/
1524 void igb_power_up_link(struct igb_adapter *adapter)
1525 {
1526         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1527                 igb_power_up_phy_copper(&adapter->hw);
1528         else
1529                 igb_power_up_serdes_link_82575(&adapter->hw);
1530         igb_reset_phy(&adapter->hw);
1531 }
1532
1533 /**
1534  * igb_power_down_link - Power down the phy/serdes link
1535  * @adapter: address of board private structure
1536  */
1537 static void igb_power_down_link(struct igb_adapter *adapter)
1538 {
1539         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1540                 igb_power_down_phy_copper_82575(&adapter->hw);
1541         else
1542                 igb_shutdown_serdes_link_82575(&adapter->hw);
1543 }
1544
1545 /**
1546  * igb_up - Open the interface and prepare it to handle traffic
1547  * @adapter: board private structure
1548  **/
1549 int igb_up(struct igb_adapter *adapter)
1550 {
1551         struct e1000_hw *hw = &adapter->hw;
1552         int i;
1553
1554         /* hardware has been reset, we need to reload some things */
1555         igb_configure(adapter);
1556
1557         clear_bit(__IGB_DOWN, &adapter->state);
1558
1559         for (i = 0; i < adapter->num_q_vectors; i++)
1560                 napi_enable(&(adapter->q_vector[i]->napi));
1561
1562         if (adapter->msix_entries)
1563                 igb_configure_msix(adapter);
1564         else
1565                 igb_assign_vector(adapter->q_vector[0], 0);
1566
1567         /* Clear any pending interrupts. */
1568         rd32(E1000_ICR);
1569         igb_irq_enable(adapter);
1570
1571         /* notify VFs that reset has been completed */
1572         if (adapter->vfs_allocated_count) {
1573                 u32 reg_data = rd32(E1000_CTRL_EXT);
1574                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1575                 wr32(E1000_CTRL_EXT, reg_data);
1576         }
1577
1578         netif_tx_start_all_queues(adapter->netdev);
1579
1580         /* start the watchdog. */
1581         hw->mac.get_link_status = 1;
1582         schedule_work(&adapter->watchdog_task);
1583
1584         return 0;
1585 }
1586
1587 void igb_down(struct igb_adapter *adapter)
1588 {
1589         struct net_device *netdev = adapter->netdev;
1590         struct e1000_hw *hw = &adapter->hw;
1591         u32 tctl, rctl;
1592         int i;
1593
1594         /* signal that we're down so the interrupt handler does not
1595          * reschedule our watchdog timer */
1596         set_bit(__IGB_DOWN, &adapter->state);
1597
1598         /* disable receives in the hardware */
1599         rctl = rd32(E1000_RCTL);
1600         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1601         /* flush and sleep below */
1602
1603         netif_tx_stop_all_queues(netdev);
1604
1605         /* disable transmits in the hardware */
1606         tctl = rd32(E1000_TCTL);
1607         tctl &= ~E1000_TCTL_EN;
1608         wr32(E1000_TCTL, tctl);
1609         /* flush both disables and wait for them to finish */
1610         wrfl();
1611         msleep(10);
1612
1613         for (i = 0; i < adapter->num_q_vectors; i++)
1614                 napi_disable(&(adapter->q_vector[i]->napi));
1615
1616         igb_irq_disable(adapter);
1617
1618         del_timer_sync(&adapter->watchdog_timer);
1619         del_timer_sync(&adapter->phy_info_timer);
1620
1621         netif_carrier_off(netdev);
1622
1623         /* record the stats before reset*/
1624         spin_lock(&adapter->stats64_lock);
1625         igb_update_stats(adapter, &adapter->stats64);
1626         spin_unlock(&adapter->stats64_lock);
1627
1628         adapter->link_speed = 0;
1629         adapter->link_duplex = 0;
1630
1631         if (!pci_channel_offline(adapter->pdev))
1632                 igb_reset(adapter);
1633         igb_clean_all_tx_rings(adapter);
1634         igb_clean_all_rx_rings(adapter);
1635 #ifdef CONFIG_IGB_DCA
1636
1637         /* since we reset the hardware DCA settings were cleared */
1638         igb_setup_dca(adapter);
1639 #endif
1640 }
1641
1642 void igb_reinit_locked(struct igb_adapter *adapter)
1643 {
1644         WARN_ON(in_interrupt());
1645         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1646                 msleep(1);
1647         igb_down(adapter);
1648         igb_up(adapter);
1649         clear_bit(__IGB_RESETTING, &adapter->state);
1650 }
1651
1652 void igb_reset(struct igb_adapter *adapter)
1653 {
1654         struct pci_dev *pdev = adapter->pdev;
1655         struct e1000_hw *hw = &adapter->hw;
1656         struct e1000_mac_info *mac = &hw->mac;
1657         struct e1000_fc_info *fc = &hw->fc;
1658         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1659         u16 hwm;
1660
1661         /* Repartition Pba for greater than 9k mtu
1662          * To take effect CTRL.RST is required.
1663          */
1664         switch (mac->type) {
1665         case e1000_i350:
1666         case e1000_82580:
1667                 pba = rd32(E1000_RXPBS);
1668                 pba = igb_rxpbs_adjust_82580(pba);
1669                 break;
1670         case e1000_82576:
1671                 pba = rd32(E1000_RXPBS);
1672                 pba &= E1000_RXPBS_SIZE_MASK_82576;
1673                 break;
1674         case e1000_82575:
1675         default:
1676                 pba = E1000_PBA_34K;
1677                 break;
1678         }
1679
1680         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1681             (mac->type < e1000_82576)) {
1682                 /* adjust PBA for jumbo frames */
1683                 wr32(E1000_PBA, pba);
1684
1685                 /* To maintain wire speed transmits, the Tx FIFO should be
1686                  * large enough to accommodate two full transmit packets,
1687                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1688                  * the Rx FIFO should be large enough to accommodate at least
1689                  * one full receive packet and is similarly rounded up and
1690                  * expressed in KB. */
1691                 pba = rd32(E1000_PBA);
1692                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1693                 tx_space = pba >> 16;
1694                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1695                 pba &= 0xffff;
1696                 /* the tx fifo also stores 16 bytes of information about the tx
1697                  * but don't include ethernet FCS because hardware appends it */
1698                 min_tx_space = (adapter->max_frame_size +
1699                                 sizeof(union e1000_adv_tx_desc) -
1700                                 ETH_FCS_LEN) * 2;
1701                 min_tx_space = ALIGN(min_tx_space, 1024);
1702                 min_tx_space >>= 10;
1703                 /* software strips receive CRC, so leave room for it */
1704                 min_rx_space = adapter->max_frame_size;
1705                 min_rx_space = ALIGN(min_rx_space, 1024);
1706                 min_rx_space >>= 10;
1707
1708                 /* If current Tx allocation is less than the min Tx FIFO size,
1709                  * and the min Tx FIFO size is less than the current Rx FIFO
1710                  * allocation, take space away from current Rx allocation */
1711                 if (tx_space < min_tx_space &&
1712                     ((min_tx_space - tx_space) < pba)) {
1713                         pba = pba - (min_tx_space - tx_space);
1714
1715                         /* if short on rx space, rx wins and must trump tx
1716                          * adjustment */
1717                         if (pba < min_rx_space)
1718                                 pba = min_rx_space;
1719                 }
1720                 wr32(E1000_PBA, pba);
1721         }
1722
1723         /* flow control settings */
1724         /* The high water mark must be low enough to fit one full frame
1725          * (or the size used for early receive) above it in the Rx FIFO.
1726          * Set it to the lower of:
1727          * - 90% of the Rx FIFO size, or
1728          * - the full Rx FIFO size minus one full frame */
1729         hwm = min(((pba << 10) * 9 / 10),
1730                         ((pba << 10) - 2 * adapter->max_frame_size));
1731
1732         fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1733         fc->low_water = fc->high_water - 16;
1734         fc->pause_time = 0xFFFF;
1735         fc->send_xon = 1;
1736         fc->current_mode = fc->requested_mode;
1737
1738         /* disable receive for all VFs and wait one second */
1739         if (adapter->vfs_allocated_count) {
1740                 int i;
1741                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1742                         adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC;
1743
1744                 /* ping all the active vfs to let them know we are going down */
1745                 igb_ping_all_vfs(adapter);
1746
1747                 /* disable transmits and receives */
1748                 wr32(E1000_VFRE, 0);
1749                 wr32(E1000_VFTE, 0);
1750         }
1751
1752         /* Allow time for pending master requests to run */
1753         hw->mac.ops.reset_hw(hw);
1754         wr32(E1000_WUC, 0);
1755
1756         if (hw->mac.ops.init_hw(hw))
1757                 dev_err(&pdev->dev, "Hardware Error\n");
1758
1759         igb_init_dmac(adapter, pba);
1760         if (!netif_running(adapter->netdev))
1761                 igb_power_down_link(adapter);
1762
1763         igb_update_mng_vlan(adapter);
1764
1765         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1766         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1767
1768         igb_get_phy_info(hw);
1769 }
1770
1771 static netdev_features_t igb_fix_features(struct net_device *netdev,
1772         netdev_features_t features)
1773 {
1774         /*
1775          * Since there is no support for separate rx/tx vlan accel
1776          * enable/disable make sure tx flag is always in same state as rx.
1777          */
1778         if (features & NETIF_F_HW_VLAN_RX)
1779                 features |= NETIF_F_HW_VLAN_TX;
1780         else
1781                 features &= ~NETIF_F_HW_VLAN_TX;
1782
1783         return features;
1784 }
1785
1786 static int igb_set_features(struct net_device *netdev,
1787         netdev_features_t features)
1788 {
1789         netdev_features_t changed = netdev->features ^ features;
1790         struct igb_adapter *adapter = netdev_priv(netdev);
1791
1792         if (changed & NETIF_F_HW_VLAN_RX)
1793                 igb_vlan_mode(netdev, features);
1794
1795         if (!(changed & NETIF_F_RXALL))
1796                 return 0;
1797
1798         netdev->features = features;
1799
1800         if (netif_running(netdev))
1801                 igb_reinit_locked(adapter);
1802         else
1803                 igb_reset(adapter);
1804
1805         return 0;
1806 }
1807
1808 static const struct net_device_ops igb_netdev_ops = {
1809         .ndo_open               = igb_open,
1810         .ndo_stop               = igb_close,
1811         .ndo_start_xmit         = igb_xmit_frame,
1812         .ndo_get_stats64        = igb_get_stats64,
1813         .ndo_set_rx_mode        = igb_set_rx_mode,
1814         .ndo_set_mac_address    = igb_set_mac,
1815         .ndo_change_mtu         = igb_change_mtu,
1816         .ndo_do_ioctl           = igb_ioctl,
1817         .ndo_tx_timeout         = igb_tx_timeout,
1818         .ndo_validate_addr      = eth_validate_addr,
1819         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1820         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1821         .ndo_set_vf_mac         = igb_ndo_set_vf_mac,
1822         .ndo_set_vf_vlan        = igb_ndo_set_vf_vlan,
1823         .ndo_set_vf_tx_rate     = igb_ndo_set_vf_bw,
1824         .ndo_get_vf_config      = igb_ndo_get_vf_config,
1825 #ifdef CONFIG_NET_POLL_CONTROLLER
1826         .ndo_poll_controller    = igb_netpoll,
1827 #endif
1828         .ndo_fix_features       = igb_fix_features,
1829         .ndo_set_features       = igb_set_features,
1830 };
1831
1832 /**
1833  * igb_probe - Device Initialization Routine
1834  * @pdev: PCI device information struct
1835  * @ent: entry in igb_pci_tbl
1836  *
1837  * Returns 0 on success, negative on failure
1838  *
1839  * igb_probe initializes an adapter identified by a pci_dev structure.
1840  * The OS initialization, configuring of the adapter private structure,
1841  * and a hardware reset occur.
1842  **/
1843 static int __devinit igb_probe(struct pci_dev *pdev,
1844                                const struct pci_device_id *ent)
1845 {
1846         struct net_device *netdev;
1847         struct igb_adapter *adapter;
1848         struct e1000_hw *hw;
1849         u16 eeprom_data = 0;
1850         s32 ret_val;
1851         static int global_quad_port_a; /* global quad port a indication */
1852         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1853         unsigned long mmio_start, mmio_len;
1854         int err, pci_using_dac;
1855         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1856         u8 part_str[E1000_PBANUM_LENGTH];
1857
1858         /* Catch broken hardware that put the wrong VF device ID in
1859          * the PCIe SR-IOV capability.
1860          */
1861         if (pdev->is_virtfn) {
1862                 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1863                      pci_name(pdev), pdev->vendor, pdev->device);
1864                 return -EINVAL;
1865         }
1866
1867         err = pci_enable_device_mem(pdev);
1868         if (err)
1869                 return err;
1870
1871         pci_using_dac = 0;
1872         err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1873         if (!err) {
1874                 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1875                 if (!err)
1876                         pci_using_dac = 1;
1877         } else {
1878                 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1879                 if (err) {
1880                         err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1881                         if (err) {
1882                                 dev_err(&pdev->dev, "No usable DMA "
1883                                         "configuration, aborting\n");
1884                                 goto err_dma;
1885                         }
1886                 }
1887         }
1888
1889         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1890                                            IORESOURCE_MEM),
1891                                            igb_driver_name);
1892         if (err)
1893                 goto err_pci_reg;
1894
1895         pci_enable_pcie_error_reporting(pdev);
1896
1897         pci_set_master(pdev);
1898         pci_save_state(pdev);
1899
1900         err = -ENOMEM;
1901         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1902                                    IGB_MAX_TX_QUEUES);
1903         if (!netdev)
1904                 goto err_alloc_etherdev;
1905
1906         SET_NETDEV_DEV(netdev, &pdev->dev);
1907
1908         pci_set_drvdata(pdev, netdev);
1909         adapter = netdev_priv(netdev);
1910         adapter->netdev = netdev;
1911         adapter->pdev = pdev;
1912         hw = &adapter->hw;
1913         hw->back = adapter;
1914         adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
1915
1916         mmio_start = pci_resource_start(pdev, 0);
1917         mmio_len = pci_resource_len(pdev, 0);
1918
1919         err = -EIO;
1920         hw->hw_addr = ioremap(mmio_start, mmio_len);
1921         if (!hw->hw_addr)
1922                 goto err_ioremap;
1923
1924         netdev->netdev_ops = &igb_netdev_ops;
1925         igb_set_ethtool_ops(netdev);
1926         netdev->watchdog_timeo = 5 * HZ;
1927
1928         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1929
1930         netdev->mem_start = mmio_start;
1931         netdev->mem_end = mmio_start + mmio_len;
1932
1933         /* PCI config space info */
1934         hw->vendor_id = pdev->vendor;
1935         hw->device_id = pdev->device;
1936         hw->revision_id = pdev->revision;
1937         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1938         hw->subsystem_device_id = pdev->subsystem_device;
1939
1940         /* Copy the default MAC, PHY and NVM function pointers */
1941         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1942         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1943         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1944         /* Initialize skew-specific constants */
1945         err = ei->get_invariants(hw);
1946         if (err)
1947                 goto err_sw_init;
1948
1949         /* setup the private structure */
1950         err = igb_sw_init(adapter);
1951         if (err)
1952                 goto err_sw_init;
1953
1954         igb_get_bus_info_pcie(hw);
1955
1956         hw->phy.autoneg_wait_to_complete = false;
1957
1958         /* Copper options */
1959         if (hw->phy.media_type == e1000_media_type_copper) {
1960                 hw->phy.mdix = AUTO_ALL_MODES;
1961                 hw->phy.disable_polarity_correction = false;
1962                 hw->phy.ms_type = e1000_ms_hw_default;
1963         }
1964
1965         if (igb_check_reset_block(hw))
1966                 dev_info(&pdev->dev,
1967                         "PHY reset is blocked due to SOL/IDER session.\n");
1968
1969         /*
1970          * features is initialized to 0 in allocation, it might have bits
1971          * set by igb_sw_init so we should use an or instead of an
1972          * assignment.
1973          */
1974         netdev->features |= NETIF_F_SG |
1975                             NETIF_F_IP_CSUM |
1976                             NETIF_F_IPV6_CSUM |
1977                             NETIF_F_TSO |
1978                             NETIF_F_TSO6 |
1979                             NETIF_F_RXHASH |
1980                             NETIF_F_RXCSUM |
1981                             NETIF_F_HW_VLAN_RX |
1982                             NETIF_F_HW_VLAN_TX;
1983
1984         /* copy netdev features into list of user selectable features */
1985         netdev->hw_features |= netdev->features;
1986         netdev->hw_features |= NETIF_F_RXALL;
1987
1988         /* set this bit last since it cannot be part of hw_features */
1989         netdev->features |= NETIF_F_HW_VLAN_FILTER;
1990
1991         netdev->vlan_features |= NETIF_F_TSO |
1992                                  NETIF_F_TSO6 |
1993                                  NETIF_F_IP_CSUM |
1994                                  NETIF_F_IPV6_CSUM |
1995                                  NETIF_F_SG;
1996
1997         netdev->priv_flags |= IFF_SUPP_NOFCS;
1998
1999         if (pci_using_dac) {
2000                 netdev->features |= NETIF_F_HIGHDMA;
2001                 netdev->vlan_features |= NETIF_F_HIGHDMA;
2002         }
2003
2004         if (hw->mac.type >= e1000_82576) {
2005                 netdev->hw_features |= NETIF_F_SCTP_CSUM;
2006                 netdev->features |= NETIF_F_SCTP_CSUM;
2007         }
2008
2009         netdev->priv_flags |= IFF_UNICAST_FLT;
2010
2011         adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
2012
2013         /* before reading the NVM, reset the controller to put the device in a
2014          * known good starting state */
2015         hw->mac.ops.reset_hw(hw);
2016
2017         /* make sure the NVM is good */
2018         if (hw->nvm.ops.validate(hw) < 0) {
2019                 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
2020                 err = -EIO;
2021                 goto err_eeprom;
2022         }
2023
2024         /* copy the MAC address out of the NVM */
2025         if (hw->mac.ops.read_mac_addr(hw))
2026                 dev_err(&pdev->dev, "NVM Read Error\n");
2027
2028         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
2029         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
2030
2031         if (!is_valid_ether_addr(netdev->perm_addr)) {
2032                 dev_err(&pdev->dev, "Invalid MAC Address\n");
2033                 err = -EIO;
2034                 goto err_eeprom;
2035         }
2036
2037         setup_timer(&adapter->watchdog_timer, igb_watchdog,
2038                     (unsigned long) adapter);
2039         setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
2040                     (unsigned long) adapter);
2041
2042         INIT_WORK(&adapter->reset_task, igb_reset_task);
2043         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
2044
2045         /* Initialize link properties that are user-changeable */
2046         adapter->fc_autoneg = true;
2047         hw->mac.autoneg = true;
2048         hw->phy.autoneg_advertised = 0x2f;
2049
2050         hw->fc.requested_mode = e1000_fc_default;
2051         hw->fc.current_mode = e1000_fc_default;
2052
2053         igb_validate_mdi_setting(hw);
2054
2055         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
2056          * enable the ACPI Magic Packet filter
2057          */
2058
2059         if (hw->bus.func == 0)
2060                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
2061         else if (hw->mac.type >= e1000_82580)
2062                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
2063                                  NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
2064                                  &eeprom_data);
2065         else if (hw->bus.func == 1)
2066                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
2067
2068         if (eeprom_data & eeprom_apme_mask)
2069                 adapter->eeprom_wol |= E1000_WUFC_MAG;
2070
2071         /* now that we have the eeprom settings, apply the special cases where
2072          * the eeprom may be wrong or the board simply won't support wake on
2073          * lan on a particular port */
2074         switch (pdev->device) {
2075         case E1000_DEV_ID_82575GB_QUAD_COPPER:
2076                 adapter->eeprom_wol = 0;
2077                 break;
2078         case E1000_DEV_ID_82575EB_FIBER_SERDES:
2079         case E1000_DEV_ID_82576_FIBER:
2080         case E1000_DEV_ID_82576_SERDES:
2081                 /* Wake events only supported on port A for dual fiber
2082                  * regardless of eeprom setting */
2083                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
2084                         adapter->eeprom_wol = 0;
2085                 break;
2086         case E1000_DEV_ID_82576_QUAD_COPPER:
2087         case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
2088                 /* if quad port adapter, disable WoL on all but port A */
2089                 if (global_quad_port_a != 0)
2090                         adapter->eeprom_wol = 0;
2091                 else
2092                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
2093                 /* Reset for multiple quad port adapters */
2094                 if (++global_quad_port_a == 4)
2095                         global_quad_port_a = 0;
2096                 break;
2097         }
2098
2099         /* initialize the wol settings based on the eeprom settings */
2100         adapter->wol = adapter->eeprom_wol;
2101         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
2102
2103         /* reset the hardware with the new settings */
2104         igb_reset(adapter);
2105
2106         /* let the f/w know that the h/w is now under the control of the
2107          * driver. */
2108         igb_get_hw_control(adapter);
2109
2110         strcpy(netdev->name, "eth%d");
2111         err = register_netdev(netdev);
2112         if (err)
2113                 goto err_register;
2114
2115         /* carrier off reporting is important to ethtool even BEFORE open */
2116         netif_carrier_off(netdev);
2117
2118 #ifdef CONFIG_IGB_DCA
2119         if (dca_add_requester(&pdev->dev) == 0) {
2120                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
2121                 dev_info(&pdev->dev, "DCA enabled\n");
2122                 igb_setup_dca(adapter);
2123         }
2124
2125 #endif
2126         /* do hw tstamp init after resetting */
2127         igb_init_hw_timer(adapter);
2128
2129         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
2130         /* print bus type/speed/width info */
2131         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
2132                  netdev->name,
2133                  ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
2134                   (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
2135                                                             "unknown"),
2136                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
2137                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
2138                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
2139                    "unknown"),
2140                  netdev->dev_addr);
2141
2142         ret_val = igb_read_part_string(hw, part_str, E1000_PBANUM_LENGTH);
2143         if (ret_val)
2144                 strcpy(part_str, "Unknown");
2145         dev_info(&pdev->dev, "%s: PBA No: %s\n", netdev->name, part_str);
2146         dev_info(&pdev->dev,
2147                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2148                 adapter->msix_entries ? "MSI-X" :
2149                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2150                 adapter->num_rx_queues, adapter->num_tx_queues);
2151         switch (hw->mac.type) {
2152         case e1000_i350:
2153                 igb_set_eee_i350(hw);
2154                 break;
2155         default:
2156                 break;
2157         }
2158
2159         pm_runtime_put_noidle(&pdev->dev);
2160         return 0;
2161
2162 err_register:
2163         igb_release_hw_control(adapter);
2164 err_eeprom:
2165         if (!igb_check_reset_block(hw))
2166                 igb_reset_phy(hw);
2167
2168         if (hw->flash_address)
2169                 iounmap(hw->flash_address);
2170 err_sw_init:
2171         igb_clear_interrupt_scheme(adapter);
2172         iounmap(hw->hw_addr);
2173 err_ioremap:
2174         free_netdev(netdev);
2175 err_alloc_etherdev:
2176         pci_release_selected_regions(pdev,
2177                                      pci_select_bars(pdev, IORESOURCE_MEM));
2178 err_pci_reg:
2179 err_dma:
2180         pci_disable_device(pdev);
2181         return err;
2182 }
2183
2184 /**
2185  * igb_remove - Device Removal Routine
2186  * @pdev: PCI device information struct
2187  *
2188  * igb_remove is called by the PCI subsystem to alert the driver
2189  * that it should release a PCI device.  The could be caused by a
2190  * Hot-Plug event, or because the driver is going to be removed from
2191  * memory.
2192  **/
2193 static void __devexit igb_remove(struct pci_dev *pdev)
2194 {
2195         struct net_device *netdev = pci_get_drvdata(pdev);
2196         struct igb_adapter *adapter = netdev_priv(netdev);
2197         struct e1000_hw *hw = &adapter->hw;
2198
2199         pm_runtime_get_noresume(&pdev->dev);
2200
2201         /*
2202          * The watchdog timer may be rescheduled, so explicitly
2203          * disable watchdog from being rescheduled.
2204          */
2205         set_bit(__IGB_DOWN, &adapter->state);
2206         del_timer_sync(&adapter->watchdog_timer);
2207         del_timer_sync(&adapter->phy_info_timer);
2208
2209         cancel_work_sync(&adapter->reset_task);
2210         cancel_work_sync(&adapter->watchdog_task);
2211
2212 #ifdef CONFIG_IGB_DCA
2213         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2214                 dev_info(&pdev->dev, "DCA disabled\n");
2215                 dca_remove_requester(&pdev->dev);
2216                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2217                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2218         }
2219 #endif
2220
2221         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
2222          * would have already happened in close and is redundant. */
2223         igb_release_hw_control(adapter);
2224
2225         unregister_netdev(netdev);
2226
2227         igb_clear_interrupt_scheme(adapter);
2228
2229 #ifdef CONFIG_PCI_IOV
2230         /* reclaim resources allocated to VFs */
2231         if (adapter->vf_data) {
2232                 /* disable iov and allow time for transactions to clear */
2233                 if (!igb_check_vf_assignment(adapter)) {
2234                         pci_disable_sriov(pdev);
2235                         msleep(500);
2236                 } else {
2237                         dev_info(&pdev->dev, "VF(s) assigned to guests!\n");
2238                 }
2239
2240                 kfree(adapter->vf_data);
2241                 adapter->vf_data = NULL;
2242                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2243                 wrfl();
2244                 msleep(100);
2245                 dev_info(&pdev->dev, "IOV Disabled\n");
2246         }
2247 #endif
2248
2249         iounmap(hw->hw_addr);
2250         if (hw->flash_address)
2251                 iounmap(hw->flash_address);
2252         pci_release_selected_regions(pdev,
2253                                      pci_select_bars(pdev, IORESOURCE_MEM));
2254
2255         kfree(adapter->shadow_vfta);
2256         free_netdev(netdev);
2257
2258         pci_disable_pcie_error_reporting(pdev);
2259
2260         pci_disable_device(pdev);
2261 }
2262
2263 /**
2264  * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2265  * @adapter: board private structure to initialize
2266  *
2267  * This function initializes the vf specific data storage and then attempts to
2268  * allocate the VFs.  The reason for ordering it this way is because it is much
2269  * mor expensive time wise to disable SR-IOV than it is to allocate and free
2270  * the memory for the VFs.
2271  **/
2272 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2273 {
2274 #ifdef CONFIG_PCI_IOV
2275         struct pci_dev *pdev = adapter->pdev;
2276         int old_vfs = igb_find_enabled_vfs(adapter);
2277         int i;
2278
2279         if (old_vfs) {
2280                 dev_info(&pdev->dev, "%d pre-allocated VFs found - override "
2281                          "max_vfs setting of %d\n", old_vfs, max_vfs);
2282                 adapter->vfs_allocated_count = old_vfs;
2283         }
2284
2285         if (!adapter->vfs_allocated_count)
2286                 return;
2287
2288         adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2289                                 sizeof(struct vf_data_storage), GFP_KERNEL);
2290         /* if allocation failed then we do not support SR-IOV */
2291         if (!adapter->vf_data) {
2292                 adapter->vfs_allocated_count = 0;
2293                 dev_err(&pdev->dev, "Unable to allocate memory for VF "
2294                         "Data Storage\n");
2295                 goto out;
2296         }
2297
2298         if (!old_vfs) {
2299                 if (pci_enable_sriov(pdev, adapter->vfs_allocated_count))
2300                         goto err_out;
2301         }
2302         dev_info(&pdev->dev, "%d VFs allocated\n",
2303                  adapter->vfs_allocated_count);
2304         for (i = 0; i < adapter->vfs_allocated_count; i++)
2305                 igb_vf_configure(adapter, i);
2306
2307         /* DMA Coalescing is not supported in IOV mode. */
2308         adapter->flags &= ~IGB_FLAG_DMAC;
2309         goto out;
2310 err_out:
2311         kfree(adapter->vf_data);
2312         adapter->vf_data = NULL;
2313         adapter->vfs_allocated_count = 0;
2314 out:
2315         return;
2316 #endif /* CONFIG_PCI_IOV */
2317 }
2318
2319 /**
2320  * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2321  * @adapter: board private structure to initialize
2322  *
2323  * igb_init_hw_timer initializes the function pointer and values for the hw
2324  * timer found in hardware.
2325  **/
2326 static void igb_init_hw_timer(struct igb_adapter *adapter)
2327 {
2328         struct e1000_hw *hw = &adapter->hw;
2329
2330         switch (hw->mac.type) {
2331         case e1000_i350:
2332         case e1000_82580:
2333                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2334                 adapter->cycles.read = igb_read_clock;
2335                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2336                 adapter->cycles.mult = 1;
2337                 /*
2338                  * The 82580 timesync updates the system timer every 8ns by 8ns
2339                  * and the value cannot be shifted.  Instead we need to shift
2340                  * the registers to generate a 64bit timer value.  As a result
2341                  * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2342                  * 24 in order to generate a larger value for synchronization.
2343                  */
2344                 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2345                 /* disable system timer temporarily by setting bit 31 */
2346                 wr32(E1000_TSAUXC, 0x80000000);
2347                 wrfl();
2348
2349                 /* Set registers so that rollover occurs soon to test this. */
2350                 wr32(E1000_SYSTIMR, 0x00000000);
2351                 wr32(E1000_SYSTIML, 0x80000000);
2352                 wr32(E1000_SYSTIMH, 0x000000FF);
2353                 wrfl();
2354
2355                 /* enable system timer by clearing bit 31 */
2356                 wr32(E1000_TSAUXC, 0x0);
2357                 wrfl();
2358
2359                 timecounter_init(&adapter->clock,
2360                                  &adapter->cycles,
2361                                  ktime_to_ns(ktime_get_real()));
2362                 /*
2363                  * Synchronize our NIC clock against system wall clock. NIC
2364                  * time stamp reading requires ~3us per sample, each sample
2365                  * was pretty stable even under load => only require 10
2366                  * samples for each offset comparison.
2367                  */
2368                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2369                 adapter->compare.source = &adapter->clock;
2370                 adapter->compare.target = ktime_get_real;
2371                 adapter->compare.num_samples = 10;
2372                 timecompare_update(&adapter->compare, 0);
2373                 break;
2374         case e1000_82576:
2375                 /*
2376                  * Initialize hardware timer: we keep it running just in case
2377                  * that some program needs it later on.
2378                  */
2379                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2380                 adapter->cycles.read = igb_read_clock;
2381                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2382                 adapter->cycles.mult = 1;
2383                 /**
2384                  * Scale the NIC clock cycle by a large factor so that
2385                  * relatively small clock corrections can be added or
2386                  * subtracted at each clock tick. The drawbacks of a large
2387                  * factor are a) that the clock register overflows more quickly
2388                  * (not such a big deal) and b) that the increment per tick has
2389                  * to fit into 24 bits.  As a result we need to use a shift of
2390                  * 19 so we can fit a value of 16 into the TIMINCA register.
2391                  */
2392                 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2393                 wr32(E1000_TIMINCA,
2394                                 (1 << E1000_TIMINCA_16NS_SHIFT) |
2395                                 (16 << IGB_82576_TSYNC_SHIFT));
2396
2397                 /* Set registers so that rollover occurs soon to test this. */
2398                 wr32(E1000_SYSTIML, 0x00000000);
2399                 wr32(E1000_SYSTIMH, 0xFF800000);
2400                 wrfl();
2401
2402                 timecounter_init(&adapter->clock,
2403                                  &adapter->cycles,
2404                                  ktime_to_ns(ktime_get_real()));
2405                 /*
2406                  * Synchronize our NIC clock against system wall clock. NIC
2407                  * time stamp reading requires ~3us per sample, each sample
2408                  * was pretty stable even under load => only require 10
2409                  * samples for each offset comparison.
2410                  */
2411                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2412                 adapter->compare.source = &adapter->clock;
2413                 adapter->compare.target = ktime_get_real;
2414                 adapter->compare.num_samples = 10;
2415                 timecompare_update(&adapter->compare, 0);
2416                 break;
2417         case e1000_82575:
2418                 /* 82575 does not support timesync */
2419         default:
2420                 break;
2421         }
2422
2423 }
2424
2425 /**
2426  * igb_sw_init - Initialize general software structures (struct igb_adapter)
2427  * @adapter: board private structure to initialize
2428  *
2429  * igb_sw_init initializes the Adapter private data structure.
2430  * Fields are initialized based on PCI device information and
2431  * OS network device settings (MTU size).
2432  **/
2433 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2434 {
2435         struct e1000_hw *hw = &adapter->hw;
2436         struct net_device *netdev = adapter->netdev;
2437         struct pci_dev *pdev = adapter->pdev;
2438
2439         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2440
2441         /* set default ring sizes */
2442         adapter->tx_ring_count = IGB_DEFAULT_TXD;
2443         adapter->rx_ring_count = IGB_DEFAULT_RXD;
2444
2445         /* set default ITR values */
2446         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2447         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2448
2449         /* set default work limits */
2450         adapter->tx_work_limit = IGB_DEFAULT_TX_WORK;
2451
2452         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
2453                                   VLAN_HLEN;
2454         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2455
2456         adapter->node = -1;
2457
2458         spin_lock_init(&adapter->stats64_lock);
2459 #ifdef CONFIG_PCI_IOV
2460         switch (hw->mac.type) {
2461         case e1000_82576:
2462         case e1000_i350:
2463                 if (max_vfs > 7) {
2464                         dev_warn(&pdev->dev,
2465                                  "Maximum of 7 VFs per PF, using max\n");
2466                         adapter->vfs_allocated_count = 7;
2467                 } else
2468                         adapter->vfs_allocated_count = max_vfs;
2469                 break;
2470         default:
2471                 break;
2472         }
2473 #endif /* CONFIG_PCI_IOV */
2474         adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2475         /* i350 cannot do RSS and SR-IOV at the same time */
2476         if (hw->mac.type == e1000_i350 && adapter->vfs_allocated_count)
2477                 adapter->rss_queues = 1;
2478
2479         /*
2480          * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2481          * then we should combine the queues into a queue pair in order to
2482          * conserve interrupts due to limited supply
2483          */
2484         if ((adapter->rss_queues > 4) ||
2485             ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2486                 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2487
2488         /* Setup and initialize a copy of the hw vlan table array */
2489         adapter->shadow_vfta = kzalloc(sizeof(u32) *
2490                                 E1000_VLAN_FILTER_TBL_SIZE,
2491                                 GFP_ATOMIC);
2492
2493         /* This call may decrease the number of queues */
2494         if (igb_init_interrupt_scheme(adapter)) {
2495                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2496                 return -ENOMEM;
2497         }
2498
2499         igb_probe_vfs(adapter);
2500
2501         /* Explicitly disable IRQ since the NIC can be in any state. */
2502         igb_irq_disable(adapter);
2503
2504         if (hw->mac.type == e1000_i350)
2505                 adapter->flags &= ~IGB_FLAG_DMAC;
2506
2507         set_bit(__IGB_DOWN, &adapter->state);
2508         return 0;
2509 }
2510
2511 /**
2512  * igb_open - Called when a network interface is made active
2513  * @netdev: network interface device structure
2514  *
2515  * Returns 0 on success, negative value on failure
2516  *
2517  * The open entry point is called when a network interface is made
2518  * active by the system (IFF_UP).  At this point all resources needed
2519  * for transmit and receive operations are allocated, the interrupt
2520  * handler is registered with the OS, the watchdog timer is started,
2521  * and the stack is notified that the interface is ready.
2522  **/
2523 static int __igb_open(struct net_device *netdev, bool resuming)
2524 {
2525         struct igb_adapter *adapter = netdev_priv(netdev);
2526         struct e1000_hw *hw = &adapter->hw;
2527         struct pci_dev *pdev = adapter->pdev;
2528         int err;
2529         int i;
2530
2531         /* disallow open during test */
2532         if (test_bit(__IGB_TESTING, &adapter->state)) {
2533                 WARN_ON(resuming);
2534                 return -EBUSY;
2535         }
2536
2537         if (!resuming)
2538                 pm_runtime_get_sync(&pdev->dev);
2539
2540         netif_carrier_off(netdev);
2541
2542         /* allocate transmit descriptors */
2543         err = igb_setup_all_tx_resources(adapter);
2544         if (err)
2545                 goto err_setup_tx;
2546
2547         /* allocate receive descriptors */
2548         err = igb_setup_all_rx_resources(adapter);
2549         if (err)
2550                 goto err_setup_rx;
2551
2552         igb_power_up_link(adapter);
2553
2554         /* before we allocate an interrupt, we must be ready to handle it.
2555          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2556          * as soon as we call pci_request_irq, so we have to setup our
2557          * clean_rx handler before we do so.  */
2558         igb_configure(adapter);
2559
2560         err = igb_request_irq(adapter);
2561         if (err)
2562                 goto err_req_irq;
2563
2564         /* From here on the code is the same as igb_up() */
2565         clear_bit(__IGB_DOWN, &adapter->state);
2566
2567         for (i = 0; i < adapter->num_q_vectors; i++)
2568                 napi_enable(&(adapter->q_vector[i]->napi));
2569
2570         /* Clear any pending interrupts. */
2571         rd32(E1000_ICR);
2572
2573         igb_irq_enable(adapter);
2574
2575         /* notify VFs that reset has been completed */
2576         if (adapter->vfs_allocated_count) {
2577                 u32 reg_data = rd32(E1000_CTRL_EXT);
2578                 reg_data |= E1000_CTRL_EXT_PFRSTD;
2579                 wr32(E1000_CTRL_EXT, reg_data);
2580         }
2581
2582         netif_tx_start_all_queues(netdev);
2583
2584         if (!resuming)
2585                 pm_runtime_put(&pdev->dev);
2586
2587         /* start the watchdog. */
2588         hw->mac.get_link_status = 1;
2589         schedule_work(&adapter->watchdog_task);
2590
2591         return 0;
2592
2593 err_req_irq:
2594         igb_release_hw_control(adapter);
2595         igb_power_down_link(adapter);
2596         igb_free_all_rx_resources(adapter);
2597 err_setup_rx:
2598         igb_free_all_tx_resources(adapter);
2599 err_setup_tx:
2600         igb_reset(adapter);
2601         if (!resuming)
2602                 pm_runtime_put(&pdev->dev);
2603
2604         return err;
2605 }
2606
2607 static int igb_open(struct net_device *netdev)
2608 {
2609         return __igb_open(netdev, false);
2610 }
2611
2612 /**
2613  * igb_close - Disables a network interface
2614  * @netdev: network interface device structure
2615  *
2616  * Returns 0, this is not allowed to fail
2617  *
2618  * The close entry point is called when an interface is de-activated
2619  * by the OS.  The hardware is still under the driver's control, but
2620  * needs to be disabled.  A global MAC reset is issued to stop the
2621  * hardware, and all transmit and receive resources are freed.
2622  **/
2623 static int __igb_close(struct net_device *netdev, bool suspending)
2624 {
2625         struct igb_adapter *adapter = netdev_priv(netdev);
2626         struct pci_dev *pdev = adapter->pdev;
2627
2628         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2629
2630         if (!suspending)
2631                 pm_runtime_get_sync(&pdev->dev);
2632
2633         igb_down(adapter);
2634         igb_free_irq(adapter);
2635
2636         igb_free_all_tx_resources(adapter);
2637         igb_free_all_rx_resources(adapter);
2638
2639         if (!suspending)
2640                 pm_runtime_put_sync(&pdev->dev);
2641         return 0;
2642 }
2643
2644 static int igb_close(struct net_device *netdev)
2645 {
2646         return __igb_close(netdev, false);
2647 }
2648
2649 /**
2650  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2651  * @tx_ring: tx descriptor ring (for a specific queue) to setup
2652  *
2653  * Return 0 on success, negative on failure
2654  **/
2655 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2656 {
2657         struct device *dev = tx_ring->dev;
2658         int orig_node = dev_to_node(dev);
2659         int size;
2660
2661         size = sizeof(struct igb_tx_buffer) * tx_ring->count;
2662         tx_ring->tx_buffer_info = vzalloc_node(size, tx_ring->numa_node);
2663         if (!tx_ring->tx_buffer_info)
2664                 tx_ring->tx_buffer_info = vzalloc(size);
2665         if (!tx_ring->tx_buffer_info)
2666                 goto err;
2667
2668         /* round up to nearest 4K */
2669         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2670         tx_ring->size = ALIGN(tx_ring->size, 4096);
2671
2672         set_dev_node(dev, tx_ring->numa_node);
2673         tx_ring->desc = dma_alloc_coherent(dev,
2674                                            tx_ring->size,
2675                                            &tx_ring->dma,
2676                                            GFP_KERNEL);
2677         set_dev_node(dev, orig_node);
2678         if (!tx_ring->desc)
2679                 tx_ring->desc = dma_alloc_coherent(dev,
2680                                                    tx_ring->size,
2681                                                    &tx_ring->dma,
2682                                                    GFP_KERNEL);
2683
2684         if (!tx_ring->desc)
2685                 goto err;
2686
2687         tx_ring->next_to_use = 0;
2688         tx_ring->next_to_clean = 0;
2689
2690         return 0;
2691
2692 err:
2693         vfree(tx_ring->tx_buffer_info);
2694         dev_err(dev,
2695                 "Unable to allocate memory for the transmit descriptor ring\n");
2696         return -ENOMEM;
2697 }
2698
2699 /**
2700  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2701  *                                (Descriptors) for all queues
2702  * @adapter: board private structure
2703  *
2704  * Return 0 on success, negative on failure
2705  **/
2706 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2707 {
2708         struct pci_dev *pdev = adapter->pdev;
2709         int i, err = 0;
2710
2711         for (i = 0; i < adapter->num_tx_queues; i++) {
2712                 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2713                 if (err) {
2714                         dev_err(&pdev->dev,
2715                                 "Allocation for Tx Queue %u failed\n", i);
2716                         for (i--; i >= 0; i--)
2717                                 igb_free_tx_resources(adapter->tx_ring[i]);
2718                         break;
2719                 }
2720         }
2721
2722         return err;
2723 }
2724
2725 /**
2726  * igb_setup_tctl - configure the transmit control registers
2727  * @adapter: Board private structure
2728  **/
2729 void igb_setup_tctl(struct igb_adapter *adapter)
2730 {
2731         struct e1000_hw *hw = &adapter->hw;
2732         u32 tctl;
2733
2734         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2735         wr32(E1000_TXDCTL(0), 0);
2736
2737         /* Program the Transmit Control Register */
2738         tctl = rd32(E1000_TCTL);
2739         tctl &= ~E1000_TCTL_CT;
2740         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2741                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2742
2743         igb_config_collision_dist(hw);
2744
2745         /* Enable transmits */
2746         tctl |= E1000_TCTL_EN;
2747
2748         wr32(E1000_TCTL, tctl);
2749 }
2750
2751 /**
2752  * igb_configure_tx_ring - Configure transmit ring after Reset
2753  * @adapter: board private structure
2754  * @ring: tx ring to configure
2755  *
2756  * Configure a transmit ring after a reset.
2757  **/
2758 void igb_configure_tx_ring(struct igb_adapter *adapter,
2759                            struct igb_ring *ring)
2760 {
2761         struct e1000_hw *hw = &adapter->hw;
2762         u32 txdctl = 0;
2763         u64 tdba = ring->dma;
2764         int reg_idx = ring->reg_idx;
2765
2766         /* disable the queue */
2767         wr32(E1000_TXDCTL(reg_idx), 0);
2768         wrfl();
2769         mdelay(10);
2770
2771         wr32(E1000_TDLEN(reg_idx),
2772                         ring->count * sizeof(union e1000_adv_tx_desc));
2773         wr32(E1000_TDBAL(reg_idx),
2774                         tdba & 0x00000000ffffffffULL);
2775         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2776
2777         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2778         wr32(E1000_TDH(reg_idx), 0);
2779         writel(0, ring->tail);
2780
2781         txdctl |= IGB_TX_PTHRESH;
2782         txdctl |= IGB_TX_HTHRESH << 8;
2783         txdctl |= IGB_TX_WTHRESH << 16;
2784
2785         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2786         wr32(E1000_TXDCTL(reg_idx), txdctl);
2787 }
2788
2789 /**
2790  * igb_configure_tx - Configure transmit Unit after Reset
2791  * @adapter: board private structure
2792  *
2793  * Configure the Tx unit of the MAC after a reset.
2794  **/
2795 static void igb_configure_tx(struct igb_adapter *adapter)
2796 {
2797         int i;
2798
2799         for (i = 0; i < adapter->num_tx_queues; i++)
2800                 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2801 }
2802
2803 /**
2804  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2805  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2806  *
2807  * Returns 0 on success, negative on failure
2808  **/
2809 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2810 {
2811         struct device *dev = rx_ring->dev;
2812         int orig_node = dev_to_node(dev);
2813         int size, desc_len;
2814
2815         size = sizeof(struct igb_rx_buffer) * rx_ring->count;
2816         rx_ring->rx_buffer_info = vzalloc_node(size, rx_ring->numa_node);
2817         if (!rx_ring->rx_buffer_info)
2818                 rx_ring->rx_buffer_info = vzalloc(size);
2819         if (!rx_ring->rx_buffer_info)
2820                 goto err;
2821
2822         desc_len = sizeof(union e1000_adv_rx_desc);
2823
2824         /* Round up to nearest 4K */
2825         rx_ring->size = rx_ring->count * desc_len;
2826         rx_ring->size = ALIGN(rx_ring->size, 4096);
2827
2828         set_dev_node(dev, rx_ring->numa_node);
2829         rx_ring->desc = dma_alloc_coherent(dev,
2830                                            rx_ring->size,
2831                                            &rx_ring->dma,
2832                                            GFP_KERNEL);
2833         set_dev_node(dev, orig_node);
2834         if (!rx_ring->desc)
2835                 rx_ring->desc = dma_alloc_coherent(dev,
2836                                                    rx_ring->size,
2837                                                    &rx_ring->dma,
2838                                                    GFP_KERNEL);
2839
2840         if (!rx_ring->desc)
2841                 goto err;
2842
2843         rx_ring->next_to_clean = 0;
2844         rx_ring->next_to_use = 0;
2845
2846         return 0;
2847
2848 err:
2849         vfree(rx_ring->rx_buffer_info);
2850         rx_ring->rx_buffer_info = NULL;
2851         dev_err(dev, "Unable to allocate memory for the receive descriptor"
2852                 " ring\n");
2853         return -ENOMEM;
2854 }
2855
2856 /**
2857  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2858  *                                (Descriptors) for all queues
2859  * @adapter: board private structure
2860  *
2861  * Return 0 on success, negative on failure
2862  **/
2863 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2864 {
2865         struct pci_dev *pdev = adapter->pdev;
2866         int i, err = 0;
2867
2868         for (i = 0; i < adapter->num_rx_queues; i++) {
2869                 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2870                 if (err) {
2871                         dev_err(&pdev->dev,
2872                                 "Allocation for Rx Queue %u failed\n", i);
2873                         for (i--; i >= 0; i--)
2874                                 igb_free_rx_resources(adapter->rx_ring[i]);
2875                         break;
2876                 }
2877         }
2878
2879         return err;
2880 }
2881
2882 /**
2883  * igb_setup_mrqc - configure the multiple receive queue control registers
2884  * @adapter: Board private structure
2885  **/
2886 static void igb_setup_mrqc(struct igb_adapter *adapter)
2887 {
2888         struct e1000_hw *hw = &adapter->hw;
2889         u32 mrqc, rxcsum;
2890         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2891         union e1000_reta {
2892                 u32 dword;
2893                 u8  bytes[4];
2894         } reta;
2895         static const u8 rsshash[40] = {
2896                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2897                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2898                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2899                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2900
2901         /* Fill out hash function seeds */
2902         for (j = 0; j < 10; j++) {
2903                 u32 rsskey = rsshash[(j * 4)];
2904                 rsskey |= rsshash[(j * 4) + 1] << 8;
2905                 rsskey |= rsshash[(j * 4) + 2] << 16;
2906                 rsskey |= rsshash[(j * 4) + 3] << 24;
2907                 array_wr32(E1000_RSSRK(0), j, rsskey);
2908         }
2909
2910         num_rx_queues = adapter->rss_queues;
2911
2912         if (adapter->vfs_allocated_count) {
2913                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2914                 switch (hw->mac.type) {
2915                 case e1000_i350:
2916                 case e1000_82580:
2917                         num_rx_queues = 1;
2918                         shift = 0;
2919                         break;
2920                 case e1000_82576:
2921                         shift = 3;
2922                         num_rx_queues = 2;
2923                         break;
2924                 case e1000_82575:
2925                         shift = 2;
2926                         shift2 = 6;
2927                 default:
2928                         break;
2929                 }
2930         } else {
2931                 if (hw->mac.type == e1000_82575)
2932                         shift = 6;
2933         }
2934
2935         for (j = 0; j < (32 * 4); j++) {
2936                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2937                 if (shift2)
2938                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2939                 if ((j & 3) == 3)
2940                         wr32(E1000_RETA(j >> 2), reta.dword);
2941         }
2942
2943         /*
2944          * Disable raw packet checksumming so that RSS hash is placed in
2945          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2946          * offloads as they are enabled by default
2947          */
2948         rxcsum = rd32(E1000_RXCSUM);
2949         rxcsum |= E1000_RXCSUM_PCSD;
2950
2951         if (adapter->hw.mac.type >= e1000_82576)
2952                 /* Enable Receive Checksum Offload for SCTP */
2953                 rxcsum |= E1000_RXCSUM_CRCOFL;
2954
2955         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2956         wr32(E1000_RXCSUM, rxcsum);
2957
2958         /* If VMDq is enabled then we set the appropriate mode for that, else
2959          * we default to RSS so that an RSS hash is calculated per packet even
2960          * if we are only using one queue */
2961         if (adapter->vfs_allocated_count) {
2962                 if (hw->mac.type > e1000_82575) {
2963                         /* Set the default pool for the PF's first queue */
2964                         u32 vtctl = rd32(E1000_VT_CTL);
2965                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2966                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2967                         vtctl |= adapter->vfs_allocated_count <<
2968                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2969                         wr32(E1000_VT_CTL, vtctl);
2970                 }
2971                 if (adapter->rss_queues > 1)
2972                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2973                 else
2974                         mrqc = E1000_MRQC_ENABLE_VMDQ;
2975         } else {
2976                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2977         }
2978         igb_vmm_control(adapter);
2979
2980         /*
2981          * Generate RSS hash based on TCP port numbers and/or
2982          * IPv4/v6 src and dst addresses since UDP cannot be
2983          * hashed reliably due to IP fragmentation
2984          */
2985         mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2986                 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2987                 E1000_MRQC_RSS_FIELD_IPV6 |
2988                 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2989                 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2990
2991         wr32(E1000_MRQC, mrqc);
2992 }
2993
2994 /**
2995  * igb_setup_rctl - configure the receive control registers
2996  * @adapter: Board private structure
2997  **/
2998 void igb_setup_rctl(struct igb_adapter *adapter)
2999 {
3000         struct e1000_hw *hw = &adapter->hw;
3001         u32 rctl;
3002
3003         rctl = rd32(E1000_RCTL);
3004
3005         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
3006         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
3007
3008         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
3009                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
3010
3011         /*
3012          * enable stripping of CRC. It's unlikely this will break BMC
3013          * redirection as it did with e1000. Newer features require
3014          * that the HW strips the CRC.
3015          */
3016         rctl |= E1000_RCTL_SECRC;
3017
3018         /* disable store bad packets and clear size bits. */
3019         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
3020
3021         /* enable LPE to prevent packets larger than max_frame_size */
3022         rctl |= E1000_RCTL_LPE;
3023
3024         /* disable queue 0 to prevent tail write w/o re-config */
3025         wr32(E1000_RXDCTL(0), 0);
3026
3027         /* Attention!!!  For SR-IOV PF driver operations you must enable
3028          * queue drop for all VF and PF queues to prevent head of line blocking
3029          * if an un-trusted VF does not provide descriptors to hardware.
3030          */
3031         if (adapter->vfs_allocated_count) {
3032                 /* set all queue drop enable bits */
3033                 wr32(E1000_QDE, ALL_QUEUES);
3034         }
3035
3036         /* This is useful for sniffing bad packets. */
3037         if (adapter->netdev->features & NETIF_F_RXALL) {
3038                 /* UPE and MPE will be handled by normal PROMISC logic
3039                  * in e1000e_set_rx_mode */
3040                 rctl |= (E1000_RCTL_SBP | /* Receive bad packets */
3041                          E1000_RCTL_BAM | /* RX All Bcast Pkts */
3042                          E1000_RCTL_PMCF); /* RX All MAC Ctrl Pkts */
3043
3044                 rctl &= ~(E1000_RCTL_VFE | /* Disable VLAN filter */
3045                           E1000_RCTL_DPF | /* Allow filtered pause */
3046                           E1000_RCTL_CFIEN); /* Dis VLAN CFIEN Filter */
3047                 /* Do not mess with E1000_CTRL_VME, it affects transmit as well,
3048                  * and that breaks VLANs.
3049                  */
3050         }
3051
3052         wr32(E1000_RCTL, rctl);
3053 }
3054
3055 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
3056                                    int vfn)
3057 {
3058         struct e1000_hw *hw = &adapter->hw;
3059         u32 vmolr;
3060
3061         /* if it isn't the PF check to see if VFs are enabled and
3062          * increase the size to support vlan tags */
3063         if (vfn < adapter->vfs_allocated_count &&
3064             adapter->vf_data[vfn].vlans_enabled)
3065                 size += VLAN_TAG_SIZE;
3066
3067         vmolr = rd32(E1000_VMOLR(vfn));
3068         vmolr &= ~E1000_VMOLR_RLPML_MASK;
3069         vmolr |= size | E1000_VMOLR_LPE;
3070         wr32(E1000_VMOLR(vfn), vmolr);
3071
3072         return 0;
3073 }
3074
3075 /**
3076  * igb_rlpml_set - set maximum receive packet size
3077  * @adapter: board private structure
3078  *
3079  * Configure maximum receivable packet size.
3080  **/
3081 static void igb_rlpml_set(struct igb_adapter *adapter)
3082 {
3083         u32 max_frame_size = adapter->max_frame_size;
3084         struct e1000_hw *hw = &adapter->hw;
3085         u16 pf_id = adapter->vfs_allocated_count;
3086
3087         if (pf_id) {
3088                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
3089                 /*
3090                  * If we're in VMDQ or SR-IOV mode, then set global RLPML
3091                  * to our max jumbo frame size, in case we need to enable
3092                  * jumbo frames on one of the rings later.
3093                  * This will not pass over-length frames into the default
3094                  * queue because it's gated by the VMOLR.RLPML.
3095                  */
3096                 max_frame_size = MAX_JUMBO_FRAME_SIZE;
3097         }
3098
3099         wr32(E1000_RLPML, max_frame_size);
3100 }
3101
3102 static inline void igb_set_vmolr(struct igb_adapter *adapter,
3103                                  int vfn, bool aupe)
3104 {
3105         struct e1000_hw *hw = &adapter->hw;
3106         u32 vmolr;
3107
3108         /*
3109          * This register exists only on 82576 and newer so if we are older then
3110          * we should exit and do nothing
3111          */
3112         if (hw->mac.type < e1000_82576)
3113                 return;
3114
3115         vmolr = rd32(E1000_VMOLR(vfn));
3116         vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
3117         if (aupe)
3118                 vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
3119         else
3120                 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
3121
3122         /* clear all bits that might not be set */
3123         vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
3124
3125         if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
3126                 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
3127         /*
3128          * for VMDq only allow the VFs and pool 0 to accept broadcast and
3129          * multicast packets
3130          */
3131         if (vfn <= adapter->vfs_allocated_count)
3132                 vmolr |= E1000_VMOLR_BAM;          /* Accept broadcast */
3133
3134         wr32(E1000_VMOLR(vfn), vmolr);
3135 }
3136
3137 /**
3138  * igb_configure_rx_ring - Configure a receive ring after Reset
3139  * @adapter: board private structure
3140  * @ring: receive ring to be configured
3141  *
3142  * Configure the Rx unit of the MAC after a reset.
3143  **/
3144 void igb_configure_rx_ring(struct igb_adapter *adapter,
3145                            struct igb_ring *ring)
3146 {
3147         struct e1000_hw *hw = &adapter->hw;
3148         u64 rdba = ring->dma;
3149         int reg_idx = ring->reg_idx;
3150         u32 srrctl = 0, rxdctl = 0;
3151
3152         /* disable the queue */
3153         wr32(E1000_RXDCTL(reg_idx), 0);
3154
3155         /* Set DMA base address registers */
3156         wr32(E1000_RDBAL(reg_idx),
3157              rdba & 0x00000000ffffffffULL);
3158         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
3159         wr32(E1000_RDLEN(reg_idx),
3160                        ring->count * sizeof(union e1000_adv_rx_desc));
3161
3162         /* initialize head and tail */
3163         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
3164         wr32(E1000_RDH(reg_idx), 0);
3165         writel(0, ring->tail);
3166
3167         /* set descriptor configuration */
3168         srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
3169 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
3170         srrctl |= IGB_RXBUFFER_16384 >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3171 #else
3172         srrctl |= (PAGE_SIZE / 2) >> E1000_SRRCTL_BSIZEPKT_SHIFT;
3173 #endif
3174         srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
3175         if (hw->mac.type >= e1000_82580)
3176                 srrctl |= E1000_SRRCTL_TIMESTAMP;
3177         /* Only set Drop Enable if we are supporting multiple queues */
3178         if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
3179                 srrctl |= E1000_SRRCTL_DROP_EN;
3180
3181         wr32(E1000_SRRCTL(reg_idx), srrctl);
3182
3183         /* set filtering for VMDQ pools */
3184         igb_set_vmolr(adapter, reg_idx & 0x7, true);
3185
3186         rxdctl |= IGB_RX_PTHRESH;
3187         rxdctl |= IGB_RX_HTHRESH << 8;
3188         rxdctl |= IGB_RX_WTHRESH << 16;
3189
3190         /* enable receive descriptor fetching */
3191         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
3192         wr32(E1000_RXDCTL(reg_idx), rxdctl);
3193 }
3194
3195 /**
3196  * igb_configure_rx - Configure receive Unit after Reset
3197  * @adapter: board private structure
3198  *
3199  * Configure the Rx unit of the MAC after a reset.
3200  **/
3201 static void igb_configure_rx(struct igb_adapter *adapter)
3202 {
3203         int i;
3204
3205         /* set UTA to appropriate mode */
3206         igb_set_uta(adapter);
3207
3208         /* set the correct pool for the PF default MAC address in entry 0 */
3209         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
3210                          adapter->vfs_allocated_count);
3211
3212         /* Setup the HW Rx Head and Tail Descriptor Pointers and
3213          * the Base and Length of the Rx Descriptor Ring */
3214         for (i = 0; i < adapter->num_rx_queues; i++)
3215                 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
3216 }
3217
3218 /**
3219  * igb_free_tx_resources - Free Tx Resources per Queue
3220  * @tx_ring: Tx descriptor ring for a specific queue
3221  *
3222  * Free all transmit software resources
3223  **/
3224 void igb_free_tx_resources(struct igb_ring *tx_ring)
3225 {
3226         igb_clean_tx_ring(tx_ring);
3227
3228         vfree(tx_ring->tx_buffer_info);
3229         tx_ring->tx_buffer_info = NULL;
3230
3231         /* if not set, then don't free */
3232         if (!tx_ring->desc)
3233                 return;
3234
3235         dma_free_coherent(tx_ring->dev, tx_ring->size,
3236                           tx_ring->desc, tx_ring->dma);
3237
3238         tx_ring->desc = NULL;
3239 }
3240
3241 /**
3242  * igb_free_all_tx_resources - Free Tx Resources for All Queues
3243  * @adapter: board private structure
3244  *
3245  * Free all transmit software resources
3246  **/
3247 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3248 {
3249         int i;
3250
3251         for (i = 0; i < adapter->num_tx_queues; i++)
3252                 igb_free_tx_resources(adapter->tx_ring[i]);
3253 }
3254
3255 void igb_unmap_and_free_tx_resource(struct igb_ring *ring,
3256                                     struct igb_tx_buffer *tx_buffer)
3257 {
3258         if (tx_buffer->skb) {
3259                 dev_kfree_skb_any(tx_buffer->skb);
3260                 if (tx_buffer->dma)
3261                         dma_unmap_single(ring->dev,
3262                                          tx_buffer->dma,
3263                                          tx_buffer->length,
3264                                          DMA_TO_DEVICE);
3265         } else if (tx_buffer->dma) {
3266                 dma_unmap_page(ring->dev,
3267                                tx_buffer->dma,
3268                                tx_buffer->length,
3269                                DMA_TO_DEVICE);
3270         }
3271         tx_buffer->next_to_watch = NULL;
3272         tx_buffer->skb = NULL;
3273         tx_buffer->dma = 0;
3274         /* buffer_info must be completely set up in the transmit path */
3275 }
3276
3277 /**
3278  * igb_clean_tx_ring - Free Tx Buffers
3279  * @tx_ring: ring to be cleaned
3280  **/
3281 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3282 {
3283         struct igb_tx_buffer *buffer_info;
3284         unsigned long size;
3285         u16 i;
3286
3287         if (!tx_ring->tx_buffer_info)
3288                 return;
3289         /* Free all the Tx ring sk_buffs */
3290
3291         for (i = 0; i < tx_ring->count; i++) {
3292                 buffer_info = &tx_ring->tx_buffer_info[i];
3293                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3294         }
3295
3296         netdev_tx_reset_queue(txring_txq(tx_ring));
3297
3298         size = sizeof(struct igb_tx_buffer) * tx_ring->count;
3299         memset(tx_ring->tx_buffer_info, 0, size);
3300
3301         /* Zero out the descriptor ring */
3302         memset(tx_ring->desc, 0, tx_ring->size);
3303
3304         tx_ring->next_to_use = 0;
3305         tx_ring->next_to_clean = 0;
3306 }
3307
3308 /**
3309  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3310  * @adapter: board private structure
3311  **/
3312 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3313 {
3314         int i;
3315
3316         for (i = 0; i < adapter->num_tx_queues; i++)
3317                 igb_clean_tx_ring(adapter->tx_ring[i]);
3318 }
3319
3320 /**
3321  * igb_free_rx_resources - Free Rx Resources
3322  * @rx_ring: ring to clean the resources from
3323  *
3324  * Free all receive software resources
3325  **/
3326 void igb_free_rx_resources(struct igb_ring *rx_ring)
3327 {
3328         igb_clean_rx_ring(rx_ring);
3329
3330         vfree(rx_ring->rx_buffer_info);
3331         rx_ring->rx_buffer_info = NULL;
3332
3333         /* if not set, then don't free */
3334         if (!rx_ring->desc)
3335                 return;
3336
3337         dma_free_coherent(rx_ring->dev, rx_ring->size,
3338                           rx_ring->desc, rx_ring->dma);
3339
3340         rx_ring->desc = NULL;
3341 }
3342
3343 /**
3344  * igb_free_all_rx_resources - Free Rx Resources for All Queues
3345  * @adapter: board private structure
3346  *
3347  * Free all receive software resources
3348  **/
3349 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3350 {
3351         int i;
3352
3353         for (i = 0; i < adapter->num_rx_queues; i++)
3354                 igb_free_rx_resources(adapter->rx_ring[i]);
3355 }
3356
3357 /**
3358  * igb_clean_rx_ring - Free Rx Buffers per Queue
3359  * @rx_ring: ring to free buffers from
3360  **/
3361 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3362 {
3363         unsigned long size;
3364         u16 i;
3365
3366         if (!rx_ring->rx_buffer_info)
3367                 return;
3368
3369         /* Free all the Rx ring sk_buffs */
3370         for (i = 0; i < rx_ring->count; i++) {
3371                 struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
3372                 if (buffer_info->dma) {
3373                         dma_unmap_single(rx_ring->dev,
3374                                          buffer_info->dma,
3375                                          IGB_RX_HDR_LEN,
3376                                          DMA_FROM_DEVICE);
3377                         buffer_info->dma = 0;
3378                 }
3379
3380                 if (buffer_info->skb) {
3381                         dev_kfree_skb(buffer_info->skb);
3382                         buffer_info->skb = NULL;
3383                 }
3384                 if (buffer_info->page_dma) {
3385                         dma_unmap_page(rx_ring->dev,
3386                                        buffer_info->page_dma,
3387                                        PAGE_SIZE / 2,
3388                                        DMA_FROM_DEVICE);
3389                         buffer_info->page_dma = 0;
3390                 }
3391                 if (buffer_info->page) {
3392                         put_page(buffer_info->page);
3393                         buffer_info->page = NULL;
3394                         buffer_info->page_offset = 0;
3395                 }
3396         }
3397
3398         size = sizeof(struct igb_rx_buffer) * rx_ring->count;
3399         memset(rx_ring->rx_buffer_info, 0, size);
3400
3401         /* Zero out the descriptor ring */
3402         memset(rx_ring->desc, 0, rx_ring->size);
3403
3404         rx_ring->next_to_clean = 0;
3405         rx_ring->next_to_use = 0;
3406 }
3407
3408 /**
3409  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3410  * @adapter: board private structure
3411  **/
3412 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3413 {
3414         int i;
3415
3416         for (i = 0; i < adapter->num_rx_queues; i++)
3417                 igb_clean_rx_ring(adapter->rx_ring[i]);
3418 }
3419
3420 /**
3421  * igb_set_mac - Change the Ethernet Address of the NIC
3422  * @netdev: network interface device structure
3423  * @p: pointer to an address structure
3424  *
3425  * Returns 0 on success, negative on failure
3426  **/
3427 static int igb_set_mac(struct net_device *netdev, void *p)
3428 {
3429         struct igb_adapter *adapter = netdev_priv(netdev);
3430         struct e1000_hw *hw = &adapter->hw;
3431         struct sockaddr *addr = p;
3432
3433         if (!is_valid_ether_addr(addr->sa_data))
3434                 return -EADDRNOTAVAIL;
3435
3436         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3437         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3438
3439         /* set the correct pool for the new PF MAC address in entry 0 */
3440         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3441                          adapter->vfs_allocated_count);
3442
3443         return 0;
3444 }
3445
3446 /**
3447  * igb_write_mc_addr_list - write multicast addresses to MTA
3448  * @netdev: network interface device structure
3449  *
3450  * Writes multicast address list to the MTA hash table.
3451  * Returns: -ENOMEM on failure
3452  *                0 on no addresses written
3453  *                X on writing X addresses to MTA
3454  **/
3455 static int igb_write_mc_addr_list(struct net_device *netdev)
3456 {
3457         struct igb_adapter *adapter = netdev_priv(netdev);
3458         struct e1000_hw *hw = &adapter->hw;
3459         struct netdev_hw_addr *ha;
3460         u8  *mta_list;
3461         int i;
3462
3463         if (netdev_mc_empty(netdev)) {
3464                 /* nothing to program, so clear mc list */
3465                 igb_update_mc_addr_list(hw, NULL, 0);
3466                 igb_restore_vf_multicasts(adapter);
3467                 return 0;
3468         }
3469
3470         mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3471         if (!mta_list)
3472                 return -ENOMEM;
3473
3474         /* The shared function expects a packed array of only addresses. */
3475         i = 0;
3476         netdev_for_each_mc_addr(ha, netdev)
3477                 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3478
3479         igb_update_mc_addr_list(hw, mta_list, i);
3480         kfree(mta_list);
3481
3482         return netdev_mc_count(netdev);
3483 }
3484
3485 /**
3486  * igb_write_uc_addr_list - write unicast addresses to RAR table
3487  * @netdev: network interface device structure
3488  *
3489  * Writes unicast address list to the RAR table.
3490  * Returns: -ENOMEM on failure/insufficient address space
3491  *                0 on no addresses written
3492  *                X on writing X addresses to the RAR table
3493  **/
3494 static int igb_write_uc_addr_list(struct net_device *netdev)
3495 {
3496         struct igb_adapter *adapter = netdev_priv(netdev);
3497         struct e1000_hw *hw = &adapter->hw;
3498         unsigned int vfn = adapter->vfs_allocated_count;
3499         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3500         int count = 0;
3501
3502         /* return ENOMEM indicating insufficient memory for addresses */
3503         if (netdev_uc_count(netdev) > rar_entries)
3504                 return -ENOMEM;
3505
3506         if (!netdev_uc_empty(netdev) && rar_entries) {
3507                 struct netdev_hw_addr *ha;
3508
3509                 netdev_for_each_uc_addr(ha, netdev) {
3510                         if (!rar_entries)
3511                                 break;
3512                         igb_rar_set_qsel(adapter, ha->addr,
3513                                          rar_entries--,
3514                                          vfn);
3515                         count++;
3516                 }
3517         }
3518         /* write the addresses in reverse order to avoid write combining */
3519         for (; rar_entries > 0 ; rar_entries--) {
3520                 wr32(E1000_RAH(rar_entries), 0);
3521                 wr32(E1000_RAL(rar_entries), 0);
3522         }
3523         wrfl();
3524
3525         return count;
3526 }
3527
3528 /**
3529  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3530  * @netdev: network interface device structure
3531  *
3532  * The set_rx_mode entry point is called whenever the unicast or multicast
3533  * address lists or the network interface flags are updated.  This routine is
3534  * responsible for configuring the hardware for proper unicast, multicast,
3535  * promiscuous mode, and all-multi behavior.
3536  **/
3537 static void igb_set_rx_mode(struct net_device *netdev)
3538 {
3539         struct igb_adapter *adapter = netdev_priv(netdev);
3540         struct e1000_hw *hw = &adapter->hw;
3541         unsigned int vfn = adapter->vfs_allocated_count;
3542         u32 rctl, vmolr = 0;
3543         int count;
3544
3545         /* Check for Promiscuous and All Multicast modes */
3546         rctl = rd32(E1000_RCTL);
3547
3548         /* clear the effected bits */
3549         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3550
3551         if (netdev->flags & IFF_PROMISC) {
3552                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3553                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3554         } else {
3555                 if (netdev->flags & IFF_ALLMULTI) {
3556                         rctl |= E1000_RCTL_MPE;
3557                         vmolr |= E1000_VMOLR_MPME;
3558                 } else {
3559                         /*
3560                          * Write addresses to the MTA, if the attempt fails
3561                          * then we should just turn on promiscuous mode so
3562                          * that we can at least receive multicast traffic
3563                          */
3564                         count = igb_write_mc_addr_list(netdev);
3565                         if (count < 0) {
3566                                 rctl |= E1000_RCTL_MPE;
3567                                 vmolr |= E1000_VMOLR_MPME;
3568                         } else if (count) {
3569                                 vmolr |= E1000_VMOLR_ROMPE;
3570                         }
3571                 }
3572                 /*
3573                  * Write addresses to available RAR registers, if there is not
3574                  * sufficient space to store all the addresses then enable
3575                  * unicast promiscuous mode
3576                  */
3577                 count = igb_write_uc_addr_list(netdev);
3578                 if (count < 0) {
3579                         rctl |= E1000_RCTL_UPE;
3580                         vmolr |= E1000_VMOLR_ROPE;
3581                 }
3582                 rctl |= E1000_RCTL_VFE;
3583         }
3584         wr32(E1000_RCTL, rctl);
3585
3586         /*
3587          * In order to support SR-IOV and eventually VMDq it is necessary to set
3588          * the VMOLR to enable the appropriate modes.  Without this workaround
3589          * we will have issues with VLAN tag stripping not being done for frames
3590          * that are only arriving because we are the default pool
3591          */
3592         if (hw->mac.type < e1000_82576)
3593                 return;
3594
3595         vmolr |= rd32(E1000_VMOLR(vfn)) &
3596                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3597         wr32(E1000_VMOLR(vfn), vmolr);
3598         igb_restore_vf_multicasts(adapter);
3599 }
3600
3601 static void igb_check_wvbr(struct igb_adapter *adapter)
3602 {
3603         struct e1000_hw *hw = &adapter->hw;
3604         u32 wvbr = 0;
3605
3606         switch (hw->mac.type) {
3607         case e1000_82576:
3608         case e1000_i350:
3609                 if (!(wvbr = rd32(E1000_WVBR)))
3610                         return;
3611                 break;
3612         default:
3613                 break;
3614         }
3615
3616         adapter->wvbr |= wvbr;
3617 }
3618
3619 #define IGB_STAGGERED_QUEUE_OFFSET 8
3620
3621 static void igb_spoof_check(struct igb_adapter *adapter)
3622 {
3623         int j;
3624
3625         if (!adapter->wvbr)
3626                 return;
3627
3628         for(j = 0; j < adapter->vfs_allocated_count; j++) {
3629                 if (adapter->wvbr & (1 << j) ||
3630                     adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) {
3631                         dev_warn(&adapter->pdev->dev,
3632                                 "Spoof event(s) detected on VF %d\n", j);
3633                         adapter->wvbr &=
3634                                 ~((1 << j) |
3635                                   (1 << (j + IGB_STAGGERED_QUEUE_OFFSET)));
3636                 }
3637         }
3638 }
3639
3640 /* Need to wait a few seconds after link up to get diagnostic information from
3641  * the phy */
3642 static void igb_update_phy_info(unsigned long data)
3643 {
3644         struct igb_adapter *adapter = (struct igb_adapter *) data;
3645         igb_get_phy_info(&adapter->hw);
3646 }
3647
3648 /**
3649  * igb_has_link - check shared code for link and determine up/down
3650  * @adapter: pointer to driver private info
3651  **/
3652 bool igb_has_link(struct igb_adapter *adapter)
3653 {
3654         struct e1000_hw *hw = &adapter->hw;
3655         bool link_active = false;
3656         s32 ret_val = 0;
3657
3658         /* get_link_status is set on LSC (link status) interrupt or
3659          * rx sequence error interrupt.  get_link_status will stay
3660          * false until the e1000_check_for_link establishes link
3661          * for copper adapters ONLY
3662          */
3663         switch (hw->phy.media_type) {
3664         case e1000_media_type_copper:
3665                 if (hw->mac.get_link_status) {
3666                         ret_val = hw->mac.ops.check_for_link(hw);
3667                         link_active = !hw->mac.get_link_status;
3668                 } else {
3669                         link_active = true;
3670                 }
3671                 break;
3672         case e1000_media_type_internal_serdes:
3673                 ret_val = hw->mac.ops.check_for_link(hw);
3674                 link_active = hw->mac.serdes_has_link;
3675                 break;
3676         default:
3677         case e1000_media_type_unknown:
3678                 break;
3679         }
3680
3681         return link_active;
3682 }
3683
3684 static bool igb_thermal_sensor_event(struct e1000_hw *hw, u32 event)
3685 {
3686         bool ret = false;
3687         u32 ctrl_ext, thstat;
3688
3689         /* check for thermal sensor event on i350, copper only */
3690         if (hw->mac.type == e1000_i350) {
3691                 thstat = rd32(E1000_THSTAT);
3692                 ctrl_ext = rd32(E1000_CTRL_EXT);
3693
3694                 if ((hw->phy.media_type == e1000_media_type_copper) &&
3695                     !(ctrl_ext & E1000_CTRL_EXT_LINK_MODE_SGMII)) {
3696                         ret = !!(thstat & event);
3697                 }
3698         }
3699
3700         return ret;
3701 }
3702
3703 /**
3704  * igb_watchdog - Timer Call-back
3705  * @data: pointer to adapter cast into an unsigned long
3706  **/
3707 static void igb_watchdog(unsigned long data)
3708 {
3709         struct igb_adapter *adapter = (struct igb_adapter *)data;
3710         /* Do the rest outside of interrupt context */
3711         schedule_work(&adapter->watchdog_task);
3712 }
3713
3714 static void igb_watchdog_task(struct work_struct *work)
3715 {
3716         struct igb_adapter *adapter = container_of(work,
3717                                                    struct igb_adapter,
3718                                                    watchdog_task);
3719         struct e1000_hw *hw = &adapter->hw;
3720         struct net_device *netdev = adapter->netdev;
3721         u32 link;
3722         int i;
3723
3724         link = igb_has_link(adapter);
3725         if (link) {
3726                 /* Cancel scheduled suspend requests. */
3727                 pm_runtime_resume(netdev->dev.parent);
3728
3729                 if (!netif_carrier_ok(netdev)) {
3730                         u32 ctrl;
3731                         hw->mac.ops.get_speed_and_duplex(hw,
3732                                                          &adapter->link_speed,
3733                                                          &adapter->link_duplex);
3734
3735                         ctrl = rd32(E1000_CTRL);
3736                         /* Links status message must follow this format */
3737                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s "
3738                                "Duplex, Flow Control: %s\n",
3739                                netdev->name,
3740                                adapter->link_speed,
3741                                adapter->link_duplex == FULL_DUPLEX ?
3742                                "Full" : "Half",
3743                                (ctrl & E1000_CTRL_TFCE) &&
3744                                (ctrl & E1000_CTRL_RFCE) ? "RX/TX" :
3745                                (ctrl & E1000_CTRL_RFCE) ?  "RX" :
3746                                (ctrl & E1000_CTRL_TFCE) ?  "TX" : "None");
3747
3748                         /* check for thermal sensor event */
3749                         if (igb_thermal_sensor_event(hw,
3750                             E1000_THSTAT_LINK_THROTTLE)) {
3751                                 netdev_info(netdev, "The network adapter link "
3752                                             "speed was downshifted because it "
3753                                             "overheated\n");
3754                         }
3755
3756                         /* adjust timeout factor according to speed/duplex */
3757                         adapter->tx_timeout_factor = 1;
3758                         switch (adapter->link_speed) {
3759                         case SPEED_10:
3760                                 adapter->tx_timeout_factor = 14;
3761                                 break;
3762                         case SPEED_100:
3763                                 /* maybe add some timeout factor ? */
3764                                 break;
3765                         }
3766
3767                         netif_carrier_on(netdev);
3768
3769                         igb_ping_all_vfs(adapter);
3770                         igb_check_vf_rate_limit(adapter);
3771
3772                         /* link state has changed, schedule phy info update */
3773                         if (!test_bit(__IGB_DOWN, &adapter->state))
3774                                 mod_timer(&adapter->phy_info_timer,
3775                                           round_jiffies(jiffies + 2 * HZ));
3776                 }
3777         } else {
3778                 if (netif_carrier_ok(netdev)) {
3779                         adapter->link_speed = 0;
3780                         adapter->link_duplex = 0;
3781
3782                         /* check for thermal sensor event */
3783                         if (igb_thermal_sensor_event(hw,
3784                             E1000_THSTAT_PWR_DOWN)) {
3785                                 netdev_err(netdev, "The network adapter was "
3786                                            "stopped because it overheated\n");
3787                         }
3788
3789                         /* Links status message must follow this format */
3790                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
3791                                netdev->name);
3792                         netif_carrier_off(netdev);
3793
3794                         igb_ping_all_vfs(adapter);
3795
3796                         /* link state has changed, schedule phy info update */
3797                         if (!test_bit(__IGB_DOWN, &adapter->state))
3798                                 mod_timer(&adapter->phy_info_timer,
3799                                           round_jiffies(jiffies + 2 * HZ));
3800
3801                         pm_schedule_suspend(netdev->dev.parent,
3802                                             MSEC_PER_SEC * 5);
3803                 }
3804         }
3805
3806         spin_lock(&adapter->stats64_lock);
3807         igb_update_stats(adapter, &adapter->stats64);
3808         spin_unlock(&adapter->stats64_lock);
3809
3810         for (i = 0; i < adapter->num_tx_queues; i++) {
3811                 struct igb_ring *tx_ring = adapter->tx_ring[i];
3812                 if (!netif_carrier_ok(netdev)) {
3813                         /* We've lost link, so the controller stops DMA,
3814                          * but we've got queued Tx work that's never going
3815                          * to get done, so reset controller to flush Tx.
3816                          * (Do the reset outside of interrupt context). */
3817                         if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3818                                 adapter->tx_timeout_count++;
3819                                 schedule_work(&adapter->reset_task);
3820                                 /* return immediately since reset is imminent */
3821                                 return;
3822                         }
3823                 }
3824
3825                 /* Force detection of hung controller every watchdog period */
3826                 set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
3827         }
3828
3829         /* Cause software interrupt to ensure rx ring is cleaned */
3830         if (adapter->msix_entries) {
3831                 u32 eics = 0;
3832                 for (i = 0; i < adapter->num_q_vectors; i++)
3833                         eics |= adapter->q_vector[i]->eims_value;
3834                 wr32(E1000_EICS, eics);
3835         } else {
3836                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3837         }
3838
3839         igb_spoof_check(adapter);
3840
3841         /* Reset the timer */
3842         if (!test_bit(__IGB_DOWN, &adapter->state))
3843                 mod_timer(&adapter->watchdog_timer,
3844                           round_jiffies(jiffies + 2 * HZ));
3845 }
3846
3847 enum latency_range {
3848         lowest_latency = 0,
3849         low_latency = 1,
3850         bulk_latency = 2,
3851         latency_invalid = 255
3852 };
3853
3854 /**
3855  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3856  *
3857  *      Stores a new ITR value based on strictly on packet size.  This
3858  *      algorithm is less sophisticated than that used in igb_update_itr,
3859  *      due to the difficulty of synchronizing statistics across multiple
3860  *      receive rings.  The divisors and thresholds used by this function
3861  *      were determined based on theoretical maximum wire speed and testing
3862  *      data, in order to minimize response time while increasing bulk
3863  *      throughput.
3864  *      This functionality is controlled by the InterruptThrottleRate module
3865  *      parameter (see igb_param.c)
3866  *      NOTE:  This function is called only when operating in a multiqueue
3867  *             receive environment.
3868  * @q_vector: pointer to q_vector
3869  **/
3870 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3871 {
3872         int new_val = q_vector->itr_val;
3873         int avg_wire_size = 0;
3874         struct igb_adapter *adapter = q_vector->adapter;
3875         unsigned int packets;
3876
3877         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3878          * ints/sec - ITR timer value of 120 ticks.
3879          */
3880         if (adapter->link_speed != SPEED_1000) {
3881                 new_val = IGB_4K_ITR;
3882                 goto set_itr_val;
3883         }
3884
3885         packets = q_vector->rx.total_packets;
3886         if (packets)
3887                 avg_wire_size = q_vector->rx.total_bytes / packets;
3888
3889         packets = q_vector->tx.total_packets;
3890         if (packets)
3891                 avg_wire_size = max_t(u32, avg_wire_size,
3892                                       q_vector->tx.total_bytes / packets);
3893
3894         /* if avg_wire_size isn't set no work was done */
3895         if (!avg_wire_size)
3896                 goto clear_counts;
3897
3898         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3899         avg_wire_size += 24;
3900
3901         /* Don't starve jumbo frames */
3902         avg_wire_size = min(avg_wire_size, 3000);
3903
3904         /* Give a little boost to mid-size frames */
3905         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3906                 new_val = avg_wire_size / 3;
3907         else
3908                 new_val = avg_wire_size / 2;
3909
3910         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3911         if (new_val < IGB_20K_ITR &&
3912             ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
3913              (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
3914                 new_val = IGB_20K_ITR;
3915
3916 set_itr_val:
3917         if (new_val != q_vector->itr_val) {
3918                 q_vector->itr_val = new_val;
3919                 q_vector->set_itr = 1;
3920         }
3921 clear_counts:
3922         q_vector->rx.total_bytes = 0;
3923         q_vector->rx.total_packets = 0;
3924         q_vector->tx.total_bytes = 0;
3925         q_vector->tx.total_packets = 0;
3926 }
3927
3928 /**
3929  * igb_update_itr - update the dynamic ITR value based on statistics
3930  *      Stores a new ITR value based on packets and byte
3931  *      counts during the last interrupt.  The advantage of per interrupt
3932  *      computation is faster updates and more accurate ITR for the current
3933  *      traffic pattern.  Constants in this function were computed
3934  *      based on theoretical maximum wire speed and thresholds were set based
3935  *      on testing data as well as attempting to minimize response time
3936  *      while increasing bulk throughput.
3937  *      this functionality is controlled by the InterruptThrottleRate module
3938  *      parameter (see igb_param.c)
3939  *      NOTE:  These calculations are only valid when operating in a single-
3940  *             queue environment.
3941  * @q_vector: pointer to q_vector
3942  * @ring_container: ring info to update the itr for
3943  **/
3944 static void igb_update_itr(struct igb_q_vector *q_vector,
3945                            struct igb_ring_container *ring_container)
3946 {
3947         unsigned int packets = ring_container->total_packets;
3948         unsigned int bytes = ring_container->total_bytes;
3949         u8 itrval = ring_container->itr;
3950
3951         /* no packets, exit with status unchanged */
3952         if (packets == 0)
3953                 return;
3954
3955         switch (itrval) {
3956         case lowest_latency:
3957                 /* handle TSO and jumbo frames */
3958                 if (bytes/packets > 8000)
3959                         itrval = bulk_latency;
3960                 else if ((packets < 5) && (bytes > 512))
3961                         itrval = low_latency;
3962                 break;
3963         case low_latency:  /* 50 usec aka 20000 ints/s */
3964                 if (bytes > 10000) {
3965                         /* this if handles the TSO accounting */
3966                         if (bytes/packets > 8000) {
3967                                 itrval = bulk_latency;
3968                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3969                                 itrval = bulk_latency;
3970                         } else if ((packets > 35)) {
3971                                 itrval = lowest_latency;
3972                         }
3973                 } else if (bytes/packets > 2000) {
3974                         itrval = bulk_latency;
3975                 } else if (packets <= 2 && bytes < 512) {
3976                         itrval = lowest_latency;
3977                 }
3978                 break;
3979         case bulk_latency: /* 250 usec aka 4000 ints/s */
3980                 if (bytes > 25000) {
3981                         if (packets > 35)
3982                                 itrval = low_latency;
3983                 } else if (bytes < 1500) {
3984                         itrval = low_latency;
3985                 }
3986                 break;
3987         }
3988
3989         /* clear work counters since we have the values we need */
3990         ring_container->total_bytes = 0;
3991         ring_container->total_packets = 0;
3992
3993         /* write updated itr to ring container */
3994         ring_container->itr = itrval;
3995 }
3996
3997 static void igb_set_itr(struct igb_q_vector *q_vector)
3998 {
3999         struct igb_adapter *adapter = q_vector->adapter;
4000         u32 new_itr = q_vector->itr_val;
4001         u8 current_itr = 0;
4002
4003         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
4004         if (adapter->link_speed != SPEED_1000) {
4005                 current_itr = 0;
4006                 new_itr = IGB_4K_ITR;
4007                 goto set_itr_now;
4008         }
4009
4010         igb_update_itr(q_vector, &q_vector->tx);
4011         igb_update_itr(q_vector, &q_vector->rx);
4012
4013         current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
4014
4015         /* conservative mode (itr 3) eliminates the lowest_latency setting */
4016         if (current_itr == lowest_latency &&
4017             ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
4018              (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
4019                 current_itr = low_latency;
4020
4021         switch (current_itr) {
4022         /* counts and packets in update_itr are dependent on these numbers */
4023         case lowest_latency:
4024                 new_itr = IGB_70K_ITR; /* 70,000 ints/sec */
4025                 break;
4026         case low_latency:
4027                 new_itr = IGB_20K_ITR; /* 20,000 ints/sec */
4028                 break;
4029         case bulk_latency:
4030                 new_itr = IGB_4K_ITR;  /* 4,000 ints/sec */
4031                 break;
4032         default:
4033                 break;
4034         }
4035
4036 set_itr_now:
4037         if (new_itr != q_vector->itr_val) {
4038                 /* this attempts to bias the interrupt rate towards Bulk
4039                  * by adding intermediate steps when interrupt rate is
4040                  * increasing */
4041                 new_itr = new_itr > q_vector->itr_val ?
4042                              max((new_itr * q_vector->itr_val) /
4043                                  (new_itr + (q_vector->itr_val >> 2)),
4044                                  new_itr) :
4045                              new_itr;
4046                 /* Don't write the value here; it resets the adapter's
4047                  * internal timer, and causes us to delay far longer than
4048                  * we should between interrupts.  Instead, we write the ITR
4049                  * value at the beginning of the next interrupt so the timing
4050                  * ends up being correct.
4051                  */
4052                 q_vector->itr_val = new_itr;
4053                 q_vector->set_itr = 1;
4054         }
4055 }
4056
4057 static void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens,
4058                             u32 type_tucmd, u32 mss_l4len_idx)
4059 {
4060         struct e1000_adv_tx_context_desc *context_desc;
4061         u16 i = tx_ring->next_to_use;
4062
4063         context_desc = IGB_TX_CTXTDESC(tx_ring, i);
4064
4065         i++;
4066         tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
4067
4068         /* set bits to identify this as an advanced context descriptor */
4069         type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT;
4070
4071         /* For 82575, context index must be unique per ring. */
4072         if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4073                 mss_l4len_idx |= tx_ring->reg_idx << 4;
4074
4075         context_desc->vlan_macip_lens   = cpu_to_le32(vlan_macip_lens);
4076         context_desc->seqnum_seed       = 0;
4077         context_desc->type_tucmd_mlhl   = cpu_to_le32(type_tucmd);
4078         context_desc->mss_l4len_idx     = cpu_to_le32(mss_l4len_idx);
4079 }
4080
4081 static int igb_tso(struct igb_ring *tx_ring,
4082                    struct igb_tx_buffer *first,
4083                    u8 *hdr_len)
4084 {
4085         struct sk_buff *skb = first->skb;
4086         u32 vlan_macip_lens, type_tucmd;
4087         u32 mss_l4len_idx, l4len;
4088
4089         if (!skb_is_gso(skb))
4090                 return 0;
4091
4092         if (skb_header_cloned(skb)) {
4093                 int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
4094                 if (err)
4095                         return err;
4096         }
4097
4098         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
4099         type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP;
4100
4101         if (first->protocol == __constant_htons(ETH_P_IP)) {
4102                 struct iphdr *iph = ip_hdr(skb);
4103                 iph->tot_len = 0;
4104                 iph->check = 0;
4105                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
4106                                                          iph->daddr, 0,
4107                                                          IPPROTO_TCP,
4108                                                          0);
4109                 type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4110                 first->tx_flags |= IGB_TX_FLAGS_TSO |
4111                                    IGB_TX_FLAGS_CSUM |
4112                                    IGB_TX_FLAGS_IPV4;
4113         } else if (skb_is_gso_v6(skb)) {
4114                 ipv6_hdr(skb)->payload_len = 0;
4115                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
4116                                                        &ipv6_hdr(skb)->daddr,
4117                                                        0, IPPROTO_TCP, 0);
4118                 first->tx_flags |= IGB_TX_FLAGS_TSO |
4119                                    IGB_TX_FLAGS_CSUM;
4120         }
4121
4122         /* compute header lengths */
4123         l4len = tcp_hdrlen(skb);
4124         *hdr_len = skb_transport_offset(skb) + l4len;
4125
4126         /* update gso size and bytecount with header size */
4127         first->gso_segs = skb_shinfo(skb)->gso_segs;
4128         first->bytecount += (first->gso_segs - 1) * *hdr_len;
4129
4130         /* MSS L4LEN IDX */
4131         mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT;
4132         mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT;
4133
4134         /* VLAN MACLEN IPLEN */
4135         vlan_macip_lens = skb_network_header_len(skb);
4136         vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4137         vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4138
4139         igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4140
4141         return 1;
4142 }
4143
4144 static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first)
4145 {
4146         struct sk_buff *skb = first->skb;
4147         u32 vlan_macip_lens = 0;
4148         u32 mss_l4len_idx = 0;
4149         u32 type_tucmd = 0;
4150
4151         if (skb->ip_summed != CHECKSUM_PARTIAL) {
4152                 if (!(first->tx_flags & IGB_TX_FLAGS_VLAN))
4153                         return;
4154         } else {
4155                 u8 l4_hdr = 0;
4156                 switch (first->protocol) {
4157                 case __constant_htons(ETH_P_IP):
4158                         vlan_macip_lens |= skb_network_header_len(skb);
4159                         type_tucmd |= E1000_ADVTXD_TUCMD_IPV4;
4160                         l4_hdr = ip_hdr(skb)->protocol;
4161                         break;
4162                 case __constant_htons(ETH_P_IPV6):
4163                         vlan_macip_lens |= skb_network_header_len(skb);
4164                         l4_hdr = ipv6_hdr(skb)->nexthdr;
4165                         break;
4166                 default:
4167                         if (unlikely(net_ratelimit())) {
4168                                 dev_warn(tx_ring->dev,
4169                                  "partial checksum but proto=%x!\n",
4170                                  first->protocol);
4171                         }
4172                         break;
4173                 }
4174
4175                 switch (l4_hdr) {
4176                 case IPPROTO_TCP:
4177                         type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
4178                         mss_l4len_idx = tcp_hdrlen(skb) <<
4179                                         E1000_ADVTXD_L4LEN_SHIFT;
4180                         break;
4181                 case IPPROTO_SCTP:
4182                         type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
4183                         mss_l4len_idx = sizeof(struct sctphdr) <<
4184                                         E1000_ADVTXD_L4LEN_SHIFT;
4185                         break;
4186                 case IPPROTO_UDP:
4187                         mss_l4len_idx = sizeof(struct udphdr) <<
4188                                         E1000_ADVTXD_L4LEN_SHIFT;
4189                         break;
4190                 default:
4191                         if (unlikely(net_ratelimit())) {
4192                                 dev_warn(tx_ring->dev,
4193                                  "partial checksum but l4 proto=%x!\n",
4194                                  l4_hdr);
4195                         }
4196                         break;
4197                 }
4198
4199                 /* update TX checksum flag */
4200                 first->tx_flags |= IGB_TX_FLAGS_CSUM;
4201         }
4202
4203         vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT;
4204         vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK;
4205
4206         igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx);
4207 }
4208
4209 static __le32 igb_tx_cmd_type(u32 tx_flags)
4210 {
4211         /* set type for advanced descriptor with frame checksum insertion */
4212         __le32 cmd_type = cpu_to_le32(E1000_ADVTXD_DTYP_DATA |
4213                                       E1000_ADVTXD_DCMD_IFCS |
4214                                       E1000_ADVTXD_DCMD_DEXT);
4215
4216         /* set HW vlan bit if vlan is present */
4217         if (tx_flags & IGB_TX_FLAGS_VLAN)
4218                 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_VLE);
4219
4220         /* set timestamp bit if present */
4221         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4222                 cmd_type |= cpu_to_le32(E1000_ADVTXD_MAC_TSTAMP);
4223
4224         /* set segmentation bits for TSO */
4225         if (tx_flags & IGB_TX_FLAGS_TSO)
4226                 cmd_type |= cpu_to_le32(E1000_ADVTXD_DCMD_TSE);
4227
4228         return cmd_type;
4229 }
4230
4231 static void igb_tx_olinfo_status(struct igb_ring *tx_ring,
4232                                  union e1000_adv_tx_desc *tx_desc,
4233                                  u32 tx_flags, unsigned int paylen)
4234 {
4235         u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT;
4236
4237         /* 82575 requires a unique index per ring if any offload is enabled */
4238         if ((tx_flags & (IGB_TX_FLAGS_CSUM | IGB_TX_FLAGS_VLAN)) &&
4239             test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
4240                 olinfo_status |= tx_ring->reg_idx << 4;
4241
4242         /* insert L4 checksum */
4243         if (tx_flags & IGB_TX_FLAGS_CSUM) {
4244                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4245
4246                 /* insert IPv4 checksum */
4247                 if (tx_flags & IGB_TX_FLAGS_IPV4)
4248                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4249         }
4250
4251         tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4252 }
4253
4254 /*
4255  * The largest size we can write to the descriptor is 65535.  In order to
4256  * maintain a power of two alignment we have to limit ourselves to 32K.
4257  */
4258 #define IGB_MAX_TXD_PWR 15
4259 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
4260
4261 static void igb_tx_map(struct igb_ring *tx_ring,
4262                        struct igb_tx_buffer *first,
4263                        const u8 hdr_len)
4264 {
4265         struct sk_buff *skb = first->skb;
4266         struct igb_tx_buffer *tx_buffer_info;
4267         union e1000_adv_tx_desc *tx_desc;
4268         dma_addr_t dma;
4269         struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0];
4270         unsigned int data_len = skb->data_len;
4271         unsigned int size = skb_headlen(skb);
4272         unsigned int paylen = skb->len - hdr_len;
4273         __le32 cmd_type;
4274         u32 tx_flags = first->tx_flags;
4275         u16 i = tx_ring->next_to_use;
4276
4277         tx_desc = IGB_TX_DESC(tx_ring, i);
4278
4279         igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, paylen);
4280         cmd_type = igb_tx_cmd_type(tx_flags);
4281
4282         dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
4283         if (dma_mapping_error(tx_ring->dev, dma))
4284                 goto dma_error;
4285
4286         /* record length, and DMA address */
4287         first->length = size;
4288         first->dma = dma;
4289         tx_desc->read.buffer_addr = cpu_to_le64(dma);
4290
4291         for (;;) {
4292                 while (unlikely(size > IGB_MAX_DATA_PER_TXD)) {
4293                         tx_desc->read.cmd_type_len =
4294                                 cmd_type | cpu_to_le32(IGB_MAX_DATA_PER_TXD);
4295
4296                         i++;
4297                         tx_desc++;
4298                         if (i == tx_ring->count) {
4299                                 tx_desc = IGB_TX_DESC(tx_ring, 0);
4300                                 i = 0;
4301                         }
4302
4303                         dma += IGB_MAX_DATA_PER_TXD;
4304                         size -= IGB_MAX_DATA_PER_TXD;
4305
4306                         tx_desc->read.olinfo_status = 0;
4307                         tx_desc->read.buffer_addr = cpu_to_le64(dma);
4308                 }
4309
4310                 if (likely(!data_len))
4311                         break;
4312
4313                 tx_desc->read.cmd_type_len = cmd_type | cpu_to_le32(size);
4314
4315                 i++;
4316                 tx_desc++;
4317                 if (i == tx_ring->count) {
4318                         tx_desc = IGB_TX_DESC(tx_ring, 0);
4319                         i = 0;
4320                 }
4321
4322                 size = skb_frag_size(frag);
4323                 data_len -= size;
4324
4325                 dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
4326                                    size, DMA_TO_DEVICE);
4327                 if (dma_mapping_error(tx_ring->dev, dma))
4328                         goto dma_error;
4329
4330                 tx_buffer_info = &tx_ring->tx_buffer_info[i];
4331                 tx_buffer_info->length = size;
4332                 tx_buffer_info->dma = dma;
4333
4334                 tx_desc->read.olinfo_status = 0;
4335                 tx_desc->read.buffer_addr = cpu_to_le64(dma);
4336
4337                 frag++;
4338         }
4339
4340         netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
4341
4342         /* write last descriptor with RS and EOP bits */
4343         cmd_type |= cpu_to_le32(size) | cpu_to_le32(IGB_TXD_DCMD);
4344         if (unlikely(skb->no_fcs))
4345                 cmd_type &= ~(cpu_to_le32(E1000_ADVTXD_DCMD_IFCS));
4346         tx_desc->read.cmd_type_len = cmd_type;
4347
4348         /* set the timestamp */
4349         first->time_stamp = jiffies;
4350
4351         /*
4352          * Force memory writes to complete before letting h/w know there
4353          * are new descriptors to fetch.  (Only applicable for weak-ordered
4354          * memory model archs, such as IA-64).
4355          *
4356          * We also need this memory barrier to make certain all of the
4357          * status bits have been updated before next_to_watch is written.
4358          */
4359         wmb();
4360
4361         /* set next_to_watch value indicating a packet is present */
4362         first->next_to_watch = tx_desc;
4363
4364         i++;
4365         if (i == tx_ring->count)
4366                 i = 0;
4367
4368         tx_ring->next_to_use = i;
4369
4370         writel(i, tx_ring->tail);
4371
4372         /* we need this if more than one processor can write to our tail
4373          * at a time, it syncronizes IO on IA64/Altix systems */
4374         mmiowb();
4375
4376         return;
4377
4378 dma_error:
4379         dev_err(tx_ring->dev, "TX DMA map failed\n");
4380
4381         /* clear dma mappings for failed tx_buffer_info map */
4382         for (;;) {
4383                 tx_buffer_info = &tx_ring->tx_buffer_info[i];
4384                 igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info);
4385                 if (tx_buffer_info == first)
4386                         break;
4387                 if (i == 0)
4388                         i = tx_ring->count;
4389                 i--;
4390         }
4391
4392         tx_ring->next_to_use = i;
4393 }
4394
4395 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4396 {
4397         struct net_device *netdev = tx_ring->netdev;
4398
4399         netif_stop_subqueue(netdev, tx_ring->queue_index);
4400
4401         /* Herbert's original patch had:
4402          *  smp_mb__after_netif_stop_queue();
4403          * but since that doesn't exist yet, just open code it. */
4404         smp_mb();
4405
4406         /* We need to check again in a case another CPU has just
4407          * made room available. */
4408         if (igb_desc_unused(tx_ring) < size)
4409                 return -EBUSY;
4410
4411         /* A reprieve! */
4412         netif_wake_subqueue(netdev, tx_ring->queue_index);
4413
4414         u64_stats_update_begin(&tx_ring->tx_syncp2);
4415         tx_ring->tx_stats.restart_queue2++;
4416         u64_stats_update_end(&tx_ring->tx_syncp2);
4417
4418         return 0;
4419 }
4420
4421 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size)
4422 {
4423         if (igb_desc_unused(tx_ring) >= size)
4424                 return 0;
4425         return __igb_maybe_stop_tx(tx_ring, size);
4426 }
4427
4428 netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
4429                                 struct igb_ring *tx_ring)
4430 {
4431         struct igb_tx_buffer *first;
4432         int tso;
4433         u32 tx_flags = 0;
4434         __be16 protocol = vlan_get_protocol(skb);
4435         u8 hdr_len = 0;
4436
4437         /* need: 1 descriptor per page,
4438          *       + 2 desc gap to keep tail from touching head,
4439          *       + 1 desc for skb->data,
4440          *       + 1 desc for context descriptor,
4441          * otherwise try next time */
4442         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4443                 /* this is a hard error */
4444                 return NETDEV_TX_BUSY;
4445         }
4446
4447         /* record the location of the first descriptor for this packet */
4448         first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
4449         first->skb = skb;
4450         first->bytecount = skb->len;
4451         first->gso_segs = 1;
4452
4453         if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4454                 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4455                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4456         }
4457
4458         if (vlan_tx_tag_present(skb)) {
4459                 tx_flags |= IGB_TX_FLAGS_VLAN;
4460                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4461         }
4462
4463         /* record initial flags and protocol */
4464         first->tx_flags = tx_flags;
4465         first->protocol = protocol;
4466
4467         tso = igb_tso(tx_ring, first, &hdr_len);
4468         if (tso < 0)
4469                 goto out_drop;
4470         else if (!tso)
4471                 igb_tx_csum(tx_ring, first);
4472
4473         igb_tx_map(tx_ring, first, hdr_len);
4474
4475         /* Make sure there is space in the ring for the next send. */
4476         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4477
4478         return NETDEV_TX_OK;
4479
4480 out_drop:
4481         igb_unmap_and_free_tx_resource(tx_ring, first);
4482
4483         return NETDEV_TX_OK;
4484 }
4485
4486 static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
4487                                                     struct sk_buff *skb)
4488 {
4489         unsigned int r_idx = skb->queue_mapping;
4490
4491         if (r_idx >= adapter->num_tx_queues)
4492                 r_idx = r_idx % adapter->num_tx_queues;
4493
4494         return adapter->tx_ring[r_idx];
4495 }
4496
4497 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
4498                                   struct net_device *netdev)
4499 {
4500         struct igb_adapter *adapter = netdev_priv(netdev);
4501
4502         if (test_bit(__IGB_DOWN, &adapter->state)) {
4503                 dev_kfree_skb_any(skb);
4504                 return NETDEV_TX_OK;
4505         }
4506
4507         if (skb->len <= 0) {
4508                 dev_kfree_skb_any(skb);
4509                 return NETDEV_TX_OK;
4510         }
4511
4512         /*
4513          * The minimum packet size with TCTL.PSP set is 17 so pad the skb
4514          * in order to meet this minimum size requirement.
4515          */
4516         if (skb->len < 17) {
4517                 if (skb_padto(skb, 17))
4518                         return NETDEV_TX_OK;
4519                 skb->len = 17;
4520         }
4521
4522         return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
4523 }
4524
4525 /**
4526  * igb_tx_timeout - Respond to a Tx Hang
4527  * @netdev: network interface device structure
4528  **/
4529 static void igb_tx_timeout(struct net_device *netdev)
4530 {
4531         struct igb_adapter *adapter = netdev_priv(netdev);
4532         struct e1000_hw *hw = &adapter->hw;
4533
4534         /* Do the reset outside of interrupt context */
4535         adapter->tx_timeout_count++;
4536
4537         if (hw->mac.type >= e1000_82580)
4538                 hw->dev_spec._82575.global_device_reset = true;
4539
4540         schedule_work(&adapter->reset_task);
4541         wr32(E1000_EICS,
4542              (adapter->eims_enable_mask & ~adapter->eims_other));
4543 }
4544
4545 static void igb_reset_task(struct work_struct *work)
4546 {
4547         struct igb_adapter *adapter;
4548         adapter = container_of(work, struct igb_adapter, reset_task);
4549
4550         igb_dump(adapter);
4551         netdev_err(adapter->netdev, "Reset adapter\n");
4552         igb_reinit_locked(adapter);
4553 }
4554
4555 /**
4556  * igb_get_stats64 - Get System Network Statistics
4557  * @netdev: network interface device structure
4558  * @stats: rtnl_link_stats64 pointer
4559  *
4560  **/
4561 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *netdev,
4562                                                  struct rtnl_link_stats64 *stats)
4563 {
4564         struct igb_adapter *adapter = netdev_priv(netdev);
4565
4566         spin_lock(&adapter->stats64_lock);
4567         igb_update_stats(adapter, &adapter->stats64);
4568         memcpy(stats, &adapter->stats64, sizeof(*stats));
4569         spin_unlock(&adapter->stats64_lock);
4570
4571         return stats;
4572 }
4573
4574 /**
4575  * igb_change_mtu - Change the Maximum Transfer Unit
4576  * @netdev: network interface device structure
4577  * @new_mtu: new value for maximum frame size
4578  *
4579  * Returns 0 on success, negative on failure
4580  **/
4581 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4582 {
4583         struct igb_adapter *adapter = netdev_priv(netdev);
4584         struct pci_dev *pdev = adapter->pdev;
4585         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
4586
4587         if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4588                 dev_err(&pdev->dev, "Invalid MTU setting\n");
4589                 return -EINVAL;
4590         }
4591
4592 #define MAX_STD_JUMBO_FRAME_SIZE 9238
4593         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4594                 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4595                 return -EINVAL;
4596         }
4597
4598         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4599                 msleep(1);
4600
4601         /* igb_down has a dependency on max_frame_size */
4602         adapter->max_frame_size = max_frame;
4603
4604         if (netif_running(netdev))
4605                 igb_down(adapter);
4606
4607         dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4608                  netdev->mtu, new_mtu);
4609         netdev->mtu = new_mtu;
4610
4611         if (netif_running(netdev))
4612                 igb_up(adapter);
4613         else
4614                 igb_reset(adapter);
4615
4616         clear_bit(__IGB_RESETTING, &adapter->state);
4617
4618         return 0;
4619 }
4620
4621 /**
4622  * igb_update_stats - Update the board statistics counters
4623  * @adapter: board private structure
4624  **/
4625
4626 void igb_update_stats(struct igb_adapter *adapter,
4627                       struct rtnl_link_stats64 *net_stats)
4628 {
4629         struct e1000_hw *hw = &adapter->hw;
4630         struct pci_dev *pdev = adapter->pdev;
4631         u32 reg, mpc;
4632         u16 phy_tmp;
4633         int i;
4634         u64 bytes, packets;
4635         unsigned int start;
4636         u64 _bytes, _packets;
4637
4638 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4639
4640         /*
4641          * Prevent stats update while adapter is being reset, or if the pci
4642          * connection is down.
4643          */
4644         if (adapter->link_speed == 0)
4645                 return;
4646         if (pci_channel_offline(pdev))
4647                 return;
4648
4649         bytes = 0;
4650         packets = 0;
4651         for (i = 0; i < adapter->num_rx_queues; i++) {
4652                 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4653                 struct igb_ring *ring = adapter->rx_ring[i];
4654
4655                 ring->rx_stats.drops += rqdpc_tmp;
4656                 net_stats->rx_fifo_errors += rqdpc_tmp;
4657
4658                 do {
4659                         start = u64_stats_fetch_begin_bh(&ring->rx_syncp);
4660                         _bytes = ring->rx_stats.bytes;
4661                         _packets = ring->rx_stats.packets;
4662                 } while (u64_stats_fetch_retry_bh(&ring->rx_syncp, start));
4663                 bytes += _bytes;
4664                 packets += _packets;
4665         }
4666
4667         net_stats->rx_bytes = bytes;
4668         net_stats->rx_packets = packets;
4669
4670         bytes = 0;
4671         packets = 0;
4672         for (i = 0; i < adapter->num_tx_queues; i++) {
4673                 struct igb_ring *ring = adapter->tx_ring[i];
4674                 do {
4675                         start = u64_stats_fetch_begin_bh(&ring->tx_syncp);
4676                         _bytes = ring->tx_stats.bytes;
4677                         _packets = ring->tx_stats.packets;
4678                 } while (u64_stats_fetch_retry_bh(&ring->tx_syncp, start));
4679                 bytes += _bytes;
4680                 packets += _packets;
4681         }
4682         net_stats->tx_bytes = bytes;
4683         net_stats->tx_packets = packets;
4684
4685         /* read stats registers */
4686         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4687         adapter->stats.gprc += rd32(E1000_GPRC);
4688         adapter->stats.gorc += rd32(E1000_GORCL);
4689         rd32(E1000_GORCH); /* clear GORCL */
4690         adapter->stats.bprc += rd32(E1000_BPRC);
4691         adapter->stats.mprc += rd32(E1000_MPRC);
4692         adapter->stats.roc += rd32(E1000_ROC);
4693
4694         adapter->stats.prc64 += rd32(E1000_PRC64);
4695         adapter->stats.prc127 += rd32(E1000_PRC127);
4696         adapter->stats.prc255 += rd32(E1000_PRC255);
4697         adapter->stats.prc511 += rd32(E1000_PRC511);
4698         adapter->stats.prc1023 += rd32(E1000_PRC1023);
4699         adapter->stats.prc1522 += rd32(E1000_PRC1522);
4700         adapter->stats.symerrs += rd32(E1000_SYMERRS);
4701         adapter->stats.sec += rd32(E1000_SEC);
4702
4703         mpc = rd32(E1000_MPC);
4704         adapter->stats.mpc += mpc;
4705         net_stats->rx_fifo_errors += mpc;
4706         adapter->stats.scc += rd32(E1000_SCC);
4707         adapter->stats.ecol += rd32(E1000_ECOL);
4708         adapter->stats.mcc += rd32(E1000_MCC);
4709         adapter->stats.latecol += rd32(E1000_LATECOL);
4710         adapter->stats.dc += rd32(E1000_DC);
4711         adapter->stats.rlec += rd32(E1000_RLEC);
4712         adapter->stats.xonrxc += rd32(E1000_XONRXC);
4713         adapter->stats.xontxc += rd32(E1000_XONTXC);
4714         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4715         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4716         adapter->stats.fcruc += rd32(E1000_FCRUC);
4717         adapter->stats.gptc += rd32(E1000_GPTC);
4718         adapter->stats.gotc += rd32(E1000_GOTCL);
4719         rd32(E1000_GOTCH); /* clear GOTCL */
4720         adapter->stats.rnbc += rd32(E1000_RNBC);
4721         adapter->stats.ruc += rd32(E1000_RUC);
4722         adapter->stats.rfc += rd32(E1000_RFC);
4723         adapter->stats.rjc += rd32(E1000_RJC);
4724         adapter->stats.tor += rd32(E1000_TORH);
4725         adapter->stats.tot += rd32(E1000_TOTH);
4726         adapter->stats.tpr += rd32(E1000_TPR);
4727
4728         adapter->stats.ptc64 += rd32(E1000_PTC64);
4729         adapter->stats.ptc127 += rd32(E1000_PTC127);
4730         adapter->stats.ptc255 += rd32(E1000_PTC255);
4731         adapter->stats.ptc511 += rd32(E1000_PTC511);
4732         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4733         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4734
4735         adapter->stats.mptc += rd32(E1000_MPTC);
4736         adapter->stats.bptc += rd32(E1000_BPTC);
4737
4738         adapter->stats.tpt += rd32(E1000_TPT);
4739         adapter->stats.colc += rd32(E1000_COLC);
4740
4741         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4742         /* read internal phy specific stats */
4743         reg = rd32(E1000_CTRL_EXT);
4744         if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4745                 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4746                 adapter->stats.tncrs += rd32(E1000_TNCRS);
4747         }
4748
4749         adapter->stats.tsctc += rd32(E1000_TSCTC);
4750         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4751
4752         adapter->stats.iac += rd32(E1000_IAC);
4753         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4754         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4755         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4756         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4757         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4758         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4759         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4760         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4761
4762         /* Fill out the OS statistics structure */
4763         net_stats->multicast = adapter->stats.mprc;
4764         net_stats->collisions = adapter->stats.colc;
4765
4766         /* Rx Errors */
4767
4768         /* RLEC on some newer hardware can be incorrect so build
4769          * our own version based on RUC and ROC */
4770         net_stats->rx_errors = adapter->stats.rxerrc +
4771                 adapter->stats.crcerrs + adapter->stats.algnerrc +
4772                 adapter->stats.ruc + adapter->stats.roc +
4773                 adapter->stats.cexterr;
4774         net_stats->rx_length_errors = adapter->stats.ruc +
4775                                       adapter->stats.roc;
4776         net_stats->rx_crc_errors = adapter->stats.crcerrs;
4777         net_stats->rx_frame_errors = adapter->stats.algnerrc;
4778         net_stats->rx_missed_errors = adapter->stats.mpc;
4779
4780         /* Tx Errors */
4781         net_stats->tx_errors = adapter->stats.ecol +
4782                                adapter->stats.latecol;
4783         net_stats->tx_aborted_errors = adapter->stats.ecol;
4784         net_stats->tx_window_errors = adapter->stats.latecol;
4785         net_stats->tx_carrier_errors = adapter->stats.tncrs;
4786
4787         /* Tx Dropped needs to be maintained elsewhere */
4788
4789         /* Phy Stats */
4790         if (hw->phy.media_type == e1000_media_type_copper) {
4791                 if ((adapter->link_speed == SPEED_1000) &&
4792                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4793                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4794                         adapter->phy_stats.idle_errors += phy_tmp;
4795                 }
4796         }
4797
4798         /* Management Stats */
4799         adapter->stats.mgptc += rd32(E1000_MGTPTC);
4800         adapter->stats.mgprc += rd32(E1000_MGTPRC);
4801         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4802
4803         /* OS2BMC Stats */
4804         reg = rd32(E1000_MANC);
4805         if (reg & E1000_MANC_EN_BMC2OS) {
4806                 adapter->stats.o2bgptc += rd32(E1000_O2BGPTC);
4807                 adapter->stats.o2bspc += rd32(E1000_O2BSPC);
4808                 adapter->stats.b2ospc += rd32(E1000_B2OSPC);
4809                 adapter->stats.b2ogprc += rd32(E1000_B2OGPRC);
4810         }
4811 }
4812
4813 static irqreturn_t igb_msix_other(int irq, void *data)
4814 {
4815         struct igb_adapter *adapter = data;
4816         struct e1000_hw *hw = &adapter->hw;
4817         u32 icr = rd32(E1000_ICR);
4818         /* reading ICR causes bit 31 of EICR to be cleared */
4819
4820         if (icr & E1000_ICR_DRSTA)
4821                 schedule_work(&adapter->reset_task);
4822
4823         if (icr & E1000_ICR_DOUTSYNC) {
4824                 /* HW is reporting DMA is out of sync */
4825                 adapter->stats.doosync++;
4826                 /* The DMA Out of Sync is also indication of a spoof event
4827                  * in IOV mode. Check the Wrong VM Behavior register to
4828                  * see if it is really a spoof event. */
4829                 igb_check_wvbr(adapter);
4830         }
4831
4832         /* Check for a mailbox event */
4833         if (icr & E1000_ICR_VMMB)
4834                 igb_msg_task(adapter);
4835
4836         if (icr & E1000_ICR_LSC) {
4837                 hw->mac.get_link_status = 1;
4838                 /* guard against interrupt when we're going down */
4839                 if (!test_bit(__IGB_DOWN, &adapter->state))
4840                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4841         }
4842
4843         wr32(E1000_EIMS, adapter->eims_other);
4844
4845         return IRQ_HANDLED;
4846 }
4847
4848 static void igb_write_itr(struct igb_q_vector *q_vector)
4849 {
4850         struct igb_adapter *adapter = q_vector->adapter;
4851         u32 itr_val = q_vector->itr_val & 0x7FFC;
4852
4853         if (!q_vector->set_itr)
4854                 return;
4855
4856         if (!itr_val)
4857                 itr_val = 0x4;
4858
4859         if (adapter->hw.mac.type == e1000_82575)
4860                 itr_val |= itr_val << 16;
4861         else
4862                 itr_val |= E1000_EITR_CNT_IGNR;
4863
4864         writel(itr_val, q_vector->itr_register);
4865         q_vector->set_itr = 0;
4866 }
4867
4868 static irqreturn_t igb_msix_ring(int irq, void *data)
4869 {
4870         struct igb_q_vector *q_vector = data;
4871
4872         /* Write the ITR value calculated from the previous interrupt. */
4873         igb_write_itr(q_vector);
4874
4875         napi_schedule(&q_vector->napi);
4876
4877         return IRQ_HANDLED;
4878 }
4879
4880 #ifdef CONFIG_IGB_DCA
4881 static void igb_update_dca(struct igb_q_vector *q_vector)
4882 {
4883         struct igb_adapter *adapter = q_vector->adapter;
4884         struct e1000_hw *hw = &adapter->hw;
4885         int cpu = get_cpu();
4886
4887         if (q_vector->cpu == cpu)
4888                 goto out_no_update;
4889
4890         if (q_vector->tx.ring) {
4891                 int q = q_vector->tx.ring->reg_idx;
4892                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4893                 if (hw->mac.type == e1000_82575) {
4894                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4895                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4896                 } else {
4897                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4898                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4899                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
4900                 }
4901                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4902                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4903         }
4904         if (q_vector->rx.ring) {
4905                 int q = q_vector->rx.ring->reg_idx;
4906                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4907                 if (hw->mac.type == e1000_82575) {
4908                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4909                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4910                 } else {
4911                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4912                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4913                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4914                 }
4915                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4916                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4917                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4918                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4919         }
4920         q_vector->cpu = cpu;
4921 out_no_update:
4922         put_cpu();
4923 }
4924
4925 static void igb_setup_dca(struct igb_adapter *adapter)
4926 {
4927         struct e1000_hw *hw = &adapter->hw;
4928         int i;
4929
4930         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4931                 return;
4932
4933         /* Always use CB2 mode, difference is masked in the CB driver. */
4934         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4935
4936         for (i = 0; i < adapter->num_q_vectors; i++) {
4937                 adapter->q_vector[i]->cpu = -1;
4938                 igb_update_dca(adapter->q_vector[i]);
4939         }
4940 }
4941
4942 static int __igb_notify_dca(struct device *dev, void *data)
4943 {
4944         struct net_device *netdev = dev_get_drvdata(dev);
4945         struct igb_adapter *adapter = netdev_priv(netdev);
4946         struct pci_dev *pdev = adapter->pdev;
4947         struct e1000_hw *hw = &adapter->hw;
4948         unsigned long event = *(unsigned long *)data;
4949
4950         switch (event) {
4951         case DCA_PROVIDER_ADD:
4952                 /* if already enabled, don't do it again */
4953                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4954                         break;
4955                 if (dca_add_requester(dev) == 0) {
4956                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
4957                         dev_info(&pdev->dev, "DCA enabled\n");
4958                         igb_setup_dca(adapter);
4959                         break;
4960                 }
4961                 /* Fall Through since DCA is disabled. */
4962         case DCA_PROVIDER_REMOVE:
4963                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4964                         /* without this a class_device is left
4965                          * hanging around in the sysfs model */
4966                         dca_remove_requester(dev);
4967                         dev_info(&pdev->dev, "DCA disabled\n");
4968                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4969                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4970                 }
4971                 break;
4972         }
4973
4974         return 0;
4975 }
4976
4977 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4978                           void *p)
4979 {
4980         int ret_val;
4981
4982         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4983                                          __igb_notify_dca);
4984
4985         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4986 }
4987 #endif /* CONFIG_IGB_DCA */
4988
4989 #ifdef CONFIG_PCI_IOV
4990 static int igb_vf_configure(struct igb_adapter *adapter, int vf)
4991 {
4992         unsigned char mac_addr[ETH_ALEN];
4993         struct pci_dev *pdev = adapter->pdev;
4994         struct e1000_hw *hw = &adapter->hw;
4995         struct pci_dev *pvfdev;
4996         unsigned int device_id;
4997         u16 thisvf_devfn;
4998
4999         random_ether_addr(mac_addr);
5000         igb_set_vf_mac(adapter, vf, mac_addr);
5001
5002         switch (adapter->hw.mac.type) {
5003         case e1000_82576:
5004                 device_id = IGB_82576_VF_DEV_ID;
5005                 /* VF Stride for 82576 is 2 */
5006                 thisvf_devfn = (pdev->devfn + 0x80 + (vf << 1)) |
5007                         (pdev->devfn & 1);
5008                 break;
5009         case e1000_i350:
5010                 device_id = IGB_I350_VF_DEV_ID;
5011                 /* VF Stride for I350 is 4 */
5012                 thisvf_devfn = (pdev->devfn + 0x80 + (vf << 2)) |
5013                                 (pdev->devfn & 3);
5014                 break;
5015         default:
5016                 device_id = 0;
5017                 thisvf_devfn = 0;
5018                 break;
5019         }
5020
5021         pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
5022         while (pvfdev) {
5023                 if (pvfdev->devfn == thisvf_devfn)
5024                         break;
5025                 pvfdev = pci_get_device(hw->vendor_id,
5026                                         device_id, pvfdev);
5027         }
5028
5029         if (pvfdev)
5030                 adapter->vf_data[vf].vfdev = pvfdev;
5031         else
5032                 dev_err(&pdev->dev,
5033                         "Couldn't find pci dev ptr for VF %4.4x\n",
5034                         thisvf_devfn);
5035         return pvfdev != NULL;
5036 }
5037
5038 static int igb_find_enabled_vfs(struct igb_adapter *adapter)
5039 {
5040         struct e1000_hw *hw = &adapter->hw;
5041         struct pci_dev *pdev = adapter->pdev;
5042         struct pci_dev *pvfdev;
5043         u16 vf_devfn = 0;
5044         u16 vf_stride;
5045         unsigned int device_id;
5046         int vfs_found = 0;
5047
5048         switch (adapter->hw.mac.type) {
5049         case e1000_82576:
5050                 device_id = IGB_82576_VF_DEV_ID;
5051                 /* VF Stride for 82576 is 2 */
5052                 vf_stride = 2;
5053                 break;
5054         case e1000_i350:
5055                 device_id = IGB_I350_VF_DEV_ID;
5056                 /* VF Stride for I350 is 4 */
5057                 vf_stride = 4;
5058                 break;
5059         default:
5060                 device_id = 0;
5061                 vf_stride = 0;
5062                 break;
5063         }
5064
5065         vf_devfn = pdev->devfn + 0x80;
5066         pvfdev = pci_get_device(hw->vendor_id, device_id, NULL);
5067         while (pvfdev) {
5068                 if (pvfdev->devfn == vf_devfn &&
5069                     (pvfdev->bus->number >= pdev->bus->number))
5070                         vfs_found++;
5071                 vf_devfn += vf_stride;
5072                 pvfdev = pci_get_device(hw->vendor_id,
5073                                         device_id, pvfdev);
5074         }
5075
5076         return vfs_found;
5077 }
5078
5079 static int igb_check_vf_assignment(struct igb_adapter *adapter)
5080 {
5081         int i;
5082         for (i = 0; i < adapter->vfs_allocated_count; i++) {
5083                 if (adapter->vf_data[i].vfdev) {
5084                         if (adapter->vf_data[i].vfdev->dev_flags &
5085                             PCI_DEV_FLAGS_ASSIGNED)
5086                                 return true;
5087                 }
5088         }
5089         return false;
5090 }
5091
5092 #endif
5093 static void igb_ping_all_vfs(struct igb_adapter *adapter)
5094 {
5095         struct e1000_hw *hw = &adapter->hw;
5096         u32 ping;
5097         int i;
5098
5099         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
5100                 ping = E1000_PF_CONTROL_MSG;
5101                 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
5102                         ping |= E1000_VT_MSGTYPE_CTS;
5103                 igb_write_mbx(hw, &ping, 1, i);
5104         }
5105 }
5106
5107 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5108 {
5109         struct e1000_hw *hw = &adapter->hw;
5110         u32 vmolr = rd32(E1000_VMOLR(vf));
5111         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5112
5113         vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
5114                             IGB_VF_FLAG_MULTI_PROMISC);
5115         vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5116
5117         if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
5118                 vmolr |= E1000_VMOLR_MPME;
5119                 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
5120                 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
5121         } else {
5122                 /*
5123                  * if we have hashes and we are clearing a multicast promisc
5124                  * flag we need to write the hashes to the MTA as this step
5125                  * was previously skipped
5126                  */
5127                 if (vf_data->num_vf_mc_hashes > 30) {
5128                         vmolr |= E1000_VMOLR_MPME;
5129                 } else if (vf_data->num_vf_mc_hashes) {
5130                         int j;
5131                         vmolr |= E1000_VMOLR_ROMPE;
5132                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5133                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5134                 }
5135         }
5136
5137         wr32(E1000_VMOLR(vf), vmolr);
5138
5139         /* there are flags left unprocessed, likely not supported */
5140         if (*msgbuf & E1000_VT_MSGINFO_MASK)
5141                 return -EINVAL;
5142
5143         return 0;
5144
5145 }
5146
5147 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
5148                                   u32 *msgbuf, u32 vf)
5149 {
5150         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5151         u16 *hash_list = (u16 *)&msgbuf[1];
5152         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5153         int i;
5154
5155         /* salt away the number of multicast addresses assigned
5156          * to this VF for later use to restore when the PF multi cast
5157          * list changes
5158          */
5159         vf_data->num_vf_mc_hashes = n;
5160
5161         /* only up to 30 hash values supported */
5162         if (n > 30)
5163                 n = 30;
5164
5165         /* store the hashes for later use */
5166         for (i = 0; i < n; i++)
5167                 vf_data->vf_mc_hashes[i] = hash_list[i];
5168
5169         /* Flush and reset the mta with the new values */
5170         igb_set_rx_mode(adapter->netdev);
5171
5172         return 0;
5173 }
5174
5175 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
5176 {
5177         struct e1000_hw *hw = &adapter->hw;
5178         struct vf_data_storage *vf_data;
5179         int i, j;
5180
5181         for (i = 0; i < adapter->vfs_allocated_count; i++) {
5182                 u32 vmolr = rd32(E1000_VMOLR(i));
5183                 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
5184
5185                 vf_data = &adapter->vf_data[i];
5186
5187                 if ((vf_data->num_vf_mc_hashes > 30) ||
5188                     (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
5189                         vmolr |= E1000_VMOLR_MPME;
5190                 } else if (vf_data->num_vf_mc_hashes) {
5191                         vmolr |= E1000_VMOLR_ROMPE;
5192                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
5193                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
5194                 }
5195                 wr32(E1000_VMOLR(i), vmolr);
5196         }
5197 }
5198
5199 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
5200 {
5201         struct e1000_hw *hw = &adapter->hw;
5202         u32 pool_mask, reg, vid;
5203         int i;
5204
5205         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5206
5207         /* Find the vlan filter for this id */
5208         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5209                 reg = rd32(E1000_VLVF(i));
5210
5211                 /* remove the vf from the pool */
5212                 reg &= ~pool_mask;
5213
5214                 /* if pool is empty then remove entry from vfta */
5215                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
5216                     (reg & E1000_VLVF_VLANID_ENABLE)) {
5217                         reg = 0;
5218                         vid = reg & E1000_VLVF_VLANID_MASK;
5219                         igb_vfta_set(hw, vid, false);
5220                 }
5221
5222                 wr32(E1000_VLVF(i), reg);
5223         }
5224
5225         adapter->vf_data[vf].vlans_enabled = 0;
5226 }
5227
5228 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
5229 {
5230         struct e1000_hw *hw = &adapter->hw;
5231         u32 reg, i;
5232
5233         /* The vlvf table only exists on 82576 hardware and newer */
5234         if (hw->mac.type < e1000_82576)
5235                 return -1;
5236
5237         /* we only need to do this if VMDq is enabled */
5238         if (!adapter->vfs_allocated_count)
5239                 return -1;
5240
5241         /* Find the vlan filter for this id */
5242         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5243                 reg = rd32(E1000_VLVF(i));
5244                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
5245                     vid == (reg & E1000_VLVF_VLANID_MASK))
5246                         break;
5247         }
5248
5249         if (add) {
5250                 if (i == E1000_VLVF_ARRAY_SIZE) {
5251                         /* Did not find a matching VLAN ID entry that was
5252                          * enabled.  Search for a free filter entry, i.e.
5253                          * one without the enable bit set
5254                          */
5255                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
5256                                 reg = rd32(E1000_VLVF(i));
5257                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
5258                                         break;
5259                         }
5260                 }
5261                 if (i < E1000_VLVF_ARRAY_SIZE) {
5262                         /* Found an enabled/available entry */
5263                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
5264
5265                         /* if !enabled we need to set this up in vfta */
5266                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
5267                                 /* add VID to filter table */
5268                                 igb_vfta_set(hw, vid, true);
5269                                 reg |= E1000_VLVF_VLANID_ENABLE;
5270                         }
5271                         reg &= ~E1000_VLVF_VLANID_MASK;
5272                         reg |= vid;
5273                         wr32(E1000_VLVF(i), reg);
5274
5275                         /* do not modify RLPML for PF devices */
5276                         if (vf >= adapter->vfs_allocated_count)
5277                                 return 0;
5278
5279                         if (!adapter->vf_data[vf].vlans_enabled) {
5280                                 u32 size;
5281                                 reg = rd32(E1000_VMOLR(vf));
5282                                 size = reg & E1000_VMOLR_RLPML_MASK;
5283                                 size += 4;
5284                                 reg &= ~E1000_VMOLR_RLPML_MASK;
5285                                 reg |= size;
5286                                 wr32(E1000_VMOLR(vf), reg);
5287                         }
5288
5289                         adapter->vf_data[vf].vlans_enabled++;
5290                 }
5291         } else {
5292                 if (i < E1000_VLVF_ARRAY_SIZE) {
5293                         /* remove vf from the pool */
5294                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
5295                         /* if pool is empty then remove entry from vfta */
5296                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
5297                                 reg = 0;
5298                                 igb_vfta_set(hw, vid, false);
5299                         }
5300                         wr32(E1000_VLVF(i), reg);
5301
5302                         /* do not modify RLPML for PF devices */
5303                         if (vf >= adapter->vfs_allocated_count)
5304                                 return 0;
5305
5306                         adapter->vf_data[vf].vlans_enabled--;
5307                         if (!adapter->vf_data[vf].vlans_enabled) {
5308                                 u32 size;
5309                                 reg = rd32(E1000_VMOLR(vf));
5310                                 size = reg & E1000_VMOLR_RLPML_MASK;
5311                                 size -= 4;
5312                                 reg &= ~E1000_VMOLR_RLPML_MASK;
5313                                 reg |= size;
5314                                 wr32(E1000_VMOLR(vf), reg);
5315                         }
5316                 }
5317         }
5318         return 0;
5319 }
5320
5321 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
5322 {
5323         struct e1000_hw *hw = &adapter->hw;
5324
5325         if (vid)
5326                 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
5327         else
5328                 wr32(E1000_VMVIR(vf), 0);
5329 }
5330
5331 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
5332                                int vf, u16 vlan, u8 qos)
5333 {
5334         int err = 0;
5335         struct igb_adapter *adapter = netdev_priv(netdev);
5336
5337         if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
5338                 return -EINVAL;
5339         if (vlan || qos) {
5340                 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
5341                 if (err)
5342                         goto out;
5343                 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
5344                 igb_set_vmolr(adapter, vf, !vlan);
5345                 adapter->vf_data[vf].pf_vlan = vlan;
5346                 adapter->vf_data[vf].pf_qos = qos;
5347                 dev_info(&adapter->pdev->dev,
5348                          "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
5349                 if (test_bit(__IGB_DOWN, &adapter->state)) {
5350                         dev_warn(&adapter->pdev->dev,
5351                                  "The VF VLAN has been set,"
5352                                  " but the PF device is not up.\n");
5353                         dev_warn(&adapter->pdev->dev,
5354                                  "Bring the PF device up before"
5355                                  " attempting to use the VF device.\n");
5356                 }
5357         } else {
5358                 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
5359                                    false, vf);
5360                 igb_set_vmvir(adapter, vlan, vf);
5361                 igb_set_vmolr(adapter, vf, true);
5362                 adapter->vf_data[vf].pf_vlan = 0;
5363                 adapter->vf_data[vf].pf_qos = 0;
5364        }
5365 out:
5366        return err;
5367 }
5368
5369 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
5370 {
5371         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
5372         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
5373
5374         return igb_vlvf_set(adapter, vid, add, vf);
5375 }
5376
5377 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
5378 {
5379         /* clear flags - except flag that indicates PF has set the MAC */
5380         adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC;
5381         adapter->vf_data[vf].last_nack = jiffies;
5382
5383         /* reset offloads to defaults */
5384         igb_set_vmolr(adapter, vf, true);
5385
5386         /* reset vlans for device */
5387         igb_clear_vf_vfta(adapter, vf);
5388         if (adapter->vf_data[vf].pf_vlan)
5389                 igb_ndo_set_vf_vlan(adapter->netdev, vf,
5390                                     adapter->vf_data[vf].pf_vlan,
5391                                     adapter->vf_data[vf].pf_qos);
5392         else
5393                 igb_clear_vf_vfta(adapter, vf);
5394
5395         /* reset multicast table array for vf */
5396         adapter->vf_data[vf].num_vf_mc_hashes = 0;
5397
5398         /* Flush and reset the mta with the new values */
5399         igb_set_rx_mode(adapter->netdev);
5400 }
5401
5402 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
5403 {
5404         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5405
5406         /* generate a new mac address as we were hotplug removed/added */
5407         if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
5408                 random_ether_addr(vf_mac);
5409
5410         /* process remaining reset events */
5411         igb_vf_reset(adapter, vf);
5412 }
5413
5414 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
5415 {
5416         struct e1000_hw *hw = &adapter->hw;
5417         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
5418         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5419         u32 reg, msgbuf[3];
5420         u8 *addr = (u8 *)(&msgbuf[1]);
5421
5422         /* process all the same items cleared in a function level reset */
5423         igb_vf_reset(adapter, vf);
5424
5425         /* set vf mac address */
5426         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
5427
5428         /* enable transmit and receive for vf */
5429         reg = rd32(E1000_VFTE);
5430         wr32(E1000_VFTE, reg | (1 << vf));
5431         reg = rd32(E1000_VFRE);
5432         wr32(E1000_VFRE, reg | (1 << vf));
5433
5434         adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS;
5435
5436         /* reply to reset with ack and vf mac address */
5437         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
5438         memcpy(addr, vf_mac, 6);
5439         igb_write_mbx(hw, msgbuf, 3, vf);
5440 }
5441
5442 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
5443 {
5444         /*
5445          * The VF MAC Address is stored in a packed array of bytes
5446          * starting at the second 32 bit word of the msg array
5447          */
5448         unsigned char *addr = (char *)&msg[1];
5449         int err = -1;
5450
5451         if (is_valid_ether_addr(addr))
5452                 err = igb_set_vf_mac(adapter, vf, addr);
5453
5454         return err;
5455 }
5456
5457 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5458 {
5459         struct e1000_hw *hw = &adapter->hw;
5460         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5461         u32 msg = E1000_VT_MSGTYPE_NACK;
5462
5463         /* if device isn't clear to send it shouldn't be reading either */
5464         if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5465             time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5466                 igb_write_mbx(hw, &msg, 1, vf);
5467                 vf_data->last_nack = jiffies;
5468         }
5469 }
5470
5471 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5472 {
5473         struct pci_dev *pdev = adapter->pdev;
5474         u32 msgbuf[E1000_VFMAILBOX_SIZE];
5475         struct e1000_hw *hw = &adapter->hw;
5476         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5477         s32 retval;
5478
5479         retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5480
5481         if (retval) {
5482                 /* if receive failed revoke VF CTS stats and restart init */
5483                 dev_err(&pdev->dev, "Error receiving message from VF\n");
5484                 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5485                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5486                         return;
5487                 goto out;
5488         }
5489
5490         /* this is a message we already processed, do nothing */
5491         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5492                 return;
5493
5494         /*
5495          * until the vf completes a reset it should not be
5496          * allowed to start any configuration.
5497          */
5498
5499         if (msgbuf[0] == E1000_VF_RESET) {
5500                 igb_vf_reset_msg(adapter, vf);
5501                 return;
5502         }
5503
5504         if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5505                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5506                         return;
5507                 retval = -1;
5508                 goto out;
5509         }
5510
5511         switch ((msgbuf[0] & 0xFFFF)) {
5512         case E1000_VF_SET_MAC_ADDR:
5513                 retval = -EINVAL;
5514                 if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC))
5515                         retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5516                 else
5517                         dev_warn(&pdev->dev,
5518                                  "VF %d attempted to override administratively "
5519                                  "set MAC address\nReload the VF driver to "
5520                                  "resume operations\n", vf);
5521                 break;
5522         case E1000_VF_SET_PROMISC:
5523                 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5524                 break;
5525         case E1000_VF_SET_MULTICAST:
5526                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5527                 break;
5528         case E1000_VF_SET_LPE:
5529                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5530                 break;
5531         case E1000_VF_SET_VLAN:
5532                 retval = -1;
5533                 if (vf_data->pf_vlan)
5534                         dev_warn(&pdev->dev,
5535                                  "VF %d attempted to override administratively "
5536                                  "set VLAN tag\nReload the VF driver to "
5537                                  "resume operations\n", vf);
5538                 else
5539                         retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5540                 break;
5541         default:
5542                 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5543                 retval = -1;
5544                 break;
5545         }
5546
5547         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5548 out:
5549         /* notify the VF of the results of what it sent us */
5550         if (retval)
5551                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5552         else
5553                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5554
5555         igb_write_mbx(hw, msgbuf, 1, vf);
5556 }
5557
5558 static void igb_msg_task(struct igb_adapter *adapter)
5559 {
5560         struct e1000_hw *hw = &adapter->hw;
5561         u32 vf;
5562
5563         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5564                 /* process any reset requests */
5565                 if (!igb_check_for_rst(hw, vf))
5566                         igb_vf_reset_event(adapter, vf);
5567
5568                 /* process any messages pending */
5569                 if (!igb_check_for_msg(hw, vf))
5570                         igb_rcv_msg_from_vf(adapter, vf);
5571
5572                 /* process any acks */
5573                 if (!igb_check_for_ack(hw, vf))
5574                         igb_rcv_ack_from_vf(adapter, vf);
5575         }
5576 }
5577
5578 /**
5579  *  igb_set_uta - Set unicast filter table address
5580  *  @adapter: board private structure
5581  *
5582  *  The unicast table address is a register array of 32-bit registers.
5583  *  The table is meant to be used in a way similar to how the MTA is used
5584  *  however due to certain limitations in the hardware it is necessary to
5585  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous
5586  *  enable bit to allow vlan tag stripping when promiscuous mode is enabled
5587  **/
5588 static void igb_set_uta(struct igb_adapter *adapter)
5589 {
5590         struct e1000_hw *hw = &adapter->hw;
5591         int i;
5592
5593         /* The UTA table only exists on 82576 hardware and newer */
5594         if (hw->mac.type < e1000_82576)
5595                 return;
5596
5597         /* we only need to do this if VMDq is enabled */
5598         if (!adapter->vfs_allocated_count)
5599                 return;
5600
5601         for (i = 0; i < hw->mac.uta_reg_count; i++)
5602                 array_wr32(E1000_UTA, i, ~0);
5603 }
5604
5605 /**
5606  * igb_intr_msi - Interrupt Handler
5607  * @irq: interrupt number
5608  * @data: pointer to a network interface device structure
5609  **/
5610 static irqreturn_t igb_intr_msi(int irq, void *data)
5611 {
5612         struct igb_adapter *adapter = data;
5613         struct igb_q_vector *q_vector = adapter->q_vector[0];
5614         struct e1000_hw *hw = &adapter->hw;
5615         /* read ICR disables interrupts using IAM */
5616         u32 icr = rd32(E1000_ICR);
5617
5618         igb_write_itr(q_vector);
5619
5620         if (icr & E1000_ICR_DRSTA)
5621                 schedule_work(&adapter->reset_task);
5622
5623         if (icr & E1000_ICR_DOUTSYNC) {
5624                 /* HW is reporting DMA is out of sync */
5625                 adapter->stats.doosync++;
5626         }
5627
5628         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5629                 hw->mac.get_link_status = 1;
5630                 if (!test_bit(__IGB_DOWN, &adapter->state))
5631                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5632         }
5633
5634         napi_schedule(&q_vector->napi);
5635
5636         return IRQ_HANDLED;
5637 }
5638
5639 /**
5640  * igb_intr - Legacy Interrupt Handler
5641  * @irq: interrupt number
5642  * @data: pointer to a network interface device structure
5643  **/
5644 static irqreturn_t igb_intr(int irq, void *data)
5645 {
5646         struct igb_adapter *adapter = data;
5647         struct igb_q_vector *q_vector = adapter->q_vector[0];
5648         struct e1000_hw *hw = &adapter->hw;
5649         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5650          * need for the IMC write */
5651         u32 icr = rd32(E1000_ICR);
5652
5653         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5654          * not set, then the adapter didn't send an interrupt */
5655         if (!(icr & E1000_ICR_INT_ASSERTED))
5656                 return IRQ_NONE;
5657
5658         igb_write_itr(q_vector);
5659
5660         if (icr & E1000_ICR_DRSTA)
5661                 schedule_work(&adapter->reset_task);
5662
5663         if (icr & E1000_ICR_DOUTSYNC) {
5664                 /* HW is reporting DMA is out of sync */
5665                 adapter->stats.doosync++;
5666         }
5667
5668         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5669                 hw->mac.get_link_status = 1;
5670                 /* guard against interrupt when we're going down */
5671                 if (!test_bit(__IGB_DOWN, &adapter->state))
5672                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5673         }
5674
5675         napi_schedule(&q_vector->napi);
5676
5677         return IRQ_HANDLED;
5678 }
5679
5680 static void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5681 {
5682         struct igb_adapter *adapter = q_vector->adapter;
5683         struct e1000_hw *hw = &adapter->hw;
5684
5685         if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
5686             (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
5687                 if ((adapter->num_q_vectors == 1) && !adapter->vf_data)
5688                         igb_set_itr(q_vector);
5689                 else
5690                         igb_update_ring_itr(q_vector);
5691         }
5692
5693         if (!test_bit(__IGB_DOWN, &adapter->state)) {
5694                 if (adapter->msix_entries)
5695                         wr32(E1000_EIMS, q_vector->eims_value);
5696                 else
5697                         igb_irq_enable(adapter);
5698         }
5699 }
5700
5701 /**
5702  * igb_poll - NAPI Rx polling callback
5703  * @napi: napi polling structure
5704  * @budget: count of how many packets we should handle
5705  **/
5706 static int igb_poll(struct napi_struct *napi, int budget)
5707 {
5708         struct igb_q_vector *q_vector = container_of(napi,
5709                                                      struct igb_q_vector,
5710                                                      napi);
5711         bool clean_complete = true;
5712
5713 #ifdef CONFIG_IGB_DCA
5714         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5715                 igb_update_dca(q_vector);
5716 #endif
5717         if (q_vector->tx.ring)
5718                 clean_complete = igb_clean_tx_irq(q_vector);
5719
5720         if (q_vector->rx.ring)
5721                 clean_complete &= igb_clean_rx_irq(q_vector, budget);
5722
5723         /* If all work not completed, return budget and keep polling */
5724         if (!clean_complete)
5725                 return budget;
5726
5727         /* If not enough Rx work done, exit the polling mode */
5728         napi_complete(napi);
5729         igb_ring_irq_enable(q_vector);
5730
5731         return 0;
5732 }
5733
5734 /**
5735  * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5736  * @adapter: board private structure
5737  * @shhwtstamps: timestamp structure to update
5738  * @regval: unsigned 64bit system time value.
5739  *
5740  * We need to convert the system time value stored in the RX/TXSTMP registers
5741  * into a hwtstamp which can be used by the upper level timestamping functions
5742  */
5743 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5744                                    struct skb_shared_hwtstamps *shhwtstamps,
5745                                    u64 regval)
5746 {
5747         u64 ns;
5748
5749         /*
5750          * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5751          * 24 to match clock shift we setup earlier.
5752          */
5753         if (adapter->hw.mac.type >= e1000_82580)
5754                 regval <<= IGB_82580_TSYNC_SHIFT;
5755
5756         ns = timecounter_cyc2time(&adapter->clock, regval);
5757         timecompare_update(&adapter->compare, ns);
5758         memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5759         shhwtstamps->hwtstamp = ns_to_ktime(ns);
5760         shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5761 }
5762
5763 /**
5764  * igb_tx_hwtstamp - utility function which checks for TX time stamp
5765  * @q_vector: pointer to q_vector containing needed info
5766  * @buffer: pointer to igb_tx_buffer structure
5767  *
5768  * If we were asked to do hardware stamping and such a time stamp is
5769  * available, then it must have been for this skb here because we only
5770  * allow only one such packet into the queue.
5771  */
5772 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector,
5773                             struct igb_tx_buffer *buffer_info)
5774 {
5775         struct igb_adapter *adapter = q_vector->adapter;
5776         struct e1000_hw *hw = &adapter->hw;
5777         struct skb_shared_hwtstamps shhwtstamps;
5778         u64 regval;
5779
5780         /* if skb does not support hw timestamp or TX stamp not valid exit */
5781         if (likely(!(buffer_info->tx_flags & IGB_TX_FLAGS_TSTAMP)) ||
5782             !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5783                 return;
5784
5785         regval = rd32(E1000_TXSTMPL);
5786         regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5787
5788         igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5789         skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5790 }
5791
5792 /**
5793  * igb_clean_tx_irq - Reclaim resources after transmit completes
5794  * @q_vector: pointer to q_vector containing needed info
5795  * returns true if ring is completely cleaned
5796  **/
5797 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5798 {
5799         struct igb_adapter *adapter = q_vector->adapter;
5800         struct igb_ring *tx_ring = q_vector->tx.ring;
5801         struct igb_tx_buffer *tx_buffer;
5802         union e1000_adv_tx_desc *tx_desc, *eop_desc;
5803         unsigned int total_bytes = 0, total_packets = 0;
5804         unsigned int budget = q_vector->tx.work_limit;
5805         unsigned int i = tx_ring->next_to_clean;
5806
5807         if (test_bit(__IGB_DOWN, &adapter->state))
5808                 return true;
5809
5810         tx_buffer = &tx_ring->tx_buffer_info[i];
5811         tx_desc = IGB_TX_DESC(tx_ring, i);
5812         i -= tx_ring->count;
5813
5814         for (; budget; budget--) {
5815                 eop_desc = tx_buffer->next_to_watch;
5816
5817                 /* prevent any other reads prior to eop_desc */
5818                 rmb();
5819
5820                 /* if next_to_watch is not set then there is no work pending */
5821                 if (!eop_desc)
5822                         break;
5823
5824                 /* if DD is not set pending work has not been completed */
5825                 if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)))
5826                         break;
5827
5828                 /* clear next_to_watch to prevent false hangs */
5829                 tx_buffer->next_to_watch = NULL;
5830
5831                 /* update the statistics for this packet */
5832                 total_bytes += tx_buffer->bytecount;
5833                 total_packets += tx_buffer->gso_segs;
5834
5835                 /* retrieve hardware timestamp */
5836                 igb_tx_hwtstamp(q_vector, tx_buffer);
5837
5838                 /* free the skb */
5839                 dev_kfree_skb_any(tx_buffer->skb);
5840                 tx_buffer->skb = NULL;
5841
5842                 /* unmap skb header data */
5843                 dma_unmap_single(tx_ring->dev,
5844                                  tx_buffer->dma,
5845                                  tx_buffer->length,
5846                                  DMA_TO_DEVICE);
5847
5848                 /* clear last DMA location and unmap remaining buffers */
5849                 while (tx_desc != eop_desc) {
5850                         tx_buffer->dma = 0;
5851
5852                         tx_buffer++;
5853                         tx_desc++;
5854                         i++;
5855                         if (unlikely(!i)) {
5856                                 i -= tx_ring->count;
5857                                 tx_buffer = tx_ring->tx_buffer_info;
5858                                 tx_desc = IGB_TX_DESC(tx_ring, 0);
5859                         }
5860
5861                         /* unmap any remaining paged data */
5862                         if (tx_buffer->dma) {
5863                                 dma_unmap_page(tx_ring->dev,
5864                                                tx_buffer->dma,
5865                                                tx_buffer->length,
5866                                                DMA_TO_DEVICE);
5867                         }
5868                 }
5869
5870                 /* clear last DMA location */
5871                 tx_buffer->dma = 0;
5872
5873                 /* move us one more past the eop_desc for start of next pkt */
5874                 tx_buffer++;
5875                 tx_desc++;
5876                 i++;
5877                 if (unlikely(!i)) {
5878                         i -= tx_ring->count;
5879                         tx_buffer = tx_ring->tx_buffer_info;
5880                         tx_desc = IGB_TX_DESC(tx_ring, 0);
5881                 }
5882         }
5883
5884         netdev_tx_completed_queue(txring_txq(tx_ring),
5885                                   total_packets, total_bytes);
5886         i += tx_ring->count;
5887         tx_ring->next_to_clean = i;
5888         u64_stats_update_begin(&tx_ring->tx_syncp);
5889         tx_ring->tx_stats.bytes += total_bytes;
5890         tx_ring->tx_stats.packets += total_packets;
5891         u64_stats_update_end(&tx_ring->tx_syncp);
5892         q_vector->tx.total_bytes += total_bytes;
5893         q_vector->tx.total_packets += total_packets;
5894
5895         if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
5896                 struct e1000_hw *hw = &adapter->hw;
5897
5898                 eop_desc = tx_buffer->next_to_watch;
5899
5900                 /* Detect a transmit hang in hardware, this serializes the
5901                  * check with the clearing of time_stamp and movement of i */
5902                 clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
5903                 if (eop_desc &&
5904                     time_after(jiffies, tx_buffer->time_stamp +
5905                                (adapter->tx_timeout_factor * HZ)) &&
5906                     !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5907
5908                         /* detected Tx unit hang */
5909                         dev_err(tx_ring->dev,
5910                                 "Detected Tx Unit Hang\n"
5911                                 "  Tx Queue             <%d>\n"
5912                                 "  TDH                  <%x>\n"
5913                                 "  TDT                  <%x>\n"
5914                                 "  next_to_use          <%x>\n"
5915                                 "  next_to_clean        <%x>\n"
5916                                 "buffer_info[next_to_clean]\n"
5917                                 "  time_stamp           <%lx>\n"
5918                                 "  next_to_watch        <%p>\n"
5919                                 "  jiffies              <%lx>\n"
5920                                 "  desc.status          <%x>\n",
5921                                 tx_ring->queue_index,
5922                                 rd32(E1000_TDH(tx_ring->reg_idx)),
5923                                 readl(tx_ring->tail),
5924                                 tx_ring->next_to_use,
5925                                 tx_ring->next_to_clean,
5926                                 tx_buffer->time_stamp,
5927                                 eop_desc,
5928                                 jiffies,
5929                                 eop_desc->wb.status);
5930                         netif_stop_subqueue(tx_ring->netdev,
5931                                             tx_ring->queue_index);
5932
5933                         /* we are about to reset, no point in enabling stuff */
5934                         return true;
5935                 }
5936         }
5937
5938         if (unlikely(total_packets &&
5939                      netif_carrier_ok(tx_ring->netdev) &&
5940                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5941                 /* Make sure that anybody stopping the queue after this
5942                  * sees the new next_to_clean.
5943                  */
5944                 smp_mb();
5945                 if (__netif_subqueue_stopped(tx_ring->netdev,
5946                                              tx_ring->queue_index) &&
5947                     !(test_bit(__IGB_DOWN, &adapter->state))) {
5948                         netif_wake_subqueue(tx_ring->netdev,
5949                                             tx_ring->queue_index);
5950
5951                         u64_stats_update_begin(&tx_ring->tx_syncp);
5952                         tx_ring->tx_stats.restart_queue++;
5953                         u64_stats_update_end(&tx_ring->tx_syncp);
5954                 }
5955         }
5956
5957         return !!budget;
5958 }
5959
5960 static inline void igb_rx_checksum(struct igb_ring *ring,
5961                                    union e1000_adv_rx_desc *rx_desc,
5962                                    struct sk_buff *skb)
5963 {
5964         skb_checksum_none_assert(skb);
5965
5966         /* Ignore Checksum bit is set */
5967         if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM))
5968                 return;
5969
5970         /* Rx checksum disabled via ethtool */
5971         if (!(ring->netdev->features & NETIF_F_RXCSUM))
5972                 return;
5973
5974         /* TCP/UDP checksum error bit is set */
5975         if (igb_test_staterr(rx_desc,
5976                              E1000_RXDEXT_STATERR_TCPE |
5977                              E1000_RXDEXT_STATERR_IPE)) {
5978                 /*
5979                  * work around errata with sctp packets where the TCPE aka
5980                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5981                  * packets, (aka let the stack check the crc32c)
5982                  */
5983                 if (!((skb->len == 60) &&
5984                       test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
5985                         u64_stats_update_begin(&ring->rx_syncp);
5986                         ring->rx_stats.csum_err++;
5987                         u64_stats_update_end(&ring->rx_syncp);
5988                 }
5989                 /* let the stack verify checksum errors */
5990                 return;
5991         }
5992         /* It must be a TCP or UDP packet with a valid checksum */
5993         if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS |
5994                                       E1000_RXD_STAT_UDPCS))
5995                 skb->ip_summed = CHECKSUM_UNNECESSARY;
5996
5997         dev_dbg(ring->dev, "cksum success: bits %08X\n",
5998                 le32_to_cpu(rx_desc->wb.upper.status_error));
5999 }
6000
6001 static inline void igb_rx_hash(struct igb_ring *ring,
6002                                union e1000_adv_rx_desc *rx_desc,
6003                                struct sk_buff *skb)
6004 {
6005         if (ring->netdev->features & NETIF_F_RXHASH)
6006                 skb->rxhash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
6007 }
6008
6009 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector,
6010                             union e1000_adv_rx_desc *rx_desc,
6011                             struct sk_buff *skb)
6012 {
6013         struct igb_adapter *adapter = q_vector->adapter;
6014         struct e1000_hw *hw = &adapter->hw;
6015         u64 regval;
6016
6017         if (!igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP |
6018                                        E1000_RXDADV_STAT_TS))
6