x86/PCI: truncate _CRS windows with _LEN > _MAX - _MIN + 1
[linux-2.6.git] / drivers / net / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2009 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <net/checksum.h>
36 #include <net/ip6_checksum.h>
37 #include <linux/net_tstamp.h>
38 #include <linux/mii.h>
39 #include <linux/ethtool.h>
40 #include <linux/if_vlan.h>
41 #include <linux/pci.h>
42 #include <linux/pci-aspm.h>
43 #include <linux/delay.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_ether.h>
46 #include <linux/aer.h>
47 #ifdef CONFIG_IGB_DCA
48 #include <linux/dca.h>
49 #endif
50 #include "igb.h"
51
52 #define DRV_VERSION "2.1.0-k2"
53 char igb_driver_name[] = "igb";
54 char igb_driver_version[] = DRV_VERSION;
55 static const char igb_driver_string[] =
56                                 "Intel(R) Gigabit Ethernet Network Driver";
57 static const char igb_copyright[] = "Copyright (c) 2007-2009 Intel Corporation.";
58
59 static const struct e1000_info *igb_info_tbl[] = {
60         [board_82575] = &e1000_82575_info,
61 };
62
63 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
64         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
65         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
66         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
67         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
68         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
69         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
70         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
71         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
72         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
73         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
74         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
75         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
76         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
77         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
78         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
79         /* required last entry */
80         {0, }
81 };
82
83 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
84
85 void igb_reset(struct igb_adapter *);
86 static int igb_setup_all_tx_resources(struct igb_adapter *);
87 static int igb_setup_all_rx_resources(struct igb_adapter *);
88 static void igb_free_all_tx_resources(struct igb_adapter *);
89 static void igb_free_all_rx_resources(struct igb_adapter *);
90 static void igb_setup_mrqc(struct igb_adapter *);
91 void igb_update_stats(struct igb_adapter *);
92 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
93 static void __devexit igb_remove(struct pci_dev *pdev);
94 static int igb_sw_init(struct igb_adapter *);
95 static int igb_open(struct net_device *);
96 static int igb_close(struct net_device *);
97 static void igb_configure_tx(struct igb_adapter *);
98 static void igb_configure_rx(struct igb_adapter *);
99 static void igb_clean_all_tx_rings(struct igb_adapter *);
100 static void igb_clean_all_rx_rings(struct igb_adapter *);
101 static void igb_clean_tx_ring(struct igb_ring *);
102 static void igb_clean_rx_ring(struct igb_ring *);
103 static void igb_set_rx_mode(struct net_device *);
104 static void igb_update_phy_info(unsigned long);
105 static void igb_watchdog(unsigned long);
106 static void igb_watchdog_task(struct work_struct *);
107 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
108 static struct net_device_stats *igb_get_stats(struct net_device *);
109 static int igb_change_mtu(struct net_device *, int);
110 static int igb_set_mac(struct net_device *, void *);
111 static void igb_set_uta(struct igb_adapter *adapter);
112 static irqreturn_t igb_intr(int irq, void *);
113 static irqreturn_t igb_intr_msi(int irq, void *);
114 static irqreturn_t igb_msix_other(int irq, void *);
115 static irqreturn_t igb_msix_ring(int irq, void *);
116 #ifdef CONFIG_IGB_DCA
117 static void igb_update_dca(struct igb_q_vector *);
118 static void igb_setup_dca(struct igb_adapter *);
119 #endif /* CONFIG_IGB_DCA */
120 static bool igb_clean_tx_irq(struct igb_q_vector *);
121 static int igb_poll(struct napi_struct *, int);
122 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
123 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
124 static void igb_tx_timeout(struct net_device *);
125 static void igb_reset_task(struct work_struct *);
126 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
127 static void igb_vlan_rx_add_vid(struct net_device *, u16);
128 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
129 static void igb_restore_vlan(struct igb_adapter *);
130 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
131 static void igb_ping_all_vfs(struct igb_adapter *);
132 static void igb_msg_task(struct igb_adapter *);
133 static void igb_vmm_control(struct igb_adapter *);
134 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
135 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
136 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
137 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
138                                int vf, u16 vlan, u8 qos);
139 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
140 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
141                                  struct ifla_vf_info *ivi);
142
143 #ifdef CONFIG_PM
144 static int igb_suspend(struct pci_dev *, pm_message_t);
145 static int igb_resume(struct pci_dev *);
146 #endif
147 static void igb_shutdown(struct pci_dev *);
148 #ifdef CONFIG_IGB_DCA
149 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
150 static struct notifier_block dca_notifier = {
151         .notifier_call  = igb_notify_dca,
152         .next           = NULL,
153         .priority       = 0
154 };
155 #endif
156 #ifdef CONFIG_NET_POLL_CONTROLLER
157 /* for netdump / net console */
158 static void igb_netpoll(struct net_device *);
159 #endif
160 #ifdef CONFIG_PCI_IOV
161 static unsigned int max_vfs = 0;
162 module_param(max_vfs, uint, 0);
163 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
164                  "per physical function");
165 #endif /* CONFIG_PCI_IOV */
166
167 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
168                      pci_channel_state_t);
169 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
170 static void igb_io_resume(struct pci_dev *);
171
172 static struct pci_error_handlers igb_err_handler = {
173         .error_detected = igb_io_error_detected,
174         .slot_reset = igb_io_slot_reset,
175         .resume = igb_io_resume,
176 };
177
178
179 static struct pci_driver igb_driver = {
180         .name     = igb_driver_name,
181         .id_table = igb_pci_tbl,
182         .probe    = igb_probe,
183         .remove   = __devexit_p(igb_remove),
184 #ifdef CONFIG_PM
185         /* Power Managment Hooks */
186         .suspend  = igb_suspend,
187         .resume   = igb_resume,
188 #endif
189         .shutdown = igb_shutdown,
190         .err_handler = &igb_err_handler
191 };
192
193 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
194 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
195 MODULE_LICENSE("GPL");
196 MODULE_VERSION(DRV_VERSION);
197
198 /**
199  * igb_read_clock - read raw cycle counter (to be used by time counter)
200  */
201 static cycle_t igb_read_clock(const struct cyclecounter *tc)
202 {
203         struct igb_adapter *adapter =
204                 container_of(tc, struct igb_adapter, cycles);
205         struct e1000_hw *hw = &adapter->hw;
206         u64 stamp = 0;
207         int shift = 0;
208
209         /*
210          * The timestamp latches on lowest register read. For the 82580
211          * the lowest register is SYSTIMR instead of SYSTIML.  However we never
212          * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
213          */
214         if (hw->mac.type == e1000_82580) {
215                 stamp = rd32(E1000_SYSTIMR) >> 8;
216                 shift = IGB_82580_TSYNC_SHIFT;
217         }
218
219         stamp |= (u64)rd32(E1000_SYSTIML) << shift;
220         stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
221         return stamp;
222 }
223
224 #ifdef DEBUG
225 /**
226  * igb_get_hw_dev_name - return device name string
227  * used by hardware layer to print debugging information
228  **/
229 char *igb_get_hw_dev_name(struct e1000_hw *hw)
230 {
231         struct igb_adapter *adapter = hw->back;
232         return adapter->netdev->name;
233 }
234
235 /**
236  * igb_get_time_str - format current NIC and system time as string
237  */
238 static char *igb_get_time_str(struct igb_adapter *adapter,
239                               char buffer[160])
240 {
241         cycle_t hw = adapter->cycles.read(&adapter->cycles);
242         struct timespec nic = ns_to_timespec(timecounter_read(&adapter->clock));
243         struct timespec sys;
244         struct timespec delta;
245         getnstimeofday(&sys);
246
247         delta = timespec_sub(nic, sys);
248
249         sprintf(buffer,
250                 "HW %llu, NIC %ld.%09lus, SYS %ld.%09lus, NIC-SYS %lds + %09luns",
251                 hw,
252                 (long)nic.tv_sec, nic.tv_nsec,
253                 (long)sys.tv_sec, sys.tv_nsec,
254                 (long)delta.tv_sec, delta.tv_nsec);
255
256         return buffer;
257 }
258 #endif
259
260 /**
261  * igb_init_module - Driver Registration Routine
262  *
263  * igb_init_module is the first routine called when the driver is
264  * loaded. All it does is register with the PCI subsystem.
265  **/
266 static int __init igb_init_module(void)
267 {
268         int ret;
269         printk(KERN_INFO "%s - version %s\n",
270                igb_driver_string, igb_driver_version);
271
272         printk(KERN_INFO "%s\n", igb_copyright);
273
274 #ifdef CONFIG_IGB_DCA
275         dca_register_notify(&dca_notifier);
276 #endif
277         ret = pci_register_driver(&igb_driver);
278         return ret;
279 }
280
281 module_init(igb_init_module);
282
283 /**
284  * igb_exit_module - Driver Exit Cleanup Routine
285  *
286  * igb_exit_module is called just before the driver is removed
287  * from memory.
288  **/
289 static void __exit igb_exit_module(void)
290 {
291 #ifdef CONFIG_IGB_DCA
292         dca_unregister_notify(&dca_notifier);
293 #endif
294         pci_unregister_driver(&igb_driver);
295 }
296
297 module_exit(igb_exit_module);
298
299 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
300 /**
301  * igb_cache_ring_register - Descriptor ring to register mapping
302  * @adapter: board private structure to initialize
303  *
304  * Once we know the feature-set enabled for the device, we'll cache
305  * the register offset the descriptor ring is assigned to.
306  **/
307 static void igb_cache_ring_register(struct igb_adapter *adapter)
308 {
309         int i = 0, j = 0;
310         u32 rbase_offset = adapter->vfs_allocated_count;
311
312         switch (adapter->hw.mac.type) {
313         case e1000_82576:
314                 /* The queues are allocated for virtualization such that VF 0
315                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
316                  * In order to avoid collision we start at the first free queue
317                  * and continue consuming queues in the same sequence
318                  */
319                 if (adapter->vfs_allocated_count) {
320                         for (; i < adapter->rss_queues; i++)
321                                 adapter->rx_ring[i]->reg_idx = rbase_offset +
322                                                                Q_IDX_82576(i);
323                         for (; j < adapter->rss_queues; j++)
324                                 adapter->tx_ring[j]->reg_idx = rbase_offset +
325                                                                Q_IDX_82576(j);
326                 }
327         case e1000_82575:
328         case e1000_82580:
329         default:
330                 for (; i < adapter->num_rx_queues; i++)
331                         adapter->rx_ring[i]->reg_idx = rbase_offset + i;
332                 for (; j < adapter->num_tx_queues; j++)
333                         adapter->tx_ring[j]->reg_idx = rbase_offset + j;
334                 break;
335         }
336 }
337
338 static void igb_free_queues(struct igb_adapter *adapter)
339 {
340         int i;
341
342         for (i = 0; i < adapter->num_tx_queues; i++) {
343                 kfree(adapter->tx_ring[i]);
344                 adapter->tx_ring[i] = NULL;
345         }
346         for (i = 0; i < adapter->num_rx_queues; i++) {
347                 kfree(adapter->rx_ring[i]);
348                 adapter->rx_ring[i] = NULL;
349         }
350         adapter->num_rx_queues = 0;
351         adapter->num_tx_queues = 0;
352 }
353
354 /**
355  * igb_alloc_queues - Allocate memory for all rings
356  * @adapter: board private structure to initialize
357  *
358  * We allocate one ring per queue at run-time since we don't know the
359  * number of queues at compile-time.
360  **/
361 static int igb_alloc_queues(struct igb_adapter *adapter)
362 {
363         struct igb_ring *ring;
364         int i;
365
366         for (i = 0; i < adapter->num_tx_queues; i++) {
367                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
368                 if (!ring)
369                         goto err;
370                 ring->count = adapter->tx_ring_count;
371                 ring->queue_index = i;
372                 ring->pdev = adapter->pdev;
373                 ring->netdev = adapter->netdev;
374                 /* For 82575, context index must be unique per ring. */
375                 if (adapter->hw.mac.type == e1000_82575)
376                         ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
377                 adapter->tx_ring[i] = ring;
378         }
379
380         for (i = 0; i < adapter->num_rx_queues; i++) {
381                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
382                 if (!ring)
383                         goto err;
384                 ring->count = adapter->rx_ring_count;
385                 ring->queue_index = i;
386                 ring->pdev = adapter->pdev;
387                 ring->netdev = adapter->netdev;
388                 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
389                 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
390                 /* set flag indicating ring supports SCTP checksum offload */
391                 if (adapter->hw.mac.type >= e1000_82576)
392                         ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
393                 adapter->rx_ring[i] = ring;
394         }
395
396         igb_cache_ring_register(adapter);
397
398         return 0;
399
400 err:
401         igb_free_queues(adapter);
402
403         return -ENOMEM;
404 }
405
406 #define IGB_N0_QUEUE -1
407 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
408 {
409         u32 msixbm = 0;
410         struct igb_adapter *adapter = q_vector->adapter;
411         struct e1000_hw *hw = &adapter->hw;
412         u32 ivar, index;
413         int rx_queue = IGB_N0_QUEUE;
414         int tx_queue = IGB_N0_QUEUE;
415
416         if (q_vector->rx_ring)
417                 rx_queue = q_vector->rx_ring->reg_idx;
418         if (q_vector->tx_ring)
419                 tx_queue = q_vector->tx_ring->reg_idx;
420
421         switch (hw->mac.type) {
422         case e1000_82575:
423                 /* The 82575 assigns vectors using a bitmask, which matches the
424                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
425                    or more queues to a vector, we write the appropriate bits
426                    into the MSIXBM register for that vector. */
427                 if (rx_queue > IGB_N0_QUEUE)
428                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
429                 if (tx_queue > IGB_N0_QUEUE)
430                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
431                 if (!adapter->msix_entries && msix_vector == 0)
432                         msixbm |= E1000_EIMS_OTHER;
433                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
434                 q_vector->eims_value = msixbm;
435                 break;
436         case e1000_82576:
437                 /* 82576 uses a table-based method for assigning vectors.
438                    Each queue has a single entry in the table to which we write
439                    a vector number along with a "valid" bit.  Sadly, the layout
440                    of the table is somewhat counterintuitive. */
441                 if (rx_queue > IGB_N0_QUEUE) {
442                         index = (rx_queue & 0x7);
443                         ivar = array_rd32(E1000_IVAR0, index);
444                         if (rx_queue < 8) {
445                                 /* vector goes into low byte of register */
446                                 ivar = ivar & 0xFFFFFF00;
447                                 ivar |= msix_vector | E1000_IVAR_VALID;
448                         } else {
449                                 /* vector goes into third byte of register */
450                                 ivar = ivar & 0xFF00FFFF;
451                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
452                         }
453                         array_wr32(E1000_IVAR0, index, ivar);
454                 }
455                 if (tx_queue > IGB_N0_QUEUE) {
456                         index = (tx_queue & 0x7);
457                         ivar = array_rd32(E1000_IVAR0, index);
458                         if (tx_queue < 8) {
459                                 /* vector goes into second byte of register */
460                                 ivar = ivar & 0xFFFF00FF;
461                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
462                         } else {
463                                 /* vector goes into high byte of register */
464                                 ivar = ivar & 0x00FFFFFF;
465                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
466                         }
467                         array_wr32(E1000_IVAR0, index, ivar);
468                 }
469                 q_vector->eims_value = 1 << msix_vector;
470                 break;
471         case e1000_82580:
472                 /* 82580 uses the same table-based approach as 82576 but has fewer
473                    entries as a result we carry over for queues greater than 4. */
474                 if (rx_queue > IGB_N0_QUEUE) {
475                         index = (rx_queue >> 1);
476                         ivar = array_rd32(E1000_IVAR0, index);
477                         if (rx_queue & 0x1) {
478                                 /* vector goes into third byte of register */
479                                 ivar = ivar & 0xFF00FFFF;
480                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
481                         } else {
482                                 /* vector goes into low byte of register */
483                                 ivar = ivar & 0xFFFFFF00;
484                                 ivar |= msix_vector | E1000_IVAR_VALID;
485                         }
486                         array_wr32(E1000_IVAR0, index, ivar);
487                 }
488                 if (tx_queue > IGB_N0_QUEUE) {
489                         index = (tx_queue >> 1);
490                         ivar = array_rd32(E1000_IVAR0, index);
491                         if (tx_queue & 0x1) {
492                                 /* vector goes into high byte of register */
493                                 ivar = ivar & 0x00FFFFFF;
494                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
495                         } else {
496                                 /* vector goes into second byte of register */
497                                 ivar = ivar & 0xFFFF00FF;
498                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
499                         }
500                         array_wr32(E1000_IVAR0, index, ivar);
501                 }
502                 q_vector->eims_value = 1 << msix_vector;
503                 break;
504         default:
505                 BUG();
506                 break;
507         }
508
509         /* add q_vector eims value to global eims_enable_mask */
510         adapter->eims_enable_mask |= q_vector->eims_value;
511
512         /* configure q_vector to set itr on first interrupt */
513         q_vector->set_itr = 1;
514 }
515
516 /**
517  * igb_configure_msix - Configure MSI-X hardware
518  *
519  * igb_configure_msix sets up the hardware to properly
520  * generate MSI-X interrupts.
521  **/
522 static void igb_configure_msix(struct igb_adapter *adapter)
523 {
524         u32 tmp;
525         int i, vector = 0;
526         struct e1000_hw *hw = &adapter->hw;
527
528         adapter->eims_enable_mask = 0;
529
530         /* set vector for other causes, i.e. link changes */
531         switch (hw->mac.type) {
532         case e1000_82575:
533                 tmp = rd32(E1000_CTRL_EXT);
534                 /* enable MSI-X PBA support*/
535                 tmp |= E1000_CTRL_EXT_PBA_CLR;
536
537                 /* Auto-Mask interrupts upon ICR read. */
538                 tmp |= E1000_CTRL_EXT_EIAME;
539                 tmp |= E1000_CTRL_EXT_IRCA;
540
541                 wr32(E1000_CTRL_EXT, tmp);
542
543                 /* enable msix_other interrupt */
544                 array_wr32(E1000_MSIXBM(0), vector++,
545                                       E1000_EIMS_OTHER);
546                 adapter->eims_other = E1000_EIMS_OTHER;
547
548                 break;
549
550         case e1000_82576:
551         case e1000_82580:
552                 /* Turn on MSI-X capability first, or our settings
553                  * won't stick.  And it will take days to debug. */
554                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
555                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
556                                 E1000_GPIE_NSICR);
557
558                 /* enable msix_other interrupt */
559                 adapter->eims_other = 1 << vector;
560                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
561
562                 wr32(E1000_IVAR_MISC, tmp);
563                 break;
564         default:
565                 /* do nothing, since nothing else supports MSI-X */
566                 break;
567         } /* switch (hw->mac.type) */
568
569         adapter->eims_enable_mask |= adapter->eims_other;
570
571         for (i = 0; i < adapter->num_q_vectors; i++)
572                 igb_assign_vector(adapter->q_vector[i], vector++);
573
574         wrfl();
575 }
576
577 /**
578  * igb_request_msix - Initialize MSI-X interrupts
579  *
580  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
581  * kernel.
582  **/
583 static int igb_request_msix(struct igb_adapter *adapter)
584 {
585         struct net_device *netdev = adapter->netdev;
586         struct e1000_hw *hw = &adapter->hw;
587         int i, err = 0, vector = 0;
588
589         err = request_irq(adapter->msix_entries[vector].vector,
590                           igb_msix_other, 0, netdev->name, adapter);
591         if (err)
592                 goto out;
593         vector++;
594
595         for (i = 0; i < adapter->num_q_vectors; i++) {
596                 struct igb_q_vector *q_vector = adapter->q_vector[i];
597
598                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
599
600                 if (q_vector->rx_ring && q_vector->tx_ring)
601                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
602                                 q_vector->rx_ring->queue_index);
603                 else if (q_vector->tx_ring)
604                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
605                                 q_vector->tx_ring->queue_index);
606                 else if (q_vector->rx_ring)
607                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
608                                 q_vector->rx_ring->queue_index);
609                 else
610                         sprintf(q_vector->name, "%s-unused", netdev->name);
611
612                 err = request_irq(adapter->msix_entries[vector].vector,
613                                   igb_msix_ring, 0, q_vector->name,
614                                   q_vector);
615                 if (err)
616                         goto out;
617                 vector++;
618         }
619
620         igb_configure_msix(adapter);
621         return 0;
622 out:
623         return err;
624 }
625
626 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
627 {
628         if (adapter->msix_entries) {
629                 pci_disable_msix(adapter->pdev);
630                 kfree(adapter->msix_entries);
631                 adapter->msix_entries = NULL;
632         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
633                 pci_disable_msi(adapter->pdev);
634         }
635 }
636
637 /**
638  * igb_free_q_vectors - Free memory allocated for interrupt vectors
639  * @adapter: board private structure to initialize
640  *
641  * This function frees the memory allocated to the q_vectors.  In addition if
642  * NAPI is enabled it will delete any references to the NAPI struct prior
643  * to freeing the q_vector.
644  **/
645 static void igb_free_q_vectors(struct igb_adapter *adapter)
646 {
647         int v_idx;
648
649         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
650                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
651                 adapter->q_vector[v_idx] = NULL;
652                 if (!q_vector)
653                         continue;
654                 netif_napi_del(&q_vector->napi);
655                 kfree(q_vector);
656         }
657         adapter->num_q_vectors = 0;
658 }
659
660 /**
661  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
662  *
663  * This function resets the device so that it has 0 rx queues, tx queues, and
664  * MSI-X interrupts allocated.
665  */
666 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
667 {
668         igb_free_queues(adapter);
669         igb_free_q_vectors(adapter);
670         igb_reset_interrupt_capability(adapter);
671 }
672
673 /**
674  * igb_set_interrupt_capability - set MSI or MSI-X if supported
675  *
676  * Attempt to configure interrupts using the best available
677  * capabilities of the hardware and kernel.
678  **/
679 static void igb_set_interrupt_capability(struct igb_adapter *adapter)
680 {
681         int err;
682         int numvecs, i;
683
684         /* Number of supported queues. */
685         adapter->num_rx_queues = adapter->rss_queues;
686         adapter->num_tx_queues = adapter->rss_queues;
687
688         /* start with one vector for every rx queue */
689         numvecs = adapter->num_rx_queues;
690
691         /* if tx handler is separate add 1 for every tx queue */
692         if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
693                 numvecs += adapter->num_tx_queues;
694
695         /* store the number of vectors reserved for queues */
696         adapter->num_q_vectors = numvecs;
697
698         /* add 1 vector for link status interrupts */
699         numvecs++;
700         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
701                                         GFP_KERNEL);
702         if (!adapter->msix_entries)
703                 goto msi_only;
704
705         for (i = 0; i < numvecs; i++)
706                 adapter->msix_entries[i].entry = i;
707
708         err = pci_enable_msix(adapter->pdev,
709                               adapter->msix_entries,
710                               numvecs);
711         if (err == 0)
712                 goto out;
713
714         igb_reset_interrupt_capability(adapter);
715
716         /* If we can't do MSI-X, try MSI */
717 msi_only:
718 #ifdef CONFIG_PCI_IOV
719         /* disable SR-IOV for non MSI-X configurations */
720         if (adapter->vf_data) {
721                 struct e1000_hw *hw = &adapter->hw;
722                 /* disable iov and allow time for transactions to clear */
723                 pci_disable_sriov(adapter->pdev);
724                 msleep(500);
725
726                 kfree(adapter->vf_data);
727                 adapter->vf_data = NULL;
728                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
729                 msleep(100);
730                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
731         }
732 #endif
733         adapter->vfs_allocated_count = 0;
734         adapter->rss_queues = 1;
735         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
736         adapter->num_rx_queues = 1;
737         adapter->num_tx_queues = 1;
738         adapter->num_q_vectors = 1;
739         if (!pci_enable_msi(adapter->pdev))
740                 adapter->flags |= IGB_FLAG_HAS_MSI;
741 out:
742         /* Notify the stack of the (possibly) reduced Tx Queue count. */
743         adapter->netdev->real_num_tx_queues = adapter->num_tx_queues;
744         return;
745 }
746
747 /**
748  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
749  * @adapter: board private structure to initialize
750  *
751  * We allocate one q_vector per queue interrupt.  If allocation fails we
752  * return -ENOMEM.
753  **/
754 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
755 {
756         struct igb_q_vector *q_vector;
757         struct e1000_hw *hw = &adapter->hw;
758         int v_idx;
759
760         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
761                 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
762                 if (!q_vector)
763                         goto err_out;
764                 q_vector->adapter = adapter;
765                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
766                 q_vector->itr_val = IGB_START_ITR;
767                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
768                 adapter->q_vector[v_idx] = q_vector;
769         }
770         return 0;
771
772 err_out:
773         igb_free_q_vectors(adapter);
774         return -ENOMEM;
775 }
776
777 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
778                                       int ring_idx, int v_idx)
779 {
780         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
781
782         q_vector->rx_ring = adapter->rx_ring[ring_idx];
783         q_vector->rx_ring->q_vector = q_vector;
784         q_vector->itr_val = adapter->rx_itr_setting;
785         if (q_vector->itr_val && q_vector->itr_val <= 3)
786                 q_vector->itr_val = IGB_START_ITR;
787 }
788
789 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
790                                       int ring_idx, int v_idx)
791 {
792         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
793
794         q_vector->tx_ring = adapter->tx_ring[ring_idx];
795         q_vector->tx_ring->q_vector = q_vector;
796         q_vector->itr_val = adapter->tx_itr_setting;
797         if (q_vector->itr_val && q_vector->itr_val <= 3)
798                 q_vector->itr_val = IGB_START_ITR;
799 }
800
801 /**
802  * igb_map_ring_to_vector - maps allocated queues to vectors
803  *
804  * This function maps the recently allocated queues to vectors.
805  **/
806 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
807 {
808         int i;
809         int v_idx = 0;
810
811         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
812             (adapter->num_q_vectors < adapter->num_tx_queues))
813                 return -ENOMEM;
814
815         if (adapter->num_q_vectors >=
816             (adapter->num_rx_queues + adapter->num_tx_queues)) {
817                 for (i = 0; i < adapter->num_rx_queues; i++)
818                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
819                 for (i = 0; i < adapter->num_tx_queues; i++)
820                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
821         } else {
822                 for (i = 0; i < adapter->num_rx_queues; i++) {
823                         if (i < adapter->num_tx_queues)
824                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
825                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
826                 }
827                 for (; i < adapter->num_tx_queues; i++)
828                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
829         }
830         return 0;
831 }
832
833 /**
834  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
835  *
836  * This function initializes the interrupts and allocates all of the queues.
837  **/
838 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
839 {
840         struct pci_dev *pdev = adapter->pdev;
841         int err;
842
843         igb_set_interrupt_capability(adapter);
844
845         err = igb_alloc_q_vectors(adapter);
846         if (err) {
847                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
848                 goto err_alloc_q_vectors;
849         }
850
851         err = igb_alloc_queues(adapter);
852         if (err) {
853                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
854                 goto err_alloc_queues;
855         }
856
857         err = igb_map_ring_to_vector(adapter);
858         if (err) {
859                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
860                 goto err_map_queues;
861         }
862
863
864         return 0;
865 err_map_queues:
866         igb_free_queues(adapter);
867 err_alloc_queues:
868         igb_free_q_vectors(adapter);
869 err_alloc_q_vectors:
870         igb_reset_interrupt_capability(adapter);
871         return err;
872 }
873
874 /**
875  * igb_request_irq - initialize interrupts
876  *
877  * Attempts to configure interrupts using the best available
878  * capabilities of the hardware and kernel.
879  **/
880 static int igb_request_irq(struct igb_adapter *adapter)
881 {
882         struct net_device *netdev = adapter->netdev;
883         struct pci_dev *pdev = adapter->pdev;
884         int err = 0;
885
886         if (adapter->msix_entries) {
887                 err = igb_request_msix(adapter);
888                 if (!err)
889                         goto request_done;
890                 /* fall back to MSI */
891                 igb_clear_interrupt_scheme(adapter);
892                 if (!pci_enable_msi(adapter->pdev))
893                         adapter->flags |= IGB_FLAG_HAS_MSI;
894                 igb_free_all_tx_resources(adapter);
895                 igb_free_all_rx_resources(adapter);
896                 adapter->num_tx_queues = 1;
897                 adapter->num_rx_queues = 1;
898                 adapter->num_q_vectors = 1;
899                 err = igb_alloc_q_vectors(adapter);
900                 if (err) {
901                         dev_err(&pdev->dev,
902                                 "Unable to allocate memory for vectors\n");
903                         goto request_done;
904                 }
905                 err = igb_alloc_queues(adapter);
906                 if (err) {
907                         dev_err(&pdev->dev,
908                                 "Unable to allocate memory for queues\n");
909                         igb_free_q_vectors(adapter);
910                         goto request_done;
911                 }
912                 igb_setup_all_tx_resources(adapter);
913                 igb_setup_all_rx_resources(adapter);
914         } else {
915                 igb_assign_vector(adapter->q_vector[0], 0);
916         }
917
918         if (adapter->flags & IGB_FLAG_HAS_MSI) {
919                 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
920                                   netdev->name, adapter);
921                 if (!err)
922                         goto request_done;
923
924                 /* fall back to legacy interrupts */
925                 igb_reset_interrupt_capability(adapter);
926                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
927         }
928
929         err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
930                           netdev->name, adapter);
931
932         if (err)
933                 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
934                         err);
935
936 request_done:
937         return err;
938 }
939
940 static void igb_free_irq(struct igb_adapter *adapter)
941 {
942         if (adapter->msix_entries) {
943                 int vector = 0, i;
944
945                 free_irq(adapter->msix_entries[vector++].vector, adapter);
946
947                 for (i = 0; i < adapter->num_q_vectors; i++) {
948                         struct igb_q_vector *q_vector = adapter->q_vector[i];
949                         free_irq(adapter->msix_entries[vector++].vector,
950                                  q_vector);
951                 }
952         } else {
953                 free_irq(adapter->pdev->irq, adapter);
954         }
955 }
956
957 /**
958  * igb_irq_disable - Mask off interrupt generation on the NIC
959  * @adapter: board private structure
960  **/
961 static void igb_irq_disable(struct igb_adapter *adapter)
962 {
963         struct e1000_hw *hw = &adapter->hw;
964
965         /*
966          * we need to be careful when disabling interrupts.  The VFs are also
967          * mapped into these registers and so clearing the bits can cause
968          * issues on the VF drivers so we only need to clear what we set
969          */
970         if (adapter->msix_entries) {
971                 u32 regval = rd32(E1000_EIAM);
972                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
973                 wr32(E1000_EIMC, adapter->eims_enable_mask);
974                 regval = rd32(E1000_EIAC);
975                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
976         }
977
978         wr32(E1000_IAM, 0);
979         wr32(E1000_IMC, ~0);
980         wrfl();
981         synchronize_irq(adapter->pdev->irq);
982 }
983
984 /**
985  * igb_irq_enable - Enable default interrupt generation settings
986  * @adapter: board private structure
987  **/
988 static void igb_irq_enable(struct igb_adapter *adapter)
989 {
990         struct e1000_hw *hw = &adapter->hw;
991
992         if (adapter->msix_entries) {
993                 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
994                 u32 regval = rd32(E1000_EIAC);
995                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
996                 regval = rd32(E1000_EIAM);
997                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
998                 wr32(E1000_EIMS, adapter->eims_enable_mask);
999                 if (adapter->vfs_allocated_count) {
1000                         wr32(E1000_MBVFIMR, 0xFF);
1001                         ims |= E1000_IMS_VMMB;
1002                 }
1003                 if (adapter->hw.mac.type == e1000_82580)
1004                         ims |= E1000_IMS_DRSTA;
1005
1006                 wr32(E1000_IMS, ims);
1007         } else {
1008                 wr32(E1000_IMS, IMS_ENABLE_MASK |
1009                                 E1000_IMS_DRSTA);
1010                 wr32(E1000_IAM, IMS_ENABLE_MASK |
1011                                 E1000_IMS_DRSTA);
1012         }
1013 }
1014
1015 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1016 {
1017         struct e1000_hw *hw = &adapter->hw;
1018         u16 vid = adapter->hw.mng_cookie.vlan_id;
1019         u16 old_vid = adapter->mng_vlan_id;
1020
1021         if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1022                 /* add VID to filter table */
1023                 igb_vfta_set(hw, vid, true);
1024                 adapter->mng_vlan_id = vid;
1025         } else {
1026                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1027         }
1028
1029         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1030             (vid != old_vid) &&
1031             !vlan_group_get_device(adapter->vlgrp, old_vid)) {
1032                 /* remove VID from filter table */
1033                 igb_vfta_set(hw, old_vid, false);
1034         }
1035 }
1036
1037 /**
1038  * igb_release_hw_control - release control of the h/w to f/w
1039  * @adapter: address of board private structure
1040  *
1041  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1042  * For ASF and Pass Through versions of f/w this means that the
1043  * driver is no longer loaded.
1044  *
1045  **/
1046 static void igb_release_hw_control(struct igb_adapter *adapter)
1047 {
1048         struct e1000_hw *hw = &adapter->hw;
1049         u32 ctrl_ext;
1050
1051         /* Let firmware take over control of h/w */
1052         ctrl_ext = rd32(E1000_CTRL_EXT);
1053         wr32(E1000_CTRL_EXT,
1054                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1055 }
1056
1057 /**
1058  * igb_get_hw_control - get control of the h/w from f/w
1059  * @adapter: address of board private structure
1060  *
1061  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1062  * For ASF and Pass Through versions of f/w this means that
1063  * the driver is loaded.
1064  *
1065  **/
1066 static void igb_get_hw_control(struct igb_adapter *adapter)
1067 {
1068         struct e1000_hw *hw = &adapter->hw;
1069         u32 ctrl_ext;
1070
1071         /* Let firmware know the driver has taken over */
1072         ctrl_ext = rd32(E1000_CTRL_EXT);
1073         wr32(E1000_CTRL_EXT,
1074                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1075 }
1076
1077 /**
1078  * igb_configure - configure the hardware for RX and TX
1079  * @adapter: private board structure
1080  **/
1081 static void igb_configure(struct igb_adapter *adapter)
1082 {
1083         struct net_device *netdev = adapter->netdev;
1084         int i;
1085
1086         igb_get_hw_control(adapter);
1087         igb_set_rx_mode(netdev);
1088
1089         igb_restore_vlan(adapter);
1090
1091         igb_setup_tctl(adapter);
1092         igb_setup_mrqc(adapter);
1093         igb_setup_rctl(adapter);
1094
1095         igb_configure_tx(adapter);
1096         igb_configure_rx(adapter);
1097
1098         igb_rx_fifo_flush_82575(&adapter->hw);
1099
1100         /* call igb_desc_unused which always leaves
1101          * at least 1 descriptor unused to make sure
1102          * next_to_use != next_to_clean */
1103         for (i = 0; i < adapter->num_rx_queues; i++) {
1104                 struct igb_ring *ring = adapter->rx_ring[i];
1105                 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1106         }
1107
1108
1109         adapter->tx_queue_len = netdev->tx_queue_len;
1110 }
1111
1112 /**
1113  * igb_power_up_link - Power up the phy/serdes link
1114  * @adapter: address of board private structure
1115  **/
1116 void igb_power_up_link(struct igb_adapter *adapter)
1117 {
1118         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1119                 igb_power_up_phy_copper(&adapter->hw);
1120         else
1121                 igb_power_up_serdes_link_82575(&adapter->hw);
1122 }
1123
1124 /**
1125  * igb_power_down_link - Power down the phy/serdes link
1126  * @adapter: address of board private structure
1127  */
1128 static void igb_power_down_link(struct igb_adapter *adapter)
1129 {
1130         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1131                 igb_power_down_phy_copper_82575(&adapter->hw);
1132         else
1133                 igb_shutdown_serdes_link_82575(&adapter->hw);
1134 }
1135
1136 /**
1137  * igb_up - Open the interface and prepare it to handle traffic
1138  * @adapter: board private structure
1139  **/
1140 int igb_up(struct igb_adapter *adapter)
1141 {
1142         struct e1000_hw *hw = &adapter->hw;
1143         int i;
1144
1145         /* hardware has been reset, we need to reload some things */
1146         igb_configure(adapter);
1147
1148         clear_bit(__IGB_DOWN, &adapter->state);
1149
1150         for (i = 0; i < adapter->num_q_vectors; i++) {
1151                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1152                 napi_enable(&q_vector->napi);
1153         }
1154         if (adapter->msix_entries)
1155                 igb_configure_msix(adapter);
1156         else
1157                 igb_assign_vector(adapter->q_vector[0], 0);
1158
1159         /* Clear any pending interrupts. */
1160         rd32(E1000_ICR);
1161         igb_irq_enable(adapter);
1162
1163         /* notify VFs that reset has been completed */
1164         if (adapter->vfs_allocated_count) {
1165                 u32 reg_data = rd32(E1000_CTRL_EXT);
1166                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1167                 wr32(E1000_CTRL_EXT, reg_data);
1168         }
1169
1170         netif_tx_start_all_queues(adapter->netdev);
1171
1172         /* start the watchdog. */
1173         hw->mac.get_link_status = 1;
1174         schedule_work(&adapter->watchdog_task);
1175
1176         return 0;
1177 }
1178
1179 void igb_down(struct igb_adapter *adapter)
1180 {
1181         struct net_device *netdev = adapter->netdev;
1182         struct e1000_hw *hw = &adapter->hw;
1183         u32 tctl, rctl;
1184         int i;
1185
1186         /* signal that we're down so the interrupt handler does not
1187          * reschedule our watchdog timer */
1188         set_bit(__IGB_DOWN, &adapter->state);
1189
1190         /* disable receives in the hardware */
1191         rctl = rd32(E1000_RCTL);
1192         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1193         /* flush and sleep below */
1194
1195         netif_tx_stop_all_queues(netdev);
1196
1197         /* disable transmits in the hardware */
1198         tctl = rd32(E1000_TCTL);
1199         tctl &= ~E1000_TCTL_EN;
1200         wr32(E1000_TCTL, tctl);
1201         /* flush both disables and wait for them to finish */
1202         wrfl();
1203         msleep(10);
1204
1205         for (i = 0; i < adapter->num_q_vectors; i++) {
1206                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1207                 napi_disable(&q_vector->napi);
1208         }
1209
1210         igb_irq_disable(adapter);
1211
1212         del_timer_sync(&adapter->watchdog_timer);
1213         del_timer_sync(&adapter->phy_info_timer);
1214
1215         netdev->tx_queue_len = adapter->tx_queue_len;
1216         netif_carrier_off(netdev);
1217
1218         /* record the stats before reset*/
1219         igb_update_stats(adapter);
1220
1221         adapter->link_speed = 0;
1222         adapter->link_duplex = 0;
1223
1224         if (!pci_channel_offline(adapter->pdev))
1225                 igb_reset(adapter);
1226         igb_clean_all_tx_rings(adapter);
1227         igb_clean_all_rx_rings(adapter);
1228 #ifdef CONFIG_IGB_DCA
1229
1230         /* since we reset the hardware DCA settings were cleared */
1231         igb_setup_dca(adapter);
1232 #endif
1233 }
1234
1235 void igb_reinit_locked(struct igb_adapter *adapter)
1236 {
1237         WARN_ON(in_interrupt());
1238         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1239                 msleep(1);
1240         igb_down(adapter);
1241         igb_up(adapter);
1242         clear_bit(__IGB_RESETTING, &adapter->state);
1243 }
1244
1245 void igb_reset(struct igb_adapter *adapter)
1246 {
1247         struct pci_dev *pdev = adapter->pdev;
1248         struct e1000_hw *hw = &adapter->hw;
1249         struct e1000_mac_info *mac = &hw->mac;
1250         struct e1000_fc_info *fc = &hw->fc;
1251         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1252         u16 hwm;
1253
1254         /* Repartition Pba for greater than 9k mtu
1255          * To take effect CTRL.RST is required.
1256          */
1257         switch (mac->type) {
1258         case e1000_82580:
1259                 pba = rd32(E1000_RXPBS);
1260                 pba = igb_rxpbs_adjust_82580(pba);
1261                 break;
1262         case e1000_82576:
1263                 pba = rd32(E1000_RXPBS);
1264                 pba &= E1000_RXPBS_SIZE_MASK_82576;
1265                 break;
1266         case e1000_82575:
1267         default:
1268                 pba = E1000_PBA_34K;
1269                 break;
1270         }
1271
1272         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1273             (mac->type < e1000_82576)) {
1274                 /* adjust PBA for jumbo frames */
1275                 wr32(E1000_PBA, pba);
1276
1277                 /* To maintain wire speed transmits, the Tx FIFO should be
1278                  * large enough to accommodate two full transmit packets,
1279                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1280                  * the Rx FIFO should be large enough to accommodate at least
1281                  * one full receive packet and is similarly rounded up and
1282                  * expressed in KB. */
1283                 pba = rd32(E1000_PBA);
1284                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1285                 tx_space = pba >> 16;
1286                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1287                 pba &= 0xffff;
1288                 /* the tx fifo also stores 16 bytes of information about the tx
1289                  * but don't include ethernet FCS because hardware appends it */
1290                 min_tx_space = (adapter->max_frame_size +
1291                                 sizeof(union e1000_adv_tx_desc) -
1292                                 ETH_FCS_LEN) * 2;
1293                 min_tx_space = ALIGN(min_tx_space, 1024);
1294                 min_tx_space >>= 10;
1295                 /* software strips receive CRC, so leave room for it */
1296                 min_rx_space = adapter->max_frame_size;
1297                 min_rx_space = ALIGN(min_rx_space, 1024);
1298                 min_rx_space >>= 10;
1299
1300                 /* If current Tx allocation is less than the min Tx FIFO size,
1301                  * and the min Tx FIFO size is less than the current Rx FIFO
1302                  * allocation, take space away from current Rx allocation */
1303                 if (tx_space < min_tx_space &&
1304                     ((min_tx_space - tx_space) < pba)) {
1305                         pba = pba - (min_tx_space - tx_space);
1306
1307                         /* if short on rx space, rx wins and must trump tx
1308                          * adjustment */
1309                         if (pba < min_rx_space)
1310                                 pba = min_rx_space;
1311                 }
1312                 wr32(E1000_PBA, pba);
1313         }
1314
1315         /* flow control settings */
1316         /* The high water mark must be low enough to fit one full frame
1317          * (or the size used for early receive) above it in the Rx FIFO.
1318          * Set it to the lower of:
1319          * - 90% of the Rx FIFO size, or
1320          * - the full Rx FIFO size minus one full frame */
1321         hwm = min(((pba << 10) * 9 / 10),
1322                         ((pba << 10) - 2 * adapter->max_frame_size));
1323
1324         fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1325         fc->low_water = fc->high_water - 16;
1326         fc->pause_time = 0xFFFF;
1327         fc->send_xon = 1;
1328         fc->current_mode = fc->requested_mode;
1329
1330         /* disable receive for all VFs and wait one second */
1331         if (adapter->vfs_allocated_count) {
1332                 int i;
1333                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1334                         adapter->vf_data[i].flags = 0;
1335
1336                 /* ping all the active vfs to let them know we are going down */
1337                 igb_ping_all_vfs(adapter);
1338
1339                 /* disable transmits and receives */
1340                 wr32(E1000_VFRE, 0);
1341                 wr32(E1000_VFTE, 0);
1342         }
1343
1344         /* Allow time for pending master requests to run */
1345         hw->mac.ops.reset_hw(hw);
1346         wr32(E1000_WUC, 0);
1347
1348         if (hw->mac.ops.init_hw(hw))
1349                 dev_err(&pdev->dev, "Hardware Error\n");
1350
1351         if (hw->mac.type == e1000_82580) {
1352                 u32 reg = rd32(E1000_PCIEMISC);
1353                 wr32(E1000_PCIEMISC,
1354                                 reg & ~E1000_PCIEMISC_LX_DECISION);
1355         }
1356         if (!netif_running(adapter->netdev))
1357                 igb_power_down_link(adapter);
1358
1359         igb_update_mng_vlan(adapter);
1360
1361         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1362         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1363
1364         igb_get_phy_info(hw);
1365 }
1366
1367 static const struct net_device_ops igb_netdev_ops = {
1368         .ndo_open               = igb_open,
1369         .ndo_stop               = igb_close,
1370         .ndo_start_xmit         = igb_xmit_frame_adv,
1371         .ndo_get_stats          = igb_get_stats,
1372         .ndo_set_rx_mode        = igb_set_rx_mode,
1373         .ndo_set_multicast_list = igb_set_rx_mode,
1374         .ndo_set_mac_address    = igb_set_mac,
1375         .ndo_change_mtu         = igb_change_mtu,
1376         .ndo_do_ioctl           = igb_ioctl,
1377         .ndo_tx_timeout         = igb_tx_timeout,
1378         .ndo_validate_addr      = eth_validate_addr,
1379         .ndo_vlan_rx_register   = igb_vlan_rx_register,
1380         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1381         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1382         .ndo_set_vf_mac         = igb_ndo_set_vf_mac,
1383         .ndo_set_vf_vlan        = igb_ndo_set_vf_vlan,
1384         .ndo_set_vf_tx_rate     = igb_ndo_set_vf_bw,
1385         .ndo_get_vf_config      = igb_ndo_get_vf_config,
1386 #ifdef CONFIG_NET_POLL_CONTROLLER
1387         .ndo_poll_controller    = igb_netpoll,
1388 #endif
1389 };
1390
1391 /**
1392  * igb_probe - Device Initialization Routine
1393  * @pdev: PCI device information struct
1394  * @ent: entry in igb_pci_tbl
1395  *
1396  * Returns 0 on success, negative on failure
1397  *
1398  * igb_probe initializes an adapter identified by a pci_dev structure.
1399  * The OS initialization, configuring of the adapter private structure,
1400  * and a hardware reset occur.
1401  **/
1402 static int __devinit igb_probe(struct pci_dev *pdev,
1403                                const struct pci_device_id *ent)
1404 {
1405         struct net_device *netdev;
1406         struct igb_adapter *adapter;
1407         struct e1000_hw *hw;
1408         u16 eeprom_data = 0;
1409         static int global_quad_port_a; /* global quad port a indication */
1410         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1411         unsigned long mmio_start, mmio_len;
1412         int err, pci_using_dac;
1413         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1414         u32 part_num;
1415
1416         err = pci_enable_device_mem(pdev);
1417         if (err)
1418                 return err;
1419
1420         pci_using_dac = 0;
1421         err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
1422         if (!err) {
1423                 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
1424                 if (!err)
1425                         pci_using_dac = 1;
1426         } else {
1427                 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
1428                 if (err) {
1429                         err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
1430                         if (err) {
1431                                 dev_err(&pdev->dev, "No usable DMA "
1432                                         "configuration, aborting\n");
1433                                 goto err_dma;
1434                         }
1435                 }
1436         }
1437
1438         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1439                                            IORESOURCE_MEM),
1440                                            igb_driver_name);
1441         if (err)
1442                 goto err_pci_reg;
1443
1444         pci_enable_pcie_error_reporting(pdev);
1445
1446         pci_set_master(pdev);
1447         pci_save_state(pdev);
1448
1449         err = -ENOMEM;
1450         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1451                                    IGB_ABS_MAX_TX_QUEUES);
1452         if (!netdev)
1453                 goto err_alloc_etherdev;
1454
1455         SET_NETDEV_DEV(netdev, &pdev->dev);
1456
1457         pci_set_drvdata(pdev, netdev);
1458         adapter = netdev_priv(netdev);
1459         adapter->netdev = netdev;
1460         adapter->pdev = pdev;
1461         hw = &adapter->hw;
1462         hw->back = adapter;
1463         adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1464
1465         mmio_start = pci_resource_start(pdev, 0);
1466         mmio_len = pci_resource_len(pdev, 0);
1467
1468         err = -EIO;
1469         hw->hw_addr = ioremap(mmio_start, mmio_len);
1470         if (!hw->hw_addr)
1471                 goto err_ioremap;
1472
1473         netdev->netdev_ops = &igb_netdev_ops;
1474         igb_set_ethtool_ops(netdev);
1475         netdev->watchdog_timeo = 5 * HZ;
1476
1477         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1478
1479         netdev->mem_start = mmio_start;
1480         netdev->mem_end = mmio_start + mmio_len;
1481
1482         /* PCI config space info */
1483         hw->vendor_id = pdev->vendor;
1484         hw->device_id = pdev->device;
1485         hw->revision_id = pdev->revision;
1486         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1487         hw->subsystem_device_id = pdev->subsystem_device;
1488
1489         /* Copy the default MAC, PHY and NVM function pointers */
1490         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1491         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1492         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1493         /* Initialize skew-specific constants */
1494         err = ei->get_invariants(hw);
1495         if (err)
1496                 goto err_sw_init;
1497
1498         /* setup the private structure */
1499         err = igb_sw_init(adapter);
1500         if (err)
1501                 goto err_sw_init;
1502
1503         igb_get_bus_info_pcie(hw);
1504
1505         hw->phy.autoneg_wait_to_complete = false;
1506
1507         /* Copper options */
1508         if (hw->phy.media_type == e1000_media_type_copper) {
1509                 hw->phy.mdix = AUTO_ALL_MODES;
1510                 hw->phy.disable_polarity_correction = false;
1511                 hw->phy.ms_type = e1000_ms_hw_default;
1512         }
1513
1514         if (igb_check_reset_block(hw))
1515                 dev_info(&pdev->dev,
1516                         "PHY reset is blocked due to SOL/IDER session.\n");
1517
1518         netdev->features = NETIF_F_SG |
1519                            NETIF_F_IP_CSUM |
1520                            NETIF_F_HW_VLAN_TX |
1521                            NETIF_F_HW_VLAN_RX |
1522                            NETIF_F_HW_VLAN_FILTER;
1523
1524         netdev->features |= NETIF_F_IPV6_CSUM;
1525         netdev->features |= NETIF_F_TSO;
1526         netdev->features |= NETIF_F_TSO6;
1527         netdev->features |= NETIF_F_GRO;
1528
1529         netdev->vlan_features |= NETIF_F_TSO;
1530         netdev->vlan_features |= NETIF_F_TSO6;
1531         netdev->vlan_features |= NETIF_F_IP_CSUM;
1532         netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1533         netdev->vlan_features |= NETIF_F_SG;
1534
1535         if (pci_using_dac)
1536                 netdev->features |= NETIF_F_HIGHDMA;
1537
1538         if (hw->mac.type >= e1000_82576)
1539                 netdev->features |= NETIF_F_SCTP_CSUM;
1540
1541         adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1542
1543         /* before reading the NVM, reset the controller to put the device in a
1544          * known good starting state */
1545         hw->mac.ops.reset_hw(hw);
1546
1547         /* make sure the NVM is good */
1548         if (igb_validate_nvm_checksum(hw) < 0) {
1549                 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1550                 err = -EIO;
1551                 goto err_eeprom;
1552         }
1553
1554         /* copy the MAC address out of the NVM */
1555         if (hw->mac.ops.read_mac_addr(hw))
1556                 dev_err(&pdev->dev, "NVM Read Error\n");
1557
1558         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1559         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1560
1561         if (!is_valid_ether_addr(netdev->perm_addr)) {
1562                 dev_err(&pdev->dev, "Invalid MAC Address\n");
1563                 err = -EIO;
1564                 goto err_eeprom;
1565         }
1566
1567         setup_timer(&adapter->watchdog_timer, &igb_watchdog,
1568                     (unsigned long) adapter);
1569         setup_timer(&adapter->phy_info_timer, &igb_update_phy_info,
1570                     (unsigned long) adapter);
1571
1572         INIT_WORK(&adapter->reset_task, igb_reset_task);
1573         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1574
1575         /* Initialize link properties that are user-changeable */
1576         adapter->fc_autoneg = true;
1577         hw->mac.autoneg = true;
1578         hw->phy.autoneg_advertised = 0x2f;
1579
1580         hw->fc.requested_mode = e1000_fc_default;
1581         hw->fc.current_mode = e1000_fc_default;
1582
1583         igb_validate_mdi_setting(hw);
1584
1585         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1586          * enable the ACPI Magic Packet filter
1587          */
1588
1589         if (hw->bus.func == 0)
1590                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1591         else if (hw->mac.type == e1000_82580)
1592                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
1593                                  NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
1594                                  &eeprom_data);
1595         else if (hw->bus.func == 1)
1596                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1597
1598         if (eeprom_data & eeprom_apme_mask)
1599                 adapter->eeprom_wol |= E1000_WUFC_MAG;
1600
1601         /* now that we have the eeprom settings, apply the special cases where
1602          * the eeprom may be wrong or the board simply won't support wake on
1603          * lan on a particular port */
1604         switch (pdev->device) {
1605         case E1000_DEV_ID_82575GB_QUAD_COPPER:
1606                 adapter->eeprom_wol = 0;
1607                 break;
1608         case E1000_DEV_ID_82575EB_FIBER_SERDES:
1609         case E1000_DEV_ID_82576_FIBER:
1610         case E1000_DEV_ID_82576_SERDES:
1611                 /* Wake events only supported on port A for dual fiber
1612                  * regardless of eeprom setting */
1613                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
1614                         adapter->eeprom_wol = 0;
1615                 break;
1616         case E1000_DEV_ID_82576_QUAD_COPPER:
1617                 /* if quad port adapter, disable WoL on all but port A */
1618                 if (global_quad_port_a != 0)
1619                         adapter->eeprom_wol = 0;
1620                 else
1621                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
1622                 /* Reset for multiple quad port adapters */
1623                 if (++global_quad_port_a == 4)
1624                         global_quad_port_a = 0;
1625                 break;
1626         }
1627
1628         /* initialize the wol settings based on the eeprom settings */
1629         adapter->wol = adapter->eeprom_wol;
1630         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
1631
1632         /* reset the hardware with the new settings */
1633         igb_reset(adapter);
1634
1635         /* let the f/w know that the h/w is now under the control of the
1636          * driver. */
1637         igb_get_hw_control(adapter);
1638
1639         strcpy(netdev->name, "eth%d");
1640         err = register_netdev(netdev);
1641         if (err)
1642                 goto err_register;
1643
1644         /* carrier off reporting is important to ethtool even BEFORE open */
1645         netif_carrier_off(netdev);
1646
1647 #ifdef CONFIG_IGB_DCA
1648         if (dca_add_requester(&pdev->dev) == 0) {
1649                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
1650                 dev_info(&pdev->dev, "DCA enabled\n");
1651                 igb_setup_dca(adapter);
1652         }
1653
1654 #endif
1655         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
1656         /* print bus type/speed/width info */
1657         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
1658                  netdev->name,
1659                  ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
1660                                                             "unknown"),
1661                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
1662                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
1663                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
1664                    "unknown"),
1665                  netdev->dev_addr);
1666
1667         igb_read_part_num(hw, &part_num);
1668         dev_info(&pdev->dev, "%s: PBA No: %06x-%03x\n", netdev->name,
1669                 (part_num >> 8), (part_num & 0xff));
1670
1671         dev_info(&pdev->dev,
1672                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
1673                 adapter->msix_entries ? "MSI-X" :
1674                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
1675                 adapter->num_rx_queues, adapter->num_tx_queues);
1676
1677         return 0;
1678
1679 err_register:
1680         igb_release_hw_control(adapter);
1681 err_eeprom:
1682         if (!igb_check_reset_block(hw))
1683                 igb_reset_phy(hw);
1684
1685         if (hw->flash_address)
1686                 iounmap(hw->flash_address);
1687 err_sw_init:
1688         igb_clear_interrupt_scheme(adapter);
1689         iounmap(hw->hw_addr);
1690 err_ioremap:
1691         free_netdev(netdev);
1692 err_alloc_etherdev:
1693         pci_release_selected_regions(pdev,
1694                                      pci_select_bars(pdev, IORESOURCE_MEM));
1695 err_pci_reg:
1696 err_dma:
1697         pci_disable_device(pdev);
1698         return err;
1699 }
1700
1701 /**
1702  * igb_remove - Device Removal Routine
1703  * @pdev: PCI device information struct
1704  *
1705  * igb_remove is called by the PCI subsystem to alert the driver
1706  * that it should release a PCI device.  The could be caused by a
1707  * Hot-Plug event, or because the driver is going to be removed from
1708  * memory.
1709  **/
1710 static void __devexit igb_remove(struct pci_dev *pdev)
1711 {
1712         struct net_device *netdev = pci_get_drvdata(pdev);
1713         struct igb_adapter *adapter = netdev_priv(netdev);
1714         struct e1000_hw *hw = &adapter->hw;
1715
1716         /* flush_scheduled work may reschedule our watchdog task, so
1717          * explicitly disable watchdog tasks from being rescheduled  */
1718         set_bit(__IGB_DOWN, &adapter->state);
1719         del_timer_sync(&adapter->watchdog_timer);
1720         del_timer_sync(&adapter->phy_info_timer);
1721
1722         flush_scheduled_work();
1723
1724 #ifdef CONFIG_IGB_DCA
1725         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
1726                 dev_info(&pdev->dev, "DCA disabled\n");
1727                 dca_remove_requester(&pdev->dev);
1728                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
1729                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
1730         }
1731 #endif
1732
1733         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
1734          * would have already happened in close and is redundant. */
1735         igb_release_hw_control(adapter);
1736
1737         unregister_netdev(netdev);
1738
1739         igb_clear_interrupt_scheme(adapter);
1740
1741 #ifdef CONFIG_PCI_IOV
1742         /* reclaim resources allocated to VFs */
1743         if (adapter->vf_data) {
1744                 /* disable iov and allow time for transactions to clear */
1745                 pci_disable_sriov(pdev);
1746                 msleep(500);
1747
1748                 kfree(adapter->vf_data);
1749                 adapter->vf_data = NULL;
1750                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1751                 msleep(100);
1752                 dev_info(&pdev->dev, "IOV Disabled\n");
1753         }
1754 #endif
1755
1756         iounmap(hw->hw_addr);
1757         if (hw->flash_address)
1758                 iounmap(hw->flash_address);
1759         pci_release_selected_regions(pdev,
1760                                      pci_select_bars(pdev, IORESOURCE_MEM));
1761
1762         free_netdev(netdev);
1763
1764         pci_disable_pcie_error_reporting(pdev);
1765
1766         pci_disable_device(pdev);
1767 }
1768
1769 /**
1770  * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
1771  * @adapter: board private structure to initialize
1772  *
1773  * This function initializes the vf specific data storage and then attempts to
1774  * allocate the VFs.  The reason for ordering it this way is because it is much
1775  * mor expensive time wise to disable SR-IOV than it is to allocate and free
1776  * the memory for the VFs.
1777  **/
1778 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
1779 {
1780 #ifdef CONFIG_PCI_IOV
1781         struct pci_dev *pdev = adapter->pdev;
1782
1783         if (adapter->vfs_allocated_count > 7)
1784                 adapter->vfs_allocated_count = 7;
1785
1786         if (adapter->vfs_allocated_count) {
1787                 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
1788                                            sizeof(struct vf_data_storage),
1789                                            GFP_KERNEL);
1790                 /* if allocation failed then we do not support SR-IOV */
1791                 if (!adapter->vf_data) {
1792                         adapter->vfs_allocated_count = 0;
1793                         dev_err(&pdev->dev, "Unable to allocate memory for VF "
1794                                 "Data Storage\n");
1795                 }
1796         }
1797
1798         if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
1799                 kfree(adapter->vf_data);
1800                 adapter->vf_data = NULL;
1801 #endif /* CONFIG_PCI_IOV */
1802                 adapter->vfs_allocated_count = 0;
1803 #ifdef CONFIG_PCI_IOV
1804         } else {
1805                 unsigned char mac_addr[ETH_ALEN];
1806                 int i;
1807                 dev_info(&pdev->dev, "%d vfs allocated\n",
1808                          adapter->vfs_allocated_count);
1809                 for (i = 0; i < adapter->vfs_allocated_count; i++) {
1810                         random_ether_addr(mac_addr);
1811                         igb_set_vf_mac(adapter, i, mac_addr);
1812                 }
1813         }
1814 #endif /* CONFIG_PCI_IOV */
1815 }
1816
1817
1818 /**
1819  * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
1820  * @adapter: board private structure to initialize
1821  *
1822  * igb_init_hw_timer initializes the function pointer and values for the hw
1823  * timer found in hardware.
1824  **/
1825 static void igb_init_hw_timer(struct igb_adapter *adapter)
1826 {
1827         struct e1000_hw *hw = &adapter->hw;
1828
1829         switch (hw->mac.type) {
1830         case e1000_82580:
1831                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1832                 adapter->cycles.read = igb_read_clock;
1833                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1834                 adapter->cycles.mult = 1;
1835                 /*
1836                  * The 82580 timesync updates the system timer every 8ns by 8ns
1837                  * and the value cannot be shifted.  Instead we need to shift
1838                  * the registers to generate a 64bit timer value.  As a result
1839                  * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
1840                  * 24 in order to generate a larger value for synchronization.
1841                  */
1842                 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
1843                 /* disable system timer temporarily by setting bit 31 */
1844                 wr32(E1000_TSAUXC, 0x80000000);
1845                 wrfl();
1846
1847                 /* Set registers so that rollover occurs soon to test this. */
1848                 wr32(E1000_SYSTIMR, 0x00000000);
1849                 wr32(E1000_SYSTIML, 0x80000000);
1850                 wr32(E1000_SYSTIMH, 0x000000FF);
1851                 wrfl();
1852
1853                 /* enable system timer by clearing bit 31 */
1854                 wr32(E1000_TSAUXC, 0x0);
1855                 wrfl();
1856
1857                 timecounter_init(&adapter->clock,
1858                                  &adapter->cycles,
1859                                  ktime_to_ns(ktime_get_real()));
1860                 /*
1861                  * Synchronize our NIC clock against system wall clock. NIC
1862                  * time stamp reading requires ~3us per sample, each sample
1863                  * was pretty stable even under load => only require 10
1864                  * samples for each offset comparison.
1865                  */
1866                 memset(&adapter->compare, 0, sizeof(adapter->compare));
1867                 adapter->compare.source = &adapter->clock;
1868                 adapter->compare.target = ktime_get_real;
1869                 adapter->compare.num_samples = 10;
1870                 timecompare_update(&adapter->compare, 0);
1871                 break;
1872         case e1000_82576:
1873                 /*
1874                  * Initialize hardware timer: we keep it running just in case
1875                  * that some program needs it later on.
1876                  */
1877                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1878                 adapter->cycles.read = igb_read_clock;
1879                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1880                 adapter->cycles.mult = 1;
1881                 /**
1882                  * Scale the NIC clock cycle by a large factor so that
1883                  * relatively small clock corrections can be added or
1884                  * substracted at each clock tick. The drawbacks of a large
1885                  * factor are a) that the clock register overflows more quickly
1886                  * (not such a big deal) and b) that the increment per tick has
1887                  * to fit into 24 bits.  As a result we need to use a shift of
1888                  * 19 so we can fit a value of 16 into the TIMINCA register.
1889                  */
1890                 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
1891                 wr32(E1000_TIMINCA,
1892                                 (1 << E1000_TIMINCA_16NS_SHIFT) |
1893                                 (16 << IGB_82576_TSYNC_SHIFT));
1894
1895                 /* Set registers so that rollover occurs soon to test this. */
1896                 wr32(E1000_SYSTIML, 0x00000000);
1897                 wr32(E1000_SYSTIMH, 0xFF800000);
1898                 wrfl();
1899
1900                 timecounter_init(&adapter->clock,
1901                                  &adapter->cycles,
1902                                  ktime_to_ns(ktime_get_real()));
1903                 /*
1904                  * Synchronize our NIC clock against system wall clock. NIC
1905                  * time stamp reading requires ~3us per sample, each sample
1906                  * was pretty stable even under load => only require 10
1907                  * samples for each offset comparison.
1908                  */
1909                 memset(&adapter->compare, 0, sizeof(adapter->compare));
1910                 adapter->compare.source = &adapter->clock;
1911                 adapter->compare.target = ktime_get_real;
1912                 adapter->compare.num_samples = 10;
1913                 timecompare_update(&adapter->compare, 0);
1914                 break;
1915         case e1000_82575:
1916                 /* 82575 does not support timesync */
1917         default:
1918                 break;
1919         }
1920
1921 }
1922
1923 /**
1924  * igb_sw_init - Initialize general software structures (struct igb_adapter)
1925  * @adapter: board private structure to initialize
1926  *
1927  * igb_sw_init initializes the Adapter private data structure.
1928  * Fields are initialized based on PCI device information and
1929  * OS network device settings (MTU size).
1930  **/
1931 static int __devinit igb_sw_init(struct igb_adapter *adapter)
1932 {
1933         struct e1000_hw *hw = &adapter->hw;
1934         struct net_device *netdev = adapter->netdev;
1935         struct pci_dev *pdev = adapter->pdev;
1936
1937         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
1938
1939         adapter->tx_ring_count = IGB_DEFAULT_TXD;
1940         adapter->rx_ring_count = IGB_DEFAULT_RXD;
1941         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
1942         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
1943
1944         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
1945         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
1946
1947 #ifdef CONFIG_PCI_IOV
1948         if (hw->mac.type == e1000_82576)
1949                 adapter->vfs_allocated_count = max_vfs;
1950
1951 #endif /* CONFIG_PCI_IOV */
1952         adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
1953
1954         /*
1955          * if rss_queues > 4 or vfs are going to be allocated with rss_queues
1956          * then we should combine the queues into a queue pair in order to
1957          * conserve interrupts due to limited supply
1958          */
1959         if ((adapter->rss_queues > 4) ||
1960             ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
1961                 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1962
1963         /* This call may decrease the number of queues */
1964         if (igb_init_interrupt_scheme(adapter)) {
1965                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1966                 return -ENOMEM;
1967         }
1968
1969         igb_init_hw_timer(adapter);
1970         igb_probe_vfs(adapter);
1971
1972         /* Explicitly disable IRQ since the NIC can be in any state. */
1973         igb_irq_disable(adapter);
1974
1975         set_bit(__IGB_DOWN, &adapter->state);
1976         return 0;
1977 }
1978
1979 /**
1980  * igb_open - Called when a network interface is made active
1981  * @netdev: network interface device structure
1982  *
1983  * Returns 0 on success, negative value on failure
1984  *
1985  * The open entry point is called when a network interface is made
1986  * active by the system (IFF_UP).  At this point all resources needed
1987  * for transmit and receive operations are allocated, the interrupt
1988  * handler is registered with the OS, the watchdog timer is started,
1989  * and the stack is notified that the interface is ready.
1990  **/
1991 static int igb_open(struct net_device *netdev)
1992 {
1993         struct igb_adapter *adapter = netdev_priv(netdev);
1994         struct e1000_hw *hw = &adapter->hw;
1995         int err;
1996         int i;
1997
1998         /* disallow open during test */
1999         if (test_bit(__IGB_TESTING, &adapter->state))
2000                 return -EBUSY;
2001
2002         netif_carrier_off(netdev);
2003
2004         /* allocate transmit descriptors */
2005         err = igb_setup_all_tx_resources(adapter);
2006         if (err)
2007                 goto err_setup_tx;
2008
2009         /* allocate receive descriptors */
2010         err = igb_setup_all_rx_resources(adapter);
2011         if (err)
2012                 goto err_setup_rx;
2013
2014         igb_power_up_link(adapter);
2015
2016         /* before we allocate an interrupt, we must be ready to handle it.
2017          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2018          * as soon as we call pci_request_irq, so we have to setup our
2019          * clean_rx handler before we do so.  */
2020         igb_configure(adapter);
2021
2022         err = igb_request_irq(adapter);
2023         if (err)
2024                 goto err_req_irq;
2025
2026         /* From here on the code is the same as igb_up() */
2027         clear_bit(__IGB_DOWN, &adapter->state);
2028
2029         for (i = 0; i < adapter->num_q_vectors; i++) {
2030                 struct igb_q_vector *q_vector = adapter->q_vector[i];
2031                 napi_enable(&q_vector->napi);
2032         }
2033
2034         /* Clear any pending interrupts. */
2035         rd32(E1000_ICR);
2036
2037         igb_irq_enable(adapter);
2038
2039         /* notify VFs that reset has been completed */
2040         if (adapter->vfs_allocated_count) {
2041                 u32 reg_data = rd32(E1000_CTRL_EXT);
2042                 reg_data |= E1000_CTRL_EXT_PFRSTD;
2043                 wr32(E1000_CTRL_EXT, reg_data);
2044         }
2045
2046         netif_tx_start_all_queues(netdev);
2047
2048         /* start the watchdog. */
2049         hw->mac.get_link_status = 1;
2050         schedule_work(&adapter->watchdog_task);
2051
2052         return 0;
2053
2054 err_req_irq:
2055         igb_release_hw_control(adapter);
2056         igb_power_down_link(adapter);
2057         igb_free_all_rx_resources(adapter);
2058 err_setup_rx:
2059         igb_free_all_tx_resources(adapter);
2060 err_setup_tx:
2061         igb_reset(adapter);
2062
2063         return err;
2064 }
2065
2066 /**
2067  * igb_close - Disables a network interface
2068  * @netdev: network interface device structure
2069  *
2070  * Returns 0, this is not allowed to fail
2071  *
2072  * The close entry point is called when an interface is de-activated
2073  * by the OS.  The hardware is still under the driver's control, but
2074  * needs to be disabled.  A global MAC reset is issued to stop the
2075  * hardware, and all transmit and receive resources are freed.
2076  **/
2077 static int igb_close(struct net_device *netdev)
2078 {
2079         struct igb_adapter *adapter = netdev_priv(netdev);
2080
2081         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2082         igb_down(adapter);
2083
2084         igb_free_irq(adapter);
2085
2086         igb_free_all_tx_resources(adapter);
2087         igb_free_all_rx_resources(adapter);
2088
2089         return 0;
2090 }
2091
2092 /**
2093  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2094  * @tx_ring: tx descriptor ring (for a specific queue) to setup
2095  *
2096  * Return 0 on success, negative on failure
2097  **/
2098 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2099 {
2100         struct pci_dev *pdev = tx_ring->pdev;
2101         int size;
2102
2103         size = sizeof(struct igb_buffer) * tx_ring->count;
2104         tx_ring->buffer_info = vmalloc(size);
2105         if (!tx_ring->buffer_info)
2106                 goto err;
2107         memset(tx_ring->buffer_info, 0, size);
2108
2109         /* round up to nearest 4K */
2110         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2111         tx_ring->size = ALIGN(tx_ring->size, 4096);
2112
2113         tx_ring->desc = pci_alloc_consistent(pdev,
2114                                              tx_ring->size,
2115                                              &tx_ring->dma);
2116
2117         if (!tx_ring->desc)
2118                 goto err;
2119
2120         tx_ring->next_to_use = 0;
2121         tx_ring->next_to_clean = 0;
2122         return 0;
2123
2124 err:
2125         vfree(tx_ring->buffer_info);
2126         dev_err(&pdev->dev,
2127                 "Unable to allocate memory for the transmit descriptor ring\n");
2128         return -ENOMEM;
2129 }
2130
2131 /**
2132  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2133  *                                (Descriptors) for all queues
2134  * @adapter: board private structure
2135  *
2136  * Return 0 on success, negative on failure
2137  **/
2138 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2139 {
2140         struct pci_dev *pdev = adapter->pdev;
2141         int i, err = 0;
2142
2143         for (i = 0; i < adapter->num_tx_queues; i++) {
2144                 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2145                 if (err) {
2146                         dev_err(&pdev->dev,
2147                                 "Allocation for Tx Queue %u failed\n", i);
2148                         for (i--; i >= 0; i--)
2149                                 igb_free_tx_resources(adapter->tx_ring[i]);
2150                         break;
2151                 }
2152         }
2153
2154         for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2155                 int r_idx = i % adapter->num_tx_queues;
2156                 adapter->multi_tx_table[i] = adapter->tx_ring[r_idx];
2157         }
2158         return err;
2159 }
2160
2161 /**
2162  * igb_setup_tctl - configure the transmit control registers
2163  * @adapter: Board private structure
2164  **/
2165 void igb_setup_tctl(struct igb_adapter *adapter)
2166 {
2167         struct e1000_hw *hw = &adapter->hw;
2168         u32 tctl;
2169
2170         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2171         wr32(E1000_TXDCTL(0), 0);
2172
2173         /* Program the Transmit Control Register */
2174         tctl = rd32(E1000_TCTL);
2175         tctl &= ~E1000_TCTL_CT;
2176         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2177                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2178
2179         igb_config_collision_dist(hw);
2180
2181         /* Enable transmits */
2182         tctl |= E1000_TCTL_EN;
2183
2184         wr32(E1000_TCTL, tctl);
2185 }
2186
2187 /**
2188  * igb_configure_tx_ring - Configure transmit ring after Reset
2189  * @adapter: board private structure
2190  * @ring: tx ring to configure
2191  *
2192  * Configure a transmit ring after a reset.
2193  **/
2194 void igb_configure_tx_ring(struct igb_adapter *adapter,
2195                            struct igb_ring *ring)
2196 {
2197         struct e1000_hw *hw = &adapter->hw;
2198         u32 txdctl;
2199         u64 tdba = ring->dma;
2200         int reg_idx = ring->reg_idx;
2201
2202         /* disable the queue */
2203         txdctl = rd32(E1000_TXDCTL(reg_idx));
2204         wr32(E1000_TXDCTL(reg_idx),
2205                         txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2206         wrfl();
2207         mdelay(10);
2208
2209         wr32(E1000_TDLEN(reg_idx),
2210                         ring->count * sizeof(union e1000_adv_tx_desc));
2211         wr32(E1000_TDBAL(reg_idx),
2212                         tdba & 0x00000000ffffffffULL);
2213         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2214
2215         ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2216         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2217         writel(0, ring->head);
2218         writel(0, ring->tail);
2219
2220         txdctl |= IGB_TX_PTHRESH;
2221         txdctl |= IGB_TX_HTHRESH << 8;
2222         txdctl |= IGB_TX_WTHRESH << 16;
2223
2224         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2225         wr32(E1000_TXDCTL(reg_idx), txdctl);
2226 }
2227
2228 /**
2229  * igb_configure_tx - Configure transmit Unit after Reset
2230  * @adapter: board private structure
2231  *
2232  * Configure the Tx unit of the MAC after a reset.
2233  **/
2234 static void igb_configure_tx(struct igb_adapter *adapter)
2235 {
2236         int i;
2237
2238         for (i = 0; i < adapter->num_tx_queues; i++)
2239                 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2240 }
2241
2242 /**
2243  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2244  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2245  *
2246  * Returns 0 on success, negative on failure
2247  **/
2248 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2249 {
2250         struct pci_dev *pdev = rx_ring->pdev;
2251         int size, desc_len;
2252
2253         size = sizeof(struct igb_buffer) * rx_ring->count;
2254         rx_ring->buffer_info = vmalloc(size);
2255         if (!rx_ring->buffer_info)
2256                 goto err;
2257         memset(rx_ring->buffer_info, 0, size);
2258
2259         desc_len = sizeof(union e1000_adv_rx_desc);
2260
2261         /* Round up to nearest 4K */
2262         rx_ring->size = rx_ring->count * desc_len;
2263         rx_ring->size = ALIGN(rx_ring->size, 4096);
2264
2265         rx_ring->desc = pci_alloc_consistent(pdev, rx_ring->size,
2266                                              &rx_ring->dma);
2267
2268         if (!rx_ring->desc)
2269                 goto err;
2270
2271         rx_ring->next_to_clean = 0;
2272         rx_ring->next_to_use = 0;
2273
2274         return 0;
2275
2276 err:
2277         vfree(rx_ring->buffer_info);
2278         rx_ring->buffer_info = NULL;
2279         dev_err(&pdev->dev, "Unable to allocate memory for "
2280                 "the receive descriptor ring\n");
2281         return -ENOMEM;
2282 }
2283
2284 /**
2285  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2286  *                                (Descriptors) for all queues
2287  * @adapter: board private structure
2288  *
2289  * Return 0 on success, negative on failure
2290  **/
2291 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2292 {
2293         struct pci_dev *pdev = adapter->pdev;
2294         int i, err = 0;
2295
2296         for (i = 0; i < adapter->num_rx_queues; i++) {
2297                 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2298                 if (err) {
2299                         dev_err(&pdev->dev,
2300                                 "Allocation for Rx Queue %u failed\n", i);
2301                         for (i--; i >= 0; i--)
2302                                 igb_free_rx_resources(adapter->rx_ring[i]);
2303                         break;
2304                 }
2305         }
2306
2307         return err;
2308 }
2309
2310 /**
2311  * igb_setup_mrqc - configure the multiple receive queue control registers
2312  * @adapter: Board private structure
2313  **/
2314 static void igb_setup_mrqc(struct igb_adapter *adapter)
2315 {
2316         struct e1000_hw *hw = &adapter->hw;
2317         u32 mrqc, rxcsum;
2318         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2319         union e1000_reta {
2320                 u32 dword;
2321                 u8  bytes[4];
2322         } reta;
2323         static const u8 rsshash[40] = {
2324                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2325                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2326                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2327                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2328
2329         /* Fill out hash function seeds */
2330         for (j = 0; j < 10; j++) {
2331                 u32 rsskey = rsshash[(j * 4)];
2332                 rsskey |= rsshash[(j * 4) + 1] << 8;
2333                 rsskey |= rsshash[(j * 4) + 2] << 16;
2334                 rsskey |= rsshash[(j * 4) + 3] << 24;
2335                 array_wr32(E1000_RSSRK(0), j, rsskey);
2336         }
2337
2338         num_rx_queues = adapter->rss_queues;
2339
2340         if (adapter->vfs_allocated_count) {
2341                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2342                 switch (hw->mac.type) {
2343                 case e1000_82580:
2344                         num_rx_queues = 1;
2345                         shift = 0;
2346                         break;
2347                 case e1000_82576:
2348                         shift = 3;
2349                         num_rx_queues = 2;
2350                         break;
2351                 case e1000_82575:
2352                         shift = 2;
2353                         shift2 = 6;
2354                 default:
2355                         break;
2356                 }
2357         } else {
2358                 if (hw->mac.type == e1000_82575)
2359                         shift = 6;
2360         }
2361
2362         for (j = 0; j < (32 * 4); j++) {
2363                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2364                 if (shift2)
2365                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2366                 if ((j & 3) == 3)
2367                         wr32(E1000_RETA(j >> 2), reta.dword);
2368         }
2369
2370         /*
2371          * Disable raw packet checksumming so that RSS hash is placed in
2372          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2373          * offloads as they are enabled by default
2374          */
2375         rxcsum = rd32(E1000_RXCSUM);
2376         rxcsum |= E1000_RXCSUM_PCSD;
2377
2378         if (adapter->hw.mac.type >= e1000_82576)
2379                 /* Enable Receive Checksum Offload for SCTP */
2380                 rxcsum |= E1000_RXCSUM_CRCOFL;
2381
2382         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2383         wr32(E1000_RXCSUM, rxcsum);
2384
2385         /* If VMDq is enabled then we set the appropriate mode for that, else
2386          * we default to RSS so that an RSS hash is calculated per packet even
2387          * if we are only using one queue */
2388         if (adapter->vfs_allocated_count) {
2389                 if (hw->mac.type > e1000_82575) {
2390                         /* Set the default pool for the PF's first queue */
2391                         u32 vtctl = rd32(E1000_VT_CTL);
2392                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2393                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2394                         vtctl |= adapter->vfs_allocated_count <<
2395                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2396                         wr32(E1000_VT_CTL, vtctl);
2397                 }
2398                 if (adapter->rss_queues > 1)
2399                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2400                 else
2401                         mrqc = E1000_MRQC_ENABLE_VMDQ;
2402         } else {
2403                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2404         }
2405         igb_vmm_control(adapter);
2406
2407         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
2408                  E1000_MRQC_RSS_FIELD_IPV4_TCP);
2409         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
2410                  E1000_MRQC_RSS_FIELD_IPV6_TCP);
2411         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4_UDP |
2412                  E1000_MRQC_RSS_FIELD_IPV6_UDP);
2413         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
2414                  E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
2415
2416         wr32(E1000_MRQC, mrqc);
2417 }
2418
2419 /**
2420  * igb_setup_rctl - configure the receive control registers
2421  * @adapter: Board private structure
2422  **/
2423 void igb_setup_rctl(struct igb_adapter *adapter)
2424 {
2425         struct e1000_hw *hw = &adapter->hw;
2426         u32 rctl;
2427
2428         rctl = rd32(E1000_RCTL);
2429
2430         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2431         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2432
2433         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2434                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2435
2436         /*
2437          * enable stripping of CRC. It's unlikely this will break BMC
2438          * redirection as it did with e1000. Newer features require
2439          * that the HW strips the CRC.
2440          */
2441         rctl |= E1000_RCTL_SECRC;
2442
2443         /* disable store bad packets and clear size bits. */
2444         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2445
2446         /* enable LPE to prevent packets larger than max_frame_size */
2447         rctl |= E1000_RCTL_LPE;
2448
2449         /* disable queue 0 to prevent tail write w/o re-config */
2450         wr32(E1000_RXDCTL(0), 0);
2451
2452         /* Attention!!!  For SR-IOV PF driver operations you must enable
2453          * queue drop for all VF and PF queues to prevent head of line blocking
2454          * if an un-trusted VF does not provide descriptors to hardware.
2455          */
2456         if (adapter->vfs_allocated_count) {
2457                 /* set all queue drop enable bits */
2458                 wr32(E1000_QDE, ALL_QUEUES);
2459         }
2460
2461         wr32(E1000_RCTL, rctl);
2462 }
2463
2464 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2465                                    int vfn)
2466 {
2467         struct e1000_hw *hw = &adapter->hw;
2468         u32 vmolr;
2469
2470         /* if it isn't the PF check to see if VFs are enabled and
2471          * increase the size to support vlan tags */
2472         if (vfn < adapter->vfs_allocated_count &&
2473             adapter->vf_data[vfn].vlans_enabled)
2474                 size += VLAN_TAG_SIZE;
2475
2476         vmolr = rd32(E1000_VMOLR(vfn));
2477         vmolr &= ~E1000_VMOLR_RLPML_MASK;
2478         vmolr |= size | E1000_VMOLR_LPE;
2479         wr32(E1000_VMOLR(vfn), vmolr);
2480
2481         return 0;
2482 }
2483
2484 /**
2485  * igb_rlpml_set - set maximum receive packet size
2486  * @adapter: board private structure
2487  *
2488  * Configure maximum receivable packet size.
2489  **/
2490 static void igb_rlpml_set(struct igb_adapter *adapter)
2491 {
2492         u32 max_frame_size = adapter->max_frame_size;
2493         struct e1000_hw *hw = &adapter->hw;
2494         u16 pf_id = adapter->vfs_allocated_count;
2495
2496         if (adapter->vlgrp)
2497                 max_frame_size += VLAN_TAG_SIZE;
2498
2499         /* if vfs are enabled we set RLPML to the largest possible request
2500          * size and set the VMOLR RLPML to the size we need */
2501         if (pf_id) {
2502                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2503                 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2504         }
2505
2506         wr32(E1000_RLPML, max_frame_size);
2507 }
2508
2509 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2510                                  int vfn, bool aupe)
2511 {
2512         struct e1000_hw *hw = &adapter->hw;
2513         u32 vmolr;
2514
2515         /*
2516          * This register exists only on 82576 and newer so if we are older then
2517          * we should exit and do nothing
2518          */
2519         if (hw->mac.type < e1000_82576)
2520                 return;
2521
2522         vmolr = rd32(E1000_VMOLR(vfn));
2523         vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
2524         if (aupe)
2525                 vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
2526         else
2527                 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
2528
2529         /* clear all bits that might not be set */
2530         vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2531
2532         if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
2533                 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
2534         /*
2535          * for VMDq only allow the VFs and pool 0 to accept broadcast and
2536          * multicast packets
2537          */
2538         if (vfn <= adapter->vfs_allocated_count)
2539                 vmolr |= E1000_VMOLR_BAM;          /* Accept broadcast */
2540
2541         wr32(E1000_VMOLR(vfn), vmolr);
2542 }
2543
2544 /**
2545  * igb_configure_rx_ring - Configure a receive ring after Reset
2546  * @adapter: board private structure
2547  * @ring: receive ring to be configured
2548  *
2549  * Configure the Rx unit of the MAC after a reset.
2550  **/
2551 void igb_configure_rx_ring(struct igb_adapter *adapter,
2552                            struct igb_ring *ring)
2553 {
2554         struct e1000_hw *hw = &adapter->hw;
2555         u64 rdba = ring->dma;
2556         int reg_idx = ring->reg_idx;
2557         u32 srrctl, rxdctl;
2558
2559         /* disable the queue */
2560         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2561         wr32(E1000_RXDCTL(reg_idx),
2562                         rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2563
2564         /* Set DMA base address registers */
2565         wr32(E1000_RDBAL(reg_idx),
2566              rdba & 0x00000000ffffffffULL);
2567         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2568         wr32(E1000_RDLEN(reg_idx),
2569                        ring->count * sizeof(union e1000_adv_rx_desc));
2570
2571         /* initialize head and tail */
2572         ring->head = hw->hw_addr + E1000_RDH(reg_idx);
2573         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
2574         writel(0, ring->head);
2575         writel(0, ring->tail);
2576
2577         /* set descriptor configuration */
2578         if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
2579                 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
2580                          E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
2581 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
2582                 srrctl |= IGB_RXBUFFER_16384 >>
2583                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2584 #else
2585                 srrctl |= (PAGE_SIZE / 2) >>
2586                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2587 #endif
2588                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2589         } else {
2590                 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
2591                          E1000_SRRCTL_BSIZEPKT_SHIFT;
2592                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2593         }
2594         /* Only set Drop Enable if we are supporting multiple queues */
2595         if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
2596                 srrctl |= E1000_SRRCTL_DROP_EN;
2597
2598         wr32(E1000_SRRCTL(reg_idx), srrctl);
2599
2600         /* set filtering for VMDQ pools */
2601         igb_set_vmolr(adapter, reg_idx & 0x7, true);
2602
2603         /* enable receive descriptor fetching */
2604         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2605         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2606         rxdctl &= 0xFFF00000;
2607         rxdctl |= IGB_RX_PTHRESH;
2608         rxdctl |= IGB_RX_HTHRESH << 8;
2609         rxdctl |= IGB_RX_WTHRESH << 16;
2610         wr32(E1000_RXDCTL(reg_idx), rxdctl);
2611 }
2612
2613 /**
2614  * igb_configure_rx - Configure receive Unit after Reset
2615  * @adapter: board private structure
2616  *
2617  * Configure the Rx unit of the MAC after a reset.
2618  **/
2619 static void igb_configure_rx(struct igb_adapter *adapter)
2620 {
2621         int i;
2622
2623         /* set UTA to appropriate mode */
2624         igb_set_uta(adapter);
2625
2626         /* set the correct pool for the PF default MAC address in entry 0 */
2627         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
2628                          adapter->vfs_allocated_count);
2629
2630         /* Setup the HW Rx Head and Tail Descriptor Pointers and
2631          * the Base and Length of the Rx Descriptor Ring */
2632         for (i = 0; i < adapter->num_rx_queues; i++)
2633                 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
2634 }
2635
2636 /**
2637  * igb_free_tx_resources - Free Tx Resources per Queue
2638  * @tx_ring: Tx descriptor ring for a specific queue
2639  *
2640  * Free all transmit software resources
2641  **/
2642 void igb_free_tx_resources(struct igb_ring *tx_ring)
2643 {
2644         igb_clean_tx_ring(tx_ring);
2645
2646         vfree(tx_ring->buffer_info);
2647         tx_ring->buffer_info = NULL;
2648
2649         /* if not set, then don't free */
2650         if (!tx_ring->desc)
2651                 return;
2652
2653         pci_free_consistent(tx_ring->pdev, tx_ring->size,
2654                             tx_ring->desc, tx_ring->dma);
2655
2656         tx_ring->desc = NULL;
2657 }
2658
2659 /**
2660  * igb_free_all_tx_resources - Free Tx Resources for All Queues
2661  * @adapter: board private structure
2662  *
2663  * Free all transmit software resources
2664  **/
2665 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
2666 {
2667         int i;
2668
2669         for (i = 0; i < adapter->num_tx_queues; i++)
2670                 igb_free_tx_resources(adapter->tx_ring[i]);
2671 }
2672
2673 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
2674                                     struct igb_buffer *buffer_info)
2675 {
2676         if (buffer_info->dma) {
2677                 if (buffer_info->mapped_as_page)
2678                         pci_unmap_page(tx_ring->pdev,
2679                                         buffer_info->dma,
2680                                         buffer_info->length,
2681                                         PCI_DMA_TODEVICE);
2682                 else
2683                         pci_unmap_single(tx_ring->pdev,
2684                                         buffer_info->dma,
2685                                         buffer_info->length,
2686                                         PCI_DMA_TODEVICE);
2687                 buffer_info->dma = 0;
2688         }
2689         if (buffer_info->skb) {
2690                 dev_kfree_skb_any(buffer_info->skb);
2691                 buffer_info->skb = NULL;
2692         }
2693         buffer_info->time_stamp = 0;
2694         buffer_info->length = 0;
2695         buffer_info->next_to_watch = 0;
2696         buffer_info->mapped_as_page = false;
2697 }
2698
2699 /**
2700  * igb_clean_tx_ring - Free Tx Buffers
2701  * @tx_ring: ring to be cleaned
2702  **/
2703 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
2704 {
2705         struct igb_buffer *buffer_info;
2706         unsigned long size;
2707         unsigned int i;
2708
2709         if (!tx_ring->buffer_info)
2710                 return;
2711         /* Free all the Tx ring sk_buffs */
2712
2713         for (i = 0; i < tx_ring->count; i++) {
2714                 buffer_info = &tx_ring->buffer_info[i];
2715                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
2716         }
2717
2718         size = sizeof(struct igb_buffer) * tx_ring->count;
2719         memset(tx_ring->buffer_info, 0, size);
2720
2721         /* Zero out the descriptor ring */
2722         memset(tx_ring->desc, 0, tx_ring->size);
2723
2724         tx_ring->next_to_use = 0;
2725         tx_ring->next_to_clean = 0;
2726 }
2727
2728 /**
2729  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
2730  * @adapter: board private structure
2731  **/
2732 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
2733 {
2734         int i;
2735
2736         for (i = 0; i < adapter->num_tx_queues; i++)
2737                 igb_clean_tx_ring(adapter->tx_ring[i]);
2738 }
2739
2740 /**
2741  * igb_free_rx_resources - Free Rx Resources
2742  * @rx_ring: ring to clean the resources from
2743  *
2744  * Free all receive software resources
2745  **/
2746 void igb_free_rx_resources(struct igb_ring *rx_ring)
2747 {
2748         igb_clean_rx_ring(rx_ring);
2749
2750         vfree(rx_ring->buffer_info);
2751         rx_ring->buffer_info = NULL;
2752
2753         /* if not set, then don't free */
2754         if (!rx_ring->desc)
2755                 return;
2756
2757         pci_free_consistent(rx_ring->pdev, rx_ring->size,
2758                             rx_ring->desc, rx_ring->dma);
2759
2760         rx_ring->desc = NULL;
2761 }
2762
2763 /**
2764  * igb_free_all_rx_resources - Free Rx Resources for All Queues
2765  * @adapter: board private structure
2766  *
2767  * Free all receive software resources
2768  **/
2769 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
2770 {
2771         int i;
2772
2773         for (i = 0; i < adapter->num_rx_queues; i++)
2774                 igb_free_rx_resources(adapter->rx_ring[i]);
2775 }
2776
2777 /**
2778  * igb_clean_rx_ring - Free Rx Buffers per Queue
2779  * @rx_ring: ring to free buffers from
2780  **/
2781 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
2782 {
2783         struct igb_buffer *buffer_info;
2784         unsigned long size;
2785         unsigned int i;
2786
2787         if (!rx_ring->buffer_info)
2788                 return;
2789
2790         /* Free all the Rx ring sk_buffs */
2791         for (i = 0; i < rx_ring->count; i++) {
2792                 buffer_info = &rx_ring->buffer_info[i];
2793                 if (buffer_info->dma) {
2794                         pci_unmap_single(rx_ring->pdev,
2795                                          buffer_info->dma,
2796                                          rx_ring->rx_buffer_len,
2797                                          PCI_DMA_FROMDEVICE);
2798                         buffer_info->dma = 0;
2799                 }
2800
2801                 if (buffer_info->skb) {
2802                         dev_kfree_skb(buffer_info->skb);
2803                         buffer_info->skb = NULL;
2804                 }
2805                 if (buffer_info->page_dma) {
2806                         pci_unmap_page(rx_ring->pdev,
2807                                        buffer_info->page_dma,
2808                                        PAGE_SIZE / 2,
2809                                        PCI_DMA_FROMDEVICE);
2810                         buffer_info->page_dma = 0;
2811                 }
2812                 if (buffer_info->page) {
2813                         put_page(buffer_info->page);
2814                         buffer_info->page = NULL;
2815                         buffer_info->page_offset = 0;
2816                 }
2817         }
2818
2819         size = sizeof(struct igb_buffer) * rx_ring->count;
2820         memset(rx_ring->buffer_info, 0, size);
2821
2822         /* Zero out the descriptor ring */
2823         memset(rx_ring->desc, 0, rx_ring->size);
2824
2825         rx_ring->next_to_clean = 0;
2826         rx_ring->next_to_use = 0;
2827 }
2828
2829 /**
2830  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
2831  * @adapter: board private structure
2832  **/
2833 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
2834 {
2835         int i;
2836
2837         for (i = 0; i < adapter->num_rx_queues; i++)
2838                 igb_clean_rx_ring(adapter->rx_ring[i]);
2839 }
2840
2841 /**
2842  * igb_set_mac - Change the Ethernet Address of the NIC
2843  * @netdev: network interface device structure
2844  * @p: pointer to an address structure
2845  *
2846  * Returns 0 on success, negative on failure
2847  **/
2848 static int igb_set_mac(struct net_device *netdev, void *p)
2849 {
2850         struct igb_adapter *adapter = netdev_priv(netdev);
2851         struct e1000_hw *hw = &adapter->hw;
2852         struct sockaddr *addr = p;
2853
2854         if (!is_valid_ether_addr(addr->sa_data))
2855                 return -EADDRNOTAVAIL;
2856
2857         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
2858         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
2859
2860         /* set the correct pool for the new PF MAC address in entry 0 */
2861         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
2862                          adapter->vfs_allocated_count);
2863
2864         return 0;
2865 }
2866
2867 /**
2868  * igb_write_mc_addr_list - write multicast addresses to MTA
2869  * @netdev: network interface device structure
2870  *
2871  * Writes multicast address list to the MTA hash table.
2872  * Returns: -ENOMEM on failure
2873  *                0 on no addresses written
2874  *                X on writing X addresses to MTA
2875  **/
2876 static int igb_write_mc_addr_list(struct net_device *netdev)
2877 {
2878         struct igb_adapter *adapter = netdev_priv(netdev);
2879         struct e1000_hw *hw = &adapter->hw;
2880         struct dev_mc_list *mc_ptr;
2881         u8  *mta_list;
2882         int i;
2883
2884         if (netdev_mc_empty(netdev)) {
2885                 /* nothing to program, so clear mc list */
2886                 igb_update_mc_addr_list(hw, NULL, 0);
2887                 igb_restore_vf_multicasts(adapter);
2888                 return 0;
2889         }
2890
2891         mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
2892         if (!mta_list)
2893                 return -ENOMEM;
2894
2895         /* The shared function expects a packed array of only addresses. */
2896         i = 0;
2897         netdev_for_each_mc_addr(mc_ptr, netdev)
2898                 memcpy(mta_list + (i++ * ETH_ALEN), mc_ptr->dmi_addr, ETH_ALEN);
2899
2900         igb_update_mc_addr_list(hw, mta_list, i);
2901         kfree(mta_list);
2902
2903         return netdev_mc_count(netdev);
2904 }
2905
2906 /**
2907  * igb_write_uc_addr_list - write unicast addresses to RAR table
2908  * @netdev: network interface device structure
2909  *
2910  * Writes unicast address list to the RAR table.
2911  * Returns: -ENOMEM on failure/insufficient address space
2912  *                0 on no addresses written
2913  *                X on writing X addresses to the RAR table
2914  **/
2915 static int igb_write_uc_addr_list(struct net_device *netdev)
2916 {
2917         struct igb_adapter *adapter = netdev_priv(netdev);
2918         struct e1000_hw *hw = &adapter->hw;
2919         unsigned int vfn = adapter->vfs_allocated_count;
2920         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
2921         int count = 0;
2922
2923         /* return ENOMEM indicating insufficient memory for addresses */
2924         if (netdev_uc_count(netdev) > rar_entries)
2925                 return -ENOMEM;
2926
2927         if (!netdev_uc_empty(netdev) && rar_entries) {
2928                 struct netdev_hw_addr *ha;
2929
2930                 netdev_for_each_uc_addr(ha, netdev) {
2931                         if (!rar_entries)
2932                                 break;
2933                         igb_rar_set_qsel(adapter, ha->addr,
2934                                          rar_entries--,
2935                                          vfn);
2936                         count++;
2937                 }
2938         }
2939         /* write the addresses in reverse order to avoid write combining */
2940         for (; rar_entries > 0 ; rar_entries--) {
2941                 wr32(E1000_RAH(rar_entries), 0);
2942                 wr32(E1000_RAL(rar_entries), 0);
2943         }
2944         wrfl();
2945
2946         return count;
2947 }
2948
2949 /**
2950  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
2951  * @netdev: network interface device structure
2952  *
2953  * The set_rx_mode entry point is called whenever the unicast or multicast
2954  * address lists or the network interface flags are updated.  This routine is
2955  * responsible for configuring the hardware for proper unicast, multicast,
2956  * promiscuous mode, and all-multi behavior.
2957  **/
2958 static void igb_set_rx_mode(struct net_device *netdev)
2959 {
2960         struct igb_adapter *adapter = netdev_priv(netdev);
2961         struct e1000_hw *hw = &adapter->hw;
2962         unsigned int vfn = adapter->vfs_allocated_count;
2963         u32 rctl, vmolr = 0;
2964         int count;
2965
2966         /* Check for Promiscuous and All Multicast modes */
2967         rctl = rd32(E1000_RCTL);
2968
2969         /* clear the effected bits */
2970         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
2971
2972         if (netdev->flags & IFF_PROMISC) {
2973                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2974                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
2975         } else {
2976                 if (netdev->flags & IFF_ALLMULTI) {
2977                         rctl |= E1000_RCTL_MPE;
2978                         vmolr |= E1000_VMOLR_MPME;
2979                 } else {
2980                         /*
2981                          * Write addresses to the MTA, if the attempt fails
2982                          * then we should just turn on promiscous mode so
2983                          * that we can at least receive multicast traffic
2984                          */
2985                         count = igb_write_mc_addr_list(netdev);
2986                         if (count < 0) {
2987                                 rctl |= E1000_RCTL_MPE;
2988                                 vmolr |= E1000_VMOLR_MPME;
2989                         } else if (count) {
2990                                 vmolr |= E1000_VMOLR_ROMPE;
2991                         }
2992                 }
2993                 /*
2994                  * Write addresses to available RAR registers, if there is not
2995                  * sufficient space to store all the addresses then enable
2996                  * unicast promiscous mode
2997                  */
2998                 count = igb_write_uc_addr_list(netdev);
2999                 if (count < 0) {
3000                         rctl |= E1000_RCTL_UPE;
3001                         vmolr |= E1000_VMOLR_ROPE;
3002                 }
3003                 rctl |= E1000_RCTL_VFE;
3004         }
3005         wr32(E1000_RCTL, rctl);
3006
3007         /*
3008          * In order to support SR-IOV and eventually VMDq it is necessary to set
3009          * the VMOLR to enable the appropriate modes.  Without this workaround
3010          * we will have issues with VLAN tag stripping not being done for frames
3011          * that are only arriving because we are the default pool
3012          */
3013         if (hw->mac.type < e1000_82576)
3014                 return;
3015
3016         vmolr |= rd32(E1000_VMOLR(vfn)) &
3017                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3018         wr32(E1000_VMOLR(vfn), vmolr);
3019         igb_restore_vf_multicasts(adapter);
3020 }
3021
3022 /* Need to wait a few seconds after link up to get diagnostic information from
3023  * the phy */
3024 static void igb_update_phy_info(unsigned long data)
3025 {
3026         struct igb_adapter *adapter = (struct igb_adapter *) data;
3027         igb_get_phy_info(&adapter->hw);
3028 }
3029
3030 /**
3031  * igb_has_link - check shared code for link and determine up/down
3032  * @adapter: pointer to driver private info
3033  **/
3034 bool igb_has_link(struct igb_adapter *adapter)
3035 {
3036         struct e1000_hw *hw = &adapter->hw;
3037         bool link_active = false;
3038         s32 ret_val = 0;
3039
3040         /* get_link_status is set on LSC (link status) interrupt or
3041          * rx sequence error interrupt.  get_link_status will stay
3042          * false until the e1000_check_for_link establishes link
3043          * for copper adapters ONLY
3044          */
3045         switch (hw->phy.media_type) {
3046         case e1000_media_type_copper:
3047                 if (hw->mac.get_link_status) {
3048                         ret_val = hw->mac.ops.check_for_link(hw);
3049                         link_active = !hw->mac.get_link_status;
3050                 } else {
3051                         link_active = true;
3052                 }
3053                 break;
3054         case e1000_media_type_internal_serdes:
3055                 ret_val = hw->mac.ops.check_for_link(hw);
3056                 link_active = hw->mac.serdes_has_link;
3057                 break;
3058         default:
3059         case e1000_media_type_unknown:
3060                 break;
3061         }
3062
3063         return link_active;
3064 }
3065
3066 /**
3067  * igb_watchdog - Timer Call-back
3068  * @data: pointer to adapter cast into an unsigned long
3069  **/
3070 static void igb_watchdog(unsigned long data)
3071 {
3072         struct igb_adapter *adapter = (struct igb_adapter *)data;
3073         /* Do the rest outside of interrupt context */
3074         schedule_work(&adapter->watchdog_task);
3075 }
3076
3077 static void igb_watchdog_task(struct work_struct *work)
3078 {
3079         struct igb_adapter *adapter = container_of(work,
3080                                                    struct igb_adapter,
3081                                                    watchdog_task);
3082         struct e1000_hw *hw = &adapter->hw;
3083         struct net_device *netdev = adapter->netdev;
3084         u32 link;
3085         int i;
3086
3087         link = igb_has_link(adapter);
3088         if (link) {
3089                 if (!netif_carrier_ok(netdev)) {
3090                         u32 ctrl;
3091                         hw->mac.ops.get_speed_and_duplex(hw,
3092                                                          &adapter->link_speed,
3093                                                          &adapter->link_duplex);
3094
3095                         ctrl = rd32(E1000_CTRL);
3096                         /* Links status message must follow this format */
3097                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3098                                  "Flow Control: %s\n",
3099                                netdev->name,
3100                                adapter->link_speed,
3101                                adapter->link_duplex == FULL_DUPLEX ?
3102                                  "Full Duplex" : "Half Duplex",
3103                                ((ctrl & E1000_CTRL_TFCE) &&
3104                                 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3105                                ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
3106                                ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
3107
3108                         /* tweak tx_queue_len according to speed/duplex and
3109                          * adjust the timeout factor */
3110                         netdev->tx_queue_len = adapter->tx_queue_len;
3111                         adapter->tx_timeout_factor = 1;
3112                         switch (adapter->link_speed) {
3113                         case SPEED_10:
3114                                 netdev->tx_queue_len = 10;
3115                                 adapter->tx_timeout_factor = 14;
3116                                 break;
3117                         case SPEED_100:
3118                                 netdev->tx_queue_len = 100;
3119                                 /* maybe add some timeout factor ? */
3120                                 break;
3121                         }
3122
3123                         netif_carrier_on(netdev);
3124
3125                         igb_ping_all_vfs(adapter);
3126
3127                         /* link state has changed, schedule phy info update */
3128                         if (!test_bit(__IGB_DOWN, &adapter->state))
3129                                 mod_timer(&adapter->phy_info_timer,
3130                                           round_jiffies(jiffies + 2 * HZ));
3131                 }
3132         } else {
3133                 if (netif_carrier_ok(netdev)) {
3134                         adapter->link_speed = 0;
3135                         adapter->link_duplex = 0;
3136                         /* Links status message must follow this format */
3137                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
3138                                netdev->name);
3139                         netif_carrier_off(netdev);
3140
3141                         igb_ping_all_vfs(adapter);
3142
3143                         /* link state has changed, schedule phy info update */
3144                         if (!test_bit(__IGB_DOWN, &adapter->state))
3145                                 mod_timer(&adapter->phy_info_timer,
3146                                           round_jiffies(jiffies + 2 * HZ));
3147                 }
3148         }
3149
3150         igb_update_stats(adapter);
3151
3152         for (i = 0; i < adapter->num_tx_queues; i++) {
3153                 struct igb_ring *tx_ring = adapter->tx_ring[i];
3154                 if (!netif_carrier_ok(netdev)) {
3155                         /* We've lost link, so the controller stops DMA,
3156                          * but we've got queued Tx work that's never going
3157                          * to get done, so reset controller to flush Tx.
3158                          * (Do the reset outside of interrupt context). */
3159                         if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3160                                 adapter->tx_timeout_count++;
3161                                 schedule_work(&adapter->reset_task);
3162                                 /* return immediately since reset is imminent */
3163                                 return;
3164                         }
3165                 }
3166
3167                 /* Force detection of hung controller every watchdog period */
3168                 tx_ring->detect_tx_hung = true;
3169         }
3170
3171         /* Cause software interrupt to ensure rx ring is cleaned */
3172         if (adapter->msix_entries) {
3173                 u32 eics = 0;
3174                 for (i = 0; i < adapter->num_q_vectors; i++) {
3175                         struct igb_q_vector *q_vector = adapter->q_vector[i];
3176                         eics |= q_vector->eims_value;
3177                 }
3178                 wr32(E1000_EICS, eics);
3179         } else {
3180                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3181         }
3182
3183         /* Reset the timer */
3184         if (!test_bit(__IGB_DOWN, &adapter->state))
3185                 mod_timer(&adapter->watchdog_timer,
3186                           round_jiffies(jiffies + 2 * HZ));
3187 }
3188
3189 enum latency_range {
3190         lowest_latency = 0,
3191         low_latency = 1,
3192         bulk_latency = 2,
3193         latency_invalid = 255
3194 };
3195
3196 /**
3197  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3198  *
3199  *      Stores a new ITR value based on strictly on packet size.  This
3200  *      algorithm is less sophisticated than that used in igb_update_itr,
3201  *      due to the difficulty of synchronizing statistics across multiple
3202  *      receive rings.  The divisors and thresholds used by this fuction
3203  *      were determined based on theoretical maximum wire speed and testing
3204  *      data, in order to minimize response time while increasing bulk
3205  *      throughput.
3206  *      This functionality is controlled by the InterruptThrottleRate module
3207  *      parameter (see igb_param.c)
3208  *      NOTE:  This function is called only when operating in a multiqueue
3209  *             receive environment.
3210  * @q_vector: pointer to q_vector
3211  **/
3212 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3213 {
3214         int new_val = q_vector->itr_val;
3215         int avg_wire_size = 0;
3216         struct igb_adapter *adapter = q_vector->adapter;
3217
3218         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3219          * ints/sec - ITR timer value of 120 ticks.
3220          */
3221         if (adapter->link_speed != SPEED_1000) {
3222                 new_val = 976;
3223                 goto set_itr_val;
3224         }
3225
3226         if (q_vector->rx_ring && q_vector->rx_ring->total_packets) {
3227                 struct igb_ring *ring = q_vector->rx_ring;
3228                 avg_wire_size = ring->total_bytes / ring->total_packets;
3229         }
3230
3231         if (q_vector->tx_ring && q_vector->tx_ring->total_packets) {
3232                 struct igb_ring *ring = q_vector->tx_ring;
3233                 avg_wire_size = max_t(u32, avg_wire_size,
3234                                       (ring->total_bytes /
3235                                        ring->total_packets));
3236         }
3237
3238         /* if avg_wire_size isn't set no work was done */
3239         if (!avg_wire_size)
3240                 goto clear_counts;
3241
3242         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3243         avg_wire_size += 24;
3244
3245         /* Don't starve jumbo frames */
3246         avg_wire_size = min(avg_wire_size, 3000);
3247
3248         /* Give a little boost to mid-size frames */
3249         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3250                 new_val = avg_wire_size / 3;
3251         else
3252                 new_val = avg_wire_size / 2;
3253
3254         /* when in itr mode 3 do not exceed 20K ints/sec */
3255         if (adapter->rx_itr_setting == 3 && new_val < 196)
3256                 new_val = 196;
3257
3258 set_itr_val:
3259         if (new_val != q_vector->itr_val) {
3260                 q_vector->itr_val = new_val;
3261                 q_vector->set_itr = 1;
3262         }
3263 clear_counts:
3264         if (q_vector->rx_ring) {
3265                 q_vector->rx_ring->total_bytes = 0;
3266                 q_vector->rx_ring->total_packets = 0;
3267         }
3268         if (q_vector->tx_ring) {
3269                 q_vector->tx_ring->total_bytes = 0;
3270                 q_vector->tx_ring->total_packets = 0;
3271         }
3272 }
3273
3274 /**
3275  * igb_update_itr - update the dynamic ITR value based on statistics
3276  *      Stores a new ITR value based on packets and byte
3277  *      counts during the last interrupt.  The advantage of per interrupt
3278  *      computation is faster updates and more accurate ITR for the current
3279  *      traffic pattern.  Constants in this function were computed
3280  *      based on theoretical maximum wire speed and thresholds were set based
3281  *      on testing data as well as attempting to minimize response time
3282  *      while increasing bulk throughput.
3283  *      this functionality is controlled by the InterruptThrottleRate module
3284  *      parameter (see igb_param.c)
3285  *      NOTE:  These calculations are only valid when operating in a single-
3286  *             queue environment.
3287  * @adapter: pointer to adapter
3288  * @itr_setting: current q_vector->itr_val
3289  * @packets: the number of packets during this measurement interval
3290  * @bytes: the number of bytes during this measurement interval
3291  **/
3292 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3293                                    int packets, int bytes)
3294 {
3295         unsigned int retval = itr_setting;
3296
3297         if (packets == 0)
3298                 goto update_itr_done;
3299
3300         switch (itr_setting) {
3301         case lowest_latency:
3302                 /* handle TSO and jumbo frames */
3303                 if (bytes/packets > 8000)
3304                         retval = bulk_latency;
3305                 else if ((packets < 5) && (bytes > 512))
3306                         retval = low_latency;
3307                 break;
3308         case low_latency:  /* 50 usec aka 20000 ints/s */
3309                 if (bytes > 10000) {
3310                         /* this if handles the TSO accounting */
3311                         if (bytes/packets > 8000) {
3312                                 retval = bulk_latency;
3313                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3314                                 retval = bulk_latency;
3315                         } else if ((packets > 35)) {
3316                                 retval = lowest_latency;
3317                         }
3318                 } else if (bytes/packets > 2000) {
3319                         retval = bulk_latency;
3320                 } else if (packets <= 2 && bytes < 512) {
3321                         retval = lowest_latency;
3322                 }
3323                 break;
3324         case bulk_latency: /* 250 usec aka 4000 ints/s */
3325                 if (bytes > 25000) {
3326                         if (packets > 35)
3327                                 retval = low_latency;
3328                 } else if (bytes < 1500) {
3329                         retval = low_latency;
3330                 }
3331                 break;
3332         }
3333
3334 update_itr_done:
3335         return retval;
3336 }
3337
3338 static void igb_set_itr(struct igb_adapter *adapter)
3339 {
3340         struct igb_q_vector *q_vector = adapter->q_vector[0];
3341         u16 current_itr;
3342         u32 new_itr = q_vector->itr_val;
3343
3344         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3345         if (adapter->link_speed != SPEED_1000) {
3346                 current_itr = 0;
3347                 new_itr = 4000;
3348                 goto set_itr_now;
3349         }
3350
3351         adapter->rx_itr = igb_update_itr(adapter,
3352                                     adapter->rx_itr,
3353                                     q_vector->rx_ring->total_packets,
3354                                     q_vector->rx_ring->total_bytes);
3355
3356         adapter->tx_itr = igb_update_itr(adapter,
3357                                     adapter->tx_itr,
3358                                     q_vector->tx_ring->total_packets,
3359                                     q_vector->tx_ring->total_bytes);
3360         current_itr = max(adapter->rx_itr, adapter->tx_itr);
3361
3362         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3363         if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3364                 current_itr = low_latency;
3365
3366         switch (current_itr) {
3367         /* counts and packets in update_itr are dependent on these numbers */
3368         case lowest_latency:
3369                 new_itr = 56;  /* aka 70,000 ints/sec */
3370                 break;
3371         case low_latency:
3372                 new_itr = 196; /* aka 20,000 ints/sec */
3373                 break;
3374         case bulk_latency:
3375                 new_itr = 980; /* aka 4,000 ints/sec */
3376                 break;
3377         default:
3378                 break;
3379         }
3380
3381 set_itr_now:
3382         q_vector->rx_ring->total_bytes = 0;
3383         q_vector->rx_ring->total_packets = 0;
3384         q_vector->tx_ring->total_bytes = 0;
3385         q_vector->tx_ring->total_packets = 0;
3386
3387         if (new_itr != q_vector->itr_val) {
3388                 /* this attempts to bias the interrupt rate towards Bulk
3389                  * by adding intermediate steps when interrupt rate is
3390                  * increasing */
3391                 new_itr = new_itr > q_vector->itr_val ?
3392                              max((new_itr * q_vector->itr_val) /
3393                                  (new_itr + (q_vector->itr_val >> 2)),
3394                                  new_itr) :
3395                              new_itr;
3396                 /* Don't write the value here; it resets the adapter's
3397                  * internal timer, and causes us to delay far longer than
3398                  * we should between interrupts.  Instead, we write the ITR
3399                  * value at the beginning of the next interrupt so the timing
3400                  * ends up being correct.
3401                  */
3402                 q_vector->itr_val = new_itr;
3403                 q_vector->set_itr = 1;
3404         }
3405
3406         return;
3407 }
3408
3409 #define IGB_TX_FLAGS_CSUM               0x00000001
3410 #define IGB_TX_FLAGS_VLAN               0x00000002
3411 #define IGB_TX_FLAGS_TSO                0x00000004
3412 #define IGB_TX_FLAGS_IPV4               0x00000008
3413 #define IGB_TX_FLAGS_TSTAMP             0x00000010
3414 #define IGB_TX_FLAGS_VLAN_MASK          0xffff0000
3415 #define IGB_TX_FLAGS_VLAN_SHIFT                 16
3416
3417 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3418                               struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3419 {
3420         struct e1000_adv_tx_context_desc *context_desc;
3421         unsigned int i;
3422         int err;
3423         struct igb_buffer *buffer_info;
3424         u32 info = 0, tu_cmd = 0;
3425         u32 mss_l4len_idx;
3426         u8 l4len;
3427
3428         if (skb_header_cloned(skb)) {
3429                 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3430                 if (err)
3431                         return err;
3432         }
3433
3434         l4len = tcp_hdrlen(skb);
3435         *hdr_len += l4len;
3436
3437         if (skb->protocol == htons(ETH_P_IP)) {
3438                 struct iphdr *iph = ip_hdr(skb);
3439                 iph->tot_len = 0;
3440                 iph->check = 0;
3441                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3442                                                          iph->daddr, 0,
3443                                                          IPPROTO_TCP,
3444                                                          0);
3445         } else if (skb_is_gso_v6(skb)) {
3446                 ipv6_hdr(skb)->payload_len = 0;
3447                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3448                                                        &ipv6_hdr(skb)->daddr,
3449                                                        0, IPPROTO_TCP, 0);
3450         }
3451
3452         i = tx_ring->next_to_use;
3453
3454         buffer_info = &tx_ring->buffer_info[i];
3455         context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3456         /* VLAN MACLEN IPLEN */
3457         if (tx_flags & IGB_TX_FLAGS_VLAN)
3458                 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3459         info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3460         *hdr_len += skb_network_offset(skb);
3461         info |= skb_network_header_len(skb);
3462         *hdr_len += skb_network_header_len(skb);
3463         context_desc->vlan_macip_lens = cpu_to_le32(info);
3464
3465         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3466         tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3467
3468         if (skb->protocol == htons(ETH_P_IP))
3469                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3470         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3471
3472         context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3473
3474         /* MSS L4LEN IDX */
3475         mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3476         mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3477
3478         /* For 82575, context index must be unique per ring. */
3479         if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3480                 mss_l4len_idx |= tx_ring->reg_idx << 4;
3481
3482         context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3483         context_desc->seqnum_seed = 0;
3484
3485         buffer_info->time_stamp = jiffies;
3486         buffer_info->next_to_watch = i;
3487         buffer_info->dma = 0;
3488         i++;
3489         if (i == tx_ring->count)
3490                 i = 0;
3491
3492         tx_ring->next_to_use = i;
3493
3494         return true;
3495 }
3496
3497 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
3498                                    struct sk_buff *skb, u32 tx_flags)
3499 {
3500         struct e1000_adv_tx_context_desc *context_desc;
3501         struct pci_dev *pdev = tx_ring->pdev;
3502         struct igb_buffer *buffer_info;
3503         u32 info = 0, tu_cmd = 0;
3504         unsigned int i;
3505
3506         if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
3507             (tx_flags & IGB_TX_FLAGS_VLAN)) {
3508                 i = tx_ring->next_to_use;
3509                 buffer_info = &tx_ring->buffer_info[i];
3510                 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3511
3512                 if (tx_flags & IGB_TX_FLAGS_VLAN)
3513                         info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3514
3515                 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3516                 if (skb->ip_summed == CHECKSUM_PARTIAL)
3517                         info |= skb_network_header_len(skb);
3518
3519                 context_desc->vlan_macip_lens = cpu_to_le32(info);
3520
3521                 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3522
3523                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3524                         __be16 protocol;
3525
3526                         if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3527                                 const struct vlan_ethhdr *vhdr =
3528                                           (const struct vlan_ethhdr*)skb->data;
3529
3530                                 protocol = vhdr->h_vlan_encapsulated_proto;
3531                         } else {
3532                                 protocol = skb->protocol;
3533                         }
3534
3535                         switch (protocol) {
3536                         case cpu_to_be16(ETH_P_IP):
3537                                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3538                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
3539                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3540                                 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
3541                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3542                                 break;
3543                         case cpu_to_be16(ETH_P_IPV6):
3544                                 /* XXX what about other V6 headers?? */
3545                                 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
3546                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3547                                 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
3548                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3549                                 break;
3550                         default:
3551                                 if (unlikely(net_ratelimit()))
3552                                         dev_warn(&pdev->dev,
3553                                             "partial checksum but proto=%x!\n",
3554                                             skb->protocol);
3555                                 break;
3556                         }
3557                 }
3558
3559                 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3560                 context_desc->seqnum_seed = 0;
3561                 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3562                         context_desc->mss_l4len_idx =
3563                                 cpu_to_le32(tx_ring->reg_idx << 4);
3564
3565                 buffer_info->time_stamp = jiffies;
3566                 buffer_info->next_to_watch = i;
3567                 buffer_info->dma = 0;
3568
3569                 i++;
3570                 if (i == tx_ring->count)
3571                         i = 0;
3572                 tx_ring->next_to_use = i;
3573
3574                 return true;
3575         }
3576         return false;
3577 }
3578
3579 #define IGB_MAX_TXD_PWR 16
3580 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
3581
3582 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
3583                                  unsigned int first)
3584 {
3585         struct igb_buffer *buffer_info;
3586         struct pci_dev *pdev = tx_ring->pdev;
3587         unsigned int len = skb_headlen(skb);
3588         unsigned int count = 0, i;
3589         unsigned int f;
3590
3591         i = tx_ring->next_to_use;
3592
3593         buffer_info = &tx_ring->buffer_info[i];
3594         BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3595         buffer_info->length = len;
3596         /* set time_stamp *before* dma to help avoid a possible race */
3597         buffer_info->time_stamp = jiffies;
3598         buffer_info->next_to_watch = i;
3599         buffer_info->dma = pci_map_single(pdev, skb->data, len,
3600                                           PCI_DMA_TODEVICE);
3601         if (pci_dma_mapping_error(pdev, buffer_info->dma))
3602                 goto dma_error;
3603
3604         for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
3605                 struct skb_frag_struct *frag;
3606
3607                 count++;
3608                 i++;
3609                 if (i == tx_ring->count)
3610                         i = 0;
3611
3612                 frag = &skb_shinfo(skb)->frags[f];
3613                 len = frag->size;
3614
3615                 buffer_info = &tx_ring->buffer_info[i];
3616                 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3617                 buffer_info->length = len;
3618                 buffer_info->time_stamp = jiffies;
3619                 buffer_info->next_to_watch = i;
3620                 buffer_info->mapped_as_page = true;
3621                 buffer_info->dma = pci_map_page(pdev,
3622                                                 frag->page,
3623                                                 frag->page_offset,
3624                                                 len,
3625                                                 PCI_DMA_TODEVICE);
3626                 if (pci_dma_mapping_error(pdev, buffer_info->dma))
3627                         goto dma_error;
3628
3629         }
3630
3631         tx_ring->buffer_info[i].skb = skb;
3632         tx_ring->buffer_info[i].gso_segs = skb_shinfo(skb)->gso_segs ?: 1;
3633         tx_ring->buffer_info[first].next_to_watch = i;
3634
3635         return ++count;
3636
3637 dma_error:
3638         dev_err(&pdev->dev, "TX DMA map failed\n");
3639
3640         /* clear timestamp and dma mappings for failed buffer_info mapping */
3641         buffer_info->dma = 0;
3642         buffer_info->time_stamp = 0;
3643         buffer_info->length = 0;
3644         buffer_info->next_to_watch = 0;
3645         buffer_info->mapped_as_page = false;
3646
3647         /* clear timestamp and dma mappings for remaining portion of packet */
3648         while (count--) {
3649                 if (i == 0)
3650                         i = tx_ring->count;
3651                 i--;
3652                 buffer_info = &tx_ring->buffer_info[i];
3653                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3654         }
3655
3656         return 0;
3657 }
3658
3659 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
3660                                     u32 tx_flags, int count, u32 paylen,
3661                                     u8 hdr_len)
3662 {
3663         union e1000_adv_tx_desc *tx_desc;
3664         struct igb_buffer *buffer_info;
3665         u32 olinfo_status = 0, cmd_type_len;
3666         unsigned int i = tx_ring->next_to_use;
3667
3668         cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
3669                         E1000_ADVTXD_DCMD_DEXT);
3670
3671         if (tx_flags & IGB_TX_FLAGS_VLAN)
3672                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
3673
3674         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
3675                 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
3676
3677         if (tx_flags & IGB_TX_FLAGS_TSO) {
3678                 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3679
3680                 /* insert tcp checksum */
3681                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3682
3683                 /* insert ip checksum */
3684                 if (tx_flags & IGB_TX_FLAGS_IPV4)
3685                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3686
3687         } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
3688                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3689         }
3690
3691         if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
3692             (tx_flags & (IGB_TX_FLAGS_CSUM |
3693                          IGB_TX_FLAGS_TSO |
3694                          IGB_TX_FLAGS_VLAN)))
3695                 olinfo_status |= tx_ring->reg_idx << 4;
3696
3697         olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
3698
3699         do {
3700                 buffer_info = &tx_ring->buffer_info[i];
3701                 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
3702                 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
3703                 tx_desc->read.cmd_type_len =
3704                         cpu_to_le32(cmd_type_len | buffer_info->length);
3705                 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
3706                 count--;
3707                 i++;
3708                 if (i == tx_ring->count)
3709                         i = 0;
3710         } while (count > 0);
3711
3712         tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
3713         /* Force memory writes to complete before letting h/w
3714          * know there are new descriptors to fetch.  (Only
3715          * applicable for weak-ordered memory model archs,
3716          * such as IA-64). */
3717         wmb();
3718
3719         tx_ring->next_to_use = i;
3720         writel(i, tx_ring->tail);
3721         /* we need this if more than one processor can write to our tail
3722          * at a time, it syncronizes IO on IA64/Altix systems */
3723         mmiowb();
3724 }
3725
3726 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3727 {
3728         struct net_device *netdev = tx_ring->netdev;
3729
3730         netif_stop_subqueue(netdev, tx_ring->queue_index);
3731
3732         /* Herbert's original patch had:
3733          *  smp_mb__after_netif_stop_queue();
3734          * but since that doesn't exist yet, just open code it. */
3735         smp_mb();
3736
3737         /* We need to check again in a case another CPU has just
3738          * made room available. */
3739         if (igb_desc_unused(tx_ring) < size)
3740                 return -EBUSY;
3741
3742         /* A reprieve! */
3743         netif_wake_subqueue(netdev, tx_ring->queue_index);
3744         tx_ring->tx_stats.restart_queue++;
3745         return 0;
3746 }
3747
3748 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3749 {
3750         if (igb_desc_unused(tx_ring) >= size)
3751                 return 0;
3752         return __igb_maybe_stop_tx(tx_ring, size);
3753 }
3754
3755 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
3756                                     struct igb_ring *tx_ring)
3757 {
3758         struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
3759         int tso = 0, count;
3760         u32 tx_flags = 0;
3761         u16 first;
3762         u8 hdr_len = 0;
3763         union skb_shared_tx *shtx = skb_tx(skb);
3764
3765         /* need: 1 descriptor per page,
3766          *       + 2 desc gap to keep tail from touching head,
3767          *       + 1 desc for skb->data,
3768          *       + 1 desc for context descriptor,
3769          * otherwise try next time */
3770         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
3771                 /* this is a hard error */
3772                 return NETDEV_TX_BUSY;
3773         }
3774
3775         if (unlikely(shtx->hardware)) {
3776                 shtx->in_progress = 1;
3777                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
3778         }
3779
3780         if (vlan_tx_tag_present(skb) && adapter->vlgrp) {
3781                 tx_flags |= IGB_TX_FLAGS_VLAN;
3782                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
3783         }
3784
3785         if (skb->protocol == htons(ETH_P_IP))
3786                 tx_flags |= IGB_TX_FLAGS_IPV4;
3787
3788         first = tx_ring->next_to_use;
3789         if (skb_is_gso(skb)) {
3790                 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
3791
3792                 if (tso < 0) {
3793                         dev_kfree_skb_any(skb);
3794                         return NETDEV_TX_OK;
3795                 }
3796         }
3797
3798         if (tso)
3799                 tx_flags |= IGB_TX_FLAGS_TSO;
3800         else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
3801                  (skb->ip_summed == CHECKSUM_PARTIAL))
3802                 tx_flags |= IGB_TX_FLAGS_CSUM;
3803
3804         /*
3805          * count reflects descriptors mapped, if 0 or less then mapping error
3806          * has occured and we need to rewind the descriptor queue
3807          */
3808         count = igb_tx_map_adv(tx_ring, skb, first);
3809         if (!count) {
3810                 dev_kfree_skb_any(skb);
3811                 tx_ring->buffer_info[first].time_stamp = 0;
3812                 tx_ring->next_to_use = first;
3813                 return NETDEV_TX_OK;
3814         }
3815
3816         igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
3817
3818         /* Make sure there is space in the ring for the next send. */
3819         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
3820
3821         return NETDEV_TX_OK;
3822 }
3823
3824 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
3825                                       struct net_device *netdev)
3826 {
3827         struct igb_adapter *adapter = netdev_priv(netdev);
3828         struct igb_ring *tx_ring;
3829         int r_idx = 0;
3830
3831         if (test_bit(__IGB_DOWN, &adapter->state)) {
3832                 dev_kfree_skb_any(skb);
3833                 return NETDEV_TX_OK;
3834         }
3835
3836         if (skb->len <= 0) {
3837                 dev_kfree_skb_any(skb);
3838                 return NETDEV_TX_OK;
3839         }
3840
3841         r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
3842         tx_ring = adapter->multi_tx_table[r_idx];
3843
3844         /* This goes back to the question of how to logically map a tx queue
3845          * to a flow.  Right now, performance is impacted slightly negatively
3846          * if using multiple tx queues.  If the stack breaks away from a
3847          * single qdisc implementation, we can look at this again. */
3848         return igb_xmit_frame_ring_adv(skb, tx_ring);
3849 }
3850
3851 /**
3852  * igb_tx_timeout - Respond to a Tx Hang
3853  * @netdev: network interface device structure
3854  **/
3855 static void igb_tx_timeout(struct net_device *netdev)
3856 {
3857         struct igb_adapter *adapter = netdev_priv(netdev);
3858         struct e1000_hw *hw = &adapter->hw;
3859
3860         /* Do the reset outside of interrupt context */
3861         adapter->tx_timeout_count++;
3862
3863         if (hw->mac.type == e1000_82580)
3864                 hw->dev_spec._82575.global_device_reset = true;
3865
3866         schedule_work(&adapter->reset_task);
3867         wr32(E1000_EICS,
3868              (adapter->eims_enable_mask & ~adapter->eims_other));
3869 }
3870
3871 static void igb_reset_task(struct work_struct *work)
3872 {
3873         struct igb_adapter *adapter;
3874         adapter = container_of(work, struct igb_adapter, reset_task);
3875
3876         igb_reinit_locked(adapter);
3877 }
3878
3879 /**
3880  * igb_get_stats - Get System Network Statistics
3881  * @netdev: network interface device structure
3882  *
3883  * Returns the address of the device statistics structure.
3884  * The statistics are actually updated from the timer callback.
3885  **/
3886 static struct net_device_stats *igb_get_stats(struct net_device *netdev)
3887 {
3888         /* only return the current stats */
3889         return &netdev->stats;
3890 }
3891
3892 /**
3893  * igb_change_mtu - Change the Maximum Transfer Unit
3894  * @netdev: network interface device structure
3895  * @new_mtu: new value for maximum frame size
3896  *
3897  * Returns 0 on success, negative on failure
3898  **/
3899 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
3900 {
3901         struct igb_adapter *adapter = netdev_priv(netdev);
3902         struct pci_dev *pdev = adapter->pdev;
3903         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
3904         u32 rx_buffer_len, i;
3905
3906         if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
3907                 dev_err(&pdev->dev, "Invalid MTU setting\n");
3908                 return -EINVAL;
3909         }
3910
3911         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
3912                 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
3913                 return -EINVAL;
3914         }
3915
3916         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
3917                 msleep(1);
3918
3919         /* igb_down has a dependency on max_frame_size */
3920         adapter->max_frame_size = max_frame;
3921
3922         /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
3923          * means we reserve 2 more, this pushes us to allocate from the next
3924          * larger slab size.
3925          * i.e. RXBUFFER_2048 --> size-4096 slab
3926          */
3927
3928         if (max_frame <= IGB_RXBUFFER_1024)
3929                 rx_buffer_len = IGB_RXBUFFER_1024;
3930         else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
3931                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
3932         else
3933                 rx_buffer_len = IGB_RXBUFFER_128;
3934
3935         if (netif_running(netdev))
3936                 igb_down(adapter);
3937
3938         dev_info(&pdev->dev, "changing MTU from %d to %d\n",
3939                  netdev->mtu, new_mtu);
3940         netdev->mtu = new_mtu;
3941
3942         for (i = 0; i < adapter->num_rx_queues; i++)
3943                 adapter->rx_ring[i]->rx_buffer_len = rx_buffer_len;
3944
3945         if (netif_running(netdev))
3946                 igb_up(adapter);
3947         else
3948                 igb_reset(adapter);
3949
3950         clear_bit(__IGB_RESETTING, &adapter->state);
3951
3952         return 0;
3953 }
3954
3955 /**
3956  * igb_update_stats - Update the board statistics counters
3957  * @adapter: board private structure
3958  **/
3959
3960 void igb_update_stats(struct igb_adapter *adapter)
3961 {
3962         struct net_device_stats *net_stats = igb_get_stats(adapter->netdev);
3963         struct e1000_hw *hw = &adapter->hw;
3964         struct pci_dev *pdev = adapter->pdev;
3965         u32 rnbc, reg;
3966         u16 phy_tmp;
3967         int i;
3968         u64 bytes, packets;
3969
3970 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
3971
3972         /*
3973          * Prevent stats update while adapter is being reset, or if the pci
3974          * connection is down.
3975          */
3976         if (adapter->link_speed == 0)
3977                 return;
3978         if (pci_channel_offline(pdev))
3979                 return;
3980
3981         bytes = 0;
3982         packets = 0;
3983         for (i = 0; i < adapter->num_rx_queues; i++) {
3984                 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
3985                 struct igb_ring *ring = adapter->rx_ring[i];
3986                 ring->rx_stats.drops += rqdpc_tmp;
3987                 net_stats->rx_fifo_errors += rqdpc_tmp;
3988                 bytes += ring->rx_stats.bytes;
3989                 packets += ring->rx_stats.packets;
3990         }
3991
3992         net_stats->rx_bytes = bytes;
3993         net_stats->rx_packets = packets;
3994
3995         bytes = 0;
3996         packets = 0;
3997         for (i = 0; i < adapter->num_tx_queues; i++) {
3998                 struct igb_ring *ring = adapter->tx_ring[i];
3999                 bytes += ring->tx_stats.bytes;
4000                 packets += ring->tx_stats.packets;
4001         }
4002         net_stats->tx_bytes = bytes;
4003         net_stats->tx_packets = packets;
4004
4005         /* read stats registers */
4006         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4007         adapter->stats.gprc += rd32(E1000_GPRC);
4008         adapter->stats.gorc += rd32(E1000_GORCL);
4009         rd32(E1000_GORCH); /* clear GORCL */
4010         adapter->stats.bprc += rd32(E1000_BPRC);
4011         adapter->stats.mprc += rd32(E1000_MPRC);
4012         adapter->stats.roc += rd32(E1000_ROC);
4013
4014         adapter->stats.prc64 += rd32(E1000_PRC64);
4015         adapter->stats.prc127 += rd32(E1000_PRC127);
4016         adapter->stats.prc255 += rd32(E1000_PRC255);
4017         adapter->stats.prc511 += rd32(E1000_PRC511);
4018         adapter->stats.prc1023 += rd32(E1000_PRC1023);
4019         adapter->stats.prc1522 += rd32(E1000_PRC1522);
4020         adapter->stats.symerrs += rd32(E1000_SYMERRS);
4021         adapter->stats.sec += rd32(E1000_SEC);
4022
4023         adapter->stats.mpc += rd32(E1000_MPC);
4024         adapter->stats.scc += rd32(E1000_SCC);
4025         adapter->stats.ecol += rd32(E1000_ECOL);
4026         adapter->stats.mcc += rd32(E1000_MCC);
4027         adapter->stats.latecol += rd32(E1000_LATECOL);
4028         adapter->stats.dc += rd32(E1000_DC);
4029         adapter->stats.rlec += rd32(E1000_RLEC);
4030         adapter->stats.xonrxc += rd32(E1000_XONRXC);
4031         adapter->stats.xontxc += rd32(E1000_XONTXC);
4032         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4033         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4034         adapter->stats.fcruc += rd32(E1000_FCRUC);
4035         adapter->stats.gptc += rd32(E1000_GPTC);
4036         adapter->stats.gotc += rd32(E1000_GOTCL);
4037         rd32(E1000_GOTCH); /* clear GOTCL */
4038         rnbc = rd32(E1000_RNBC);
4039         adapter->stats.rnbc += rnbc;
4040         net_stats->rx_fifo_errors += rnbc;
4041         adapter->stats.ruc += rd32(E1000_RUC);
4042         adapter->stats.rfc += rd32(E1000_RFC);
4043         adapter->stats.rjc += rd32(E1000_RJC);
4044         adapter->stats.tor += rd32(E1000_TORH);
4045         adapter->stats.tot += rd32(E1000_TOTH);
4046         adapter->stats.tpr += rd32(E1000_TPR);
4047
4048         adapter->stats.ptc64 += rd32(E1000_PTC64);
4049         adapter->stats.ptc127 += rd32(E1000_PTC127);
4050         adapter->stats.ptc255 += rd32(E1000_PTC255);
4051         adapter->stats.ptc511 += rd32(E1000_PTC511);
4052         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4053         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4054
4055         adapter->stats.mptc += rd32(E1000_MPTC);
4056         adapter->stats.bptc += rd32(E1000_BPTC);
4057
4058         adapter->stats.tpt += rd32(E1000_TPT);
4059         adapter->stats.colc += rd32(E1000_COLC);
4060
4061         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4062         /* read internal phy specific stats */
4063         reg = rd32(E1000_CTRL_EXT);
4064         if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4065                 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4066                 adapter->stats.tncrs += rd32(E1000_TNCRS);
4067         }
4068
4069         adapter->stats.tsctc += rd32(E1000_TSCTC);
4070         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4071
4072         adapter->stats.iac += rd32(E1000_IAC);
4073         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4074         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4075         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4076         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4077         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4078         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4079         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4080         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4081
4082         /* Fill out the OS statistics structure */
4083         net_stats->multicast = adapter->stats.mprc;
4084         net_stats->collisions = adapter->stats.colc;
4085
4086         /* Rx Errors */
4087
4088         /* RLEC on some newer hardware can be incorrect so build
4089          * our own version based on RUC and ROC */
4090         net_stats->rx_errors = adapter->stats.rxerrc +
4091                 adapter->stats.crcerrs + adapter->stats.algnerrc +
4092                 adapter->stats.ruc + adapter->stats.roc +
4093                 adapter->stats.cexterr;
4094         net_stats->rx_length_errors = adapter->stats.ruc +
4095                                       adapter->stats.roc;
4096         net_stats->rx_crc_errors = adapter->stats.crcerrs;
4097         net_stats->rx_frame_errors = adapter->stats.algnerrc;
4098         net_stats->rx_missed_errors = adapter->stats.mpc;
4099
4100         /* Tx Errors */
4101         net_stats->tx_errors = adapter->stats.ecol +
4102                                adapter->stats.latecol;
4103         net_stats->tx_aborted_errors = adapter->stats.ecol;
4104         net_stats->tx_window_errors = adapter->stats.latecol;
4105         net_stats->tx_carrier_errors = adapter->stats.tncrs;
4106
4107         /* Tx Dropped needs to be maintained elsewhere */
4108
4109         /* Phy Stats */
4110         if (hw->phy.media_type == e1000_media_type_copper) {
4111                 if ((adapter->link_speed == SPEED_1000) &&
4112                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4113                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4114                         adapter->phy_stats.idle_errors += phy_tmp;
4115                 }
4116         }
4117
4118         /* Management Stats */
4119         adapter->stats.mgptc += rd32(E1000_MGTPTC);
4120         adapter->stats.mgprc += rd32(E1000_MGTPRC);
4121         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4122 }
4123
4124 static irqreturn_t igb_msix_other(int irq, void *data)
4125 {
4126         struct igb_adapter *adapter = data;
4127         struct e1000_hw *hw = &adapter->hw;
4128         u32 icr = rd32(E1000_ICR);
4129         /* reading ICR causes bit 31 of EICR to be cleared */
4130
4131         if (icr & E1000_ICR_DRSTA)
4132                 schedule_work(&adapter->reset_task);
4133
4134         if (icr & E1000_ICR_DOUTSYNC) {
4135                 /* HW is reporting DMA is out of sync */
4136                 adapter->stats.doosync++;
4137         }
4138
4139         /* Check for a mailbox event */
4140         if (icr & E1000_ICR_VMMB)
4141                 igb_msg_task(adapter);
4142
4143         if (icr & E1000_ICR_LSC) {
4144                 hw->mac.get_link_status = 1;
4145                 /* guard against interrupt when we're going down */
4146                 if (!test_bit(__IGB_DOWN, &adapter->state))
4147                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4148         }
4149
4150         if (adapter->vfs_allocated_count)
4151                 wr32(E1000_IMS, E1000_IMS_LSC |
4152                                 E1000_IMS_VMMB |
4153                                 E1000_IMS_DOUTSYNC);
4154         else
4155                 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4156         wr32(E1000_EIMS, adapter->eims_other);
4157
4158         return IRQ_HANDLED;
4159 }
4160
4161 static void igb_write_itr(struct igb_q_vector *q_vector)
4162 {
4163         struct igb_adapter *adapter = q_vector->adapter;
4164         u32 itr_val = q_vector->itr_val & 0x7FFC;
4165
4166         if (!q_vector->set_itr)
4167                 return;
4168
4169         if (!itr_val)
4170                 itr_val = 0x4;
4171
4172         if (adapter->hw.mac.type == e1000_82575)
4173                 itr_val |= itr_val << 16;
4174         else
4175                 itr_val |= 0x8000000;
4176
4177         writel(itr_val, q_vector->itr_register);
4178         q_vector->set_itr = 0;
4179 }
4180
4181 static irqreturn_t igb_msix_ring(int irq, void *data)
4182 {
4183         struct igb_q_vector *q_vector = data;
4184
4185         /* Write the ITR value calculated from the previous interrupt. */
4186         igb_write_itr(q_vector);
4187
4188         napi_schedule(&q_vector->napi);
4189
4190         return IRQ_HANDLED;
4191 }
4192
4193 #ifdef CONFIG_IGB_DCA
4194 static void igb_update_dca(struct igb_q_vector *q_vector)
4195 {
4196         struct igb_adapter *adapter = q_vector->adapter;
4197         struct e1000_hw *hw = &adapter->hw;
4198         int cpu = get_cpu();
4199
4200         if (q_vector->cpu == cpu)
4201                 goto out_no_update;
4202
4203         if (q_vector->tx_ring) {
4204                 int q = q_vector->tx_ring->reg_idx;
4205                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4206                 if (hw->mac.type == e1000_82575) {
4207                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4208                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4209                 } else {
4210                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4211                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4212                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
4213                 }
4214                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4215                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4216         }
4217         if (q_vector->rx_ring) {
4218                 int q = q_vector->rx_ring->reg_idx;
4219                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4220                 if (hw->mac.type == e1000_82575) {
4221                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4222                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4223                 } else {
4224                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4225                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4226                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4227                 }
4228                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4229                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4230                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4231                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4232         }
4233         q_vector->cpu = cpu;
4234 out_no_update:
4235         put_cpu();
4236 }
4237
4238 static void igb_setup_dca(struct igb_adapter *adapter)
4239 {
4240         struct e1000_hw *hw = &adapter->hw;
4241         int i;
4242
4243         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4244                 return;
4245
4246         /* Always use CB2 mode, difference is masked in the CB driver. */
4247         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4248
4249         for (i = 0; i < adapter->num_q_vectors; i++) {
4250                 adapter->q_vector[i]->cpu = -1;
4251                 igb_update_dca(adapter->q_vector[i]);
4252         }
4253 }
4254
4255 static int __igb_notify_dca(struct device *dev, void *data)
4256 {
4257         struct net_device *netdev = dev_get_drvdata(dev);
4258         struct igb_adapter *adapter = netdev_priv(netdev);
4259         struct pci_dev *pdev = adapter->pdev;
4260         struct e1000_hw *hw = &adapter->hw;
4261         unsigned long event = *(unsigned long *)data;
4262
4263         switch (event) {
4264         case DCA_PROVIDER_ADD:
4265                 /* if already enabled, don't do it again */
4266                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4267                         break;
4268                 if (dca_add_requester(dev) == 0) {
4269                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
4270                         dev_info(&pdev->dev, "DCA enabled\n");
4271                         igb_setup_dca(adapter);
4272                         break;
4273                 }
4274                 /* Fall Through since DCA is disabled. */
4275         case DCA_PROVIDER_REMOVE:
4276                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4277                         /* without this a class_device is left
4278                          * hanging around in the sysfs model */
4279                         dca_remove_requester(dev);
4280                         dev_info(&pdev->dev, "DCA disabled\n");
4281                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4282                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4283                 }
4284                 break;
4285         }
4286
4287         return 0;
4288 }
4289
4290 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4291                           void *p)
4292 {
4293         int ret_val;
4294
4295         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4296                                          __igb_notify_dca);
4297
4298         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4299 }
4300 #endif /* CONFIG_IGB_DCA */
4301
4302 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4303 {
4304         struct e1000_hw *hw = &adapter->hw;
4305         u32 ping;
4306         int i;
4307
4308         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4309                 ping = E1000_PF_CONTROL_MSG;
4310                 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4311                         ping |= E1000_VT_MSGTYPE_CTS;
4312                 igb_write_mbx(hw, &ping, 1, i);
4313         }
4314 }
4315
4316 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4317 {
4318         struct e1000_hw *hw = &adapter->hw;
4319         u32 vmolr = rd32(E1000_VMOLR(vf));
4320         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4321
4322         vf_data->flags |= ~(IGB_VF_FLAG_UNI_PROMISC |
4323                             IGB_VF_FLAG_MULTI_PROMISC);
4324         vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4325
4326         if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4327                 vmolr |= E1000_VMOLR_MPME;
4328                 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4329         } else {
4330                 /*
4331                  * if we have hashes and we are clearing a multicast promisc
4332                  * flag we need to write the hashes to the MTA as this step
4333                  * was previously skipped
4334                  */
4335                 if (vf_data->num_vf_mc_hashes > 30) {
4336                         vmolr |= E1000_VMOLR_MPME;
4337                 } else if (vf_data->num_vf_mc_hashes) {
4338                         int j;
4339                         vmolr |= E1000_VMOLR_ROMPE;
4340                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4341                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4342                 }
4343         }
4344
4345         wr32(E1000_VMOLR(vf), vmolr);
4346
4347         /* there are flags left unprocessed, likely not supported */
4348         if (*msgbuf & E1000_VT_MSGINFO_MASK)
4349                 return -EINVAL;
4350
4351         return 0;
4352
4353 }
4354
4355 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4356                                   u32 *msgbuf, u32 vf)
4357 {
4358         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4359         u16 *hash_list = (u16 *)&msgbuf[1];
4360         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4361         int i;
4362
4363         /* salt away the number of multicast addresses assigned
4364          * to this VF for later use to restore when the PF multi cast
4365          * list changes
4366          */
4367         vf_data->num_vf_mc_hashes = n;
4368
4369         /* only up to 30 hash values supported */
4370         if (n > 30)
4371                 n = 30;
4372
4373         /* store the hashes for later use */
4374         for (i = 0; i < n; i++)
4375                 vf_data->vf_mc_hashes[i] = hash_list[i];
4376
4377         /* Flush and reset the mta with the new values */
4378         igb_set_rx_mode(adapter->netdev);
4379
4380         return 0;
4381 }
4382
4383 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4384 {
4385         struct e1000_hw *hw = &adapter->hw;
4386         struct vf_data_storage *vf_data;
4387         int i, j;
4388
4389         for (i = 0; i < adapter->vfs_allocated_count; i++) {
4390                 u32 vmolr = rd32(E1000_VMOLR(i));
4391                 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4392
4393                 vf_data = &adapter->vf_data[i];
4394
4395                 if ((vf_data->num_vf_mc_hashes > 30) ||
4396                     (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4397                         vmolr |= E1000_VMOLR_MPME;
4398                 } else if (vf_data->num_vf_mc_hashes) {
4399                         vmolr |= E1000_VMOLR_ROMPE;
4400                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4401                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4402                 }
4403                 wr32(E1000_VMOLR(i), vmolr);
4404         }
4405 }
4406
4407 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4408 {
4409         struct e1000_hw *hw = &adapter->hw;
4410         u32 pool_mask, reg, vid;
4411         int i;
4412
4413         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4414
4415         /* Find the vlan filter for this id */
4416         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4417                 reg = rd32(E1000_VLVF(i));
4418
4419                 /* remove the vf from the pool */
4420                 reg &= ~pool_mask;
4421
4422                 /* if pool is empty then remove entry from vfta */
4423                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4424                     (reg & E1000_VLVF_VLANID_ENABLE)) {
4425                         reg = 0;
4426                         vid = reg & E1000_VLVF_VLANID_MASK;
4427                         igb_vfta_set(hw, vid, false);
4428                 }
4429
4430                 wr32(E1000_VLVF(i), reg);
4431         }
4432
4433         adapter->vf_data[vf].vlans_enabled = 0;
4434 }
4435
4436 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4437 {
4438         struct e1000_hw *hw = &adapter->hw;
4439         u32 reg, i;
4440
4441         /* The vlvf table only exists on 82576 hardware and newer */
4442         if (hw->mac.type < e1000_82576)
4443                 return -1;
4444
4445         /* we only need to do this if VMDq is enabled */
4446         if (!adapter->vfs_allocated_count)
4447                 return -1;
4448
4449         /* Find the vlan filter for this id */
4450         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4451                 reg = rd32(E1000_VLVF(i));
4452                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
4453                     vid == (reg & E1000_VLVF_VLANID_MASK))
4454                         break;
4455         }
4456
4457         if (add) {
4458                 if (i == E1000_VLVF_ARRAY_SIZE) {
4459                         /* Did not find a matching VLAN ID entry that was
4460                          * enabled.  Search for a free filter entry, i.e.
4461                          * one without the enable bit set
4462                          */
4463                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4464                                 reg = rd32(E1000_VLVF(i));
4465                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
4466                                         break;
4467                         }
4468                 }
4469                 if (i < E1000_VLVF_ARRAY_SIZE) {
4470                         /* Found an enabled/available entry */
4471                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4472
4473                         /* if !enabled we need to set this up in vfta */
4474                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
4475                                 /* add VID to filter table */
4476                                 igb_vfta_set(hw, vid, true);
4477                                 reg |= E1000_VLVF_VLANID_ENABLE;
4478                         }
4479                         reg &= ~E1000_VLVF_VLANID_MASK;
4480                         reg |= vid;
4481                         wr32(E1000_VLVF(i), reg);
4482
4483                         /* do not modify RLPML for PF devices */
4484                         if (vf >= adapter->vfs_allocated_count)
4485                                 return 0;
4486
4487                         if (!adapter->vf_data[vf].vlans_enabled) {
4488                                 u32 size;
4489                                 reg = rd32(E1000_VMOLR(vf));
4490                                 size = reg & E1000_VMOLR_RLPML_MASK;
4491                                 size += 4;
4492                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4493                                 reg |= size;
4494                                 wr32(E1000_VMOLR(vf), reg);
4495                         }
4496
4497                         adapter->vf_data[vf].vlans_enabled++;
4498                         return 0;
4499                 }
4500         } else {
4501                 if (i < E1000_VLVF_ARRAY_SIZE) {
4502                         /* remove vf from the pool */
4503                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
4504                         /* if pool is empty then remove entry from vfta */
4505                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
4506                                 reg = 0;
4507                                 igb_vfta_set(hw, vid, false);
4508                         }
4509                         wr32(E1000_VLVF(i), reg);
4510
4511                         /* do not modify RLPML for PF devices */
4512                         if (vf >= adapter->vfs_allocated_count)
4513                                 return 0;
4514
4515                         adapter->vf_data[vf].vlans_enabled--;
4516                         if (!adapter->vf_data[vf].vlans_enabled) {
4517                                 u32 size;
4518                                 reg = rd32(E1000_VMOLR(vf));
4519                                 size = reg & E1000_VMOLR_RLPML_MASK;
4520                                 size -= 4;
4521                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4522                                 reg |= size;
4523                                 wr32(E1000_VMOLR(vf), reg);
4524                         }
4525                 }
4526         }
4527         return 0;
4528 }
4529
4530 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
4531 {
4532         struct e1000_hw *hw = &adapter->hw;
4533
4534         if (vid)
4535                 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
4536         else
4537                 wr32(E1000_VMVIR(vf), 0);
4538 }
4539
4540 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
4541                                int vf, u16 vlan, u8 qos)
4542 {
4543         int err = 0;
4544         struct igb_adapter *adapter = netdev_priv(netdev);
4545
4546         if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
4547                 return -EINVAL;
4548         if (vlan || qos) {
4549                 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
4550                 if (err)
4551                         goto out;
4552                 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
4553                 igb_set_vmolr(adapter, vf, !vlan);
4554                 adapter->vf_data[vf].pf_vlan = vlan;
4555                 adapter->vf_data[vf].pf_qos = qos;
4556                 dev_info(&adapter->pdev->dev,
4557                          "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
4558                 if (test_bit(__IGB_DOWN, &adapter->state)) {
4559                         dev_warn(&adapter->pdev->dev,
4560                                  "The VF VLAN has been set,"
4561                                  " but the PF device is not up.\n");
4562                         dev_warn(&adapter->pdev->dev,
4563                                  "Bring the PF device up before"
4564                                  " attempting to use the VF device.\n");
4565                 }
4566         } else {
4567                 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
4568                                    false, vf);
4569                 igb_set_vmvir(adapter, vlan, vf);
4570                 igb_set_vmolr(adapter, vf, true);
4571                 adapter->vf_data[vf].pf_vlan = 0;
4572                 adapter->vf_data[vf].pf_qos = 0;
4573        }
4574 out:
4575        return err;
4576 }
4577
4578 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4579 {
4580         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4581         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
4582
4583         return igb_vlvf_set(adapter, vid, add, vf);
4584 }
4585
4586 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
4587 {
4588         /* clear flags */
4589         adapter->vf_data[vf].flags &= ~(IGB_VF_FLAG_PF_SET_MAC);
4590         adapter->vf_data[vf].last_nack = jiffies;
4591
4592         /* reset offloads to defaults */
4593         igb_set_vmolr(adapter, vf, true);
4594
4595         /* reset vlans for device */
4596         igb_clear_vf_vfta(adapter, vf);
4597         if (adapter->vf_data[vf].pf_vlan)
4598                 igb_ndo_set_vf_vlan(adapter->netdev, vf,
4599                                     adapter->vf_data[vf].pf_vlan,
4600                                     adapter->vf_data[vf].pf_qos);
4601         else
4602                 igb_clear_vf_vfta(adapter, vf);
4603
4604         /* reset multicast table array for vf */
4605         adapter->vf_data[vf].num_vf_mc_hashes = 0;
4606
4607         /* Flush and reset the mta with the new values */
4608         igb_set_rx_mode(adapter->netdev);
4609 }
4610
4611 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
4612 {
4613         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4614
4615         /* generate a new mac address as we were hotplug removed/added */
4616         if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
4617                 random_ether_addr(vf_mac);
4618
4619         /* process remaining reset events */
4620         igb_vf_reset(adapter, vf);
4621 }
4622
4623 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
4624 {
4625         struct e1000_hw *hw = &adapter->hw;
4626         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4627         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
4628         u32 reg, msgbuf[3];
4629         u8 *addr = (u8 *)(&msgbuf[1]);
4630
4631         /* process all the same items cleared in a function level reset */
4632         igb_vf_reset(adapter, vf);
4633
4634         /* set vf mac address */
4635         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
4636
4637         /* enable transmit and receive for vf */
4638         reg = rd32(E1000_VFTE);
4639         wr32(E1000_VFTE, reg | (1 << vf));
4640         reg = rd32(E1000_VFRE);
4641         wr32(E1000_VFRE, reg | (1 << vf));
4642
4643         adapter->vf_data[vf].flags = IGB_VF_FLAG_CTS;
4644
4645         /* reply to reset with ack and vf mac address */
4646         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
4647         memcpy(addr, vf_mac, 6);
4648         igb_write_mbx(hw, msgbuf, 3, vf);
4649 }
4650
4651 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
4652 {
4653         unsigned char *addr = (char *)&msg[1];
4654         int err = -1;
4655
4656         if (is_valid_ether_addr(addr))
4657                 err = igb_set_vf_mac(adapter, vf, addr);
4658
4659         return err;
4660 }
4661
4662 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
4663 {
4664         struct e1000_hw *hw = &adapter->hw;
4665         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4666         u32 msg = E1000_VT_MSGTYPE_NACK;
4667
4668         /* if device isn't clear to send it shouldn't be reading either */
4669         if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
4670             time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
4671                 igb_write_mbx(hw, &msg, 1, vf);
4672                 vf_data->last_nack = jiffies;
4673         }
4674 }
4675
4676 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
4677 {
4678         struct pci_dev *pdev = adapter->pdev;
4679         u32 msgbuf[E1000_VFMAILBOX_SIZE];
4680         struct e1000_hw *hw = &adapter->hw;
4681         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4682         s32 retval;
4683
4684         retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
4685
4686         if (retval) {
4687                 /* if receive failed revoke VF CTS stats and restart init */
4688                 dev_err(&pdev->dev, "Error receiving message from VF\n");
4689                 vf_data->flags &= ~IGB_VF_FLAG_CTS;
4690                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
4691                         return;
4692                 goto out;
4693         }
4694
4695         /* this is a message we already processed, do nothing */
4696         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
4697                 return;
4698
4699         /*
4700          * until the vf completes a reset it should not be
4701          * allowed to start any configuration.
4702          */
4703
4704         if (msgbuf[0] == E1000_VF_RESET) {
4705                 igb_vf_reset_msg(adapter, vf);
4706                 return;
4707         }
4708
4709         if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
4710                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
4711                         return;
4712                 retval = -1;
4713                 goto out;
4714         }
4715
4716         switch ((msgbuf[0] & 0xFFFF)) {
4717         case E1000_VF_SET_MAC_ADDR:
4718                 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
4719                 break;
4720         case E1000_VF_SET_PROMISC:
4721                 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
4722                 break;
4723         case E1000_VF_SET_MULTICAST:
4724                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
4725                 break;
4726         case E1000_VF_SET_LPE:
4727                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
4728                 break;
4729         case E1000_VF_SET_VLAN:
4730                 if (adapter->vf_data[vf].pf_vlan)
4731                         retval = -1;
4732                 else
4733                         retval = igb_set_vf_vlan(adapter, msgbuf, vf);
4734                 break;
4735         default:
4736                 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
4737                 retval = -1;
4738                 break;
4739         }
4740
4741         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
4742 out:
4743         /* notify the VF of the results of what it sent us */
4744         if (retval)
4745                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
4746         else
4747                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
4748
4749         igb_write_mbx(hw, msgbuf, 1, vf);
4750 }
4751
4752 static void igb_msg_task(struct igb_adapter *adapter)
4753 {
4754         struct e1000_hw *hw = &adapter->hw;
4755         u32 vf;
4756
4757         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
4758                 /* process any reset requests */
4759                 if (!igb_check_for_rst(hw, vf))
4760                         igb_vf_reset_event(adapter, vf);
4761
4762                 /* process any messages pending */
4763                 if (!igb_check_for_msg(hw, vf))
4764                         igb_rcv_msg_from_vf(adapter, vf);
4765
4766                 /* process any acks */
4767                 if (!igb_check_for_ack(hw, vf))
4768                         igb_rcv_ack_from_vf(adapter, vf);
4769         }
4770 }
4771
4772 /**
4773  *  igb_set_uta - Set unicast filter table address
4774  *  @adapter: board private structure
4775  *
4776  *  The unicast table address is a register array of 32-bit registers.
4777  *  The table is meant to be used in a way similar to how the MTA is used
4778  *  however due to certain limitations in the hardware it is necessary to
4779  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscous
4780  *  enable bit to allow vlan tag stripping when promiscous mode is enabled
4781  **/
4782 static void igb_set_uta(struct igb_adapter *adapter)
4783 {
4784         struct e1000_hw *hw = &adapter->hw;
4785         int i;
4786
4787         /* The UTA table only exists on 82576 hardware and newer */
4788         if (hw->mac.type < e1000_82576)
4789                 return;
4790
4791         /* we only need to do this if VMDq is enabled */
4792         if (!adapter->vfs_allocated_count)
4793                 return;
4794
4795         for (i = 0; i < hw->mac.uta_reg_count; i++)
4796                 array_wr32(E1000_UTA, i, ~0);
4797 }
4798
4799 /**
4800  * igb_intr_msi - Interrupt Handler
4801  * @irq: interrupt number
4802  * @data: pointer to a network interface device structure
4803  **/
4804 static irqreturn_t igb_intr_msi(int irq, void *data)
4805 {
4806         struct igb_adapter *adapter = data;
4807         struct igb_q_vector *q_vector = adapter->q_vector[0];
4808         struct e1000_hw *hw = &adapter->hw;
4809         /* read ICR disables interrupts using IAM */
4810         u32 icr = rd32(E1000_ICR);
4811
4812         igb_write_itr(q_vector);
4813
4814         if (icr & E1000_ICR_DRSTA)
4815                 schedule_work(&adapter->reset_task);
4816
4817         if (icr & E1000_ICR_DOUTSYNC) {
4818                 /* HW is reporting DMA is out of sync */
4819                 adapter->stats.doosync++;
4820         }
4821
4822         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4823                 hw->mac.get_link_status = 1;
4824                 if (!test_bit(__IGB_DOWN, &adapter->state))
4825                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4826         }
4827
4828         napi_schedule(&q_vector->napi);
4829
4830         return IRQ_HANDLED;
4831 }
4832
4833 /**
4834  * igb_intr - Legacy Interrupt Handler
4835  * @irq: interrupt number
4836  * @data: pointer to a network interface device structure
4837  **/
4838 static irqreturn_t igb_intr(int irq, void *data)
4839 {
4840         struct igb_adapter *adapter = data;
4841         struct igb_q_vector *q_vector = adapter->q_vector[0];
4842         struct e1000_hw *hw = &adapter->hw;
4843         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
4844          * need for the IMC write */
4845         u32 icr = rd32(E1000_ICR);
4846         if (!icr)
4847                 return IRQ_NONE;  /* Not our interrupt */
4848
4849         igb_write_itr(q_vector);
4850
4851         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
4852          * not set, then the adapter didn't send an interrupt */
4853         if (!(icr & E1000_ICR_INT_ASSERTED))
4854                 return IRQ_NONE;
4855
4856         if (icr & E1000_ICR_DRSTA)
4857                 schedule_work(&adapter->reset_task);
4858
4859         if (icr & E1000_ICR_DOUTSYNC) {
4860                 /* HW is reporting DMA is out of sync */
4861                 adapter->stats.doosync++;
4862         }
4863
4864         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4865                 hw->mac.get_link_status = 1;
4866                 /* guard against interrupt when we're going down */
4867                 if (!test_bit(__IGB_DOWN, &adapter->state))
4868                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4869         }
4870
4871         napi_schedule(&q_vector->napi);
4872
4873         return IRQ_HANDLED;
4874 }
4875
4876 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
4877 {
4878         struct igb_adapter *adapter = q_vector->adapter;
4879         struct e1000_hw *hw = &adapter->hw;
4880
4881         if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
4882             (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
4883                 if (!adapter->msix_entries)
4884                         igb_set_itr(adapter);
4885                 else
4886                         igb_update_ring_itr(q_vector);
4887         }
4888
4889         if (!test_bit(__IGB_DOWN, &adapter->state)) {
4890                 if (adapter->msix_entries)
4891                         wr32(E1000_EIMS, q_vector->eims_value);
4892                 else
4893                         igb_irq_enable(adapter);
4894         }
4895 }
4896
4897 /**
4898  * igb_poll - NAPI Rx polling callback
4899  * @napi: napi polling structure
4900  * @budget: count of how many packets we should handle
4901  **/
4902 static int igb_poll(struct napi_struct *napi, int budget)
4903 {
4904         struct igb_q_vector *q_vector = container_of(napi,
4905                                                      struct igb_q_vector,
4906                                                      napi);
4907         int tx_clean_complete = 1, work_done = 0;
4908
4909 #ifdef CONFIG_IGB_DCA
4910         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
4911                 igb_update_dca(q_vector);
4912 #endif
4913         if (q_vector->tx_ring)
4914                 tx_clean_complete = igb_clean_tx_irq(q_vector);
4915
4916         if (q_vector->rx_ring)
4917                 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
4918
4919         if (!tx_clean_complete)
4920                 work_done = budget;
4921
4922         /* If not enough Rx work done, exit the polling mode */
4923         if (work_done < budget) {
4924                 napi_complete(napi);
4925                 igb_ring_irq_enable(q_vector);
4926         }
4927
4928         return work_done;
4929 }
4930
4931 /**
4932  * igb_systim_to_hwtstamp - convert system time value to hw timestamp
4933  * @adapter: board private structure
4934  * @shhwtstamps: timestamp structure to update
4935  * @regval: unsigned 64bit system time value.
4936  *
4937  * We need to convert the system time value stored in the RX/TXSTMP registers
4938  * into a hwtstamp which can be used by the upper level timestamping functions
4939  */
4940 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
4941                                    struct skb_shared_hwtstamps *shhwtstamps,
4942                                    u64 regval)
4943 {
4944         u64 ns;
4945
4946         /*
4947          * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
4948          * 24 to match clock shift we setup earlier.
4949          */
4950         if (adapter->hw.mac.type == e1000_82580)
4951                 regval <<= IGB_82580_TSYNC_SHIFT;
4952
4953         ns = timecounter_cyc2time(&adapter->clock, regval);
4954         timecompare_update(&adapter->compare, ns);
4955         memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
4956         shhwtstamps->hwtstamp = ns_to_ktime(ns);
4957         shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
4958 }
4959
4960 /**
4961  * igb_tx_hwtstamp - utility function which checks for TX time stamp
4962  * @q_vector: pointer to q_vector containing needed info
4963  * @skb: packet that was just sent
4964  *
4965  * If we were asked to do hardware stamping and such a time stamp is
4966  * available, then it must have been for this skb here because we only
4967  * allow only one such packet into the queue.
4968  */
4969 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb)
4970 {
4971         struct igb_adapter *adapter = q_vector->adapter;
4972         union skb_shared_tx *shtx = skb_tx(skb);
4973         struct e1000_hw *hw = &adapter->hw;
4974         struct skb_shared_hwtstamps shhwtstamps;
4975         u64 regval;
4976
4977         /* if skb does not support hw timestamp or TX stamp not valid exit */
4978         if (likely(!shtx->hardware) ||
4979             !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
4980                 return;
4981
4982         regval = rd32(E1000_TXSTMPL);
4983         regval |= (u64)rd32(E1000_TXSTMPH) << 32;
4984
4985         igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
4986         skb_tstamp_tx(skb, &shhwtstamps);
4987 }
4988
4989 /**
4990  * igb_clean_tx_irq - Reclaim resources after transmit completes
4991  * @q_vector: pointer to q_vector containing needed info
4992  * returns true if ring is completely cleaned
4993  **/
4994 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
4995 {
4996         struct igb_adapter *adapter = q_vector->adapter;
4997         struct igb_ring *tx_ring = q_vector->tx_ring;
4998         struct net_device *netdev = tx_ring->netdev;
4999         struct e1000_hw *hw = &adapter->hw;
5000         struct igb_buffer *buffer_info;
5001         struct sk_buff *skb;
5002         union e1000_adv_tx_desc *tx_desc, *eop_desc;
5003         unsigned int total_bytes = 0, total_packets = 0;
5004         unsigned int i, eop, count = 0;
5005         bool cleaned = false;
5006
5007         i = tx_ring->next_to_clean;
5008         eop = tx_ring->buffer_info[i].next_to_watch;
5009         eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5010
5011         while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5012                (count < tx_ring->count)) {
5013                 for (cleaned = false; !cleaned; count++) {
5014                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
5015                         buffer_info = &tx_ring->buffer_info[i];
5016                         cleaned = (i == eop);
5017                         skb = buffer_info->skb;
5018
5019                         if (skb) {
5020                                 unsigned int segs, bytecount;
5021                                 /* gso_segs is currently only valid for tcp */
5022                                 segs = buffer_info->gso_segs;
5023                                 /* multiply data chunks by size of headers */
5024                                 bytecount = ((segs - 1) * skb_headlen(skb)) +
5025                                             skb->len;
5026                                 total_packets += segs;
5027                                 total_bytes += bytecount;
5028
5029                                 igb_tx_hwtstamp(q_vector, skb);
5030                         }
5031
5032                         igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5033                         tx_desc->wb.status = 0;
5034
5035                         i++;
5036                         if (i == tx_ring->count)
5037                                 i = 0;
5038                 }
5039                 eop = tx_ring->buffer_info[i].next_to_watch;
5040                 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5041         }
5042
5043         tx_ring->next_to_clean = i;
5044
5045         if (unlikely(count &&
5046                      netif_carrier_ok(netdev) &&
5047                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5048                 /* Make sure that anybody stopping the queue after this
5049                  * sees the new next_to_clean.
5050                  */
5051                 smp_mb();
5052                 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5053                     !(test_bit(__IGB_DOWN, &adapter->state))) {
5054                         netif_wake_subqueue(netdev, tx_ring->queue_index);
5055                         tx_ring->tx_stats.restart_queue++;
5056                 }
5057         }
5058
5059         if (tx_ring->detect_tx_hung) {
5060                 /* Detect a transmit hang in hardware, this serializes the
5061                  * check with the clearing of time_stamp and movement of i */
5062                 tx_ring->detect_tx_hung = false;
5063                 if (tx_ring->buffer_info[i].time_stamp &&
5064                     time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5065                                (adapter->tx_timeout_factor * HZ)) &&
5066                     !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5067
5068                         /* detected Tx unit hang */
5069                         dev_err(&tx_ring->pdev->dev,
5070                                 "Detected Tx Unit Hang\n"
5071                                 "  Tx Queue             <%d>\n"
5072                                 "  TDH                  <%x>\n"
5073                                 "  TDT                  <%x>\n"
5074                                 "  next_to_use          <%x>\n"
5075                                 "  next_to_clean        <%x>\n"
5076                                 "buffer_info[next_to_clean]\n"
5077                                 "  time_stamp           <%lx>\n"
5078                                 "  next_to_watch        <%x>\n"
5079                                 "  jiffies              <%lx>\n"
5080                                 "  desc.status          <%x>\n",
5081                                 tx_ring->queue_index,
5082                                 readl(tx_ring->head),
5083                                 readl(tx_ring->tail),
5084                                 tx_ring->next_to_use,
5085                                 tx_ring->next_to_clean,
5086                                 tx_ring->buffer_info[eop].time_stamp,
5087                                 eop,
5088                                 jiffies,
5089                                 eop_desc->wb.status);
5090                         netif_stop_subqueue(netdev, tx_ring->queue_index);
5091                 }
5092         }
5093         tx_ring->total_bytes += total_bytes;
5094         tx_ring->total_packets += total_packets;
5095         tx_ring->tx_stats.bytes += total_bytes;
5096         tx_ring->tx_stats.packets += total_packets;
5097         return (count < tx_ring->count);
5098 }
5099
5100 /**
5101  * igb_receive_skb - helper function to handle rx indications
5102  * @q_vector: structure containing interrupt and ring information
5103  * @skb: packet to send up
5104  * @vlan_tag: vlan tag for packet
5105  **/
5106 static void igb_receive_skb(struct igb_q_vector *q_vector,
5107                             struct sk_buff *skb,
5108                             u16 vlan_tag)
5109 {
5110         struct igb_adapter *adapter = q_vector->adapter;
5111
5112         if (vlan_tag)
5113                 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
5114                                  vlan_tag, skb);
5115         else
5116                 napi_gro_receive(&q_vector->napi, skb);
5117 }
5118
5119 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5120                                        u32 status_err, struct sk_buff *skb)
5121 {
5122         skb->ip_summed = CHECKSUM_NONE;
5123
5124         /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5125         if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5126              (status_err & E1000_RXD_STAT_IXSM))
5127                 return;
5128
5129         /* TCP/UDP checksum error bit is set */
5130         if (status_err &
5131             (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5132                 /*
5133                  * work around errata with sctp packets where the TCPE aka
5134                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5135                  * packets, (aka let the stack check the crc32c)
5136                  */
5137                 if ((skb->len == 60) &&
5138                     (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM))
5139                         ring->rx_stats.csum_err++;
5140
5141                 /* let the stack verify checksum errors */
5142                 return;
5143         }
5144         /* It must be a TCP or UDP packet with a valid checksum */
5145         if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5146                 skb->ip_summed = CHECKSUM_UNNECESSARY;
5147
5148         dev_dbg(&ring->pdev->dev, "cksum success: bits %08X\n", status_err);
5149 }
5150
5151 static inline void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5152                                    struct sk_buff *skb)
5153 {
5154         struct igb_adapter *adapter = q_vector->adapter;
5155         struct e1000_hw *hw = &adapter->hw;
5156         u64 regval;
5157
5158         /*
5159          * If this bit is set, then the RX registers contain the time stamp. No
5160          * other packet will be time stamped until we read these registers, so
5161          * read the registers to make them available again. Because only one
5162          * packet can be time stamped at a time, we know that the register
5163          * values must belong to this one here and therefore we don't need to
5164          * compare any of the additional attributes stored for it.
5165          *
5166          * If nothing went wrong, then it should have a skb_shared_tx that we
5167          * can turn into a skb_shared_hwtstamps.
5168          */
5169         if (likely(!(staterr & E1000_RXDADV_STAT_TS)))
5170                 return;
5171         if (!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5172                 return;
5173
5174         regval = rd32(E1000_RXSTMPL);
5175         regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5176
5177         igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5178 }
5179 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
5180                                union e1000_adv_rx_desc *rx_desc)
5181 {
5182         /* HW will not DMA in data larger than the given buffer, even if it
5183          * parses the (NFS, of course) header to be larger.  In that case, it
5184          * fills the header buffer and spills the rest into the page.
5185          */
5186         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5187                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5188         if (hlen > rx_ring->rx_buffer_len)
5189                 hlen = rx_ring->rx_buffer_len;
5190         return hlen;
5191 }
5192
5193 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5194                                  int *work_done, int budget)
5195 {
5196         struct igb_ring *rx_ring = q_vector->rx_ring;
5197         struct net_device *netdev = rx_ring->netdev;
5198         struct pci_dev *pdev = rx_ring->pdev;
5199         union e1000_adv_rx_desc *rx_desc , *next_rxd;
5200         struct igb_buffer *buffer_info , *next_buffer;
5201         struct sk_buff *skb;
5202         bool cleaned = false;
5203         int cleaned_count = 0;
5204         int current_node = numa_node_id();
5205         unsigned int total_bytes = 0, total_packets = 0;
5206         unsigned int i;
5207         u32 staterr;
5208         u16 length;
5209         u16 vlan_tag;
5210
5211         i = rx_ring->next_to_clean;
5212         buffer_info = &rx_ring->buffer_info[i];
5213         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5214         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5215
5216         while (staterr & E1000_RXD_STAT_DD) {
5217                 if (*work_done >= budget)
5218                         break;
5219                 (*work_done)++;
5220
5221                 skb = buffer_info->skb;
5222                 prefetch(skb->data - NET_IP_ALIGN);
5223                 buffer_info->skb = NULL;
5224
5225                 i++;
5226                 if (i == rx_ring->count)
5227                         i = 0;
5228
5229                 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5230                 prefetch(next_rxd);
5231                 next_buffer = &rx_ring->buffer_info[i];
5232
5233                 length = le16_to_cpu(rx_desc->wb.upper.length);
5234                 cleaned = true;
5235                 cleaned_count++;
5236
5237                 if (buffer_info->dma) {
5238                         pci_unmap_single(pdev, buffer_info->dma,
5239                                          rx_ring->rx_buffer_len,
5240                                          PCI_DMA_FROMDEVICE);
5241                         buffer_info->dma = 0;
5242                         if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
5243                                 skb_put(skb, length);
5244                                 goto send_up;
5245                         }
5246                         skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
5247                 }
5248
5249                 if (length) {
5250                         pci_unmap_page(pdev, buffer_info->page_dma,
5251                                        PAGE_SIZE / 2, PCI_DMA_FROMDEVICE);
5252                         buffer_info->page_dma = 0;
5253
5254                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags++,
5255                                                 buffer_info->page,
5256                                                 buffer_info->page_offset,
5257                                                 length);
5258
5259                         if ((page_count(buffer_info->page) != 1) ||
5260                             (page_to_nid(buffer_info->page) != current_node))
5261                                 buffer_info->page = NULL;
5262                         else
5263                                 get_page(buffer_info->page);
5264
5265                         skb->len += length;
5266                         skb->data_len += length;
5267                         skb->truesize += length;
5268                 }
5269
5270                 if (!(staterr & E1000_RXD_STAT_EOP)) {
5271                         buffer_info->skb = next_buffer->skb;
5272                         buffer_info->dma = next_buffer->dma;
5273                         next_buffer->skb = skb;
5274                         next_buffer->dma = 0;
5275                         goto next_desc;
5276                 }
5277 send_up:
5278                 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5279                         dev_kfree_skb_irq(skb);
5280                         goto next_desc;
5281                 }
5282
5283                 igb_rx_hwtstamp(q_vector, staterr, skb);
5284                 total_bytes += skb->len;
5285                 total_packets++;
5286
5287                 igb_rx_checksum_adv(rx_ring, staterr, skb);
5288
5289                 skb->protocol = eth_type_trans(skb, netdev);
5290                 skb_record_rx_queue(skb, rx_ring->queue_index);
5291
5292                 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
5293                             le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
5294
5295                 igb_receive_skb(q_vector, skb, vlan_tag);
5296
5297 next_desc:
5298                 rx_desc->wb.upper.status_error = 0;
5299
5300                 /* return some buffers to hardware, one at a time is too slow */
5301                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5302                         igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5303                         cleaned_count = 0;
5304                 }
5305
5306                 /* use prefetched values */
5307                 rx_desc = next_rxd;
5308                 buffer_info = next_buffer;
5309                 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5310         }
5311
5312         rx_ring->next_to_clean = i;
5313         cleaned_count = igb_desc_unused(rx_ring);
5314
5315         if (cleaned_count)
5316                 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5317
5318         rx_ring->total_packets += total_packets;
5319         rx_ring->total_bytes += total_bytes;
5320         rx_ring->rx_stats.packets += total_packets;
5321         rx_ring->rx_stats.bytes += total_bytes;
5322         return cleaned;
5323 }
5324
5325 /**
5326  * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5327  * @adapter: address of board private structure
5328  **/
5329 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5330 {
5331         struct net_device *netdev = rx_ring->netdev;
5332         union e1000_adv_rx_desc *rx_desc;
5333         struct igb_buffer *buffer_info;
5334         struct sk_buff *skb;
5335         unsigned int i;
5336         int bufsz;
5337
5338         i = rx_ring->next_to_use;
5339         buffer_info = &rx_ring->buffer_info[i];
5340
5341         bufsz = rx_ring->rx_buffer_len;
5342
5343         while (cleaned_count--) {
5344                 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5345
5346                 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5347                         if (!buffer_info->page) {
5348                                 buffer_info->page = netdev_alloc_page(netdev);
5349                                 if (!buffer_info->page) {
5350                                         rx_ring->rx_stats.alloc_failed++;
5351                                         goto no_buffers;
5352                                 }
5353                                 buffer_info->page_offset = 0;
5354                         } else {
5355                                 buffer_info->page_offset ^= PAGE_SIZE / 2;
5356                         }
5357                         buffer_info->page_dma =
5358                                 pci_map_page(rx_ring->pdev, buffer_info->page,
5359                                              buffer_info->page_offset,
5360                                              PAGE_SIZE / 2,
5361                                              PCI_DMA_FROMDEVICE);
5362                         if (pci_dma_mapping_error(rx_ring->pdev,
5363                                                   buffer_info->page_dma)) {
5364                                 buffer_info->page_dma = 0;
5365                                 rx_ring->rx_stats.alloc_failed++;
5366                                 goto no_buffers;
5367                         }
5368                 }
5369
5370                 skb = buffer_info->skb;
5371                 if (!skb) {
5372                         skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5373                         if (!skb) {
5374                                 rx_ring->rx_stats.alloc_failed++;
5375                                 goto no_buffers;
5376                         }
5377
5378                         buffer_info->skb = skb;
5379                 }
5380                 if (!buffer_info->dma) {
5381                         buffer_info->dma = pci_map_single(rx_ring->pdev,
5382                                                           skb->data,
5383                                                           bufsz,
5384                                                           PCI_DMA_FROMDEVICE);
5385                         if (pci_dma_mapping_error(rx_ring->pdev,
5386                                                   buffer_info->dma)) {
5387                                 buffer_info->dma = 0;
5388                                 rx_ring->rx_stats.alloc_failed++;
5389                                 goto no_buffers;
5390                         }
5391                 }
5392                 /* Refresh the desc even if buffer_addrs didn't change because
5393                  * each write-back erases this info. */
5394                 if (bufsz < IGB_RXBUFFER_1024) {
5395                         rx_desc->read.pkt_addr =
5396                              cpu_to_le64(buffer_info->page_dma);
5397                         rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5398                 } else {
5399                         rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
5400                         rx_desc->read.hdr_addr = 0;
5401                 }
5402
5403                 i++;
5404                 if (i == rx_ring->count)
5405                         i = 0;
5406                 buffer_info = &rx_ring->buffer_info[i];
5407         }
5408
5409 no_buffers:
5410         if (rx_ring->next_to_use != i) {
5411                 rx_ring->next_to_use = i;
5412                 if (i == 0)
5413                         i = (rx_ring->count - 1);
5414                 else
5415                         i--;
5416
5417                 /* Force memory writes to complete before letting h/w
5418                  * know there are new descriptors to fetch.  (Only
5419                  * applicable for weak-ordered memory model archs,
5420                  * such as IA-64). */
5421                 wmb();
5422                 writel(i, rx_ring->tail);
5423         }
5424 }
5425
5426 /**
5427  * igb_mii_ioctl -
5428  * @netdev:
5429  * @ifreq:
5430  * @cmd:
5431  **/
5432 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5433 {
5434         struct igb_adapter *adapter = netdev_priv(netdev);
5435         struct mii_ioctl_data *data = if_mii(ifr);
5436
5437         if (adapter->hw.phy.media_type != e1000_media_type_copper)
5438                 return -EOPNOTSUPP;
5439
5440         switch (cmd) {
5441         case SIOCGMIIPHY:
5442                 data->phy_id = adapter->hw.phy.addr;
5443                 break;
5444         case SIOCGMIIREG:
5445                 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
5446                                      &data->val_out))
5447                         return -EIO;
5448                 break;
5449         case SIOCSMIIREG:
5450         default:
5451                 return -EOPNOTSUPP;
5452         }
5453         return 0;
5454 }
5455
5456 /**
5457  * igb_hwtstamp_ioctl - control hardware time stamping
5458  * @netdev:
5459  * @ifreq:
5460  * @cmd:
5461  *
5462  * Outgoing time stamping can be enabled and disabled. Play nice and
5463  * disable it when requested, although it shouldn't case any overhead
5464  * when no packet needs it. At most one packet in the queue may be
5465  * marked for time stamping, otherwise it would be impossible to tell
5466  * for sure to which packet the hardware time stamp belongs.
5467  *
5468  * Incoming time stamping has to be configured via the hardware
5469  * filters. Not all combinations are supported, in particular event
5470  * type has to be specified. Matching the kind of event packet is
5471  * not supported, with the exception of "all V2 events regardless of
5472  * level 2 or 4".
5473  *
5474  **/
5475 static int igb_hwtstamp_ioctl(struct net_device *netdev,
5476                               struct ifreq *ifr, int cmd)
5477 {
5478         struct igb_adapter *adapter = netdev_priv(netdev);
5479         struct e1000_hw *hw = &adapter->hw;
5480         struct hwtstamp_config config;
5481         u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
5482         u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5483         u32 tsync_rx_cfg = 0;
5484         bool is_l4 = false;
5485         bool is_l2 = false;
5486         u32 regval;
5487
5488         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
5489                 return -EFAULT;
5490
5491         /* reserved for future extensions */
5492         if (config.flags)
5493                 return -EINVAL;
5494
5495         switch (config.tx_type) {
5496         case HWTSTAMP_TX_OFF:
5497                 tsync_tx_ctl = 0;
5498         case HWTSTAMP_TX_ON:
5499                 break;
5500         default:
5501                 return -ERANGE;
5502         }
5503
5504         switch (config.rx_filter) {
5505         case HWTSTAMP_FILTER_NONE:
5506                 tsync_rx_ctl = 0;
5507                 break;
5508         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
5509         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
5510         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
5511         case HWTSTAMP_FILTER_ALL:
5512                 /*
5513                  * register TSYNCRXCFG must be set, therefore it is not
5514                  * possible to time stamp both Sync and Delay_Req messages
5515                  * => fall back to time stamping all packets
5516                  */
5517                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5518                 config.rx_filter = HWTSTAMP_FILTER_ALL;
5519                 break;
5520         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
5521                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5522                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
5523                 is_l4 = true;
5524                 break;
5525         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
5526                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5527                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
5528                 is_l4 = true;
5529                 break;
5530         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
5531         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
5532                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5533                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
5534                 is_l2 = true;
5535                 is_l4 = true;
5536                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5537                 break;
5538         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
5539         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
5540                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5541                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
5542                 is_l2 = true;
5543                 is_l4 = true;
5544                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5545                 break;
5546         case HWTSTAMP_FILTER_PTP_V2_EVENT:
5547         case HWTSTAMP_FILTER_PTP_V2_SYNC:
5548         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
5549                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
5550                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
5551                 is_l2 = true;
5552                 break;
5553         default:
5554                 return -ERANGE;
5555         }
5556
5557         if (hw->mac.type == e1000_82575) {
5558                 if (tsync_rx_ctl | tsync_tx_ctl)
5559                         return -EINVAL;
5560                 return 0;
5561         }
5562
5563         /* enable/disable TX */
5564         regval = rd32(E1000_TSYNCTXCTL);
5565         regval &= ~E1000_TSYNCTXCTL_ENABLED;
5566         regval |= tsync_tx_ctl;
5567         wr32(E1000_TSYNCTXCTL, regval);
5568
5569         /* enable/disable RX */
5570         regval = rd32(E1000_TSYNCRXCTL);
5571         regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
5572         regval |= tsync_rx_ctl;
5573         wr32(E1000_TSYNCRXCTL, regval);
5574
5575         /* define which PTP packets are time stamped */
5576         wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
5577
5578         /* define ethertype filter for timestamped packets */
5579         if (is_l2)
5580                 wr32(E1000_ETQF(3),
5581                                 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
5582                                  E1000_ETQF_1588 | /* enable timestamping */
5583                                  ETH_P_1588));     /* 1588 eth protocol type */
5584         else
5585                 wr32(E1000_ETQF(3), 0);
5586
5587 #define PTP_PORT 319
5588         /* L4 Queue Filter[3]: filter by destination port and protocol */
5589         if (is_l4) {
5590                 u32 ftqf = (IPPROTO_UDP /* UDP */
5591                         | E1000_FTQF_VF_BP /* VF not compared */
5592                         | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
5593                         | E1000_FTQF_MASK); /* mask all inputs */
5594                 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
5595
5596                 wr32(E1000_IMIR(3), htons(PTP_PORT));
5597                 wr32(E1000_IMIREXT(3),
5598                      (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
5599                 if (hw->mac.type == e1000_82576) {
5600                         /* enable source port check */
5601                         wr32(E1000_SPQF(3), htons(PTP_PORT));
5602                         ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
5603                 }
5604                 wr32(E1000_FTQF(3), ftqf);
5605         } else {
5606                 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
5607         }
5608         wrfl();
5609
5610         adapter->hwtstamp_config = config;
5611
5612         /* clear TX/RX time stamp registers, just to be sure */
5613         regval = rd32(E1000_TXSTMPH);
5614         regval = rd32(E1000_RXSTMPH);
5615
5616         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
5617                 -EFAULT : 0;
5618 }
5619
5620 /**
5621  * igb_ioctl -
5622  * @netdev:
5623  * @ifreq:
5624  * @cmd:
5625  **/
5626 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5627 {
5628         switch (cmd) {
5629         case SIOCGMIIPHY:
5630         case SIOCGMIIREG:
5631         case SIOCSMIIREG:
5632                 return igb_mii_ioctl(netdev, ifr, cmd);
5633         case SIOCSHWTSTAMP:
5634                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
5635         default:
5636                 return -EOPNOTSUPP;
5637         }
5638 }
5639
5640 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5641 {
5642         struct igb_adapter *adapter = hw->back;
5643         u16 cap_offset;
5644
5645         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5646         if (!cap_offset)
5647                 return -E1000_ERR_CONFIG;
5648
5649         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
5650
5651         return 0;
5652 }
5653
5654 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5655 {
5656         struct igb_adapter *adapter = hw->back;
5657         u16 cap_offset;
5658
5659         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5660         if (!cap_offset)
5661                 return -E1000_ERR_CONFIG;
5662
5663         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
5664
5665         return 0;
5666 }
5667
5668 static void igb_vlan_rx_register(struct net_device *netdev,
5669                                  struct vlan_group *grp)
5670 {
5671         struct igb_adapter *adapter = netdev_priv(netdev);
5672         struct e1000_hw *hw = &adapter->hw;
5673         u32 ctrl, rctl;
5674
5675         igb_irq_disable(adapter);
5676         adapter->vlgrp = grp;
5677
5678         if (grp) {
5679                 /* enable VLAN tag insert/strip */
5680                 ctrl = rd32(E1000_CTRL);
5681                 ctrl |= E1000_CTRL_VME;
5682                 wr32(E1000_CTRL, ctrl);
5683
5684                 /* Disable CFI check */
5685                 rctl = rd32(E1000_RCTL);
5686                 rctl &= ~E1000_RCTL_CFIEN;
5687                 wr32(E1000_RCTL, rctl);
5688         } else {
5689                 /* disable VLAN tag insert/strip */
5690                 ctrl = rd32(E1000_CTRL);
5691                 ctrl &= ~E1000_CTRL_VME;
5692                 wr32(E1000_CTRL, ctrl);
5693         }
5694
5695         igb_rlpml_set(adapter);
5696
5697         if (!test_bit(__IGB_DOWN, &adapter->state))
5698                 igb_irq_enable(adapter);
5699 }
5700
5701 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
5702 {
5703         struct igb_adapter *adapter = netdev_priv(netdev);
5704         struct e1000_hw *hw = &adapter->hw;
5705         int pf_id = adapter->vfs_allocated_count;
5706
5707         /* attempt to add filter to vlvf array */
5708         igb_vlvf_set(adapter, vid, true, pf_id);
5709
5710         /* add the filter since PF can receive vlans w/o entry in vlvf */
5711         igb_vfta_set(hw, vid, true);
5712 }
5713
5714 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
5715 {
5716         struct igb_adapter *adapter = netdev_priv(netdev);
5717         struct e1000_hw *hw = &adapter->hw;
5718         int pf_id = adapter->vfs_allocated_count;
5719         s32 err;
5720
5721         igb_irq_disable(adapter);
5722         vlan_group_set_device(adapter->vlgrp, vid, NULL);
5723
5724         if (!test_bit(__IGB_DOWN, &adapter->state))
5725                 igb_irq_enable(adapter);
5726
5727         /* remove vlan from VLVF table array */
5728         err = igb_vlvf_set(adapter, vid, false, pf_id);
5729
5730         /* if vid was not present in VLVF just remove it from table */
5731         if (err)
5732                 igb_vfta_set(hw, vid, false);
5733 }
5734
5735 static void igb_restore_vlan(struct igb_adapter *adapter)
5736 {
5737         igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
5738
5739         if (adapter->vlgrp) {
5740                 u16 vid;
5741                 for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
5742                         if (!vlan_group_get_device(adapter->vlgrp, vid))
5743                                 continue;
5744                         igb_vlan_rx_add_vid(adapter->netdev, vid);
5745                 }
5746         }
5747 }
5748
5749 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
5750 {
5751         struct pci_dev *pdev = adapter->pdev;
5752         struct e1000_mac_info *mac = &adapter->hw.mac;
5753
5754         mac->autoneg = 0;
5755
5756         switch (spddplx) {
5757         case SPEED_10 + DUPLEX_HALF:
5758                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
5759                 break;
5760         case SPEED_10 + DUPLEX_FULL:
5761                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
5762                 break;
5763         case SPEED_100 + DUPLEX_HALF:
5764                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
5765                 break;
5766         case SPEED_100 + DUPLEX_FULL:
5767                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
5768                 break;
5769         case SPEED_1000 + DUPLEX_FULL:
5770                 mac->autoneg = 1;
5771                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
5772                 break;
5773         case SPEED_1000 + DUPLEX_HALF: /* not supported */
5774         default:
5775                 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
5776                 return -EINVAL;
5777         }
5778         return 0;
5779 }
5780
5781 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
5782 {
5783         struct net_device *netdev = pci_get_drvdata(pdev);
5784         struct igb_adapter *adapter = netdev_priv(netdev);
5785         struct e1000_hw *hw = &adapter->hw;
5786         u32 ctrl, rctl, status;
5787         u32 wufc = adapter->wol;
5788 #ifdef CONFIG_PM
5789         int retval = 0;
5790 #endif
5791
5792         netif_device_detach(netdev);
5793
5794         if (netif_running(netdev))
5795                 igb_close(netdev);
5796
5797         igb_clear_interrupt_scheme(adapter);
5798
5799 #ifdef CONFIG_PM
5800         retval = pci_save_state(pdev);
5801         if (retval)
5802                 return retval;
5803 #endif
5804
5805         status = rd32(E1000_STATUS);
5806         if (status & E1000_STATUS_LU)
5807                 wufc &= ~E1000_WUFC_LNKC;
5808
5809         if (wufc) {
5810                 igb_setup_rctl(adapter);
5811                 igb_set_rx_mode(netdev);
5812
5813                 /* turn on all-multi mode if wake on multicast is enabled */
5814                 if (wufc & E1000_WUFC_MC) {
5815                         rctl = rd32(E1000_RCTL);
5816                         rctl |= E1000_RCTL_MPE;
5817                         wr32(E1000_RCTL, rctl);
5818                 }
5819
5820                 ctrl = rd32(E1000_CTRL);
5821                 /* advertise wake from D3Cold */
5822                 #define E1000_CTRL_ADVD3WUC 0x00100000
5823                 /* phy power management enable */
5824                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
5825                 ctrl |= E1000_CTRL_ADVD3WUC;
5826                 wr32(E1000_CTRL, ctrl);
5827
5828                 /* Allow time for pending master requests to run */
5829                 igb_disable_pcie_master(hw);
5830
5831                 wr32(E1000_WUC, E1000_WUC_PME_EN);
5832                 wr32(E1000_WUFC, wufc);
5833         } else {
5834                 wr32(E1000_WUC, 0);
5835                 wr32(E1000_WUFC, 0);
5836         }
5837
5838         *enable_wake = wufc || adapter->en_mng_pt;
5839         if (!*enable_wake)
5840                 igb_power_down_link(adapter);
5841         else
5842                 igb_power_up_link(adapter);
5843
5844         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
5845          * would have already happened in close and is redundant. */
5846         igb_release_hw_control(adapter);
5847
5848         pci_disable_device(pdev);
5849
5850         return 0;
5851 }
5852
5853 #ifdef CONFIG_PM
5854 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
5855 {
5856         int retval;
5857         bool wake;
5858
5859         retval = __igb_shutdown(pdev, &wake);
5860         if (retval)
5861                 return retval;
5862
5863         if (wake) {
5864                 pci_prepare_to_sleep(pdev);
5865         } else {
5866                 pci_wake_from_d3(pdev, false);
5867                 pci_set_power_state(pdev, PCI_D3hot);
5868         }
5869
5870         return 0;
5871 }
5872
5873 static int igb_resume(struct pci_dev *pdev)
5874 {
5875         struct net_device *netdev = pci_get_drvdata(pdev);
5876         struct igb_adapter *adapter = netdev_priv(netdev);
5877         struct e1000_hw *hw = &adapter->hw;
5878         u32 err;
5879
5880         pci_set_power_state(pdev, PCI_D0);
5881         pci_restore_state(pdev);
5882         pci_save_state(pdev);
5883
5884         err = pci_enable_device_mem(pdev);
5885         if (err) {
5886                 dev_err(&pdev->dev,
5887                         "igb: Cannot enable PCI device from suspend\n");
5888                 return err;
5889         }
5890         pci_set_master(pdev);
5891
5892         pci_enable_wake(pdev, PCI_D3hot, 0);
5893         pci_enable_wake(pdev, PCI_D3cold, 0);
5894
5895         if (igb_init_interrupt_scheme(adapter)) {
5896                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
5897                 return -ENOMEM;
5898         }
5899
5900         igb_reset(adapter);
5901
5902         /* let the f/w know that the h/w is now under the control of the
5903          * driver. */
5904         igb_get_hw_control(adapter);
5905
5906         wr32(E1000_WUS, ~0);
5907
5908         if (netif_running(netdev)) {
5909                 err = igb_open(netdev);
5910                 if (err)
5911                         return err;
5912         }
5913
5914         netif_device_attach(netdev);
5915
5916         return 0;
5917 }
5918 #endif
5919
5920 static void igb_shutdown(struct pci_dev *pdev)
5921 {
5922         bool wake;
5923
5924         __igb_shutdown(pdev, &wake);
5925
5926         if (system_state == SYSTEM_POWER_OFF) {
5927                 pci_wake_from_d3(pdev, wake);
5928                 pci_set_power_state(pdev, PCI_D3hot);
5929         }
5930 }
5931
5932 #ifdef CONFIG_NET_POLL_CONTROLLER
5933 /*
5934  * Polling 'interrupt' - used by things like netconsole to send skbs
5935  * without having to re-enable interrupts. It's not called while
5936  * the interrupt routine is executing.
5937  */
5938 static void igb_netpoll(struct net_device *netdev)
5939 {
5940         struct igb_adapter *adapter = netdev_priv(netdev);
5941         struct e1000_hw *hw = &adapter->hw;
5942         int i;
5943
5944         if (!adapter->msix_entries) {
5945                 struct igb_q_vector *q_vector = adapter->q_vector[0];
5946                 igb_irq_disable(adapter);
5947                 napi_schedule(&q_vector->napi);
5948                 return;
5949         }
5950
5951         for (i = 0; i < adapter->num_q_vectors; i++) {
5952                 struct igb_q_vector *q_vector = adapter->q_vector[i];
5953                 wr32(E1000_EIMC, q_vector->eims_value);
5954                 napi_schedule(&q_vector->napi);
5955         }
5956 }
5957 #endif /* CONFIG_NET_POLL_CONTROLLER */
5958
5959 /**
5960  * igb_io_error_detected - called when PCI error is detected
5961  * @pdev: Pointer to PCI device
5962  * @state: The current pci connection state
5963  *
5964  * This function is called after a PCI bus error affecting
5965  * this device has been detected.
5966  */
5967 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
5968                                               pci_channel_state_t state)
5969 {
5970         struct net_device *netdev = pci_get_drvdata(pdev);
5971         struct igb_adapter *adapter = netdev_priv(netdev);
5972
5973         netif_device_detach(netdev);
5974
5975         if (state == pci_channel_io_perm_failure)
5976                 return PCI_ERS_RESULT_DISCONNECT;
5977
5978         if (netif_running(netdev))
5979                 igb_down(adapter);
5980         pci_disable_device(pdev);
5981
5982         /* Request a slot slot reset. */
5983         return PCI_ERS_RESULT_NEED_RESET;
5984 }
5985
5986 /**
5987  * igb_io_slot_reset - called after the pci bus has been reset.
5988  * @pdev: Pointer to PCI device
5989  *
5990  * Restart the card from scratch, as if from a cold-boot. Implementation
5991  * resembles the first-half of the igb_resume routine.
5992  */
5993 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
5994 {
5995         struct net_device *netdev = pci_get_drvdata(pdev);
5996         struct igb_adapter *adapter = netdev_priv(netdev);
5997         struct e1000_hw *hw = &adapter->hw;
5998         pci_ers_result_t result;
5999         int err;
6000
6001         if (pci_enable_device_mem(pdev)) {
6002                 dev_err(&pdev->dev,
6003                         "Cannot re-enable PCI device after reset.\n");
6004                 result = PCI_ERS_RESULT_DISCONNECT;
6005         } else {
6006                 pci_set_master(pdev);
6007                 pci_restore_state(pdev);
6008                 pci_save_state(pdev);
6009
6010                 pci_enable_wake(pdev, PCI_D3hot, 0);
6011                 pci_enable_wake(pdev, PCI_D3cold, 0);
6012
6013                 igb_reset(adapter);
6014                 wr32(E1000_WUS, ~0);
6015                 result = PCI_ERS_RESULT_RECOVERED;
6016         }
6017
6018         err = pci_cleanup_aer_uncorrect_error_status(pdev);
6019         if (err) {
6020                 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6021                         "failed 0x%0x\n", err);
6022                 /* non-fatal, continue */
6023         }
6024
6025         return result;
6026 }
6027
6028 /**
6029  * igb_io_resume - called when traffic can start flowing again.
6030  * @pdev: Pointer to PCI device
6031  *
6032  * This callback is called when the error recovery driver tells us that
6033  * its OK to resume normal operation. Implementation resembles the
6034  * second-half of the igb_resume routine.
6035  */
6036 static void igb_io_resume(struct pci_dev *pdev)
6037 {
6038         struct net_device *netdev = pci_get_drvdata(pdev);
6039         struct igb_adapter *adapter = netdev_priv(netdev);
6040
6041         if (netif_running(netdev)) {
6042                 if (igb_up(adapter)) {
6043                         dev_err(&pdev->dev, "igb_up failed after reset\n");
6044                         return;
6045                 }
6046         }
6047
6048         netif_device_attach(netdev);
6049
6050         /* let the f/w know that the h/w is now under the control of the
6051          * driver. */
6052         igb_get_hw_control(adapter);
6053 }
6054
6055 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6056                              u8 qsel)
6057 {
6058         u32 rar_low, rar_high;
6059         struct e1000_hw *hw = &adapter->hw;
6060
6061         /* HW expects these in little endian so we reverse the byte order
6062          * from network order (big endian) to little endian
6063          */
6064         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6065                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6066         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6067
6068         /* Indicate to hardware the Address is Valid. */
6069         rar_high |= E1000_RAH_AV;
6070
6071         if (hw->mac.type == e1000_82575)
6072                 rar_high |= E1000_RAH_POOL_1 * qsel;
6073         else
6074                 rar_high |= E1000_RAH_POOL_1 << qsel;