NFSv4: fix delegated locking
[sfrench/cifs-2.6.git] / drivers / net / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2009 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <linux/slab.h>
36 #include <net/checksum.h>
37 #include <net/ip6_checksum.h>
38 #include <linux/net_tstamp.h>
39 #include <linux/mii.h>
40 #include <linux/ethtool.h>
41 #include <linux/if_vlan.h>
42 #include <linux/pci.h>
43 #include <linux/pci-aspm.h>
44 #include <linux/delay.h>
45 #include <linux/interrupt.h>
46 #include <linux/if_ether.h>
47 #include <linux/aer.h>
48 #ifdef CONFIG_IGB_DCA
49 #include <linux/dca.h>
50 #endif
51 #include "igb.h"
52
53 #define DRV_VERSION "2.1.0-k2"
54 char igb_driver_name[] = "igb";
55 char igb_driver_version[] = DRV_VERSION;
56 static const char igb_driver_string[] =
57                                 "Intel(R) Gigabit Ethernet Network Driver";
58 static const char igb_copyright[] = "Copyright (c) 2007-2009 Intel Corporation.";
59
60 static const struct e1000_info *igb_info_tbl[] = {
61         [board_82575] = &e1000_82575_info,
62 };
63
64 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
65         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
66         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
67         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
68         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
69         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
70         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
71         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
72         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
73         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
74         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
75         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
76         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
77         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
78         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
79         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
80         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
81         /* required last entry */
82         {0, }
83 };
84
85 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
86
87 void igb_reset(struct igb_adapter *);
88 static int igb_setup_all_tx_resources(struct igb_adapter *);
89 static int igb_setup_all_rx_resources(struct igb_adapter *);
90 static void igb_free_all_tx_resources(struct igb_adapter *);
91 static void igb_free_all_rx_resources(struct igb_adapter *);
92 static void igb_setup_mrqc(struct igb_adapter *);
93 void igb_update_stats(struct igb_adapter *);
94 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
95 static void __devexit igb_remove(struct pci_dev *pdev);
96 static int igb_sw_init(struct igb_adapter *);
97 static int igb_open(struct net_device *);
98 static int igb_close(struct net_device *);
99 static void igb_configure_tx(struct igb_adapter *);
100 static void igb_configure_rx(struct igb_adapter *);
101 static void igb_clean_all_tx_rings(struct igb_adapter *);
102 static void igb_clean_all_rx_rings(struct igb_adapter *);
103 static void igb_clean_tx_ring(struct igb_ring *);
104 static void igb_clean_rx_ring(struct igb_ring *);
105 static void igb_set_rx_mode(struct net_device *);
106 static void igb_update_phy_info(unsigned long);
107 static void igb_watchdog(unsigned long);
108 static void igb_watchdog_task(struct work_struct *);
109 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
110 static struct net_device_stats *igb_get_stats(struct net_device *);
111 static int igb_change_mtu(struct net_device *, int);
112 static int igb_set_mac(struct net_device *, void *);
113 static void igb_set_uta(struct igb_adapter *adapter);
114 static irqreturn_t igb_intr(int irq, void *);
115 static irqreturn_t igb_intr_msi(int irq, void *);
116 static irqreturn_t igb_msix_other(int irq, void *);
117 static irqreturn_t igb_msix_ring(int irq, void *);
118 #ifdef CONFIG_IGB_DCA
119 static void igb_update_dca(struct igb_q_vector *);
120 static void igb_setup_dca(struct igb_adapter *);
121 #endif /* CONFIG_IGB_DCA */
122 static bool igb_clean_tx_irq(struct igb_q_vector *);
123 static int igb_poll(struct napi_struct *, int);
124 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
125 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
126 static void igb_tx_timeout(struct net_device *);
127 static void igb_reset_task(struct work_struct *);
128 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
129 static void igb_vlan_rx_add_vid(struct net_device *, u16);
130 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
131 static void igb_restore_vlan(struct igb_adapter *);
132 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
133 static void igb_ping_all_vfs(struct igb_adapter *);
134 static void igb_msg_task(struct igb_adapter *);
135 static void igb_vmm_control(struct igb_adapter *);
136 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
137 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
138 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
139 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
140                                int vf, u16 vlan, u8 qos);
141 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
142 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
143                                  struct ifla_vf_info *ivi);
144
145 #ifdef CONFIG_PM
146 static int igb_suspend(struct pci_dev *, pm_message_t);
147 static int igb_resume(struct pci_dev *);
148 #endif
149 static void igb_shutdown(struct pci_dev *);
150 #ifdef CONFIG_IGB_DCA
151 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
152 static struct notifier_block dca_notifier = {
153         .notifier_call  = igb_notify_dca,
154         .next           = NULL,
155         .priority       = 0
156 };
157 #endif
158 #ifdef CONFIG_NET_POLL_CONTROLLER
159 /* for netdump / net console */
160 static void igb_netpoll(struct net_device *);
161 #endif
162 #ifdef CONFIG_PCI_IOV
163 static unsigned int max_vfs = 0;
164 module_param(max_vfs, uint, 0);
165 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
166                  "per physical function");
167 #endif /* CONFIG_PCI_IOV */
168
169 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
170                      pci_channel_state_t);
171 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
172 static void igb_io_resume(struct pci_dev *);
173
174 static struct pci_error_handlers igb_err_handler = {
175         .error_detected = igb_io_error_detected,
176         .slot_reset = igb_io_slot_reset,
177         .resume = igb_io_resume,
178 };
179
180
181 static struct pci_driver igb_driver = {
182         .name     = igb_driver_name,
183         .id_table = igb_pci_tbl,
184         .probe    = igb_probe,
185         .remove   = __devexit_p(igb_remove),
186 #ifdef CONFIG_PM
187         /* Power Managment Hooks */
188         .suspend  = igb_suspend,
189         .resume   = igb_resume,
190 #endif
191         .shutdown = igb_shutdown,
192         .err_handler = &igb_err_handler
193 };
194
195 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
196 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
197 MODULE_LICENSE("GPL");
198 MODULE_VERSION(DRV_VERSION);
199
200 /**
201  * igb_read_clock - read raw cycle counter (to be used by time counter)
202  */
203 static cycle_t igb_read_clock(const struct cyclecounter *tc)
204 {
205         struct igb_adapter *adapter =
206                 container_of(tc, struct igb_adapter, cycles);
207         struct e1000_hw *hw = &adapter->hw;
208         u64 stamp = 0;
209         int shift = 0;
210
211         /*
212          * The timestamp latches on lowest register read. For the 82580
213          * the lowest register is SYSTIMR instead of SYSTIML.  However we never
214          * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
215          */
216         if (hw->mac.type == e1000_82580) {
217                 stamp = rd32(E1000_SYSTIMR) >> 8;
218                 shift = IGB_82580_TSYNC_SHIFT;
219         }
220
221         stamp |= (u64)rd32(E1000_SYSTIML) << shift;
222         stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
223         return stamp;
224 }
225
226 #ifdef DEBUG
227 /**
228  * igb_get_hw_dev_name - return device name string
229  * used by hardware layer to print debugging information
230  **/
231 char *igb_get_hw_dev_name(struct e1000_hw *hw)
232 {
233         struct igb_adapter *adapter = hw->back;
234         return adapter->netdev->name;
235 }
236
237 /**
238  * igb_get_time_str - format current NIC and system time as string
239  */
240 static char *igb_get_time_str(struct igb_adapter *adapter,
241                               char buffer[160])
242 {
243         cycle_t hw = adapter->cycles.read(&adapter->cycles);
244         struct timespec nic = ns_to_timespec(timecounter_read(&adapter->clock));
245         struct timespec sys;
246         struct timespec delta;
247         getnstimeofday(&sys);
248
249         delta = timespec_sub(nic, sys);
250
251         sprintf(buffer,
252                 "HW %llu, NIC %ld.%09lus, SYS %ld.%09lus, NIC-SYS %lds + %09luns",
253                 hw,
254                 (long)nic.tv_sec, nic.tv_nsec,
255                 (long)sys.tv_sec, sys.tv_nsec,
256                 (long)delta.tv_sec, delta.tv_nsec);
257
258         return buffer;
259 }
260 #endif
261
262 /**
263  * igb_init_module - Driver Registration Routine
264  *
265  * igb_init_module is the first routine called when the driver is
266  * loaded. All it does is register with the PCI subsystem.
267  **/
268 static int __init igb_init_module(void)
269 {
270         int ret;
271         printk(KERN_INFO "%s - version %s\n",
272                igb_driver_string, igb_driver_version);
273
274         printk(KERN_INFO "%s\n", igb_copyright);
275
276 #ifdef CONFIG_IGB_DCA
277         dca_register_notify(&dca_notifier);
278 #endif
279         ret = pci_register_driver(&igb_driver);
280         return ret;
281 }
282
283 module_init(igb_init_module);
284
285 /**
286  * igb_exit_module - Driver Exit Cleanup Routine
287  *
288  * igb_exit_module is called just before the driver is removed
289  * from memory.
290  **/
291 static void __exit igb_exit_module(void)
292 {
293 #ifdef CONFIG_IGB_DCA
294         dca_unregister_notify(&dca_notifier);
295 #endif
296         pci_unregister_driver(&igb_driver);
297 }
298
299 module_exit(igb_exit_module);
300
301 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
302 /**
303  * igb_cache_ring_register - Descriptor ring to register mapping
304  * @adapter: board private structure to initialize
305  *
306  * Once we know the feature-set enabled for the device, we'll cache
307  * the register offset the descriptor ring is assigned to.
308  **/
309 static void igb_cache_ring_register(struct igb_adapter *adapter)
310 {
311         int i = 0, j = 0;
312         u32 rbase_offset = adapter->vfs_allocated_count;
313
314         switch (adapter->hw.mac.type) {
315         case e1000_82576:
316                 /* The queues are allocated for virtualization such that VF 0
317                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
318                  * In order to avoid collision we start at the first free queue
319                  * and continue consuming queues in the same sequence
320                  */
321                 if (adapter->vfs_allocated_count) {
322                         for (; i < adapter->rss_queues; i++)
323                                 adapter->rx_ring[i]->reg_idx = rbase_offset +
324                                                                Q_IDX_82576(i);
325                         for (; j < adapter->rss_queues; j++)
326                                 adapter->tx_ring[j]->reg_idx = rbase_offset +
327                                                                Q_IDX_82576(j);
328                 }
329         case e1000_82575:
330         case e1000_82580:
331         default:
332                 for (; i < adapter->num_rx_queues; i++)
333                         adapter->rx_ring[i]->reg_idx = rbase_offset + i;
334                 for (; j < adapter->num_tx_queues; j++)
335                         adapter->tx_ring[j]->reg_idx = rbase_offset + j;
336                 break;
337         }
338 }
339
340 static void igb_free_queues(struct igb_adapter *adapter)
341 {
342         int i;
343
344         for (i = 0; i < adapter->num_tx_queues; i++) {
345                 kfree(adapter->tx_ring[i]);
346                 adapter->tx_ring[i] = NULL;
347         }
348         for (i = 0; i < adapter->num_rx_queues; i++) {
349                 kfree(adapter->rx_ring[i]);
350                 adapter->rx_ring[i] = NULL;
351         }
352         adapter->num_rx_queues = 0;
353         adapter->num_tx_queues = 0;
354 }
355
356 /**
357  * igb_alloc_queues - Allocate memory for all rings
358  * @adapter: board private structure to initialize
359  *
360  * We allocate one ring per queue at run-time since we don't know the
361  * number of queues at compile-time.
362  **/
363 static int igb_alloc_queues(struct igb_adapter *adapter)
364 {
365         struct igb_ring *ring;
366         int i;
367
368         for (i = 0; i < adapter->num_tx_queues; i++) {
369                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
370                 if (!ring)
371                         goto err;
372                 ring->count = adapter->tx_ring_count;
373                 ring->queue_index = i;
374                 ring->pdev = adapter->pdev;
375                 ring->netdev = adapter->netdev;
376                 /* For 82575, context index must be unique per ring. */
377                 if (adapter->hw.mac.type == e1000_82575)
378                         ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
379                 adapter->tx_ring[i] = ring;
380         }
381
382         for (i = 0; i < adapter->num_rx_queues; i++) {
383                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
384                 if (!ring)
385                         goto err;
386                 ring->count = adapter->rx_ring_count;
387                 ring->queue_index = i;
388                 ring->pdev = adapter->pdev;
389                 ring->netdev = adapter->netdev;
390                 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
391                 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
392                 /* set flag indicating ring supports SCTP checksum offload */
393                 if (adapter->hw.mac.type >= e1000_82576)
394                         ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
395                 adapter->rx_ring[i] = ring;
396         }
397
398         igb_cache_ring_register(adapter);
399
400         return 0;
401
402 err:
403         igb_free_queues(adapter);
404
405         return -ENOMEM;
406 }
407
408 #define IGB_N0_QUEUE -1
409 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
410 {
411         u32 msixbm = 0;
412         struct igb_adapter *adapter = q_vector->adapter;
413         struct e1000_hw *hw = &adapter->hw;
414         u32 ivar, index;
415         int rx_queue = IGB_N0_QUEUE;
416         int tx_queue = IGB_N0_QUEUE;
417
418         if (q_vector->rx_ring)
419                 rx_queue = q_vector->rx_ring->reg_idx;
420         if (q_vector->tx_ring)
421                 tx_queue = q_vector->tx_ring->reg_idx;
422
423         switch (hw->mac.type) {
424         case e1000_82575:
425                 /* The 82575 assigns vectors using a bitmask, which matches the
426                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
427                    or more queues to a vector, we write the appropriate bits
428                    into the MSIXBM register for that vector. */
429                 if (rx_queue > IGB_N0_QUEUE)
430                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
431                 if (tx_queue > IGB_N0_QUEUE)
432                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
433                 if (!adapter->msix_entries && msix_vector == 0)
434                         msixbm |= E1000_EIMS_OTHER;
435                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
436                 q_vector->eims_value = msixbm;
437                 break;
438         case e1000_82576:
439                 /* 82576 uses a table-based method for assigning vectors.
440                    Each queue has a single entry in the table to which we write
441                    a vector number along with a "valid" bit.  Sadly, the layout
442                    of the table is somewhat counterintuitive. */
443                 if (rx_queue > IGB_N0_QUEUE) {
444                         index = (rx_queue & 0x7);
445                         ivar = array_rd32(E1000_IVAR0, index);
446                         if (rx_queue < 8) {
447                                 /* vector goes into low byte of register */
448                                 ivar = ivar & 0xFFFFFF00;
449                                 ivar |= msix_vector | E1000_IVAR_VALID;
450                         } else {
451                                 /* vector goes into third byte of register */
452                                 ivar = ivar & 0xFF00FFFF;
453                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
454                         }
455                         array_wr32(E1000_IVAR0, index, ivar);
456                 }
457                 if (tx_queue > IGB_N0_QUEUE) {
458                         index = (tx_queue & 0x7);
459                         ivar = array_rd32(E1000_IVAR0, index);
460                         if (tx_queue < 8) {
461                                 /* vector goes into second byte of register */
462                                 ivar = ivar & 0xFFFF00FF;
463                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
464                         } else {
465                                 /* vector goes into high byte of register */
466                                 ivar = ivar & 0x00FFFFFF;
467                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
468                         }
469                         array_wr32(E1000_IVAR0, index, ivar);
470                 }
471                 q_vector->eims_value = 1 << msix_vector;
472                 break;
473         case e1000_82580:
474                 /* 82580 uses the same table-based approach as 82576 but has fewer
475                    entries as a result we carry over for queues greater than 4. */
476                 if (rx_queue > IGB_N0_QUEUE) {
477                         index = (rx_queue >> 1);
478                         ivar = array_rd32(E1000_IVAR0, index);
479                         if (rx_queue & 0x1) {
480                                 /* vector goes into third byte of register */
481                                 ivar = ivar & 0xFF00FFFF;
482                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
483                         } else {
484                                 /* vector goes into low byte of register */
485                                 ivar = ivar & 0xFFFFFF00;
486                                 ivar |= msix_vector | E1000_IVAR_VALID;
487                         }
488                         array_wr32(E1000_IVAR0, index, ivar);
489                 }
490                 if (tx_queue > IGB_N0_QUEUE) {
491                         index = (tx_queue >> 1);
492                         ivar = array_rd32(E1000_IVAR0, index);
493                         if (tx_queue & 0x1) {
494                                 /* vector goes into high byte of register */
495                                 ivar = ivar & 0x00FFFFFF;
496                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
497                         } else {
498                                 /* vector goes into second byte of register */
499                                 ivar = ivar & 0xFFFF00FF;
500                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
501                         }
502                         array_wr32(E1000_IVAR0, index, ivar);
503                 }
504                 q_vector->eims_value = 1 << msix_vector;
505                 break;
506         default:
507                 BUG();
508                 break;
509         }
510
511         /* add q_vector eims value to global eims_enable_mask */
512         adapter->eims_enable_mask |= q_vector->eims_value;
513
514         /* configure q_vector to set itr on first interrupt */
515         q_vector->set_itr = 1;
516 }
517
518 /**
519  * igb_configure_msix - Configure MSI-X hardware
520  *
521  * igb_configure_msix sets up the hardware to properly
522  * generate MSI-X interrupts.
523  **/
524 static void igb_configure_msix(struct igb_adapter *adapter)
525 {
526         u32 tmp;
527         int i, vector = 0;
528         struct e1000_hw *hw = &adapter->hw;
529
530         adapter->eims_enable_mask = 0;
531
532         /* set vector for other causes, i.e. link changes */
533         switch (hw->mac.type) {
534         case e1000_82575:
535                 tmp = rd32(E1000_CTRL_EXT);
536                 /* enable MSI-X PBA support*/
537                 tmp |= E1000_CTRL_EXT_PBA_CLR;
538
539                 /* Auto-Mask interrupts upon ICR read. */
540                 tmp |= E1000_CTRL_EXT_EIAME;
541                 tmp |= E1000_CTRL_EXT_IRCA;
542
543                 wr32(E1000_CTRL_EXT, tmp);
544
545                 /* enable msix_other interrupt */
546                 array_wr32(E1000_MSIXBM(0), vector++,
547                                       E1000_EIMS_OTHER);
548                 adapter->eims_other = E1000_EIMS_OTHER;
549
550                 break;
551
552         case e1000_82576:
553         case e1000_82580:
554                 /* Turn on MSI-X capability first, or our settings
555                  * won't stick.  And it will take days to debug. */
556                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
557                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
558                                 E1000_GPIE_NSICR);
559
560                 /* enable msix_other interrupt */
561                 adapter->eims_other = 1 << vector;
562                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
563
564                 wr32(E1000_IVAR_MISC, tmp);
565                 break;
566         default:
567                 /* do nothing, since nothing else supports MSI-X */
568                 break;
569         } /* switch (hw->mac.type) */
570
571         adapter->eims_enable_mask |= adapter->eims_other;
572
573         for (i = 0; i < adapter->num_q_vectors; i++)
574                 igb_assign_vector(adapter->q_vector[i], vector++);
575
576         wrfl();
577 }
578
579 /**
580  * igb_request_msix - Initialize MSI-X interrupts
581  *
582  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
583  * kernel.
584  **/
585 static int igb_request_msix(struct igb_adapter *adapter)
586 {
587         struct net_device *netdev = adapter->netdev;
588         struct e1000_hw *hw = &adapter->hw;
589         int i, err = 0, vector = 0;
590
591         err = request_irq(adapter->msix_entries[vector].vector,
592                           igb_msix_other, 0, netdev->name, adapter);
593         if (err)
594                 goto out;
595         vector++;
596
597         for (i = 0; i < adapter->num_q_vectors; i++) {
598                 struct igb_q_vector *q_vector = adapter->q_vector[i];
599
600                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
601
602                 if (q_vector->rx_ring && q_vector->tx_ring)
603                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
604                                 q_vector->rx_ring->queue_index);
605                 else if (q_vector->tx_ring)
606                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
607                                 q_vector->tx_ring->queue_index);
608                 else if (q_vector->rx_ring)
609                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
610                                 q_vector->rx_ring->queue_index);
611                 else
612                         sprintf(q_vector->name, "%s-unused", netdev->name);
613
614                 err = request_irq(adapter->msix_entries[vector].vector,
615                                   igb_msix_ring, 0, q_vector->name,
616                                   q_vector);
617                 if (err)
618                         goto out;
619                 vector++;
620         }
621
622         igb_configure_msix(adapter);
623         return 0;
624 out:
625         return err;
626 }
627
628 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
629 {
630         if (adapter->msix_entries) {
631                 pci_disable_msix(adapter->pdev);
632                 kfree(adapter->msix_entries);
633                 adapter->msix_entries = NULL;
634         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
635                 pci_disable_msi(adapter->pdev);
636         }
637 }
638
639 /**
640  * igb_free_q_vectors - Free memory allocated for interrupt vectors
641  * @adapter: board private structure to initialize
642  *
643  * This function frees the memory allocated to the q_vectors.  In addition if
644  * NAPI is enabled it will delete any references to the NAPI struct prior
645  * to freeing the q_vector.
646  **/
647 static void igb_free_q_vectors(struct igb_adapter *adapter)
648 {
649         int v_idx;
650
651         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
652                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
653                 adapter->q_vector[v_idx] = NULL;
654                 if (!q_vector)
655                         continue;
656                 netif_napi_del(&q_vector->napi);
657                 kfree(q_vector);
658         }
659         adapter->num_q_vectors = 0;
660 }
661
662 /**
663  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
664  *
665  * This function resets the device so that it has 0 rx queues, tx queues, and
666  * MSI-X interrupts allocated.
667  */
668 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
669 {
670         igb_free_queues(adapter);
671         igb_free_q_vectors(adapter);
672         igb_reset_interrupt_capability(adapter);
673 }
674
675 /**
676  * igb_set_interrupt_capability - set MSI or MSI-X if supported
677  *
678  * Attempt to configure interrupts using the best available
679  * capabilities of the hardware and kernel.
680  **/
681 static void igb_set_interrupt_capability(struct igb_adapter *adapter)
682 {
683         int err;
684         int numvecs, i;
685
686         /* Number of supported queues. */
687         adapter->num_rx_queues = adapter->rss_queues;
688         adapter->num_tx_queues = adapter->rss_queues;
689
690         /* start with one vector for every rx queue */
691         numvecs = adapter->num_rx_queues;
692
693         /* if tx handler is separate add 1 for every tx queue */
694         if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
695                 numvecs += adapter->num_tx_queues;
696
697         /* store the number of vectors reserved for queues */
698         adapter->num_q_vectors = numvecs;
699
700         /* add 1 vector for link status interrupts */
701         numvecs++;
702         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
703                                         GFP_KERNEL);
704         if (!adapter->msix_entries)
705                 goto msi_only;
706
707         for (i = 0; i < numvecs; i++)
708                 adapter->msix_entries[i].entry = i;
709
710         err = pci_enable_msix(adapter->pdev,
711                               adapter->msix_entries,
712                               numvecs);
713         if (err == 0)
714                 goto out;
715
716         igb_reset_interrupt_capability(adapter);
717
718         /* If we can't do MSI-X, try MSI */
719 msi_only:
720 #ifdef CONFIG_PCI_IOV
721         /* disable SR-IOV for non MSI-X configurations */
722         if (adapter->vf_data) {
723                 struct e1000_hw *hw = &adapter->hw;
724                 /* disable iov and allow time for transactions to clear */
725                 pci_disable_sriov(adapter->pdev);
726                 msleep(500);
727
728                 kfree(adapter->vf_data);
729                 adapter->vf_data = NULL;
730                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
731                 msleep(100);
732                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
733         }
734 #endif
735         adapter->vfs_allocated_count = 0;
736         adapter->rss_queues = 1;
737         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
738         adapter->num_rx_queues = 1;
739         adapter->num_tx_queues = 1;
740         adapter->num_q_vectors = 1;
741         if (!pci_enable_msi(adapter->pdev))
742                 adapter->flags |= IGB_FLAG_HAS_MSI;
743 out:
744         /* Notify the stack of the (possibly) reduced Tx Queue count. */
745         adapter->netdev->real_num_tx_queues = adapter->num_tx_queues;
746         return;
747 }
748
749 /**
750  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
751  * @adapter: board private structure to initialize
752  *
753  * We allocate one q_vector per queue interrupt.  If allocation fails we
754  * return -ENOMEM.
755  **/
756 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
757 {
758         struct igb_q_vector *q_vector;
759         struct e1000_hw *hw = &adapter->hw;
760         int v_idx;
761
762         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
763                 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
764                 if (!q_vector)
765                         goto err_out;
766                 q_vector->adapter = adapter;
767                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
768                 q_vector->itr_val = IGB_START_ITR;
769                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
770                 adapter->q_vector[v_idx] = q_vector;
771         }
772         return 0;
773
774 err_out:
775         igb_free_q_vectors(adapter);
776         return -ENOMEM;
777 }
778
779 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
780                                       int ring_idx, int v_idx)
781 {
782         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
783
784         q_vector->rx_ring = adapter->rx_ring[ring_idx];
785         q_vector->rx_ring->q_vector = q_vector;
786         q_vector->itr_val = adapter->rx_itr_setting;
787         if (q_vector->itr_val && q_vector->itr_val <= 3)
788                 q_vector->itr_val = IGB_START_ITR;
789 }
790
791 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
792                                       int ring_idx, int v_idx)
793 {
794         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
795
796         q_vector->tx_ring = adapter->tx_ring[ring_idx];
797         q_vector->tx_ring->q_vector = q_vector;
798         q_vector->itr_val = adapter->tx_itr_setting;
799         if (q_vector->itr_val && q_vector->itr_val <= 3)
800                 q_vector->itr_val = IGB_START_ITR;
801 }
802
803 /**
804  * igb_map_ring_to_vector - maps allocated queues to vectors
805  *
806  * This function maps the recently allocated queues to vectors.
807  **/
808 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
809 {
810         int i;
811         int v_idx = 0;
812
813         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
814             (adapter->num_q_vectors < adapter->num_tx_queues))
815                 return -ENOMEM;
816
817         if (adapter->num_q_vectors >=
818             (adapter->num_rx_queues + adapter->num_tx_queues)) {
819                 for (i = 0; i < adapter->num_rx_queues; i++)
820                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
821                 for (i = 0; i < adapter->num_tx_queues; i++)
822                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
823         } else {
824                 for (i = 0; i < adapter->num_rx_queues; i++) {
825                         if (i < adapter->num_tx_queues)
826                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
827                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
828                 }
829                 for (; i < adapter->num_tx_queues; i++)
830                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
831         }
832         return 0;
833 }
834
835 /**
836  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
837  *
838  * This function initializes the interrupts and allocates all of the queues.
839  **/
840 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
841 {
842         struct pci_dev *pdev = adapter->pdev;
843         int err;
844
845         igb_set_interrupt_capability(adapter);
846
847         err = igb_alloc_q_vectors(adapter);
848         if (err) {
849                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
850                 goto err_alloc_q_vectors;
851         }
852
853         err = igb_alloc_queues(adapter);
854         if (err) {
855                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
856                 goto err_alloc_queues;
857         }
858
859         err = igb_map_ring_to_vector(adapter);
860         if (err) {
861                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
862                 goto err_map_queues;
863         }
864
865
866         return 0;
867 err_map_queues:
868         igb_free_queues(adapter);
869 err_alloc_queues:
870         igb_free_q_vectors(adapter);
871 err_alloc_q_vectors:
872         igb_reset_interrupt_capability(adapter);
873         return err;
874 }
875
876 /**
877  * igb_request_irq - initialize interrupts
878  *
879  * Attempts to configure interrupts using the best available
880  * capabilities of the hardware and kernel.
881  **/
882 static int igb_request_irq(struct igb_adapter *adapter)
883 {
884         struct net_device *netdev = adapter->netdev;
885         struct pci_dev *pdev = adapter->pdev;
886         int err = 0;
887
888         if (adapter->msix_entries) {
889                 err = igb_request_msix(adapter);
890                 if (!err)
891                         goto request_done;
892                 /* fall back to MSI */
893                 igb_clear_interrupt_scheme(adapter);
894                 if (!pci_enable_msi(adapter->pdev))
895                         adapter->flags |= IGB_FLAG_HAS_MSI;
896                 igb_free_all_tx_resources(adapter);
897                 igb_free_all_rx_resources(adapter);
898                 adapter->num_tx_queues = 1;
899                 adapter->num_rx_queues = 1;
900                 adapter->num_q_vectors = 1;
901                 err = igb_alloc_q_vectors(adapter);
902                 if (err) {
903                         dev_err(&pdev->dev,
904                                 "Unable to allocate memory for vectors\n");
905                         goto request_done;
906                 }
907                 err = igb_alloc_queues(adapter);
908                 if (err) {
909                         dev_err(&pdev->dev,
910                                 "Unable to allocate memory for queues\n");
911                         igb_free_q_vectors(adapter);
912                         goto request_done;
913                 }
914                 igb_setup_all_tx_resources(adapter);
915                 igb_setup_all_rx_resources(adapter);
916         } else {
917                 igb_assign_vector(adapter->q_vector[0], 0);
918         }
919
920         if (adapter->flags & IGB_FLAG_HAS_MSI) {
921                 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
922                                   netdev->name, adapter);
923                 if (!err)
924                         goto request_done;
925
926                 /* fall back to legacy interrupts */
927                 igb_reset_interrupt_capability(adapter);
928                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
929         }
930
931         err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
932                           netdev->name, adapter);
933
934         if (err)
935                 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
936                         err);
937
938 request_done:
939         return err;
940 }
941
942 static void igb_free_irq(struct igb_adapter *adapter)
943 {
944         if (adapter->msix_entries) {
945                 int vector = 0, i;
946
947                 free_irq(adapter->msix_entries[vector++].vector, adapter);
948
949                 for (i = 0; i < adapter->num_q_vectors; i++) {
950                         struct igb_q_vector *q_vector = adapter->q_vector[i];
951                         free_irq(adapter->msix_entries[vector++].vector,
952                                  q_vector);
953                 }
954         } else {
955                 free_irq(adapter->pdev->irq, adapter);
956         }
957 }
958
959 /**
960  * igb_irq_disable - Mask off interrupt generation on the NIC
961  * @adapter: board private structure
962  **/
963 static void igb_irq_disable(struct igb_adapter *adapter)
964 {
965         struct e1000_hw *hw = &adapter->hw;
966
967         /*
968          * we need to be careful when disabling interrupts.  The VFs are also
969          * mapped into these registers and so clearing the bits can cause
970          * issues on the VF drivers so we only need to clear what we set
971          */
972         if (adapter->msix_entries) {
973                 u32 regval = rd32(E1000_EIAM);
974                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
975                 wr32(E1000_EIMC, adapter->eims_enable_mask);
976                 regval = rd32(E1000_EIAC);
977                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
978         }
979
980         wr32(E1000_IAM, 0);
981         wr32(E1000_IMC, ~0);
982         wrfl();
983         synchronize_irq(adapter->pdev->irq);
984 }
985
986 /**
987  * igb_irq_enable - Enable default interrupt generation settings
988  * @adapter: board private structure
989  **/
990 static void igb_irq_enable(struct igb_adapter *adapter)
991 {
992         struct e1000_hw *hw = &adapter->hw;
993
994         if (adapter->msix_entries) {
995                 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
996                 u32 regval = rd32(E1000_EIAC);
997                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
998                 regval = rd32(E1000_EIAM);
999                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1000                 wr32(E1000_EIMS, adapter->eims_enable_mask);
1001                 if (adapter->vfs_allocated_count) {
1002                         wr32(E1000_MBVFIMR, 0xFF);
1003                         ims |= E1000_IMS_VMMB;
1004                 }
1005                 if (adapter->hw.mac.type == e1000_82580)
1006                         ims |= E1000_IMS_DRSTA;
1007
1008                 wr32(E1000_IMS, ims);
1009         } else {
1010                 wr32(E1000_IMS, IMS_ENABLE_MASK |
1011                                 E1000_IMS_DRSTA);
1012                 wr32(E1000_IAM, IMS_ENABLE_MASK |
1013                                 E1000_IMS_DRSTA);
1014         }
1015 }
1016
1017 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1018 {
1019         struct e1000_hw *hw = &adapter->hw;
1020         u16 vid = adapter->hw.mng_cookie.vlan_id;
1021         u16 old_vid = adapter->mng_vlan_id;
1022
1023         if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1024                 /* add VID to filter table */
1025                 igb_vfta_set(hw, vid, true);
1026                 adapter->mng_vlan_id = vid;
1027         } else {
1028                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1029         }
1030
1031         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1032             (vid != old_vid) &&
1033             !vlan_group_get_device(adapter->vlgrp, old_vid)) {
1034                 /* remove VID from filter table */
1035                 igb_vfta_set(hw, old_vid, false);
1036         }
1037 }
1038
1039 /**
1040  * igb_release_hw_control - release control of the h/w to f/w
1041  * @adapter: address of board private structure
1042  *
1043  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1044  * For ASF and Pass Through versions of f/w this means that the
1045  * driver is no longer loaded.
1046  *
1047  **/
1048 static void igb_release_hw_control(struct igb_adapter *adapter)
1049 {
1050         struct e1000_hw *hw = &adapter->hw;
1051         u32 ctrl_ext;
1052
1053         /* Let firmware take over control of h/w */
1054         ctrl_ext = rd32(E1000_CTRL_EXT);
1055         wr32(E1000_CTRL_EXT,
1056                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1057 }
1058
1059 /**
1060  * igb_get_hw_control - get control of the h/w from f/w
1061  * @adapter: address of board private structure
1062  *
1063  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1064  * For ASF and Pass Through versions of f/w this means that
1065  * the driver is loaded.
1066  *
1067  **/
1068 static void igb_get_hw_control(struct igb_adapter *adapter)
1069 {
1070         struct e1000_hw *hw = &adapter->hw;
1071         u32 ctrl_ext;
1072
1073         /* Let firmware know the driver has taken over */
1074         ctrl_ext = rd32(E1000_CTRL_EXT);
1075         wr32(E1000_CTRL_EXT,
1076                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1077 }
1078
1079 /**
1080  * igb_configure - configure the hardware for RX and TX
1081  * @adapter: private board structure
1082  **/
1083 static void igb_configure(struct igb_adapter *adapter)
1084 {
1085         struct net_device *netdev = adapter->netdev;
1086         int i;
1087
1088         igb_get_hw_control(adapter);
1089         igb_set_rx_mode(netdev);
1090
1091         igb_restore_vlan(adapter);
1092
1093         igb_setup_tctl(adapter);
1094         igb_setup_mrqc(adapter);
1095         igb_setup_rctl(adapter);
1096
1097         igb_configure_tx(adapter);
1098         igb_configure_rx(adapter);
1099
1100         igb_rx_fifo_flush_82575(&adapter->hw);
1101
1102         /* call igb_desc_unused which always leaves
1103          * at least 1 descriptor unused to make sure
1104          * next_to_use != next_to_clean */
1105         for (i = 0; i < adapter->num_rx_queues; i++) {
1106                 struct igb_ring *ring = adapter->rx_ring[i];
1107                 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1108         }
1109 }
1110
1111 /**
1112  * igb_power_up_link - Power up the phy/serdes link
1113  * @adapter: address of board private structure
1114  **/
1115 void igb_power_up_link(struct igb_adapter *adapter)
1116 {
1117         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1118                 igb_power_up_phy_copper(&adapter->hw);
1119         else
1120                 igb_power_up_serdes_link_82575(&adapter->hw);
1121 }
1122
1123 /**
1124  * igb_power_down_link - Power down the phy/serdes link
1125  * @adapter: address of board private structure
1126  */
1127 static void igb_power_down_link(struct igb_adapter *adapter)
1128 {
1129         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1130                 igb_power_down_phy_copper_82575(&adapter->hw);
1131         else
1132                 igb_shutdown_serdes_link_82575(&adapter->hw);
1133 }
1134
1135 /**
1136  * igb_up - Open the interface and prepare it to handle traffic
1137  * @adapter: board private structure
1138  **/
1139 int igb_up(struct igb_adapter *adapter)
1140 {
1141         struct e1000_hw *hw = &adapter->hw;
1142         int i;
1143
1144         /* hardware has been reset, we need to reload some things */
1145         igb_configure(adapter);
1146
1147         clear_bit(__IGB_DOWN, &adapter->state);
1148
1149         for (i = 0; i < adapter->num_q_vectors; i++) {
1150                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1151                 napi_enable(&q_vector->napi);
1152         }
1153         if (adapter->msix_entries)
1154                 igb_configure_msix(adapter);
1155         else
1156                 igb_assign_vector(adapter->q_vector[0], 0);
1157
1158         /* Clear any pending interrupts. */
1159         rd32(E1000_ICR);
1160         igb_irq_enable(adapter);
1161
1162         /* notify VFs that reset has been completed */
1163         if (adapter->vfs_allocated_count) {
1164                 u32 reg_data = rd32(E1000_CTRL_EXT);
1165                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1166                 wr32(E1000_CTRL_EXT, reg_data);
1167         }
1168
1169         netif_tx_start_all_queues(adapter->netdev);
1170
1171         /* start the watchdog. */
1172         hw->mac.get_link_status = 1;
1173         schedule_work(&adapter->watchdog_task);
1174
1175         return 0;
1176 }
1177
1178 void igb_down(struct igb_adapter *adapter)
1179 {
1180         struct net_device *netdev = adapter->netdev;
1181         struct e1000_hw *hw = &adapter->hw;
1182         u32 tctl, rctl;
1183         int i;
1184
1185         /* signal that we're down so the interrupt handler does not
1186          * reschedule our watchdog timer */
1187         set_bit(__IGB_DOWN, &adapter->state);
1188
1189         /* disable receives in the hardware */
1190         rctl = rd32(E1000_RCTL);
1191         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1192         /* flush and sleep below */
1193
1194         netif_tx_stop_all_queues(netdev);
1195
1196         /* disable transmits in the hardware */
1197         tctl = rd32(E1000_TCTL);
1198         tctl &= ~E1000_TCTL_EN;
1199         wr32(E1000_TCTL, tctl);
1200         /* flush both disables and wait for them to finish */
1201         wrfl();
1202         msleep(10);
1203
1204         for (i = 0; i < adapter->num_q_vectors; i++) {
1205                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1206                 napi_disable(&q_vector->napi);
1207         }
1208
1209         igb_irq_disable(adapter);
1210
1211         del_timer_sync(&adapter->watchdog_timer);
1212         del_timer_sync(&adapter->phy_info_timer);
1213
1214         netif_carrier_off(netdev);
1215
1216         /* record the stats before reset*/
1217         igb_update_stats(adapter);
1218
1219         adapter->link_speed = 0;
1220         adapter->link_duplex = 0;
1221
1222         if (!pci_channel_offline(adapter->pdev))
1223                 igb_reset(adapter);
1224         igb_clean_all_tx_rings(adapter);
1225         igb_clean_all_rx_rings(adapter);
1226 #ifdef CONFIG_IGB_DCA
1227
1228         /* since we reset the hardware DCA settings were cleared */
1229         igb_setup_dca(adapter);
1230 #endif
1231 }
1232
1233 void igb_reinit_locked(struct igb_adapter *adapter)
1234 {
1235         WARN_ON(in_interrupt());
1236         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1237                 msleep(1);
1238         igb_down(adapter);
1239         igb_up(adapter);
1240         clear_bit(__IGB_RESETTING, &adapter->state);
1241 }
1242
1243 void igb_reset(struct igb_adapter *adapter)
1244 {
1245         struct pci_dev *pdev = adapter->pdev;
1246         struct e1000_hw *hw = &adapter->hw;
1247         struct e1000_mac_info *mac = &hw->mac;
1248         struct e1000_fc_info *fc = &hw->fc;
1249         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1250         u16 hwm;
1251
1252         /* Repartition Pba for greater than 9k mtu
1253          * To take effect CTRL.RST is required.
1254          */
1255         switch (mac->type) {
1256         case e1000_82580:
1257                 pba = rd32(E1000_RXPBS);
1258                 pba = igb_rxpbs_adjust_82580(pba);
1259                 break;
1260         case e1000_82576:
1261                 pba = rd32(E1000_RXPBS);
1262                 pba &= E1000_RXPBS_SIZE_MASK_82576;
1263                 break;
1264         case e1000_82575:
1265         default:
1266                 pba = E1000_PBA_34K;
1267                 break;
1268         }
1269
1270         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1271             (mac->type < e1000_82576)) {
1272                 /* adjust PBA for jumbo frames */
1273                 wr32(E1000_PBA, pba);
1274
1275                 /* To maintain wire speed transmits, the Tx FIFO should be
1276                  * large enough to accommodate two full transmit packets,
1277                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1278                  * the Rx FIFO should be large enough to accommodate at least
1279                  * one full receive packet and is similarly rounded up and
1280                  * expressed in KB. */
1281                 pba = rd32(E1000_PBA);
1282                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1283                 tx_space = pba >> 16;
1284                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1285                 pba &= 0xffff;
1286                 /* the tx fifo also stores 16 bytes of information about the tx
1287                  * but don't include ethernet FCS because hardware appends it */
1288                 min_tx_space = (adapter->max_frame_size +
1289                                 sizeof(union e1000_adv_tx_desc) -
1290                                 ETH_FCS_LEN) * 2;
1291                 min_tx_space = ALIGN(min_tx_space, 1024);
1292                 min_tx_space >>= 10;
1293                 /* software strips receive CRC, so leave room for it */
1294                 min_rx_space = adapter->max_frame_size;
1295                 min_rx_space = ALIGN(min_rx_space, 1024);
1296                 min_rx_space >>= 10;
1297
1298                 /* If current Tx allocation is less than the min Tx FIFO size,
1299                  * and the min Tx FIFO size is less than the current Rx FIFO
1300                  * allocation, take space away from current Rx allocation */
1301                 if (tx_space < min_tx_space &&
1302                     ((min_tx_space - tx_space) < pba)) {
1303                         pba = pba - (min_tx_space - tx_space);
1304
1305                         /* if short on rx space, rx wins and must trump tx
1306                          * adjustment */
1307                         if (pba < min_rx_space)
1308                                 pba = min_rx_space;
1309                 }
1310                 wr32(E1000_PBA, pba);
1311         }
1312
1313         /* flow control settings */
1314         /* The high water mark must be low enough to fit one full frame
1315          * (or the size used for early receive) above it in the Rx FIFO.
1316          * Set it to the lower of:
1317          * - 90% of the Rx FIFO size, or
1318          * - the full Rx FIFO size minus one full frame */
1319         hwm = min(((pba << 10) * 9 / 10),
1320                         ((pba << 10) - 2 * adapter->max_frame_size));
1321
1322         fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1323         fc->low_water = fc->high_water - 16;
1324         fc->pause_time = 0xFFFF;
1325         fc->send_xon = 1;
1326         fc->current_mode = fc->requested_mode;
1327
1328         /* disable receive for all VFs and wait one second */
1329         if (adapter->vfs_allocated_count) {
1330                 int i;
1331                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1332                         adapter->vf_data[i].flags = 0;
1333
1334                 /* ping all the active vfs to let them know we are going down */
1335                 igb_ping_all_vfs(adapter);
1336
1337                 /* disable transmits and receives */
1338                 wr32(E1000_VFRE, 0);
1339                 wr32(E1000_VFTE, 0);
1340         }
1341
1342         /* Allow time for pending master requests to run */
1343         hw->mac.ops.reset_hw(hw);
1344         wr32(E1000_WUC, 0);
1345
1346         if (hw->mac.ops.init_hw(hw))
1347                 dev_err(&pdev->dev, "Hardware Error\n");
1348
1349         if (hw->mac.type == e1000_82580) {
1350                 u32 reg = rd32(E1000_PCIEMISC);
1351                 wr32(E1000_PCIEMISC,
1352                                 reg & ~E1000_PCIEMISC_LX_DECISION);
1353         }
1354         if (!netif_running(adapter->netdev))
1355                 igb_power_down_link(adapter);
1356
1357         igb_update_mng_vlan(adapter);
1358
1359         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1360         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1361
1362         igb_get_phy_info(hw);
1363 }
1364
1365 static const struct net_device_ops igb_netdev_ops = {
1366         .ndo_open               = igb_open,
1367         .ndo_stop               = igb_close,
1368         .ndo_start_xmit         = igb_xmit_frame_adv,
1369         .ndo_get_stats          = igb_get_stats,
1370         .ndo_set_rx_mode        = igb_set_rx_mode,
1371         .ndo_set_multicast_list = igb_set_rx_mode,
1372         .ndo_set_mac_address    = igb_set_mac,
1373         .ndo_change_mtu         = igb_change_mtu,
1374         .ndo_do_ioctl           = igb_ioctl,
1375         .ndo_tx_timeout         = igb_tx_timeout,
1376         .ndo_validate_addr      = eth_validate_addr,
1377         .ndo_vlan_rx_register   = igb_vlan_rx_register,
1378         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1379         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1380         .ndo_set_vf_mac         = igb_ndo_set_vf_mac,
1381         .ndo_set_vf_vlan        = igb_ndo_set_vf_vlan,
1382         .ndo_set_vf_tx_rate     = igb_ndo_set_vf_bw,
1383         .ndo_get_vf_config      = igb_ndo_get_vf_config,
1384 #ifdef CONFIG_NET_POLL_CONTROLLER
1385         .ndo_poll_controller    = igb_netpoll,
1386 #endif
1387 };
1388
1389 /**
1390  * igb_probe - Device Initialization Routine
1391  * @pdev: PCI device information struct
1392  * @ent: entry in igb_pci_tbl
1393  *
1394  * Returns 0 on success, negative on failure
1395  *
1396  * igb_probe initializes an adapter identified by a pci_dev structure.
1397  * The OS initialization, configuring of the adapter private structure,
1398  * and a hardware reset occur.
1399  **/
1400 static int __devinit igb_probe(struct pci_dev *pdev,
1401                                const struct pci_device_id *ent)
1402 {
1403         struct net_device *netdev;
1404         struct igb_adapter *adapter;
1405         struct e1000_hw *hw;
1406         u16 eeprom_data = 0;
1407         static int global_quad_port_a; /* global quad port a indication */
1408         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1409         unsigned long mmio_start, mmio_len;
1410         int err, pci_using_dac;
1411         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1412         u32 part_num;
1413
1414         err = pci_enable_device_mem(pdev);
1415         if (err)
1416                 return err;
1417
1418         pci_using_dac = 0;
1419         err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
1420         if (!err) {
1421                 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
1422                 if (!err)
1423                         pci_using_dac = 1;
1424         } else {
1425                 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
1426                 if (err) {
1427                         err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
1428                         if (err) {
1429                                 dev_err(&pdev->dev, "No usable DMA "
1430                                         "configuration, aborting\n");
1431                                 goto err_dma;
1432                         }
1433                 }
1434         }
1435
1436         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1437                                            IORESOURCE_MEM),
1438                                            igb_driver_name);
1439         if (err)
1440                 goto err_pci_reg;
1441
1442         pci_enable_pcie_error_reporting(pdev);
1443
1444         pci_set_master(pdev);
1445         pci_save_state(pdev);
1446
1447         err = -ENOMEM;
1448         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1449                                    IGB_ABS_MAX_TX_QUEUES);
1450         if (!netdev)
1451                 goto err_alloc_etherdev;
1452
1453         SET_NETDEV_DEV(netdev, &pdev->dev);
1454
1455         pci_set_drvdata(pdev, netdev);
1456         adapter = netdev_priv(netdev);
1457         adapter->netdev = netdev;
1458         adapter->pdev = pdev;
1459         hw = &adapter->hw;
1460         hw->back = adapter;
1461         adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1462
1463         mmio_start = pci_resource_start(pdev, 0);
1464         mmio_len = pci_resource_len(pdev, 0);
1465
1466         err = -EIO;
1467         hw->hw_addr = ioremap(mmio_start, mmio_len);
1468         if (!hw->hw_addr)
1469                 goto err_ioremap;
1470
1471         netdev->netdev_ops = &igb_netdev_ops;
1472         igb_set_ethtool_ops(netdev);
1473         netdev->watchdog_timeo = 5 * HZ;
1474
1475         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1476
1477         netdev->mem_start = mmio_start;
1478         netdev->mem_end = mmio_start + mmio_len;
1479
1480         /* PCI config space info */
1481         hw->vendor_id = pdev->vendor;
1482         hw->device_id = pdev->device;
1483         hw->revision_id = pdev->revision;
1484         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1485         hw->subsystem_device_id = pdev->subsystem_device;
1486
1487         /* Copy the default MAC, PHY and NVM function pointers */
1488         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1489         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1490         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1491         /* Initialize skew-specific constants */
1492         err = ei->get_invariants(hw);
1493         if (err)
1494                 goto err_sw_init;
1495
1496         /* setup the private structure */
1497         err = igb_sw_init(adapter);
1498         if (err)
1499                 goto err_sw_init;
1500
1501         igb_get_bus_info_pcie(hw);
1502
1503         hw->phy.autoneg_wait_to_complete = false;
1504
1505         /* Copper options */
1506         if (hw->phy.media_type == e1000_media_type_copper) {
1507                 hw->phy.mdix = AUTO_ALL_MODES;
1508                 hw->phy.disable_polarity_correction = false;
1509                 hw->phy.ms_type = e1000_ms_hw_default;
1510         }
1511
1512         if (igb_check_reset_block(hw))
1513                 dev_info(&pdev->dev,
1514                         "PHY reset is blocked due to SOL/IDER session.\n");
1515
1516         netdev->features = NETIF_F_SG |
1517                            NETIF_F_IP_CSUM |
1518                            NETIF_F_HW_VLAN_TX |
1519                            NETIF_F_HW_VLAN_RX |
1520                            NETIF_F_HW_VLAN_FILTER;
1521
1522         netdev->features |= NETIF_F_IPV6_CSUM;
1523         netdev->features |= NETIF_F_TSO;
1524         netdev->features |= NETIF_F_TSO6;
1525         netdev->features |= NETIF_F_GRO;
1526
1527         netdev->vlan_features |= NETIF_F_TSO;
1528         netdev->vlan_features |= NETIF_F_TSO6;
1529         netdev->vlan_features |= NETIF_F_IP_CSUM;
1530         netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1531         netdev->vlan_features |= NETIF_F_SG;
1532
1533         if (pci_using_dac)
1534                 netdev->features |= NETIF_F_HIGHDMA;
1535
1536         if (hw->mac.type >= e1000_82576)
1537                 netdev->features |= NETIF_F_SCTP_CSUM;
1538
1539         adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1540
1541         /* before reading the NVM, reset the controller to put the device in a
1542          * known good starting state */
1543         hw->mac.ops.reset_hw(hw);
1544
1545         /* make sure the NVM is good */
1546         if (igb_validate_nvm_checksum(hw) < 0) {
1547                 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1548                 err = -EIO;
1549                 goto err_eeprom;
1550         }
1551
1552         /* copy the MAC address out of the NVM */
1553         if (hw->mac.ops.read_mac_addr(hw))
1554                 dev_err(&pdev->dev, "NVM Read Error\n");
1555
1556         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1557         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1558
1559         if (!is_valid_ether_addr(netdev->perm_addr)) {
1560                 dev_err(&pdev->dev, "Invalid MAC Address\n");
1561                 err = -EIO;
1562                 goto err_eeprom;
1563         }
1564
1565         setup_timer(&adapter->watchdog_timer, &igb_watchdog,
1566                     (unsigned long) adapter);
1567         setup_timer(&adapter->phy_info_timer, &igb_update_phy_info,
1568                     (unsigned long) adapter);
1569
1570         INIT_WORK(&adapter->reset_task, igb_reset_task);
1571         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1572
1573         /* Initialize link properties that are user-changeable */
1574         adapter->fc_autoneg = true;
1575         hw->mac.autoneg = true;
1576         hw->phy.autoneg_advertised = 0x2f;
1577
1578         hw->fc.requested_mode = e1000_fc_default;
1579         hw->fc.current_mode = e1000_fc_default;
1580
1581         igb_validate_mdi_setting(hw);
1582
1583         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1584          * enable the ACPI Magic Packet filter
1585          */
1586
1587         if (hw->bus.func == 0)
1588                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1589         else if (hw->mac.type == e1000_82580)
1590                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
1591                                  NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
1592                                  &eeprom_data);
1593         else if (hw->bus.func == 1)
1594                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1595
1596         if (eeprom_data & eeprom_apme_mask)
1597                 adapter->eeprom_wol |= E1000_WUFC_MAG;
1598
1599         /* now that we have the eeprom settings, apply the special cases where
1600          * the eeprom may be wrong or the board simply won't support wake on
1601          * lan on a particular port */
1602         switch (pdev->device) {
1603         case E1000_DEV_ID_82575GB_QUAD_COPPER:
1604                 adapter->eeprom_wol = 0;
1605                 break;
1606         case E1000_DEV_ID_82575EB_FIBER_SERDES:
1607         case E1000_DEV_ID_82576_FIBER:
1608         case E1000_DEV_ID_82576_SERDES:
1609                 /* Wake events only supported on port A for dual fiber
1610                  * regardless of eeprom setting */
1611                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
1612                         adapter->eeprom_wol = 0;
1613                 break;
1614         case E1000_DEV_ID_82576_QUAD_COPPER:
1615                 /* if quad port adapter, disable WoL on all but port A */
1616                 if (global_quad_port_a != 0)
1617                         adapter->eeprom_wol = 0;
1618                 else
1619                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
1620                 /* Reset for multiple quad port adapters */
1621                 if (++global_quad_port_a == 4)
1622                         global_quad_port_a = 0;
1623                 break;
1624         }
1625
1626         /* initialize the wol settings based on the eeprom settings */
1627         adapter->wol = adapter->eeprom_wol;
1628         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
1629
1630         /* reset the hardware with the new settings */
1631         igb_reset(adapter);
1632
1633         /* let the f/w know that the h/w is now under the control of the
1634          * driver. */
1635         igb_get_hw_control(adapter);
1636
1637         strcpy(netdev->name, "eth%d");
1638         err = register_netdev(netdev);
1639         if (err)
1640                 goto err_register;
1641
1642         /* carrier off reporting is important to ethtool even BEFORE open */
1643         netif_carrier_off(netdev);
1644
1645 #ifdef CONFIG_IGB_DCA
1646         if (dca_add_requester(&pdev->dev) == 0) {
1647                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
1648                 dev_info(&pdev->dev, "DCA enabled\n");
1649                 igb_setup_dca(adapter);
1650         }
1651
1652 #endif
1653         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
1654         /* print bus type/speed/width info */
1655         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
1656                  netdev->name,
1657                  ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
1658                                                             "unknown"),
1659                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
1660                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
1661                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
1662                    "unknown"),
1663                  netdev->dev_addr);
1664
1665         igb_read_part_num(hw, &part_num);
1666         dev_info(&pdev->dev, "%s: PBA No: %06x-%03x\n", netdev->name,
1667                 (part_num >> 8), (part_num & 0xff));
1668
1669         dev_info(&pdev->dev,
1670                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
1671                 adapter->msix_entries ? "MSI-X" :
1672                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
1673                 adapter->num_rx_queues, adapter->num_tx_queues);
1674
1675         return 0;
1676
1677 err_register:
1678         igb_release_hw_control(adapter);
1679 err_eeprom:
1680         if (!igb_check_reset_block(hw))
1681                 igb_reset_phy(hw);
1682
1683         if (hw->flash_address)
1684                 iounmap(hw->flash_address);
1685 err_sw_init:
1686         igb_clear_interrupt_scheme(adapter);
1687         iounmap(hw->hw_addr);
1688 err_ioremap:
1689         free_netdev(netdev);
1690 err_alloc_etherdev:
1691         pci_release_selected_regions(pdev,
1692                                      pci_select_bars(pdev, IORESOURCE_MEM));
1693 err_pci_reg:
1694 err_dma:
1695         pci_disable_device(pdev);
1696         return err;
1697 }
1698
1699 /**
1700  * igb_remove - Device Removal Routine
1701  * @pdev: PCI device information struct
1702  *
1703  * igb_remove is called by the PCI subsystem to alert the driver
1704  * that it should release a PCI device.  The could be caused by a
1705  * Hot-Plug event, or because the driver is going to be removed from
1706  * memory.
1707  **/
1708 static void __devexit igb_remove(struct pci_dev *pdev)
1709 {
1710         struct net_device *netdev = pci_get_drvdata(pdev);
1711         struct igb_adapter *adapter = netdev_priv(netdev);
1712         struct e1000_hw *hw = &adapter->hw;
1713
1714         /* flush_scheduled work may reschedule our watchdog task, so
1715          * explicitly disable watchdog tasks from being rescheduled  */
1716         set_bit(__IGB_DOWN, &adapter->state);
1717         del_timer_sync(&adapter->watchdog_timer);
1718         del_timer_sync(&adapter->phy_info_timer);
1719
1720         flush_scheduled_work();
1721
1722 #ifdef CONFIG_IGB_DCA
1723         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
1724                 dev_info(&pdev->dev, "DCA disabled\n");
1725                 dca_remove_requester(&pdev->dev);
1726                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
1727                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
1728         }
1729 #endif
1730
1731         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
1732          * would have already happened in close and is redundant. */
1733         igb_release_hw_control(adapter);
1734
1735         unregister_netdev(netdev);
1736
1737         igb_clear_interrupt_scheme(adapter);
1738
1739 #ifdef CONFIG_PCI_IOV
1740         /* reclaim resources allocated to VFs */
1741         if (adapter->vf_data) {
1742                 /* disable iov and allow time for transactions to clear */
1743                 pci_disable_sriov(pdev);
1744                 msleep(500);
1745
1746                 kfree(adapter->vf_data);
1747                 adapter->vf_data = NULL;
1748                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1749                 msleep(100);
1750                 dev_info(&pdev->dev, "IOV Disabled\n");
1751         }
1752 #endif
1753
1754         iounmap(hw->hw_addr);
1755         if (hw->flash_address)
1756                 iounmap(hw->flash_address);
1757         pci_release_selected_regions(pdev,
1758                                      pci_select_bars(pdev, IORESOURCE_MEM));
1759
1760         free_netdev(netdev);
1761
1762         pci_disable_pcie_error_reporting(pdev);
1763
1764         pci_disable_device(pdev);
1765 }
1766
1767 /**
1768  * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
1769  * @adapter: board private structure to initialize
1770  *
1771  * This function initializes the vf specific data storage and then attempts to
1772  * allocate the VFs.  The reason for ordering it this way is because it is much
1773  * mor expensive time wise to disable SR-IOV than it is to allocate and free
1774  * the memory for the VFs.
1775  **/
1776 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
1777 {
1778 #ifdef CONFIG_PCI_IOV
1779         struct pci_dev *pdev = adapter->pdev;
1780
1781         if (adapter->vfs_allocated_count > 7)
1782                 adapter->vfs_allocated_count = 7;
1783
1784         if (adapter->vfs_allocated_count) {
1785                 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
1786                                            sizeof(struct vf_data_storage),
1787                                            GFP_KERNEL);
1788                 /* if allocation failed then we do not support SR-IOV */
1789                 if (!adapter->vf_data) {
1790                         adapter->vfs_allocated_count = 0;
1791                         dev_err(&pdev->dev, "Unable to allocate memory for VF "
1792                                 "Data Storage\n");
1793                 }
1794         }
1795
1796         if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
1797                 kfree(adapter->vf_data);
1798                 adapter->vf_data = NULL;
1799 #endif /* CONFIG_PCI_IOV */
1800                 adapter->vfs_allocated_count = 0;
1801 #ifdef CONFIG_PCI_IOV
1802         } else {
1803                 unsigned char mac_addr[ETH_ALEN];
1804                 int i;
1805                 dev_info(&pdev->dev, "%d vfs allocated\n",
1806                          adapter->vfs_allocated_count);
1807                 for (i = 0; i < adapter->vfs_allocated_count; i++) {
1808                         random_ether_addr(mac_addr);
1809                         igb_set_vf_mac(adapter, i, mac_addr);
1810                 }
1811         }
1812 #endif /* CONFIG_PCI_IOV */
1813 }
1814
1815
1816 /**
1817  * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
1818  * @adapter: board private structure to initialize
1819  *
1820  * igb_init_hw_timer initializes the function pointer and values for the hw
1821  * timer found in hardware.
1822  **/
1823 static void igb_init_hw_timer(struct igb_adapter *adapter)
1824 {
1825         struct e1000_hw *hw = &adapter->hw;
1826
1827         switch (hw->mac.type) {
1828         case e1000_82580:
1829                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1830                 adapter->cycles.read = igb_read_clock;
1831                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1832                 adapter->cycles.mult = 1;
1833                 /*
1834                  * The 82580 timesync updates the system timer every 8ns by 8ns
1835                  * and the value cannot be shifted.  Instead we need to shift
1836                  * the registers to generate a 64bit timer value.  As a result
1837                  * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
1838                  * 24 in order to generate a larger value for synchronization.
1839                  */
1840                 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
1841                 /* disable system timer temporarily by setting bit 31 */
1842                 wr32(E1000_TSAUXC, 0x80000000);
1843                 wrfl();
1844
1845                 /* Set registers so that rollover occurs soon to test this. */
1846                 wr32(E1000_SYSTIMR, 0x00000000);
1847                 wr32(E1000_SYSTIML, 0x80000000);
1848                 wr32(E1000_SYSTIMH, 0x000000FF);
1849                 wrfl();
1850
1851                 /* enable system timer by clearing bit 31 */
1852                 wr32(E1000_TSAUXC, 0x0);
1853                 wrfl();
1854
1855                 timecounter_init(&adapter->clock,
1856                                  &adapter->cycles,
1857                                  ktime_to_ns(ktime_get_real()));
1858                 /*
1859                  * Synchronize our NIC clock against system wall clock. NIC
1860                  * time stamp reading requires ~3us per sample, each sample
1861                  * was pretty stable even under load => only require 10
1862                  * samples for each offset comparison.
1863                  */
1864                 memset(&adapter->compare, 0, sizeof(adapter->compare));
1865                 adapter->compare.source = &adapter->clock;
1866                 adapter->compare.target = ktime_get_real;
1867                 adapter->compare.num_samples = 10;
1868                 timecompare_update(&adapter->compare, 0);
1869                 break;
1870         case e1000_82576:
1871                 /*
1872                  * Initialize hardware timer: we keep it running just in case
1873                  * that some program needs it later on.
1874                  */
1875                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1876                 adapter->cycles.read = igb_read_clock;
1877                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1878                 adapter->cycles.mult = 1;
1879                 /**
1880                  * Scale the NIC clock cycle by a large factor so that
1881                  * relatively small clock corrections can be added or
1882                  * substracted at each clock tick. The drawbacks of a large
1883                  * factor are a) that the clock register overflows more quickly
1884                  * (not such a big deal) and b) that the increment per tick has
1885                  * to fit into 24 bits.  As a result we need to use a shift of
1886                  * 19 so we can fit a value of 16 into the TIMINCA register.
1887                  */
1888                 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
1889                 wr32(E1000_TIMINCA,
1890                                 (1 << E1000_TIMINCA_16NS_SHIFT) |
1891                                 (16 << IGB_82576_TSYNC_SHIFT));
1892
1893                 /* Set registers so that rollover occurs soon to test this. */
1894                 wr32(E1000_SYSTIML, 0x00000000);
1895                 wr32(E1000_SYSTIMH, 0xFF800000);
1896                 wrfl();
1897
1898                 timecounter_init(&adapter->clock,
1899                                  &adapter->cycles,
1900                                  ktime_to_ns(ktime_get_real()));
1901                 /*
1902                  * Synchronize our NIC clock against system wall clock. NIC
1903                  * time stamp reading requires ~3us per sample, each sample
1904                  * was pretty stable even under load => only require 10
1905                  * samples for each offset comparison.
1906                  */
1907                 memset(&adapter->compare, 0, sizeof(adapter->compare));
1908                 adapter->compare.source = &adapter->clock;
1909                 adapter->compare.target = ktime_get_real;
1910                 adapter->compare.num_samples = 10;
1911                 timecompare_update(&adapter->compare, 0);
1912                 break;
1913         case e1000_82575:
1914                 /* 82575 does not support timesync */
1915         default:
1916                 break;
1917         }
1918
1919 }
1920
1921 /**
1922  * igb_sw_init - Initialize general software structures (struct igb_adapter)
1923  * @adapter: board private structure to initialize
1924  *
1925  * igb_sw_init initializes the Adapter private data structure.
1926  * Fields are initialized based on PCI device information and
1927  * OS network device settings (MTU size).
1928  **/
1929 static int __devinit igb_sw_init(struct igb_adapter *adapter)
1930 {
1931         struct e1000_hw *hw = &adapter->hw;
1932         struct net_device *netdev = adapter->netdev;
1933         struct pci_dev *pdev = adapter->pdev;
1934
1935         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
1936
1937         adapter->tx_ring_count = IGB_DEFAULT_TXD;
1938         adapter->rx_ring_count = IGB_DEFAULT_RXD;
1939         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
1940         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
1941
1942         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
1943         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
1944
1945 #ifdef CONFIG_PCI_IOV
1946         if (hw->mac.type == e1000_82576)
1947                 adapter->vfs_allocated_count = max_vfs;
1948
1949 #endif /* CONFIG_PCI_IOV */
1950         adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
1951
1952         /*
1953          * if rss_queues > 4 or vfs are going to be allocated with rss_queues
1954          * then we should combine the queues into a queue pair in order to
1955          * conserve interrupts due to limited supply
1956          */
1957         if ((adapter->rss_queues > 4) ||
1958             ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
1959                 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1960
1961         /* This call may decrease the number of queues */
1962         if (igb_init_interrupt_scheme(adapter)) {
1963                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1964                 return -ENOMEM;
1965         }
1966
1967         igb_init_hw_timer(adapter);
1968         igb_probe_vfs(adapter);
1969
1970         /* Explicitly disable IRQ since the NIC can be in any state. */
1971         igb_irq_disable(adapter);
1972
1973         set_bit(__IGB_DOWN, &adapter->state);
1974         return 0;
1975 }
1976
1977 /**
1978  * igb_open - Called when a network interface is made active
1979  * @netdev: network interface device structure
1980  *
1981  * Returns 0 on success, negative value on failure
1982  *
1983  * The open entry point is called when a network interface is made
1984  * active by the system (IFF_UP).  At this point all resources needed
1985  * for transmit and receive operations are allocated, the interrupt
1986  * handler is registered with the OS, the watchdog timer is started,
1987  * and the stack is notified that the interface is ready.
1988  **/
1989 static int igb_open(struct net_device *netdev)
1990 {
1991         struct igb_adapter *adapter = netdev_priv(netdev);
1992         struct e1000_hw *hw = &adapter->hw;
1993         int err;
1994         int i;
1995
1996         /* disallow open during test */
1997         if (test_bit(__IGB_TESTING, &adapter->state))
1998                 return -EBUSY;
1999
2000         netif_carrier_off(netdev);
2001
2002         /* allocate transmit descriptors */
2003         err = igb_setup_all_tx_resources(adapter);
2004         if (err)
2005                 goto err_setup_tx;
2006
2007         /* allocate receive descriptors */
2008         err = igb_setup_all_rx_resources(adapter);
2009         if (err)
2010                 goto err_setup_rx;
2011
2012         igb_power_up_link(adapter);
2013
2014         /* before we allocate an interrupt, we must be ready to handle it.
2015          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2016          * as soon as we call pci_request_irq, so we have to setup our
2017          * clean_rx handler before we do so.  */
2018         igb_configure(adapter);
2019
2020         err = igb_request_irq(adapter);
2021         if (err)
2022                 goto err_req_irq;
2023
2024         /* From here on the code is the same as igb_up() */
2025         clear_bit(__IGB_DOWN, &adapter->state);
2026
2027         for (i = 0; i < adapter->num_q_vectors; i++) {
2028                 struct igb_q_vector *q_vector = adapter->q_vector[i];
2029                 napi_enable(&q_vector->napi);
2030         }
2031
2032         /* Clear any pending interrupts. */
2033         rd32(E1000_ICR);
2034
2035         igb_irq_enable(adapter);
2036
2037         /* notify VFs that reset has been completed */
2038         if (adapter->vfs_allocated_count) {
2039                 u32 reg_data = rd32(E1000_CTRL_EXT);
2040                 reg_data |= E1000_CTRL_EXT_PFRSTD;
2041                 wr32(E1000_CTRL_EXT, reg_data);
2042         }
2043
2044         netif_tx_start_all_queues(netdev);
2045
2046         /* start the watchdog. */
2047         hw->mac.get_link_status = 1;
2048         schedule_work(&adapter->watchdog_task);
2049
2050         return 0;
2051
2052 err_req_irq:
2053         igb_release_hw_control(adapter);
2054         igb_power_down_link(adapter);
2055         igb_free_all_rx_resources(adapter);
2056 err_setup_rx:
2057         igb_free_all_tx_resources(adapter);
2058 err_setup_tx:
2059         igb_reset(adapter);
2060
2061         return err;
2062 }
2063
2064 /**
2065  * igb_close - Disables a network interface
2066  * @netdev: network interface device structure
2067  *
2068  * Returns 0, this is not allowed to fail
2069  *
2070  * The close entry point is called when an interface is de-activated
2071  * by the OS.  The hardware is still under the driver's control, but
2072  * needs to be disabled.  A global MAC reset is issued to stop the
2073  * hardware, and all transmit and receive resources are freed.
2074  **/
2075 static int igb_close(struct net_device *netdev)
2076 {
2077         struct igb_adapter *adapter = netdev_priv(netdev);
2078
2079         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2080         igb_down(adapter);
2081
2082         igb_free_irq(adapter);
2083
2084         igb_free_all_tx_resources(adapter);
2085         igb_free_all_rx_resources(adapter);
2086
2087         return 0;
2088 }
2089
2090 /**
2091  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2092  * @tx_ring: tx descriptor ring (for a specific queue) to setup
2093  *
2094  * Return 0 on success, negative on failure
2095  **/
2096 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2097 {
2098         struct pci_dev *pdev = tx_ring->pdev;
2099         int size;
2100
2101         size = sizeof(struct igb_buffer) * tx_ring->count;
2102         tx_ring->buffer_info = vmalloc(size);
2103         if (!tx_ring->buffer_info)
2104                 goto err;
2105         memset(tx_ring->buffer_info, 0, size);
2106
2107         /* round up to nearest 4K */
2108         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2109         tx_ring->size = ALIGN(tx_ring->size, 4096);
2110
2111         tx_ring->desc = pci_alloc_consistent(pdev,
2112                                              tx_ring->size,
2113                                              &tx_ring->dma);
2114
2115         if (!tx_ring->desc)
2116                 goto err;
2117
2118         tx_ring->next_to_use = 0;
2119         tx_ring->next_to_clean = 0;
2120         return 0;
2121
2122 err:
2123         vfree(tx_ring->buffer_info);
2124         dev_err(&pdev->dev,
2125                 "Unable to allocate memory for the transmit descriptor ring\n");
2126         return -ENOMEM;
2127 }
2128
2129 /**
2130  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2131  *                                (Descriptors) for all queues
2132  * @adapter: board private structure
2133  *
2134  * Return 0 on success, negative on failure
2135  **/
2136 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2137 {
2138         struct pci_dev *pdev = adapter->pdev;
2139         int i, err = 0;
2140
2141         for (i = 0; i < adapter->num_tx_queues; i++) {
2142                 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2143                 if (err) {
2144                         dev_err(&pdev->dev,
2145                                 "Allocation for Tx Queue %u failed\n", i);
2146                         for (i--; i >= 0; i--)
2147                                 igb_free_tx_resources(adapter->tx_ring[i]);
2148                         break;
2149                 }
2150         }
2151
2152         for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2153                 int r_idx = i % adapter->num_tx_queues;
2154                 adapter->multi_tx_table[i] = adapter->tx_ring[r_idx];
2155         }
2156         return err;
2157 }
2158
2159 /**
2160  * igb_setup_tctl - configure the transmit control registers
2161  * @adapter: Board private structure
2162  **/
2163 void igb_setup_tctl(struct igb_adapter *adapter)
2164 {
2165         struct e1000_hw *hw = &adapter->hw;
2166         u32 tctl;
2167
2168         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2169         wr32(E1000_TXDCTL(0), 0);
2170
2171         /* Program the Transmit Control Register */
2172         tctl = rd32(E1000_TCTL);
2173         tctl &= ~E1000_TCTL_CT;
2174         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2175                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2176
2177         igb_config_collision_dist(hw);
2178
2179         /* Enable transmits */
2180         tctl |= E1000_TCTL_EN;
2181
2182         wr32(E1000_TCTL, tctl);
2183 }
2184
2185 /**
2186  * igb_configure_tx_ring - Configure transmit ring after Reset
2187  * @adapter: board private structure
2188  * @ring: tx ring to configure
2189  *
2190  * Configure a transmit ring after a reset.
2191  **/
2192 void igb_configure_tx_ring(struct igb_adapter *adapter,
2193                            struct igb_ring *ring)
2194 {
2195         struct e1000_hw *hw = &adapter->hw;
2196         u32 txdctl;
2197         u64 tdba = ring->dma;
2198         int reg_idx = ring->reg_idx;
2199
2200         /* disable the queue */
2201         txdctl = rd32(E1000_TXDCTL(reg_idx));
2202         wr32(E1000_TXDCTL(reg_idx),
2203                         txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2204         wrfl();
2205         mdelay(10);
2206
2207         wr32(E1000_TDLEN(reg_idx),
2208                         ring->count * sizeof(union e1000_adv_tx_desc));
2209         wr32(E1000_TDBAL(reg_idx),
2210                         tdba & 0x00000000ffffffffULL);
2211         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2212
2213         ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2214         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2215         writel(0, ring->head);
2216         writel(0, ring->tail);
2217
2218         txdctl |= IGB_TX_PTHRESH;
2219         txdctl |= IGB_TX_HTHRESH << 8;
2220         txdctl |= IGB_TX_WTHRESH << 16;
2221
2222         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2223         wr32(E1000_TXDCTL(reg_idx), txdctl);
2224 }
2225
2226 /**
2227  * igb_configure_tx - Configure transmit Unit after Reset
2228  * @adapter: board private structure
2229  *
2230  * Configure the Tx unit of the MAC after a reset.
2231  **/
2232 static void igb_configure_tx(struct igb_adapter *adapter)
2233 {
2234         int i;
2235
2236         for (i = 0; i < adapter->num_tx_queues; i++)
2237                 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2238 }
2239
2240 /**
2241  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2242  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2243  *
2244  * Returns 0 on success, negative on failure
2245  **/
2246 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2247 {
2248         struct pci_dev *pdev = rx_ring->pdev;
2249         int size, desc_len;
2250
2251         size = sizeof(struct igb_buffer) * rx_ring->count;
2252         rx_ring->buffer_info = vmalloc(size);
2253         if (!rx_ring->buffer_info)
2254                 goto err;
2255         memset(rx_ring->buffer_info, 0, size);
2256
2257         desc_len = sizeof(union e1000_adv_rx_desc);
2258
2259         /* Round up to nearest 4K */
2260         rx_ring->size = rx_ring->count * desc_len;
2261         rx_ring->size = ALIGN(rx_ring->size, 4096);
2262
2263         rx_ring->desc = pci_alloc_consistent(pdev, rx_ring->size,
2264                                              &rx_ring->dma);
2265
2266         if (!rx_ring->desc)
2267                 goto err;
2268
2269         rx_ring->next_to_clean = 0;
2270         rx_ring->next_to_use = 0;
2271
2272         return 0;
2273
2274 err:
2275         vfree(rx_ring->buffer_info);
2276         rx_ring->buffer_info = NULL;
2277         dev_err(&pdev->dev, "Unable to allocate memory for "
2278                 "the receive descriptor ring\n");
2279         return -ENOMEM;
2280 }
2281
2282 /**
2283  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2284  *                                (Descriptors) for all queues
2285  * @adapter: board private structure
2286  *
2287  * Return 0 on success, negative on failure
2288  **/
2289 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2290 {
2291         struct pci_dev *pdev = adapter->pdev;
2292         int i, err = 0;
2293
2294         for (i = 0; i < adapter->num_rx_queues; i++) {
2295                 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2296                 if (err) {
2297                         dev_err(&pdev->dev,
2298                                 "Allocation for Rx Queue %u failed\n", i);
2299                         for (i--; i >= 0; i--)
2300                                 igb_free_rx_resources(adapter->rx_ring[i]);
2301                         break;
2302                 }
2303         }
2304
2305         return err;
2306 }
2307
2308 /**
2309  * igb_setup_mrqc - configure the multiple receive queue control registers
2310  * @adapter: Board private structure
2311  **/
2312 static void igb_setup_mrqc(struct igb_adapter *adapter)
2313 {
2314         struct e1000_hw *hw = &adapter->hw;
2315         u32 mrqc, rxcsum;
2316         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2317         union e1000_reta {
2318                 u32 dword;
2319                 u8  bytes[4];
2320         } reta;
2321         static const u8 rsshash[40] = {
2322                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2323                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2324                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2325                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2326
2327         /* Fill out hash function seeds */
2328         for (j = 0; j < 10; j++) {
2329                 u32 rsskey = rsshash[(j * 4)];
2330                 rsskey |= rsshash[(j * 4) + 1] << 8;
2331                 rsskey |= rsshash[(j * 4) + 2] << 16;
2332                 rsskey |= rsshash[(j * 4) + 3] << 24;
2333                 array_wr32(E1000_RSSRK(0), j, rsskey);
2334         }
2335
2336         num_rx_queues = adapter->rss_queues;
2337
2338         if (adapter->vfs_allocated_count) {
2339                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2340                 switch (hw->mac.type) {
2341                 case e1000_82580:
2342                         num_rx_queues = 1;
2343                         shift = 0;
2344                         break;
2345                 case e1000_82576:
2346                         shift = 3;
2347                         num_rx_queues = 2;
2348                         break;
2349                 case e1000_82575:
2350                         shift = 2;
2351                         shift2 = 6;
2352                 default:
2353                         break;
2354                 }
2355         } else {
2356                 if (hw->mac.type == e1000_82575)
2357                         shift = 6;
2358         }
2359
2360         for (j = 0; j < (32 * 4); j++) {
2361                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2362                 if (shift2)
2363                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2364                 if ((j & 3) == 3)
2365                         wr32(E1000_RETA(j >> 2), reta.dword);
2366         }
2367
2368         /*
2369          * Disable raw packet checksumming so that RSS hash is placed in
2370          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2371          * offloads as they are enabled by default
2372          */
2373         rxcsum = rd32(E1000_RXCSUM);
2374         rxcsum |= E1000_RXCSUM_PCSD;
2375
2376         if (adapter->hw.mac.type >= e1000_82576)
2377                 /* Enable Receive Checksum Offload for SCTP */
2378                 rxcsum |= E1000_RXCSUM_CRCOFL;
2379
2380         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2381         wr32(E1000_RXCSUM, rxcsum);
2382
2383         /* If VMDq is enabled then we set the appropriate mode for that, else
2384          * we default to RSS so that an RSS hash is calculated per packet even
2385          * if we are only using one queue */
2386         if (adapter->vfs_allocated_count) {
2387                 if (hw->mac.type > e1000_82575) {
2388                         /* Set the default pool for the PF's first queue */
2389                         u32 vtctl = rd32(E1000_VT_CTL);
2390                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2391                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2392                         vtctl |= adapter->vfs_allocated_count <<
2393                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2394                         wr32(E1000_VT_CTL, vtctl);
2395                 }
2396                 if (adapter->rss_queues > 1)
2397                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2398                 else
2399                         mrqc = E1000_MRQC_ENABLE_VMDQ;
2400         } else {
2401                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2402         }
2403         igb_vmm_control(adapter);
2404
2405         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
2406                  E1000_MRQC_RSS_FIELD_IPV4_TCP);
2407         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
2408                  E1000_MRQC_RSS_FIELD_IPV6_TCP);
2409         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4_UDP |
2410                  E1000_MRQC_RSS_FIELD_IPV6_UDP);
2411         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
2412                  E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
2413
2414         wr32(E1000_MRQC, mrqc);
2415 }
2416
2417 /**
2418  * igb_setup_rctl - configure the receive control registers
2419  * @adapter: Board private structure
2420  **/
2421 void igb_setup_rctl(struct igb_adapter *adapter)
2422 {
2423         struct e1000_hw *hw = &adapter->hw;
2424         u32 rctl;
2425
2426         rctl = rd32(E1000_RCTL);
2427
2428         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2429         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2430
2431         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2432                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2433
2434         /*
2435          * enable stripping of CRC. It's unlikely this will break BMC
2436          * redirection as it did with e1000. Newer features require
2437          * that the HW strips the CRC.
2438          */
2439         rctl |= E1000_RCTL_SECRC;
2440
2441         /* disable store bad packets and clear size bits. */
2442         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2443
2444         /* enable LPE to prevent packets larger than max_frame_size */
2445         rctl |= E1000_RCTL_LPE;
2446
2447         /* disable queue 0 to prevent tail write w/o re-config */
2448         wr32(E1000_RXDCTL(0), 0);
2449
2450         /* Attention!!!  For SR-IOV PF driver operations you must enable
2451          * queue drop for all VF and PF queues to prevent head of line blocking
2452          * if an un-trusted VF does not provide descriptors to hardware.
2453          */
2454         if (adapter->vfs_allocated_count) {
2455                 /* set all queue drop enable bits */
2456                 wr32(E1000_QDE, ALL_QUEUES);
2457         }
2458
2459         wr32(E1000_RCTL, rctl);
2460 }
2461
2462 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2463                                    int vfn)
2464 {
2465         struct e1000_hw *hw = &adapter->hw;
2466         u32 vmolr;
2467
2468         /* if it isn't the PF check to see if VFs are enabled and
2469          * increase the size to support vlan tags */
2470         if (vfn < adapter->vfs_allocated_count &&
2471             adapter->vf_data[vfn].vlans_enabled)
2472                 size += VLAN_TAG_SIZE;
2473
2474         vmolr = rd32(E1000_VMOLR(vfn));
2475         vmolr &= ~E1000_VMOLR_RLPML_MASK;
2476         vmolr |= size | E1000_VMOLR_LPE;
2477         wr32(E1000_VMOLR(vfn), vmolr);
2478
2479         return 0;
2480 }
2481
2482 /**
2483  * igb_rlpml_set - set maximum receive packet size
2484  * @adapter: board private structure
2485  *
2486  * Configure maximum receivable packet size.
2487  **/
2488 static void igb_rlpml_set(struct igb_adapter *adapter)
2489 {
2490         u32 max_frame_size = adapter->max_frame_size;
2491         struct e1000_hw *hw = &adapter->hw;
2492         u16 pf_id = adapter->vfs_allocated_count;
2493
2494         if (adapter->vlgrp)
2495                 max_frame_size += VLAN_TAG_SIZE;
2496
2497         /* if vfs are enabled we set RLPML to the largest possible request
2498          * size and set the VMOLR RLPML to the size we need */
2499         if (pf_id) {
2500                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2501                 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2502         }
2503
2504         wr32(E1000_RLPML, max_frame_size);
2505 }
2506
2507 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2508                                  int vfn, bool aupe)
2509 {
2510         struct e1000_hw *hw = &adapter->hw;
2511         u32 vmolr;
2512
2513         /*
2514          * This register exists only on 82576 and newer so if we are older then
2515          * we should exit and do nothing
2516          */
2517         if (hw->mac.type < e1000_82576)
2518                 return;
2519
2520         vmolr = rd32(E1000_VMOLR(vfn));
2521         vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
2522         if (aupe)
2523                 vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
2524         else
2525                 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
2526
2527         /* clear all bits that might not be set */
2528         vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2529
2530         if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
2531                 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
2532         /*
2533          * for VMDq only allow the VFs and pool 0 to accept broadcast and
2534          * multicast packets
2535          */
2536         if (vfn <= adapter->vfs_allocated_count)
2537                 vmolr |= E1000_VMOLR_BAM;          /* Accept broadcast */
2538
2539         wr32(E1000_VMOLR(vfn), vmolr);
2540 }
2541
2542 /**
2543  * igb_configure_rx_ring - Configure a receive ring after Reset
2544  * @adapter: board private structure
2545  * @ring: receive ring to be configured
2546  *
2547  * Configure the Rx unit of the MAC after a reset.
2548  **/
2549 void igb_configure_rx_ring(struct igb_adapter *adapter,
2550                            struct igb_ring *ring)
2551 {
2552         struct e1000_hw *hw = &adapter->hw;
2553         u64 rdba = ring->dma;
2554         int reg_idx = ring->reg_idx;
2555         u32 srrctl, rxdctl;
2556
2557         /* disable the queue */
2558         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2559         wr32(E1000_RXDCTL(reg_idx),
2560                         rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2561
2562         /* Set DMA base address registers */
2563         wr32(E1000_RDBAL(reg_idx),
2564              rdba & 0x00000000ffffffffULL);
2565         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2566         wr32(E1000_RDLEN(reg_idx),
2567                        ring->count * sizeof(union e1000_adv_rx_desc));
2568
2569         /* initialize head and tail */
2570         ring->head = hw->hw_addr + E1000_RDH(reg_idx);
2571         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
2572         writel(0, ring->head);
2573         writel(0, ring->tail);
2574
2575         /* set descriptor configuration */
2576         if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
2577                 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
2578                          E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
2579 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
2580                 srrctl |= IGB_RXBUFFER_16384 >>
2581                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2582 #else
2583                 srrctl |= (PAGE_SIZE / 2) >>
2584                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2585 #endif
2586                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2587         } else {
2588                 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
2589                          E1000_SRRCTL_BSIZEPKT_SHIFT;
2590                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2591         }
2592         /* Only set Drop Enable if we are supporting multiple queues */
2593         if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
2594                 srrctl |= E1000_SRRCTL_DROP_EN;
2595
2596         wr32(E1000_SRRCTL(reg_idx), srrctl);
2597
2598         /* set filtering for VMDQ pools */
2599         igb_set_vmolr(adapter, reg_idx & 0x7, true);
2600
2601         /* enable receive descriptor fetching */
2602         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2603         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2604         rxdctl &= 0xFFF00000;
2605         rxdctl |= IGB_RX_PTHRESH;
2606         rxdctl |= IGB_RX_HTHRESH << 8;
2607         rxdctl |= IGB_RX_WTHRESH << 16;
2608         wr32(E1000_RXDCTL(reg_idx), rxdctl);
2609 }
2610
2611 /**
2612  * igb_configure_rx - Configure receive Unit after Reset
2613  * @adapter: board private structure
2614  *
2615  * Configure the Rx unit of the MAC after a reset.
2616  **/
2617 static void igb_configure_rx(struct igb_adapter *adapter)
2618 {
2619         int i;
2620
2621         /* set UTA to appropriate mode */
2622         igb_set_uta(adapter);
2623
2624         /* set the correct pool for the PF default MAC address in entry 0 */
2625         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
2626                          adapter->vfs_allocated_count);
2627
2628         /* Setup the HW Rx Head and Tail Descriptor Pointers and
2629          * the Base and Length of the Rx Descriptor Ring */
2630         for (i = 0; i < adapter->num_rx_queues; i++)
2631                 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
2632 }
2633
2634 /**
2635  * igb_free_tx_resources - Free Tx Resources per Queue
2636  * @tx_ring: Tx descriptor ring for a specific queue
2637  *
2638  * Free all transmit software resources
2639  **/
2640 void igb_free_tx_resources(struct igb_ring *tx_ring)
2641 {
2642         igb_clean_tx_ring(tx_ring);
2643
2644         vfree(tx_ring->buffer_info);
2645         tx_ring->buffer_info = NULL;
2646
2647         /* if not set, then don't free */
2648         if (!tx_ring->desc)
2649                 return;
2650
2651         pci_free_consistent(tx_ring->pdev, tx_ring->size,
2652                             tx_ring->desc, tx_ring->dma);
2653
2654         tx_ring->desc = NULL;
2655 }
2656
2657 /**
2658  * igb_free_all_tx_resources - Free Tx Resources for All Queues
2659  * @adapter: board private structure
2660  *
2661  * Free all transmit software resources
2662  **/
2663 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
2664 {
2665         int i;
2666
2667         for (i = 0; i < adapter->num_tx_queues; i++)
2668                 igb_free_tx_resources(adapter->tx_ring[i]);
2669 }
2670
2671 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
2672                                     struct igb_buffer *buffer_info)
2673 {
2674         if (buffer_info->dma) {
2675                 if (buffer_info->mapped_as_page)
2676                         pci_unmap_page(tx_ring->pdev,
2677                                         buffer_info->dma,
2678                                         buffer_info->length,
2679                                         PCI_DMA_TODEVICE);
2680                 else
2681                         pci_unmap_single(tx_ring->pdev,
2682                                         buffer_info->dma,
2683                                         buffer_info->length,
2684                                         PCI_DMA_TODEVICE);
2685                 buffer_info->dma = 0;
2686         }
2687         if (buffer_info->skb) {
2688                 dev_kfree_skb_any(buffer_info->skb);
2689                 buffer_info->skb = NULL;
2690         }
2691         buffer_info->time_stamp = 0;
2692         buffer_info->length = 0;
2693         buffer_info->next_to_watch = 0;
2694         buffer_info->mapped_as_page = false;
2695 }
2696
2697 /**
2698  * igb_clean_tx_ring - Free Tx Buffers
2699  * @tx_ring: ring to be cleaned
2700  **/
2701 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
2702 {
2703         struct igb_buffer *buffer_info;
2704         unsigned long size;
2705         unsigned int i;
2706
2707         if (!tx_ring->buffer_info)
2708                 return;
2709         /* Free all the Tx ring sk_buffs */
2710
2711         for (i = 0; i < tx_ring->count; i++) {
2712                 buffer_info = &tx_ring->buffer_info[i];
2713                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
2714         }
2715
2716         size = sizeof(struct igb_buffer) * tx_ring->count;
2717         memset(tx_ring->buffer_info, 0, size);
2718
2719         /* Zero out the descriptor ring */
2720         memset(tx_ring->desc, 0, tx_ring->size);
2721
2722         tx_ring->next_to_use = 0;
2723         tx_ring->next_to_clean = 0;
2724 }
2725
2726 /**
2727  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
2728  * @adapter: board private structure
2729  **/
2730 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
2731 {
2732         int i;
2733
2734         for (i = 0; i < adapter->num_tx_queues; i++)
2735                 igb_clean_tx_ring(adapter->tx_ring[i]);
2736 }
2737
2738 /**
2739  * igb_free_rx_resources - Free Rx Resources
2740  * @rx_ring: ring to clean the resources from
2741  *
2742  * Free all receive software resources
2743  **/
2744 void igb_free_rx_resources(struct igb_ring *rx_ring)
2745 {
2746         igb_clean_rx_ring(rx_ring);
2747
2748         vfree(rx_ring->buffer_info);
2749         rx_ring->buffer_info = NULL;
2750
2751         /* if not set, then don't free */
2752         if (!rx_ring->desc)
2753                 return;
2754
2755         pci_free_consistent(rx_ring->pdev, rx_ring->size,
2756                             rx_ring->desc, rx_ring->dma);
2757
2758         rx_ring->desc = NULL;
2759 }
2760
2761 /**
2762  * igb_free_all_rx_resources - Free Rx Resources for All Queues
2763  * @adapter: board private structure
2764  *
2765  * Free all receive software resources
2766  **/
2767 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
2768 {
2769         int i;
2770
2771         for (i = 0; i < adapter->num_rx_queues; i++)
2772                 igb_free_rx_resources(adapter->rx_ring[i]);
2773 }
2774
2775 /**
2776  * igb_clean_rx_ring - Free Rx Buffers per Queue
2777  * @rx_ring: ring to free buffers from
2778  **/
2779 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
2780 {
2781         struct igb_buffer *buffer_info;
2782         unsigned long size;
2783         unsigned int i;
2784
2785         if (!rx_ring->buffer_info)
2786                 return;
2787
2788         /* Free all the Rx ring sk_buffs */
2789         for (i = 0; i < rx_ring->count; i++) {
2790                 buffer_info = &rx_ring->buffer_info[i];
2791                 if (buffer_info->dma) {
2792                         pci_unmap_single(rx_ring->pdev,
2793                                          buffer_info->dma,
2794                                          rx_ring->rx_buffer_len,
2795                                          PCI_DMA_FROMDEVICE);
2796                         buffer_info->dma = 0;
2797                 }
2798
2799                 if (buffer_info->skb) {
2800                         dev_kfree_skb(buffer_info->skb);
2801                         buffer_info->skb = NULL;
2802                 }
2803                 if (buffer_info->page_dma) {
2804                         pci_unmap_page(rx_ring->pdev,
2805                                        buffer_info->page_dma,
2806                                        PAGE_SIZE / 2,
2807                                        PCI_DMA_FROMDEVICE);
2808                         buffer_info->page_dma = 0;
2809                 }
2810                 if (buffer_info->page) {
2811                         put_page(buffer_info->page);
2812                         buffer_info->page = NULL;
2813                         buffer_info->page_offset = 0;
2814                 }
2815         }
2816
2817         size = sizeof(struct igb_buffer) * rx_ring->count;
2818         memset(rx_ring->buffer_info, 0, size);
2819
2820         /* Zero out the descriptor ring */
2821         memset(rx_ring->desc, 0, rx_ring->size);
2822
2823         rx_ring->next_to_clean = 0;
2824         rx_ring->next_to_use = 0;
2825 }
2826
2827 /**
2828  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
2829  * @adapter: board private structure
2830  **/
2831 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
2832 {
2833         int i;
2834
2835         for (i = 0; i < adapter->num_rx_queues; i++)
2836                 igb_clean_rx_ring(adapter->rx_ring[i]);
2837 }
2838
2839 /**
2840  * igb_set_mac - Change the Ethernet Address of the NIC
2841  * @netdev: network interface device structure
2842  * @p: pointer to an address structure
2843  *
2844  * Returns 0 on success, negative on failure
2845  **/
2846 static int igb_set_mac(struct net_device *netdev, void *p)
2847 {
2848         struct igb_adapter *adapter = netdev_priv(netdev);
2849         struct e1000_hw *hw = &adapter->hw;
2850         struct sockaddr *addr = p;
2851
2852         if (!is_valid_ether_addr(addr->sa_data))
2853                 return -EADDRNOTAVAIL;
2854
2855         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
2856         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
2857
2858         /* set the correct pool for the new PF MAC address in entry 0 */
2859         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
2860                          adapter->vfs_allocated_count);
2861
2862         return 0;
2863 }
2864
2865 /**
2866  * igb_write_mc_addr_list - write multicast addresses to MTA
2867  * @netdev: network interface device structure
2868  *
2869  * Writes multicast address list to the MTA hash table.
2870  * Returns: -ENOMEM on failure
2871  *                0 on no addresses written
2872  *                X on writing X addresses to MTA
2873  **/
2874 static int igb_write_mc_addr_list(struct net_device *netdev)
2875 {
2876         struct igb_adapter *adapter = netdev_priv(netdev);
2877         struct e1000_hw *hw = &adapter->hw;
2878         struct dev_mc_list *mc_ptr;
2879         u8  *mta_list;
2880         int i;
2881
2882         if (netdev_mc_empty(netdev)) {
2883                 /* nothing to program, so clear mc list */
2884                 igb_update_mc_addr_list(hw, NULL, 0);
2885                 igb_restore_vf_multicasts(adapter);
2886                 return 0;
2887         }
2888
2889         mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
2890         if (!mta_list)
2891                 return -ENOMEM;
2892
2893         /* The shared function expects a packed array of only addresses. */
2894         i = 0;
2895         netdev_for_each_mc_addr(mc_ptr, netdev)
2896                 memcpy(mta_list + (i++ * ETH_ALEN), mc_ptr->dmi_addr, ETH_ALEN);
2897
2898         igb_update_mc_addr_list(hw, mta_list, i);
2899         kfree(mta_list);
2900
2901         return netdev_mc_count(netdev);
2902 }
2903
2904 /**
2905  * igb_write_uc_addr_list - write unicast addresses to RAR table
2906  * @netdev: network interface device structure
2907  *
2908  * Writes unicast address list to the RAR table.
2909  * Returns: -ENOMEM on failure/insufficient address space
2910  *                0 on no addresses written
2911  *                X on writing X addresses to the RAR table
2912  **/
2913 static int igb_write_uc_addr_list(struct net_device *netdev)
2914 {
2915         struct igb_adapter *adapter = netdev_priv(netdev);
2916         struct e1000_hw *hw = &adapter->hw;
2917         unsigned int vfn = adapter->vfs_allocated_count;
2918         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
2919         int count = 0;
2920
2921         /* return ENOMEM indicating insufficient memory for addresses */
2922         if (netdev_uc_count(netdev) > rar_entries)
2923                 return -ENOMEM;
2924
2925         if (!netdev_uc_empty(netdev) && rar_entries) {
2926                 struct netdev_hw_addr *ha;
2927
2928                 netdev_for_each_uc_addr(ha, netdev) {
2929                         if (!rar_entries)
2930                                 break;
2931                         igb_rar_set_qsel(adapter, ha->addr,
2932                                          rar_entries--,
2933                                          vfn);
2934                         count++;
2935                 }
2936         }
2937         /* write the addresses in reverse order to avoid write combining */
2938         for (; rar_entries > 0 ; rar_entries--) {
2939                 wr32(E1000_RAH(rar_entries), 0);
2940                 wr32(E1000_RAL(rar_entries), 0);
2941         }
2942         wrfl();
2943
2944         return count;
2945 }
2946
2947 /**
2948  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
2949  * @netdev: network interface device structure
2950  *
2951  * The set_rx_mode entry point is called whenever the unicast or multicast
2952  * address lists or the network interface flags are updated.  This routine is
2953  * responsible for configuring the hardware for proper unicast, multicast,
2954  * promiscuous mode, and all-multi behavior.
2955  **/
2956 static void igb_set_rx_mode(struct net_device *netdev)
2957 {
2958         struct igb_adapter *adapter = netdev_priv(netdev);
2959         struct e1000_hw *hw = &adapter->hw;
2960         unsigned int vfn = adapter->vfs_allocated_count;
2961         u32 rctl, vmolr = 0;
2962         int count;
2963
2964         /* Check for Promiscuous and All Multicast modes */
2965         rctl = rd32(E1000_RCTL);
2966
2967         /* clear the effected bits */
2968         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
2969
2970         if (netdev->flags & IFF_PROMISC) {
2971                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2972                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
2973         } else {
2974                 if (netdev->flags & IFF_ALLMULTI) {
2975                         rctl |= E1000_RCTL_MPE;
2976                         vmolr |= E1000_VMOLR_MPME;
2977                 } else {
2978                         /*
2979                          * Write addresses to the MTA, if the attempt fails
2980                          * then we should just turn on promiscous mode so
2981                          * that we can at least receive multicast traffic
2982                          */
2983                         count = igb_write_mc_addr_list(netdev);
2984                         if (count < 0) {
2985                                 rctl |= E1000_RCTL_MPE;
2986                                 vmolr |= E1000_VMOLR_MPME;
2987                         } else if (count) {
2988                                 vmolr |= E1000_VMOLR_ROMPE;
2989                         }
2990                 }
2991                 /*
2992                  * Write addresses to available RAR registers, if there is not
2993                  * sufficient space to store all the addresses then enable
2994                  * unicast promiscous mode
2995                  */
2996                 count = igb_write_uc_addr_list(netdev);
2997                 if (count < 0) {
2998                         rctl |= E1000_RCTL_UPE;
2999                         vmolr |= E1000_VMOLR_ROPE;
3000                 }
3001                 rctl |= E1000_RCTL_VFE;
3002         }
3003         wr32(E1000_RCTL, rctl);
3004
3005         /*
3006          * In order to support SR-IOV and eventually VMDq it is necessary to set
3007          * the VMOLR to enable the appropriate modes.  Without this workaround
3008          * we will have issues with VLAN tag stripping not being done for frames
3009          * that are only arriving because we are the default pool
3010          */
3011         if (hw->mac.type < e1000_82576)
3012                 return;
3013
3014         vmolr |= rd32(E1000_VMOLR(vfn)) &
3015                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3016         wr32(E1000_VMOLR(vfn), vmolr);
3017         igb_restore_vf_multicasts(adapter);
3018 }
3019
3020 /* Need to wait a few seconds after link up to get diagnostic information from
3021  * the phy */
3022 static void igb_update_phy_info(unsigned long data)
3023 {
3024         struct igb_adapter *adapter = (struct igb_adapter *) data;
3025         igb_get_phy_info(&adapter->hw);
3026 }
3027
3028 /**
3029  * igb_has_link - check shared code for link and determine up/down
3030  * @adapter: pointer to driver private info
3031  **/
3032 bool igb_has_link(struct igb_adapter *adapter)
3033 {
3034         struct e1000_hw *hw = &adapter->hw;
3035         bool link_active = false;
3036         s32 ret_val = 0;
3037
3038         /* get_link_status is set on LSC (link status) interrupt or
3039          * rx sequence error interrupt.  get_link_status will stay
3040          * false until the e1000_check_for_link establishes link
3041          * for copper adapters ONLY
3042          */
3043         switch (hw->phy.media_type) {
3044         case e1000_media_type_copper:
3045                 if (hw->mac.get_link_status) {
3046                         ret_val = hw->mac.ops.check_for_link(hw);
3047                         link_active = !hw->mac.get_link_status;
3048                 } else {
3049                         link_active = true;
3050                 }
3051                 break;
3052         case e1000_media_type_internal_serdes:
3053                 ret_val = hw->mac.ops.check_for_link(hw);
3054                 link_active = hw->mac.serdes_has_link;
3055                 break;
3056         default:
3057         case e1000_media_type_unknown:
3058                 break;
3059         }
3060
3061         return link_active;
3062 }
3063
3064 /**
3065  * igb_watchdog - Timer Call-back
3066  * @data: pointer to adapter cast into an unsigned long
3067  **/
3068 static void igb_watchdog(unsigned long data)
3069 {
3070         struct igb_adapter *adapter = (struct igb_adapter *)data;
3071         /* Do the rest outside of interrupt context */
3072         schedule_work(&adapter->watchdog_task);
3073 }
3074
3075 static void igb_watchdog_task(struct work_struct *work)
3076 {
3077         struct igb_adapter *adapter = container_of(work,
3078                                                    struct igb_adapter,
3079                                                    watchdog_task);
3080         struct e1000_hw *hw = &adapter->hw;
3081         struct net_device *netdev = adapter->netdev;
3082         u32 link;
3083         int i;
3084
3085         link = igb_has_link(adapter);
3086         if (link) {
3087                 if (!netif_carrier_ok(netdev)) {
3088                         u32 ctrl;
3089                         hw->mac.ops.get_speed_and_duplex(hw,
3090                                                          &adapter->link_speed,
3091                                                          &adapter->link_duplex);
3092
3093                         ctrl = rd32(E1000_CTRL);
3094                         /* Links status message must follow this format */
3095                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3096                                  "Flow Control: %s\n",
3097                                netdev->name,
3098                                adapter->link_speed,
3099                                adapter->link_duplex == FULL_DUPLEX ?
3100                                  "Full Duplex" : "Half Duplex",
3101                                ((ctrl & E1000_CTRL_TFCE) &&
3102                                 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3103                                ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
3104                                ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
3105
3106                         /* adjust timeout factor according to speed/duplex */
3107                         adapter->tx_timeout_factor = 1;
3108                         switch (adapter->link_speed) {
3109                         case SPEED_10:
3110                                 adapter->tx_timeout_factor = 14;
3111                                 break;
3112                         case SPEED_100:
3113                                 /* maybe add some timeout factor ? */
3114                                 break;
3115                         }
3116
3117                         netif_carrier_on(netdev);
3118
3119                         igb_ping_all_vfs(adapter);
3120
3121                         /* link state has changed, schedule phy info update */
3122                         if (!test_bit(__IGB_DOWN, &adapter->state))
3123                                 mod_timer(&adapter->phy_info_timer,
3124                                           round_jiffies(jiffies + 2 * HZ));
3125                 }
3126         } else {
3127                 if (netif_carrier_ok(netdev)) {
3128                         adapter->link_speed = 0;
3129                         adapter->link_duplex = 0;
3130                         /* Links status message must follow this format */
3131                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
3132                                netdev->name);
3133                         netif_carrier_off(netdev);
3134
3135                         igb_ping_all_vfs(adapter);
3136
3137                         /* link state has changed, schedule phy info update */
3138                         if (!test_bit(__IGB_DOWN, &adapter->state))
3139                                 mod_timer(&adapter->phy_info_timer,
3140                                           round_jiffies(jiffies + 2 * HZ));
3141                 }
3142         }
3143
3144         igb_update_stats(adapter);
3145
3146         for (i = 0; i < adapter->num_tx_queues; i++) {
3147                 struct igb_ring *tx_ring = adapter->tx_ring[i];
3148                 if (!netif_carrier_ok(netdev)) {
3149                         /* We've lost link, so the controller stops DMA,
3150                          * but we've got queued Tx work that's never going
3151                          * to get done, so reset controller to flush Tx.
3152                          * (Do the reset outside of interrupt context). */
3153                         if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3154                                 adapter->tx_timeout_count++;
3155                                 schedule_work(&adapter->reset_task);
3156                                 /* return immediately since reset is imminent */
3157                                 return;
3158                         }
3159                 }
3160
3161                 /* Force detection of hung controller every watchdog period */
3162                 tx_ring->detect_tx_hung = true;
3163         }
3164
3165         /* Cause software interrupt to ensure rx ring is cleaned */
3166         if (adapter->msix_entries) {
3167                 u32 eics = 0;
3168                 for (i = 0; i < adapter->num_q_vectors; i++) {
3169                         struct igb_q_vector *q_vector = adapter->q_vector[i];
3170                         eics |= q_vector->eims_value;
3171                 }
3172                 wr32(E1000_EICS, eics);
3173         } else {
3174                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3175         }
3176
3177         /* Reset the timer */
3178         if (!test_bit(__IGB_DOWN, &adapter->state))
3179                 mod_timer(&adapter->watchdog_timer,
3180                           round_jiffies(jiffies + 2 * HZ));
3181 }
3182
3183 enum latency_range {
3184         lowest_latency = 0,
3185         low_latency = 1,
3186         bulk_latency = 2,
3187         latency_invalid = 255
3188 };
3189
3190 /**
3191  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3192  *
3193  *      Stores a new ITR value based on strictly on packet size.  This
3194  *      algorithm is less sophisticated than that used in igb_update_itr,
3195  *      due to the difficulty of synchronizing statistics across multiple
3196  *      receive rings.  The divisors and thresholds used by this fuction
3197  *      were determined based on theoretical maximum wire speed and testing
3198  *      data, in order to minimize response time while increasing bulk
3199  *      throughput.
3200  *      This functionality is controlled by the InterruptThrottleRate module
3201  *      parameter (see igb_param.c)
3202  *      NOTE:  This function is called only when operating in a multiqueue
3203  *             receive environment.
3204  * @q_vector: pointer to q_vector
3205  **/
3206 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3207 {
3208         int new_val = q_vector->itr_val;
3209         int avg_wire_size = 0;
3210         struct igb_adapter *adapter = q_vector->adapter;
3211
3212         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3213          * ints/sec - ITR timer value of 120 ticks.
3214          */
3215         if (adapter->link_speed != SPEED_1000) {
3216                 new_val = 976;
3217                 goto set_itr_val;
3218         }
3219
3220         if (q_vector->rx_ring && q_vector->rx_ring->total_packets) {
3221                 struct igb_ring *ring = q_vector->rx_ring;
3222                 avg_wire_size = ring->total_bytes / ring->total_packets;
3223         }
3224
3225         if (q_vector->tx_ring && q_vector->tx_ring->total_packets) {
3226                 struct igb_ring *ring = q_vector->tx_ring;
3227                 avg_wire_size = max_t(u32, avg_wire_size,
3228                                       (ring->total_bytes /
3229                                        ring->total_packets));
3230         }
3231
3232         /* if avg_wire_size isn't set no work was done */
3233         if (!avg_wire_size)
3234                 goto clear_counts;
3235
3236         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3237         avg_wire_size += 24;
3238
3239         /* Don't starve jumbo frames */
3240         avg_wire_size = min(avg_wire_size, 3000);
3241
3242         /* Give a little boost to mid-size frames */
3243         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3244                 new_val = avg_wire_size / 3;
3245         else
3246                 new_val = avg_wire_size / 2;
3247
3248         /* when in itr mode 3 do not exceed 20K ints/sec */
3249         if (adapter->rx_itr_setting == 3 && new_val < 196)
3250                 new_val = 196;
3251
3252 set_itr_val:
3253         if (new_val != q_vector->itr_val) {
3254                 q_vector->itr_val = new_val;
3255                 q_vector->set_itr = 1;
3256         }
3257 clear_counts:
3258         if (q_vector->rx_ring) {
3259                 q_vector->rx_ring->total_bytes = 0;
3260                 q_vector->rx_ring->total_packets = 0;
3261         }
3262         if (q_vector->tx_ring) {
3263                 q_vector->tx_ring->total_bytes = 0;
3264                 q_vector->tx_ring->total_packets = 0;
3265         }
3266 }
3267
3268 /**
3269  * igb_update_itr - update the dynamic ITR value based on statistics
3270  *      Stores a new ITR value based on packets and byte
3271  *      counts during the last interrupt.  The advantage of per interrupt
3272  *      computation is faster updates and more accurate ITR for the current
3273  *      traffic pattern.  Constants in this function were computed
3274  *      based on theoretical maximum wire speed and thresholds were set based
3275  *      on testing data as well as attempting to minimize response time
3276  *      while increasing bulk throughput.
3277  *      this functionality is controlled by the InterruptThrottleRate module
3278  *      parameter (see igb_param.c)
3279  *      NOTE:  These calculations are only valid when operating in a single-
3280  *             queue environment.
3281  * @adapter: pointer to adapter
3282  * @itr_setting: current q_vector->itr_val
3283  * @packets: the number of packets during this measurement interval
3284  * @bytes: the number of bytes during this measurement interval
3285  **/
3286 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3287                                    int packets, int bytes)
3288 {
3289         unsigned int retval = itr_setting;
3290
3291         if (packets == 0)
3292                 goto update_itr_done;
3293
3294         switch (itr_setting) {
3295         case lowest_latency:
3296                 /* handle TSO and jumbo frames */
3297                 if (bytes/packets > 8000)
3298                         retval = bulk_latency;
3299                 else if ((packets < 5) && (bytes > 512))
3300                         retval = low_latency;
3301                 break;
3302         case low_latency:  /* 50 usec aka 20000 ints/s */
3303                 if (bytes > 10000) {
3304                         /* this if handles the TSO accounting */
3305                         if (bytes/packets > 8000) {
3306                                 retval = bulk_latency;
3307                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3308                                 retval = bulk_latency;
3309                         } else if ((packets > 35)) {
3310                                 retval = lowest_latency;
3311                         }
3312                 } else if (bytes/packets > 2000) {
3313                         retval = bulk_latency;
3314                 } else if (packets <= 2 && bytes < 512) {
3315                         retval = lowest_latency;
3316                 }
3317                 break;
3318         case bulk_latency: /* 250 usec aka 4000 ints/s */
3319                 if (bytes > 25000) {
3320                         if (packets > 35)
3321                                 retval = low_latency;
3322                 } else if (bytes < 1500) {
3323                         retval = low_latency;
3324                 }
3325                 break;
3326         }
3327
3328 update_itr_done:
3329         return retval;
3330 }
3331
3332 static void igb_set_itr(struct igb_adapter *adapter)
3333 {
3334         struct igb_q_vector *q_vector = adapter->q_vector[0];
3335         u16 current_itr;
3336         u32 new_itr = q_vector->itr_val;
3337
3338         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3339         if (adapter->link_speed != SPEED_1000) {
3340                 current_itr = 0;
3341                 new_itr = 4000;
3342                 goto set_itr_now;
3343         }
3344
3345         adapter->rx_itr = igb_update_itr(adapter,
3346                                     adapter->rx_itr,
3347                                     q_vector->rx_ring->total_packets,
3348                                     q_vector->rx_ring->total_bytes);
3349
3350         adapter->tx_itr = igb_update_itr(adapter,
3351                                     adapter->tx_itr,
3352                                     q_vector->tx_ring->total_packets,
3353                                     q_vector->tx_ring->total_bytes);
3354         current_itr = max(adapter->rx_itr, adapter->tx_itr);
3355
3356         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3357         if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3358                 current_itr = low_latency;
3359
3360         switch (current_itr) {
3361         /* counts and packets in update_itr are dependent on these numbers */
3362         case lowest_latency:
3363                 new_itr = 56;  /* aka 70,000 ints/sec */
3364                 break;
3365         case low_latency:
3366                 new_itr = 196; /* aka 20,000 ints/sec */
3367                 break;
3368         case bulk_latency:
3369                 new_itr = 980; /* aka 4,000 ints/sec */
3370                 break;
3371         default:
3372                 break;
3373         }
3374
3375 set_itr_now:
3376         q_vector->rx_ring->total_bytes = 0;
3377         q_vector->rx_ring->total_packets = 0;
3378         q_vector->tx_ring->total_bytes = 0;
3379         q_vector->tx_ring->total_packets = 0;
3380
3381         if (new_itr != q_vector->itr_val) {
3382                 /* this attempts to bias the interrupt rate towards Bulk
3383                  * by adding intermediate steps when interrupt rate is
3384                  * increasing */
3385                 new_itr = new_itr > q_vector->itr_val ?
3386                              max((new_itr * q_vector->itr_val) /
3387                                  (new_itr + (q_vector->itr_val >> 2)),
3388                                  new_itr) :
3389                              new_itr;
3390                 /* Don't write the value here; it resets the adapter's
3391                  * internal timer, and causes us to delay far longer than
3392                  * we should between interrupts.  Instead, we write the ITR
3393                  * value at the beginning of the next interrupt so the timing
3394                  * ends up being correct.
3395                  */
3396                 q_vector->itr_val = new_itr;
3397                 q_vector->set_itr = 1;
3398         }
3399
3400         return;
3401 }
3402
3403 #define IGB_TX_FLAGS_CSUM               0x00000001
3404 #define IGB_TX_FLAGS_VLAN               0x00000002
3405 #define IGB_TX_FLAGS_TSO                0x00000004
3406 #define IGB_TX_FLAGS_IPV4               0x00000008
3407 #define IGB_TX_FLAGS_TSTAMP             0x00000010
3408 #define IGB_TX_FLAGS_VLAN_MASK          0xffff0000
3409 #define IGB_TX_FLAGS_VLAN_SHIFT                 16
3410
3411 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3412                               struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3413 {
3414         struct e1000_adv_tx_context_desc *context_desc;
3415         unsigned int i;
3416         int err;
3417         struct igb_buffer *buffer_info;
3418         u32 info = 0, tu_cmd = 0;
3419         u32 mss_l4len_idx;
3420         u8 l4len;
3421
3422         if (skb_header_cloned(skb)) {
3423                 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3424                 if (err)
3425                         return err;
3426         }
3427
3428         l4len = tcp_hdrlen(skb);
3429         *hdr_len += l4len;
3430
3431         if (skb->protocol == htons(ETH_P_IP)) {
3432                 struct iphdr *iph = ip_hdr(skb);
3433                 iph->tot_len = 0;
3434                 iph->check = 0;
3435                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3436                                                          iph->daddr, 0,
3437                                                          IPPROTO_TCP,
3438                                                          0);
3439         } else if (skb_is_gso_v6(skb)) {
3440                 ipv6_hdr(skb)->payload_len = 0;
3441                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3442                                                        &ipv6_hdr(skb)->daddr,
3443                                                        0, IPPROTO_TCP, 0);
3444         }
3445
3446         i = tx_ring->next_to_use;
3447
3448         buffer_info = &tx_ring->buffer_info[i];
3449         context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3450         /* VLAN MACLEN IPLEN */
3451         if (tx_flags & IGB_TX_FLAGS_VLAN)
3452                 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3453         info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3454         *hdr_len += skb_network_offset(skb);
3455         info |= skb_network_header_len(skb);
3456         *hdr_len += skb_network_header_len(skb);
3457         context_desc->vlan_macip_lens = cpu_to_le32(info);
3458
3459         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3460         tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3461
3462         if (skb->protocol == htons(ETH_P_IP))
3463                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3464         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3465
3466         context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3467
3468         /* MSS L4LEN IDX */
3469         mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3470         mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3471
3472         /* For 82575, context index must be unique per ring. */
3473         if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3474                 mss_l4len_idx |= tx_ring->reg_idx << 4;
3475
3476         context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3477         context_desc->seqnum_seed = 0;
3478
3479         buffer_info->time_stamp = jiffies;
3480         buffer_info->next_to_watch = i;
3481         buffer_info->dma = 0;
3482         i++;
3483         if (i == tx_ring->count)
3484                 i = 0;
3485
3486         tx_ring->next_to_use = i;
3487
3488         return true;
3489 }
3490
3491 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
3492                                    struct sk_buff *skb, u32 tx_flags)
3493 {
3494         struct e1000_adv_tx_context_desc *context_desc;
3495         struct pci_dev *pdev = tx_ring->pdev;
3496         struct igb_buffer *buffer_info;
3497         u32 info = 0, tu_cmd = 0;
3498         unsigned int i;
3499
3500         if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
3501             (tx_flags & IGB_TX_FLAGS_VLAN)) {
3502                 i = tx_ring->next_to_use;
3503                 buffer_info = &tx_ring->buffer_info[i];
3504                 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3505
3506                 if (tx_flags & IGB_TX_FLAGS_VLAN)
3507                         info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3508
3509                 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3510                 if (skb->ip_summed == CHECKSUM_PARTIAL)
3511                         info |= skb_network_header_len(skb);
3512
3513                 context_desc->vlan_macip_lens = cpu_to_le32(info);
3514
3515                 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3516
3517                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3518                         __be16 protocol;
3519
3520                         if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3521                                 const struct vlan_ethhdr *vhdr =
3522                                           (const struct vlan_ethhdr*)skb->data;
3523
3524                                 protocol = vhdr->h_vlan_encapsulated_proto;
3525                         } else {
3526                                 protocol = skb->protocol;
3527                         }
3528
3529                         switch (protocol) {
3530                         case cpu_to_be16(ETH_P_IP):
3531                                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3532                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
3533                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3534                                 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
3535                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3536                                 break;
3537                         case cpu_to_be16(ETH_P_IPV6):
3538                                 /* XXX what about other V6 headers?? */
3539                                 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
3540                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3541                                 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
3542                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3543                                 break;
3544                         default:
3545                                 if (unlikely(net_ratelimit()))
3546                                         dev_warn(&pdev->dev,
3547                                             "partial checksum but proto=%x!\n",
3548                                             skb->protocol);
3549                                 break;
3550                         }
3551                 }
3552
3553                 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3554                 context_desc->seqnum_seed = 0;
3555                 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3556                         context_desc->mss_l4len_idx =
3557                                 cpu_to_le32(tx_ring->reg_idx << 4);
3558
3559                 buffer_info->time_stamp = jiffies;
3560                 buffer_info->next_to_watch = i;
3561                 buffer_info->dma = 0;
3562
3563                 i++;
3564                 if (i == tx_ring->count)
3565                         i = 0;
3566                 tx_ring->next_to_use = i;
3567
3568                 return true;
3569         }
3570         return false;
3571 }
3572
3573 #define IGB_MAX_TXD_PWR 16
3574 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
3575
3576 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
3577                                  unsigned int first)
3578 {
3579         struct igb_buffer *buffer_info;
3580         struct pci_dev *pdev = tx_ring->pdev;
3581         unsigned int len = skb_headlen(skb);
3582         unsigned int count = 0, i;
3583         unsigned int f;
3584
3585         i = tx_ring->next_to_use;
3586
3587         buffer_info = &tx_ring->buffer_info[i];
3588         BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3589         buffer_info->length = len;
3590         /* set time_stamp *before* dma to help avoid a possible race */
3591         buffer_info->time_stamp = jiffies;
3592         buffer_info->next_to_watch = i;
3593         buffer_info->dma = pci_map_single(pdev, skb->data, len,
3594                                           PCI_DMA_TODEVICE);
3595         if (pci_dma_mapping_error(pdev, buffer_info->dma))
3596                 goto dma_error;
3597
3598         for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
3599                 struct skb_frag_struct *frag;
3600
3601                 count++;
3602                 i++;
3603                 if (i == tx_ring->count)
3604                         i = 0;
3605
3606                 frag = &skb_shinfo(skb)->frags[f];
3607                 len = frag->size;
3608
3609                 buffer_info = &tx_ring->buffer_info[i];
3610                 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3611                 buffer_info->length = len;
3612                 buffer_info->time_stamp = jiffies;
3613                 buffer_info->next_to_watch = i;
3614                 buffer_info->mapped_as_page = true;
3615                 buffer_info->dma = pci_map_page(pdev,
3616                                                 frag->page,
3617                                                 frag->page_offset,
3618                                                 len,
3619                                                 PCI_DMA_TODEVICE);
3620                 if (pci_dma_mapping_error(pdev, buffer_info->dma))
3621                         goto dma_error;
3622
3623         }
3624
3625         tx_ring->buffer_info[i].skb = skb;
3626         tx_ring->buffer_info[i].gso_segs = skb_shinfo(skb)->gso_segs ?: 1;
3627         tx_ring->buffer_info[first].next_to_watch = i;
3628
3629         return ++count;
3630
3631 dma_error:
3632         dev_err(&pdev->dev, "TX DMA map failed\n");
3633
3634         /* clear timestamp and dma mappings for failed buffer_info mapping */
3635         buffer_info->dma = 0;
3636         buffer_info->time_stamp = 0;
3637         buffer_info->length = 0;
3638         buffer_info->next_to_watch = 0;
3639         buffer_info->mapped_as_page = false;
3640
3641         /* clear timestamp and dma mappings for remaining portion of packet */
3642         while (count--) {
3643                 if (i == 0)
3644                         i = tx_ring->count;
3645                 i--;
3646                 buffer_info = &tx_ring->buffer_info[i];
3647                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3648         }
3649
3650         return 0;
3651 }
3652
3653 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
3654                                     u32 tx_flags, int count, u32 paylen,
3655                                     u8 hdr_len)
3656 {
3657         union e1000_adv_tx_desc *tx_desc;
3658         struct igb_buffer *buffer_info;
3659         u32 olinfo_status = 0, cmd_type_len;
3660         unsigned int i = tx_ring->next_to_use;
3661
3662         cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
3663                         E1000_ADVTXD_DCMD_DEXT);
3664
3665         if (tx_flags & IGB_TX_FLAGS_VLAN)
3666                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
3667
3668         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
3669                 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
3670
3671         if (tx_flags & IGB_TX_FLAGS_TSO) {
3672                 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3673
3674                 /* insert tcp checksum */
3675                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3676
3677                 /* insert ip checksum */
3678                 if (tx_flags & IGB_TX_FLAGS_IPV4)
3679                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3680
3681         } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
3682                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3683         }
3684
3685         if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
3686             (tx_flags & (IGB_TX_FLAGS_CSUM |
3687                          IGB_TX_FLAGS_TSO |
3688                          IGB_TX_FLAGS_VLAN)))
3689                 olinfo_status |= tx_ring->reg_idx << 4;
3690
3691         olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
3692
3693         do {
3694                 buffer_info = &tx_ring->buffer_info[i];
3695                 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
3696                 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
3697                 tx_desc->read.cmd_type_len =
3698                         cpu_to_le32(cmd_type_len | buffer_info->length);
3699                 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
3700                 count--;
3701                 i++;
3702                 if (i == tx_ring->count)
3703                         i = 0;
3704         } while (count > 0);
3705
3706         tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
3707         /* Force memory writes to complete before letting h/w
3708          * know there are new descriptors to fetch.  (Only
3709          * applicable for weak-ordered memory model archs,
3710          * such as IA-64). */
3711         wmb();
3712
3713         tx_ring->next_to_use = i;
3714         writel(i, tx_ring->tail);
3715         /* we need this if more than one processor can write to our tail
3716          * at a time, it syncronizes IO on IA64/Altix systems */
3717         mmiowb();
3718 }
3719
3720 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3721 {
3722         struct net_device *netdev = tx_ring->netdev;
3723
3724         netif_stop_subqueue(netdev, tx_ring->queue_index);
3725
3726         /* Herbert's original patch had:
3727          *  smp_mb__after_netif_stop_queue();
3728          * but since that doesn't exist yet, just open code it. */
3729         smp_mb();
3730
3731         /* We need to check again in a case another CPU has just
3732          * made room available. */
3733         if (igb_desc_unused(tx_ring) < size)
3734                 return -EBUSY;
3735
3736         /* A reprieve! */
3737         netif_wake_subqueue(netdev, tx_ring->queue_index);
3738         tx_ring->tx_stats.restart_queue++;
3739         return 0;
3740 }
3741
3742 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3743 {
3744         if (igb_desc_unused(tx_ring) >= size)
3745                 return 0;
3746         return __igb_maybe_stop_tx(tx_ring, size);
3747 }
3748
3749 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
3750                                     struct igb_ring *tx_ring)
3751 {
3752         struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
3753         int tso = 0, count;
3754         u32 tx_flags = 0;
3755         u16 first;
3756         u8 hdr_len = 0;
3757         union skb_shared_tx *shtx = skb_tx(skb);
3758
3759         /* need: 1 descriptor per page,
3760          *       + 2 desc gap to keep tail from touching head,
3761          *       + 1 desc for skb->data,
3762          *       + 1 desc for context descriptor,
3763          * otherwise try next time */
3764         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
3765                 /* this is a hard error */
3766                 return NETDEV_TX_BUSY;
3767         }
3768
3769         if (unlikely(shtx->hardware)) {
3770                 shtx->in_progress = 1;
3771                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
3772         }
3773
3774         if (vlan_tx_tag_present(skb) && adapter->vlgrp) {
3775                 tx_flags |= IGB_TX_FLAGS_VLAN;
3776                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
3777         }
3778
3779         if (skb->protocol == htons(ETH_P_IP))
3780                 tx_flags |= IGB_TX_FLAGS_IPV4;
3781
3782         first = tx_ring->next_to_use;
3783         if (skb_is_gso(skb)) {
3784                 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
3785
3786                 if (tso < 0) {
3787                         dev_kfree_skb_any(skb);
3788                         return NETDEV_TX_OK;
3789                 }
3790         }
3791
3792         if (tso)
3793                 tx_flags |= IGB_TX_FLAGS_TSO;
3794         else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
3795                  (skb->ip_summed == CHECKSUM_PARTIAL))
3796                 tx_flags |= IGB_TX_FLAGS_CSUM;
3797
3798         /*
3799          * count reflects descriptors mapped, if 0 or less then mapping error
3800          * has occured and we need to rewind the descriptor queue
3801          */
3802         count = igb_tx_map_adv(tx_ring, skb, first);
3803         if (!count) {
3804                 dev_kfree_skb_any(skb);
3805                 tx_ring->buffer_info[first].time_stamp = 0;
3806                 tx_ring->next_to_use = first;
3807                 return NETDEV_TX_OK;
3808         }
3809
3810         igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
3811
3812         /* Make sure there is space in the ring for the next send. */
3813         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
3814
3815         return NETDEV_TX_OK;
3816 }
3817
3818 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
3819                                       struct net_device *netdev)
3820 {
3821         struct igb_adapter *adapter = netdev_priv(netdev);
3822         struct igb_ring *tx_ring;
3823         int r_idx = 0;
3824
3825         if (test_bit(__IGB_DOWN, &adapter->state)) {
3826                 dev_kfree_skb_any(skb);
3827                 return NETDEV_TX_OK;
3828         }
3829
3830         if (skb->len <= 0) {
3831                 dev_kfree_skb_any(skb);
3832                 return NETDEV_TX_OK;
3833         }
3834
3835         r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
3836         tx_ring = adapter->multi_tx_table[r_idx];
3837
3838         /* This goes back to the question of how to logically map a tx queue
3839          * to a flow.  Right now, performance is impacted slightly negatively
3840          * if using multiple tx queues.  If the stack breaks away from a
3841          * single qdisc implementation, we can look at this again. */
3842         return igb_xmit_frame_ring_adv(skb, tx_ring);
3843 }
3844
3845 /**
3846  * igb_tx_timeout - Respond to a Tx Hang
3847  * @netdev: network interface device structure
3848  **/
3849 static void igb_tx_timeout(struct net_device *netdev)
3850 {
3851         struct igb_adapter *adapter = netdev_priv(netdev);
3852         struct e1000_hw *hw = &adapter->hw;
3853
3854         /* Do the reset outside of interrupt context */
3855         adapter->tx_timeout_count++;
3856
3857         if (hw->mac.type == e1000_82580)
3858                 hw->dev_spec._82575.global_device_reset = true;
3859
3860         schedule_work(&adapter->reset_task);
3861         wr32(E1000_EICS,
3862              (adapter->eims_enable_mask & ~adapter->eims_other));
3863 }
3864
3865 static void igb_reset_task(struct work_struct *work)
3866 {
3867         struct igb_adapter *adapter;
3868         adapter = container_of(work, struct igb_adapter, reset_task);
3869
3870         igb_reinit_locked(adapter);
3871 }
3872
3873 /**
3874  * igb_get_stats - Get System Network Statistics
3875  * @netdev: network interface device structure
3876  *
3877  * Returns the address of the device statistics structure.
3878  * The statistics are actually updated from the timer callback.
3879  **/
3880 static struct net_device_stats *igb_get_stats(struct net_device *netdev)
3881 {
3882         /* only return the current stats */
3883         return &netdev->stats;
3884 }
3885
3886 /**
3887  * igb_change_mtu - Change the Maximum Transfer Unit
3888  * @netdev: network interface device structure
3889  * @new_mtu: new value for maximum frame size
3890  *
3891  * Returns 0 on success, negative on failure
3892  **/
3893 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
3894 {
3895         struct igb_adapter *adapter = netdev_priv(netdev);
3896         struct pci_dev *pdev = adapter->pdev;
3897         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
3898         u32 rx_buffer_len, i;
3899
3900         if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
3901                 dev_err(&pdev->dev, "Invalid MTU setting\n");
3902                 return -EINVAL;
3903         }
3904
3905         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
3906                 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
3907                 return -EINVAL;
3908         }
3909
3910         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
3911                 msleep(1);
3912
3913         /* igb_down has a dependency on max_frame_size */
3914         adapter->max_frame_size = max_frame;
3915
3916         /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
3917          * means we reserve 2 more, this pushes us to allocate from the next
3918          * larger slab size.
3919          * i.e. RXBUFFER_2048 --> size-4096 slab
3920          */
3921
3922         if (max_frame <= IGB_RXBUFFER_1024)
3923                 rx_buffer_len = IGB_RXBUFFER_1024;
3924         else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
3925                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
3926         else
3927                 rx_buffer_len = IGB_RXBUFFER_128;
3928
3929         if (netif_running(netdev))
3930                 igb_down(adapter);
3931
3932         dev_info(&pdev->dev, "changing MTU from %d to %d\n",
3933                  netdev->mtu, new_mtu);
3934         netdev->mtu = new_mtu;
3935
3936         for (i = 0; i < adapter->num_rx_queues; i++)
3937                 adapter->rx_ring[i]->rx_buffer_len = rx_buffer_len;
3938
3939         if (netif_running(netdev))
3940                 igb_up(adapter);
3941         else
3942                 igb_reset(adapter);
3943
3944         clear_bit(__IGB_RESETTING, &adapter->state);
3945
3946         return 0;
3947 }
3948
3949 /**
3950  * igb_update_stats - Update the board statistics counters
3951  * @adapter: board private structure
3952  **/
3953
3954 void igb_update_stats(struct igb_adapter *adapter)
3955 {
3956         struct net_device_stats *net_stats = igb_get_stats(adapter->netdev);
3957         struct e1000_hw *hw = &adapter->hw;
3958         struct pci_dev *pdev = adapter->pdev;
3959         u32 reg, mpc;
3960         u16 phy_tmp;
3961         int i;
3962         u64 bytes, packets;
3963
3964 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
3965
3966         /*
3967          * Prevent stats update while adapter is being reset, or if the pci
3968          * connection is down.
3969          */
3970         if (adapter->link_speed == 0)
3971                 return;
3972         if (pci_channel_offline(pdev))
3973                 return;
3974
3975         bytes = 0;
3976         packets = 0;
3977         for (i = 0; i < adapter->num_rx_queues; i++) {
3978                 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
3979                 struct igb_ring *ring = adapter->rx_ring[i];
3980                 ring->rx_stats.drops += rqdpc_tmp;
3981                 net_stats->rx_fifo_errors += rqdpc_tmp;
3982                 bytes += ring->rx_stats.bytes;
3983                 packets += ring->rx_stats.packets;
3984         }
3985
3986         net_stats->rx_bytes = bytes;
3987         net_stats->rx_packets = packets;
3988
3989         bytes = 0;
3990         packets = 0;
3991         for (i = 0; i < adapter->num_tx_queues; i++) {
3992                 struct igb_ring *ring = adapter->tx_ring[i];
3993                 bytes += ring->tx_stats.bytes;
3994                 packets += ring->tx_stats.packets;
3995         }
3996         net_stats->tx_bytes = bytes;
3997         net_stats->tx_packets = packets;
3998
3999         /* read stats registers */
4000         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4001         adapter->stats.gprc += rd32(E1000_GPRC);
4002         adapter->stats.gorc += rd32(E1000_GORCL);
4003         rd32(E1000_GORCH); /* clear GORCL */
4004         adapter->stats.bprc += rd32(E1000_BPRC);
4005         adapter->stats.mprc += rd32(E1000_MPRC);
4006         adapter->stats.roc += rd32(E1000_ROC);
4007
4008         adapter->stats.prc64 += rd32(E1000_PRC64);
4009         adapter->stats.prc127 += rd32(E1000_PRC127);
4010         adapter->stats.prc255 += rd32(E1000_PRC255);
4011         adapter->stats.prc511 += rd32(E1000_PRC511);
4012         adapter->stats.prc1023 += rd32(E1000_PRC1023);
4013         adapter->stats.prc1522 += rd32(E1000_PRC1522);
4014         adapter->stats.symerrs += rd32(E1000_SYMERRS);
4015         adapter->stats.sec += rd32(E1000_SEC);
4016
4017         mpc = rd32(E1000_MPC);
4018         adapter->stats.mpc += mpc;
4019         net_stats->rx_fifo_errors += mpc;
4020         adapter->stats.scc += rd32(E1000_SCC);
4021         adapter->stats.ecol += rd32(E1000_ECOL);
4022         adapter->stats.mcc += rd32(E1000_MCC);
4023         adapter->stats.latecol += rd32(E1000_LATECOL);
4024         adapter->stats.dc += rd32(E1000_DC);
4025         adapter->stats.rlec += rd32(E1000_RLEC);
4026         adapter->stats.xonrxc += rd32(E1000_XONRXC);
4027         adapter->stats.xontxc += rd32(E1000_XONTXC);
4028         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4029         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4030         adapter->stats.fcruc += rd32(E1000_FCRUC);
4031         adapter->stats.gptc += rd32(E1000_GPTC);
4032         adapter->stats.gotc += rd32(E1000_GOTCL);
4033         rd32(E1000_GOTCH); /* clear GOTCL */
4034         adapter->stats.rnbc += rd32(E1000_RNBC);
4035         adapter->stats.ruc += rd32(E1000_RUC);
4036         adapter->stats.rfc += rd32(E1000_RFC);
4037         adapter->stats.rjc += rd32(E1000_RJC);
4038         adapter->stats.tor += rd32(E1000_TORH);
4039         adapter->stats.tot += rd32(E1000_TOTH);
4040         adapter->stats.tpr += rd32(E1000_TPR);
4041
4042         adapter->stats.ptc64 += rd32(E1000_PTC64);
4043         adapter->stats.ptc127 += rd32(E1000_PTC127);
4044         adapter->stats.ptc255 += rd32(E1000_PTC255);
4045         adapter->stats.ptc511 += rd32(E1000_PTC511);
4046         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4047         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4048
4049         adapter->stats.mptc += rd32(E1000_MPTC);
4050         adapter->stats.bptc += rd32(E1000_BPTC);
4051
4052         adapter->stats.tpt += rd32(E1000_TPT);
4053         adapter->stats.colc += rd32(E1000_COLC);
4054
4055         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4056         /* read internal phy specific stats */
4057         reg = rd32(E1000_CTRL_EXT);
4058         if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4059                 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4060                 adapter->stats.tncrs += rd32(E1000_TNCRS);
4061         }
4062
4063         adapter->stats.tsctc += rd32(E1000_TSCTC);
4064         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4065
4066         adapter->stats.iac += rd32(E1000_IAC);
4067         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4068         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4069         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4070         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4071         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4072         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4073         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4074         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4075
4076         /* Fill out the OS statistics structure */
4077         net_stats->multicast = adapter->stats.mprc;
4078         net_stats->collisions = adapter->stats.colc;
4079
4080         /* Rx Errors */
4081
4082         /* RLEC on some newer hardware can be incorrect so build
4083          * our own version based on RUC and ROC */
4084         net_stats->rx_errors = adapter->stats.rxerrc +
4085                 adapter->stats.crcerrs + adapter->stats.algnerrc +
4086                 adapter->stats.ruc + adapter->stats.roc +
4087                 adapter->stats.cexterr;
4088         net_stats->rx_length_errors = adapter->stats.ruc +
4089                                       adapter->stats.roc;
4090         net_stats->rx_crc_errors = adapter->stats.crcerrs;
4091         net_stats->rx_frame_errors = adapter->stats.algnerrc;
4092         net_stats->rx_missed_errors = adapter->stats.mpc;
4093
4094         /* Tx Errors */
4095         net_stats->tx_errors = adapter->stats.ecol +
4096                                adapter->stats.latecol;
4097         net_stats->tx_aborted_errors = adapter->stats.ecol;
4098         net_stats->tx_window_errors = adapter->stats.latecol;
4099         net_stats->tx_carrier_errors = adapter->stats.tncrs;
4100
4101         /* Tx Dropped needs to be maintained elsewhere */
4102
4103         /* Phy Stats */
4104         if (hw->phy.media_type == e1000_media_type_copper) {
4105                 if ((adapter->link_speed == SPEED_1000) &&
4106                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4107                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4108                         adapter->phy_stats.idle_errors += phy_tmp;
4109                 }
4110         }
4111
4112         /* Management Stats */
4113         adapter->stats.mgptc += rd32(E1000_MGTPTC);
4114         adapter->stats.mgprc += rd32(E1000_MGTPRC);
4115         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4116 }
4117
4118 static irqreturn_t igb_msix_other(int irq, void *data)
4119 {
4120         struct igb_adapter *adapter = data;
4121         struct e1000_hw *hw = &adapter->hw;
4122         u32 icr = rd32(E1000_ICR);
4123         /* reading ICR causes bit 31 of EICR to be cleared */
4124
4125         if (icr & E1000_ICR_DRSTA)
4126                 schedule_work(&adapter->reset_task);
4127
4128         if (icr & E1000_ICR_DOUTSYNC) {
4129                 /* HW is reporting DMA is out of sync */
4130                 adapter->stats.doosync++;
4131         }
4132
4133         /* Check for a mailbox event */
4134         if (icr & E1000_ICR_VMMB)
4135                 igb_msg_task(adapter);
4136
4137         if (icr & E1000_ICR_LSC) {
4138                 hw->mac.get_link_status = 1;
4139                 /* guard against interrupt when we're going down */
4140                 if (!test_bit(__IGB_DOWN, &adapter->state))
4141                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4142         }
4143
4144         if (adapter->vfs_allocated_count)
4145                 wr32(E1000_IMS, E1000_IMS_LSC |
4146                                 E1000_IMS_VMMB |
4147                                 E1000_IMS_DOUTSYNC);
4148         else
4149                 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4150         wr32(E1000_EIMS, adapter->eims_other);
4151
4152         return IRQ_HANDLED;
4153 }
4154
4155 static void igb_write_itr(struct igb_q_vector *q_vector)
4156 {
4157         struct igb_adapter *adapter = q_vector->adapter;
4158         u32 itr_val = q_vector->itr_val & 0x7FFC;
4159
4160         if (!q_vector->set_itr)
4161                 return;
4162
4163         if (!itr_val)
4164                 itr_val = 0x4;
4165
4166         if (adapter->hw.mac.type == e1000_82575)
4167                 itr_val |= itr_val << 16;
4168         else
4169                 itr_val |= 0x8000000;
4170
4171         writel(itr_val, q_vector->itr_register);
4172         q_vector->set_itr = 0;
4173 }
4174
4175 static irqreturn_t igb_msix_ring(int irq, void *data)
4176 {
4177         struct igb_q_vector *q_vector = data;
4178
4179         /* Write the ITR value calculated from the previous interrupt. */
4180         igb_write_itr(q_vector);
4181
4182         napi_schedule(&q_vector->napi);
4183
4184         return IRQ_HANDLED;
4185 }
4186
4187 #ifdef CONFIG_IGB_DCA
4188 static void igb_update_dca(struct igb_q_vector *q_vector)
4189 {
4190         struct igb_adapter *adapter = q_vector->adapter;
4191         struct e1000_hw *hw = &adapter->hw;
4192         int cpu = get_cpu();
4193
4194         if (q_vector->cpu == cpu)
4195                 goto out_no_update;
4196
4197         if (q_vector->tx_ring) {
4198                 int q = q_vector->tx_ring->reg_idx;
4199                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4200                 if (hw->mac.type == e1000_82575) {
4201                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4202                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4203                 } else {
4204                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4205                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4206                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
4207                 }
4208                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4209                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4210         }
4211         if (q_vector->rx_ring) {
4212                 int q = q_vector->rx_ring->reg_idx;
4213                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4214                 if (hw->mac.type == e1000_82575) {
4215                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4216                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4217                 } else {
4218                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4219                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4220                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4221                 }
4222                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4223                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4224                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4225                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4226         }
4227         q_vector->cpu = cpu;
4228 out_no_update:
4229         put_cpu();
4230 }
4231
4232 static void igb_setup_dca(struct igb_adapter *adapter)
4233 {
4234         struct e1000_hw *hw = &adapter->hw;
4235         int i;
4236
4237         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4238                 return;
4239
4240         /* Always use CB2 mode, difference is masked in the CB driver. */
4241         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4242
4243         for (i = 0; i < adapter->num_q_vectors; i++) {
4244                 adapter->q_vector[i]->cpu = -1;
4245                 igb_update_dca(adapter->q_vector[i]);
4246         }
4247 }
4248
4249 static int __igb_notify_dca(struct device *dev, void *data)
4250 {
4251         struct net_device *netdev = dev_get_drvdata(dev);
4252         struct igb_adapter *adapter = netdev_priv(netdev);
4253         struct pci_dev *pdev = adapter->pdev;
4254         struct e1000_hw *hw = &adapter->hw;
4255         unsigned long event = *(unsigned long *)data;
4256
4257         switch (event) {
4258         case DCA_PROVIDER_ADD:
4259                 /* if already enabled, don't do it again */
4260                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4261                         break;
4262                 if (dca_add_requester(dev) == 0) {
4263                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
4264                         dev_info(&pdev->dev, "DCA enabled\n");
4265                         igb_setup_dca(adapter);
4266                         break;
4267                 }
4268                 /* Fall Through since DCA is disabled. */
4269         case DCA_PROVIDER_REMOVE:
4270                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4271                         /* without this a class_device is left
4272                          * hanging around in the sysfs model */
4273                         dca_remove_requester(dev);
4274                         dev_info(&pdev->dev, "DCA disabled\n");
4275                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4276                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4277                 }
4278                 break;
4279         }
4280
4281         return 0;
4282 }
4283
4284 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4285                           void *p)
4286 {
4287         int ret_val;
4288
4289         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4290                                          __igb_notify_dca);
4291
4292         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4293 }
4294 #endif /* CONFIG_IGB_DCA */
4295
4296 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4297 {
4298         struct e1000_hw *hw = &adapter->hw;
4299         u32 ping;
4300         int i;
4301
4302         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4303                 ping = E1000_PF_CONTROL_MSG;
4304                 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4305                         ping |= E1000_VT_MSGTYPE_CTS;
4306                 igb_write_mbx(hw, &ping, 1, i);
4307         }
4308 }
4309
4310 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4311 {
4312         struct e1000_hw *hw = &adapter->hw;
4313         u32 vmolr = rd32(E1000_VMOLR(vf));
4314         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4315
4316         vf_data->flags |= ~(IGB_VF_FLAG_UNI_PROMISC |
4317                             IGB_VF_FLAG_MULTI_PROMISC);
4318         vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4319
4320         if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4321                 vmolr |= E1000_VMOLR_MPME;
4322                 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4323         } else {
4324                 /*
4325                  * if we have hashes and we are clearing a multicast promisc
4326                  * flag we need to write the hashes to the MTA as this step
4327                  * was previously skipped
4328                  */
4329                 if (vf_data->num_vf_mc_hashes > 30) {
4330                         vmolr |= E1000_VMOLR_MPME;
4331                 } else if (vf_data->num_vf_mc_hashes) {
4332                         int j;
4333                         vmolr |= E1000_VMOLR_ROMPE;
4334                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4335                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4336                 }
4337         }
4338
4339         wr32(E1000_VMOLR(vf), vmolr);
4340
4341         /* there are flags left unprocessed, likely not supported */
4342         if (*msgbuf & E1000_VT_MSGINFO_MASK)
4343                 return -EINVAL;
4344
4345         return 0;
4346
4347 }
4348
4349 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4350                                   u32 *msgbuf, u32 vf)
4351 {
4352         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4353         u16 *hash_list = (u16 *)&msgbuf[1];
4354         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4355         int i;
4356
4357         /* salt away the number of multicast addresses assigned
4358          * to this VF for later use to restore when the PF multi cast
4359          * list changes
4360          */
4361         vf_data->num_vf_mc_hashes = n;
4362
4363         /* only up to 30 hash values supported */
4364         if (n > 30)
4365                 n = 30;
4366
4367         /* store the hashes for later use */
4368         for (i = 0; i < n; i++)
4369                 vf_data->vf_mc_hashes[i] = hash_list[i];
4370
4371         /* Flush and reset the mta with the new values */
4372         igb_set_rx_mode(adapter->netdev);
4373
4374         return 0;
4375 }
4376
4377 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4378 {
4379         struct e1000_hw *hw = &adapter->hw;
4380         struct vf_data_storage *vf_data;
4381         int i, j;
4382
4383         for (i = 0; i < adapter->vfs_allocated_count; i++) {
4384                 u32 vmolr = rd32(E1000_VMOLR(i));
4385                 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4386
4387                 vf_data = &adapter->vf_data[i];
4388
4389                 if ((vf_data->num_vf_mc_hashes > 30) ||
4390                     (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4391                         vmolr |= E1000_VMOLR_MPME;
4392                 } else if (vf_data->num_vf_mc_hashes) {
4393                         vmolr |= E1000_VMOLR_ROMPE;
4394                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4395                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4396                 }
4397                 wr32(E1000_VMOLR(i), vmolr);
4398         }
4399 }
4400
4401 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4402 {
4403         struct e1000_hw *hw = &adapter->hw;
4404         u32 pool_mask, reg, vid;
4405         int i;
4406
4407         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4408
4409         /* Find the vlan filter for this id */
4410         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4411                 reg = rd32(E1000_VLVF(i));
4412
4413                 /* remove the vf from the pool */
4414                 reg &= ~pool_mask;
4415
4416                 /* if pool is empty then remove entry from vfta */
4417                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4418                     (reg & E1000_VLVF_VLANID_ENABLE)) {
4419                         reg = 0;
4420                         vid = reg & E1000_VLVF_VLANID_MASK;
4421                         igb_vfta_set(hw, vid, false);
4422                 }
4423
4424                 wr32(E1000_VLVF(i), reg);
4425         }
4426
4427         adapter->vf_data[vf].vlans_enabled = 0;
4428 }
4429
4430 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4431 {
4432         struct e1000_hw *hw = &adapter->hw;
4433         u32 reg, i;
4434
4435         /* The vlvf table only exists on 82576 hardware and newer */
4436         if (hw->mac.type < e1000_82576)
4437                 return -1;
4438
4439         /* we only need to do this if VMDq is enabled */
4440         if (!adapter->vfs_allocated_count)
4441                 return -1;
4442
4443         /* Find the vlan filter for this id */
4444         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4445                 reg = rd32(E1000_VLVF(i));
4446                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
4447                     vid == (reg & E1000_VLVF_VLANID_MASK))
4448                         break;
4449         }
4450
4451         if (add) {
4452                 if (i == E1000_VLVF_ARRAY_SIZE) {
4453                         /* Did not find a matching VLAN ID entry that was
4454                          * enabled.  Search for a free filter entry, i.e.
4455                          * one without the enable bit set
4456                          */
4457                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4458                                 reg = rd32(E1000_VLVF(i));
4459                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
4460                                         break;
4461                         }
4462                 }
4463                 if (i < E1000_VLVF_ARRAY_SIZE) {
4464                         /* Found an enabled/available entry */
4465                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4466
4467                         /* if !enabled we need to set this up in vfta */
4468                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
4469                                 /* add VID to filter table */
4470                                 igb_vfta_set(hw, vid, true);
4471                                 reg |= E1000_VLVF_VLANID_ENABLE;
4472                         }
4473                         reg &= ~E1000_VLVF_VLANID_MASK;
4474                         reg |= vid;
4475                         wr32(E1000_VLVF(i), reg);
4476
4477                         /* do not modify RLPML for PF devices */
4478                         if (vf >= adapter->vfs_allocated_count)
4479                                 return 0;
4480
4481                         if (!adapter->vf_data[vf].vlans_enabled) {
4482                                 u32 size;
4483                                 reg = rd32(E1000_VMOLR(vf));
4484                                 size = reg & E1000_VMOLR_RLPML_MASK;
4485                                 size += 4;
4486                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4487                                 reg |= size;
4488                                 wr32(E1000_VMOLR(vf), reg);
4489                         }
4490
4491                         adapter->vf_data[vf].vlans_enabled++;
4492                         return 0;
4493                 }
4494         } else {
4495                 if (i < E1000_VLVF_ARRAY_SIZE) {
4496                         /* remove vf from the pool */
4497                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
4498                         /* if pool is empty then remove entry from vfta */
4499                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
4500                                 reg = 0;
4501                                 igb_vfta_set(hw, vid, false);
4502                         }
4503                         wr32(E1000_VLVF(i), reg);
4504
4505                         /* do not modify RLPML for PF devices */
4506                         if (vf >= adapter->vfs_allocated_count)
4507                                 return 0;
4508
4509                         adapter->vf_data[vf].vlans_enabled--;
4510                         if (!adapter->vf_data[vf].vlans_enabled) {
4511                                 u32 size;
4512                                 reg = rd32(E1000_VMOLR(vf));
4513                                 size = reg & E1000_VMOLR_RLPML_MASK;
4514                                 size -= 4;
4515                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4516                                 reg |= size;
4517                                 wr32(E1000_VMOLR(vf), reg);
4518                         }
4519                 }
4520         }
4521         return 0;
4522 }
4523
4524 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
4525 {
4526         struct e1000_hw *hw = &adapter->hw;
4527
4528         if (vid)
4529                 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
4530         else
4531                 wr32(E1000_VMVIR(vf), 0);
4532 }
4533
4534 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
4535                                int vf, u16 vlan, u8 qos)
4536 {
4537         int err = 0;
4538         struct igb_adapter *adapter = netdev_priv(netdev);
4539
4540         if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
4541                 return -EINVAL;
4542         if (vlan || qos) {
4543                 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
4544                 if (err)
4545                         goto out;
4546                 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
4547                 igb_set_vmolr(adapter, vf, !vlan);
4548                 adapter->vf_data[vf].pf_vlan = vlan;
4549                 adapter->vf_data[vf].pf_qos = qos;
4550                 dev_info(&adapter->pdev->dev,
4551                          "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
4552                 if (test_bit(__IGB_DOWN, &adapter->state)) {
4553                         dev_warn(&adapter->pdev->dev,
4554                                  "The VF VLAN has been set,"
4555                                  " but the PF device is not up.\n");
4556                         dev_warn(&adapter->pdev->dev,
4557                                  "Bring the PF device up before"
4558                                  " attempting to use the VF device.\n");
4559                 }
4560         } else {
4561                 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
4562                                    false, vf);
4563                 igb_set_vmvir(adapter, vlan, vf);
4564                 igb_set_vmolr(adapter, vf, true);
4565                 adapter->vf_data[vf].pf_vlan = 0;
4566                 adapter->vf_data[vf].pf_qos = 0;
4567        }
4568 out:
4569        return err;
4570 }
4571
4572 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4573 {
4574         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4575         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
4576
4577         return igb_vlvf_set(adapter, vid, add, vf);
4578 }
4579
4580 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
4581 {
4582         /* clear flags */
4583         adapter->vf_data[vf].flags &= ~(IGB_VF_FLAG_PF_SET_MAC);
4584         adapter->vf_data[vf].last_nack = jiffies;
4585
4586         /* reset offloads to defaults */
4587         igb_set_vmolr(adapter, vf, true);
4588
4589         /* reset vlans for device */
4590         igb_clear_vf_vfta(adapter, vf);
4591         if (adapter->vf_data[vf].pf_vlan)
4592                 igb_ndo_set_vf_vlan(adapter->netdev, vf,
4593                                     adapter->vf_data[vf].pf_vlan,
4594                                     adapter->vf_data[vf].pf_qos);
4595         else
4596                 igb_clear_vf_vfta(adapter, vf);
4597
4598         /* reset multicast table array for vf */
4599         adapter->vf_data[vf].num_vf_mc_hashes = 0;
4600
4601         /* Flush and reset the mta with the new values */
4602         igb_set_rx_mode(adapter->netdev);
4603 }
4604
4605 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
4606 {
4607         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4608
4609         /* generate a new mac address as we were hotplug removed/added */
4610         if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
4611                 random_ether_addr(vf_mac);
4612
4613         /* process remaining reset events */
4614         igb_vf_reset(adapter, vf);
4615 }
4616
4617 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
4618 {
4619         struct e1000_hw *hw = &adapter->hw;
4620         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4621         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
4622         u32 reg, msgbuf[3];
4623         u8 *addr = (u8 *)(&msgbuf[1]);
4624
4625         /* process all the same items cleared in a function level reset */
4626         igb_vf_reset(adapter, vf);
4627
4628         /* set vf mac address */
4629         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
4630
4631         /* enable transmit and receive for vf */
4632         reg = rd32(E1000_VFTE);
4633         wr32(E1000_VFTE, reg | (1 << vf));
4634         reg = rd32(E1000_VFRE);
4635         wr32(E1000_VFRE, reg | (1 << vf));
4636
4637         adapter->vf_data[vf].flags = IGB_VF_FLAG_CTS;
4638
4639         /* reply to reset with ack and vf mac address */
4640         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
4641         memcpy(addr, vf_mac, 6);
4642         igb_write_mbx(hw, msgbuf, 3, vf);
4643 }
4644
4645 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
4646 {
4647         unsigned char *addr = (char *)&msg[1];
4648         int err = -1;
4649
4650         if (is_valid_ether_addr(addr))
4651                 err = igb_set_vf_mac(adapter, vf, addr);
4652
4653         return err;
4654 }
4655
4656 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
4657 {
4658         struct e1000_hw *hw = &adapter->hw;
4659         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4660         u32 msg = E1000_VT_MSGTYPE_NACK;
4661
4662         /* if device isn't clear to send it shouldn't be reading either */
4663         if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
4664             time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
4665                 igb_write_mbx(hw, &msg, 1, vf);
4666                 vf_data->last_nack = jiffies;
4667         }
4668 }
4669
4670 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
4671 {
4672         struct pci_dev *pdev = adapter->pdev;
4673         u32 msgbuf[E1000_VFMAILBOX_SIZE];
4674         struct e1000_hw *hw = &adapter->hw;
4675         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4676         s32 retval;
4677
4678         retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
4679
4680         if (retval) {
4681                 /* if receive failed revoke VF CTS stats and restart init */
4682                 dev_err(&pdev->dev, "Error receiving message from VF\n");
4683                 vf_data->flags &= ~IGB_VF_FLAG_CTS;
4684                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
4685                         return;
4686                 goto out;
4687         }
4688
4689         /* this is a message we already processed, do nothing */
4690         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
4691                 return;
4692
4693         /*
4694          * until the vf completes a reset it should not be
4695          * allowed to start any configuration.
4696          */
4697
4698         if (msgbuf[0] == E1000_VF_RESET) {
4699                 igb_vf_reset_msg(adapter, vf);
4700                 return;
4701         }
4702
4703         if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
4704                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
4705                         return;
4706                 retval = -1;
4707                 goto out;
4708         }
4709
4710         switch ((msgbuf[0] & 0xFFFF)) {
4711         case E1000_VF_SET_MAC_ADDR:
4712                 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
4713                 break;
4714         case E1000_VF_SET_PROMISC:
4715                 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
4716                 break;
4717         case E1000_VF_SET_MULTICAST:
4718                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
4719                 break;
4720         case E1000_VF_SET_LPE:
4721                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
4722                 break;
4723         case E1000_VF_SET_VLAN:
4724                 if (adapter->vf_data[vf].pf_vlan)
4725                         retval = -1;
4726                 else
4727                         retval = igb_set_vf_vlan(adapter, msgbuf, vf);
4728                 break;
4729         default:
4730                 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
4731                 retval = -1;
4732                 break;
4733         }
4734
4735         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
4736 out:
4737         /* notify the VF of the results of what it sent us */
4738         if (retval)
4739                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
4740         else
4741                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
4742
4743         igb_write_mbx(hw, msgbuf, 1, vf);
4744 }
4745
4746 static void igb_msg_task(struct igb_adapter *adapter)
4747 {
4748         struct e1000_hw *hw = &adapter->hw;
4749         u32 vf;
4750
4751         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
4752                 /* process any reset requests */
4753                 if (!igb_check_for_rst(hw, vf))
4754                         igb_vf_reset_event(adapter, vf);
4755
4756                 /* process any messages pending */
4757                 if (!igb_check_for_msg(hw, vf))
4758                         igb_rcv_msg_from_vf(adapter, vf);
4759
4760                 /* process any acks */
4761                 if (!igb_check_for_ack(hw, vf))
4762                         igb_rcv_ack_from_vf(adapter, vf);
4763         }
4764 }
4765
4766 /**
4767  *  igb_set_uta - Set unicast filter table address
4768  *  @adapter: board private structure
4769  *
4770  *  The unicast table address is a register array of 32-bit registers.
4771  *  The table is meant to be used in a way similar to how the MTA is used
4772  *  however due to certain limitations in the hardware it is necessary to
4773  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscous
4774  *  enable bit to allow vlan tag stripping when promiscous mode is enabled
4775  **/
4776 static void igb_set_uta(struct igb_adapter *adapter)
4777 {
4778         struct e1000_hw *hw = &adapter->hw;
4779         int i;
4780
4781         /* The UTA table only exists on 82576 hardware and newer */
4782         if (hw->mac.type < e1000_82576)
4783                 return;
4784
4785         /* we only need to do this if VMDq is enabled */
4786         if (!adapter->vfs_allocated_count)
4787                 return;
4788
4789         for (i = 0; i < hw->mac.uta_reg_count; i++)
4790                 array_wr32(E1000_UTA, i, ~0);
4791 }
4792
4793 /**
4794  * igb_intr_msi - Interrupt Handler
4795  * @irq: interrupt number
4796  * @data: pointer to a network interface device structure
4797  **/
4798 static irqreturn_t igb_intr_msi(int irq, void *data)
4799 {
4800         struct igb_adapter *adapter = data;
4801         struct igb_q_vector *q_vector = adapter->q_vector[0];
4802         struct e1000_hw *hw = &adapter->hw;
4803         /* read ICR disables interrupts using IAM */
4804         u32 icr = rd32(E1000_ICR);
4805
4806         igb_write_itr(q_vector);
4807
4808         if (icr & E1000_ICR_DRSTA)
4809                 schedule_work(&adapter->reset_task);
4810
4811         if (icr & E1000_ICR_DOUTSYNC) {
4812                 /* HW is reporting DMA is out of sync */
4813                 adapter->stats.doosync++;
4814         }
4815
4816         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4817                 hw->mac.get_link_status = 1;
4818                 if (!test_bit(__IGB_DOWN, &adapter->state))
4819                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4820         }
4821
4822         napi_schedule(&q_vector->napi);
4823
4824         return IRQ_HANDLED;
4825 }
4826
4827 /**
4828  * igb_intr - Legacy Interrupt Handler
4829  * @irq: interrupt number
4830  * @data: pointer to a network interface device structure
4831  **/
4832 static irqreturn_t igb_intr(int irq, void *data)
4833 {
4834         struct igb_adapter *adapter = data;
4835         struct igb_q_vector *q_vector = adapter->q_vector[0];
4836         struct e1000_hw *hw = &adapter->hw;
4837         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
4838          * need for the IMC write */
4839         u32 icr = rd32(E1000_ICR);
4840         if (!icr)
4841                 return IRQ_NONE;  /* Not our interrupt */
4842
4843         igb_write_itr(q_vector);
4844
4845         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
4846          * not set, then the adapter didn't send an interrupt */
4847         if (!(icr & E1000_ICR_INT_ASSERTED))
4848                 return IRQ_NONE;
4849
4850         if (icr & E1000_ICR_DRSTA)
4851                 schedule_work(&adapter->reset_task);
4852
4853         if (icr & E1000_ICR_DOUTSYNC) {
4854                 /* HW is reporting DMA is out of sync */
4855                 adapter->stats.doosync++;
4856         }
4857
4858         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4859                 hw->mac.get_link_status = 1;
4860                 /* guard against interrupt when we're going down */
4861                 if (!test_bit(__IGB_DOWN, &adapter->state))
4862                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4863         }
4864
4865         napi_schedule(&q_vector->napi);
4866
4867         return IRQ_HANDLED;
4868 }
4869
4870 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
4871 {
4872         struct igb_adapter *adapter = q_vector->adapter;
4873         struct e1000_hw *hw = &adapter->hw;
4874
4875         if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
4876             (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
4877                 if (!adapter->msix_entries)
4878                         igb_set_itr(adapter);
4879                 else
4880                         igb_update_ring_itr(q_vector);
4881         }
4882
4883         if (!test_bit(__IGB_DOWN, &adapter->state)) {
4884                 if (adapter->msix_entries)
4885                         wr32(E1000_EIMS, q_vector->eims_value);
4886                 else
4887                         igb_irq_enable(adapter);
4888         }
4889 }
4890
4891 /**
4892  * igb_poll - NAPI Rx polling callback
4893  * @napi: napi polling structure
4894  * @budget: count of how many packets we should handle
4895  **/
4896 static int igb_poll(struct napi_struct *napi, int budget)
4897 {
4898         struct igb_q_vector *q_vector = container_of(napi,
4899                                                      struct igb_q_vector,
4900                                                      napi);
4901         int tx_clean_complete = 1, work_done = 0;
4902
4903 #ifdef CONFIG_IGB_DCA
4904         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
4905                 igb_update_dca(q_vector);
4906 #endif
4907         if (q_vector->tx_ring)
4908                 tx_clean_complete = igb_clean_tx_irq(q_vector);
4909
4910         if (q_vector->rx_ring)
4911                 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
4912
4913         if (!tx_clean_complete)
4914                 work_done = budget;
4915
4916         /* If not enough Rx work done, exit the polling mode */
4917         if (work_done < budget) {
4918                 napi_complete(napi);
4919                 igb_ring_irq_enable(q_vector);
4920         }
4921
4922         return work_done;
4923 }
4924
4925 /**
4926  * igb_systim_to_hwtstamp - convert system time value to hw timestamp
4927  * @adapter: board private structure
4928  * @shhwtstamps: timestamp structure to update
4929  * @regval: unsigned 64bit system time value.
4930  *
4931  * We need to convert the system time value stored in the RX/TXSTMP registers
4932  * into a hwtstamp which can be used by the upper level timestamping functions
4933  */
4934 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
4935                                    struct skb_shared_hwtstamps *shhwtstamps,
4936                                    u64 regval)
4937 {
4938         u64 ns;
4939
4940         /*
4941          * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
4942          * 24 to match clock shift we setup earlier.
4943          */
4944         if (adapter->hw.mac.type == e1000_82580)
4945                 regval <<= IGB_82580_TSYNC_SHIFT;
4946
4947         ns = timecounter_cyc2time(&adapter->clock, regval);
4948         timecompare_update(&adapter->compare, ns);
4949         memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
4950         shhwtstamps->hwtstamp = ns_to_ktime(ns);
4951         shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
4952 }
4953
4954 /**
4955  * igb_tx_hwtstamp - utility function which checks for TX time stamp
4956  * @q_vector: pointer to q_vector containing needed info
4957  * @skb: packet that was just sent
4958  *
4959  * If we were asked to do hardware stamping and such a time stamp is
4960  * available, then it must have been for this skb here because we only
4961  * allow only one such packet into the queue.
4962  */
4963 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb)
4964 {
4965         struct igb_adapter *adapter = q_vector->adapter;
4966         union skb_shared_tx *shtx = skb_tx(skb);
4967         struct e1000_hw *hw = &adapter->hw;
4968         struct skb_shared_hwtstamps shhwtstamps;
4969         u64 regval;
4970
4971         /* if skb does not support hw timestamp or TX stamp not valid exit */
4972         if (likely(!shtx->hardware) ||
4973             !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
4974                 return;
4975
4976         regval = rd32(E1000_TXSTMPL);
4977         regval |= (u64)rd32(E1000_TXSTMPH) << 32;
4978
4979         igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
4980         skb_tstamp_tx(skb, &shhwtstamps);
4981 }
4982
4983 /**
4984  * igb_clean_tx_irq - Reclaim resources after transmit completes
4985  * @q_vector: pointer to q_vector containing needed info
4986  * returns true if ring is completely cleaned
4987  **/
4988 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
4989 {
4990         struct igb_adapter *adapter = q_vector->adapter;
4991         struct igb_ring *tx_ring = q_vector->tx_ring;
4992         struct net_device *netdev = tx_ring->netdev;
4993         struct e1000_hw *hw = &adapter->hw;
4994         struct igb_buffer *buffer_info;
4995         struct sk_buff *skb;
4996         union e1000_adv_tx_desc *tx_desc, *eop_desc;
4997         unsigned int total_bytes = 0, total_packets = 0;
4998         unsigned int i, eop, count = 0;
4999         bool cleaned = false;
5000
5001         i = tx_ring->next_to_clean;
5002         eop = tx_ring->buffer_info[i].next_to_watch;
5003         eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5004
5005         while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5006                (count < tx_ring->count)) {
5007                 for (cleaned = false; !cleaned; count++) {
5008                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
5009                         buffer_info = &tx_ring->buffer_info[i];
5010                         cleaned = (i == eop);
5011                         skb = buffer_info->skb;
5012
5013                         if (skb) {
5014                                 unsigned int segs, bytecount;
5015                                 /* gso_segs is currently only valid for tcp */
5016                                 segs = buffer_info->gso_segs;
5017                                 /* multiply data chunks by size of headers */
5018                                 bytecount = ((segs - 1) * skb_headlen(skb)) +
5019                                             skb->len;
5020                                 total_packets += segs;
5021                                 total_bytes += bytecount;
5022
5023                                 igb_tx_hwtstamp(q_vector, skb);
5024                         }
5025
5026                         igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5027                         tx_desc->wb.status = 0;
5028
5029                         i++;
5030                         if (i == tx_ring->count)
5031                                 i = 0;
5032                 }
5033                 eop = tx_ring->buffer_info[i].next_to_watch;
5034                 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5035         }
5036
5037         tx_ring->next_to_clean = i;
5038
5039         if (unlikely(count &&
5040                      netif_carrier_ok(netdev) &&
5041                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5042                 /* Make sure that anybody stopping the queue after this
5043                  * sees the new next_to_clean.
5044                  */
5045                 smp_mb();
5046                 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5047                     !(test_bit(__IGB_DOWN, &adapter->state))) {
5048                         netif_wake_subqueue(netdev, tx_ring->queue_index);
5049                         tx_ring->tx_stats.restart_queue++;
5050                 }
5051         }
5052
5053         if (tx_ring->detect_tx_hung) {
5054                 /* Detect a transmit hang in hardware, this serializes the
5055                  * check with the clearing of time_stamp and movement of i */
5056                 tx_ring->detect_tx_hung = false;
5057                 if (tx_ring->buffer_info[i].time_stamp &&
5058                     time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5059                                (adapter->tx_timeout_factor * HZ)) &&
5060                     !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5061
5062                         /* detected Tx unit hang */
5063                         dev_err(&tx_ring->pdev->dev,
5064                                 "Detected Tx Unit Hang\n"
5065                                 "  Tx Queue             <%d>\n"
5066                                 "  TDH                  <%x>\n"
5067                                 "  TDT                  <%x>\n"
5068                                 "  next_to_use          <%x>\n"
5069                                 "  next_to_clean        <%x>\n"
5070                                 "buffer_info[next_to_clean]\n"
5071                                 "  time_stamp           <%lx>\n"
5072                                 "  next_to_watch        <%x>\n"
5073                                 "  jiffies              <%lx>\n"
5074                                 "  desc.status          <%x>\n",
5075                                 tx_ring->queue_index,
5076                                 readl(tx_ring->head),
5077                                 readl(tx_ring->tail),
5078                                 tx_ring->next_to_use,
5079                                 tx_ring->next_to_clean,
5080                                 tx_ring->buffer_info[eop].time_stamp,
5081                                 eop,
5082                                 jiffies,
5083                                 eop_desc->wb.status);
5084                         netif_stop_subqueue(netdev, tx_ring->queue_index);
5085                 }
5086         }
5087         tx_ring->total_bytes += total_bytes;
5088         tx_ring->total_packets += total_packets;
5089         tx_ring->tx_stats.bytes += total_bytes;
5090         tx_ring->tx_stats.packets += total_packets;
5091         return (count < tx_ring->count);
5092 }
5093
5094 /**
5095  * igb_receive_skb - helper function to handle rx indications
5096  * @q_vector: structure containing interrupt and ring information
5097  * @skb: packet to send up
5098  * @vlan_tag: vlan tag for packet
5099  **/
5100 static void igb_receive_skb(struct igb_q_vector *q_vector,
5101                             struct sk_buff *skb,
5102                             u16 vlan_tag)
5103 {
5104         struct igb_adapter *adapter = q_vector->adapter;
5105
5106         if (vlan_tag && adapter->vlgrp)
5107                 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
5108                                  vlan_tag, skb);
5109         else
5110                 napi_gro_receive(&q_vector->napi, skb);
5111 }
5112
5113 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5114                                        u32 status_err, struct sk_buff *skb)
5115 {
5116         skb->ip_summed = CHECKSUM_NONE;
5117
5118         /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5119         if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5120              (status_err & E1000_RXD_STAT_IXSM))
5121                 return;
5122
5123         /* TCP/UDP checksum error bit is set */
5124         if (status_err &
5125             (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5126                 /*
5127                  * work around errata with sctp packets where the TCPE aka
5128                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5129                  * packets, (aka let the stack check the crc32c)
5130                  */
5131                 if ((skb->len == 60) &&
5132                     (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM))
5133                         ring->rx_stats.csum_err++;
5134
5135                 /* let the stack verify checksum errors */
5136                 return;
5137         }
5138         /* It must be a TCP or UDP packet with a valid checksum */
5139         if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5140                 skb->ip_summed = CHECKSUM_UNNECESSARY;
5141
5142         dev_dbg(&ring->pdev->dev, "cksum success: bits %08X\n", status_err);
5143 }
5144
5145 static inline void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5146                                    struct sk_buff *skb)
5147 {
5148         struct igb_adapter *adapter = q_vector->adapter;
5149         struct e1000_hw *hw = &adapter->hw;
5150         u64 regval;
5151
5152         /*
5153          * If this bit is set, then the RX registers contain the time stamp. No
5154          * other packet will be time stamped until we read these registers, so
5155          * read the registers to make them available again. Because only one
5156          * packet can be time stamped at a time, we know that the register
5157          * values must belong to this one here and therefore we don't need to
5158          * compare any of the additional attributes stored for it.
5159          *
5160          * If nothing went wrong, then it should have a skb_shared_tx that we
5161          * can turn into a skb_shared_hwtstamps.
5162          */
5163         if (likely(!(staterr & E1000_RXDADV_STAT_TS)))
5164                 return;
5165         if (!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5166                 return;
5167
5168         regval = rd32(E1000_RXSTMPL);
5169         regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5170
5171         igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5172 }
5173 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
5174                                union e1000_adv_rx_desc *rx_desc)
5175 {
5176         /* HW will not DMA in data larger than the given buffer, even if it
5177          * parses the (NFS, of course) header to be larger.  In that case, it
5178          * fills the header buffer and spills the rest into the page.
5179          */
5180         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5181                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5182         if (hlen > rx_ring->rx_buffer_len)
5183                 hlen = rx_ring->rx_buffer_len;
5184         return hlen;
5185 }
5186
5187 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5188                                  int *work_done, int budget)
5189 {
5190         struct igb_ring *rx_ring = q_vector->rx_ring;
5191         struct net_device *netdev = rx_ring->netdev;
5192         struct pci_dev *pdev = rx_ring->pdev;
5193         union e1000_adv_rx_desc *rx_desc , *next_rxd;
5194         struct igb_buffer *buffer_info , *next_buffer;
5195         struct sk_buff *skb;
5196         bool cleaned = false;
5197         int cleaned_count = 0;
5198         int current_node = numa_node_id();
5199         unsigned int total_bytes = 0, total_packets = 0;
5200         unsigned int i;
5201         u32 staterr;
5202         u16 length;
5203         u16 vlan_tag;
5204
5205         i = rx_ring->next_to_clean;
5206         buffer_info = &rx_ring->buffer_info[i];
5207         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5208         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5209
5210         while (staterr & E1000_RXD_STAT_DD) {
5211                 if (*work_done >= budget)
5212                         break;
5213                 (*work_done)++;
5214
5215                 skb = buffer_info->skb;
5216                 prefetch(skb->data - NET_IP_ALIGN);
5217                 buffer_info->skb = NULL;
5218
5219                 i++;
5220                 if (i == rx_ring->count)
5221                         i = 0;
5222
5223                 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5224                 prefetch(next_rxd);
5225                 next_buffer = &rx_ring->buffer_info[i];
5226
5227                 length = le16_to_cpu(rx_desc->wb.upper.length);
5228                 cleaned = true;
5229                 cleaned_count++;
5230
5231                 if (buffer_info->dma) {
5232                         pci_unmap_single(pdev, buffer_info->dma,
5233                                          rx_ring->rx_buffer_len,
5234                                          PCI_DMA_FROMDEVICE);
5235                         buffer_info->dma = 0;
5236                         if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
5237                                 skb_put(skb, length);
5238                                 goto send_up;
5239                         }
5240                         skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
5241                 }
5242
5243                 if (length) {
5244                         pci_unmap_page(pdev, buffer_info->page_dma,
5245                                        PAGE_SIZE / 2, PCI_DMA_FROMDEVICE);
5246                         buffer_info->page_dma = 0;
5247
5248                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags++,
5249                                                 buffer_info->page,
5250                                                 buffer_info->page_offset,
5251                                                 length);
5252
5253                         if ((page_count(buffer_info->page) != 1) ||
5254                             (page_to_nid(buffer_info->page) != current_node))
5255                                 buffer_info->page = NULL;
5256                         else
5257                                 get_page(buffer_info->page);
5258
5259                         skb->len += length;
5260                         skb->data_len += length;
5261                         skb->truesize += length;
5262                 }
5263
5264                 if (!(staterr & E1000_RXD_STAT_EOP)) {
5265                         buffer_info->skb = next_buffer->skb;
5266                         buffer_info->dma = next_buffer->dma;
5267                         next_buffer->skb = skb;
5268                         next_buffer->dma = 0;
5269                         goto next_desc;
5270                 }
5271 send_up:
5272                 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5273                         dev_kfree_skb_irq(skb);
5274                         goto next_desc;
5275                 }
5276
5277                 igb_rx_hwtstamp(q_vector, staterr, skb);
5278                 total_bytes += skb->len;
5279                 total_packets++;
5280
5281                 igb_rx_checksum_adv(rx_ring, staterr, skb);
5282
5283                 skb->protocol = eth_type_trans(skb, netdev);
5284                 skb_record_rx_queue(skb, rx_ring->queue_index);
5285
5286                 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
5287                             le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
5288
5289                 igb_receive_skb(q_vector, skb, vlan_tag);
5290
5291 next_desc:
5292                 rx_desc->wb.upper.status_error = 0;
5293
5294                 /* return some buffers to hardware, one at a time is too slow */
5295                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5296                         igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5297                         cleaned_count = 0;
5298                 }
5299
5300                 /* use prefetched values */
5301                 rx_desc = next_rxd;
5302                 buffer_info = next_buffer;
5303                 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5304         }
5305
5306         rx_ring->next_to_clean = i;
5307         cleaned_count = igb_desc_unused(rx_ring);
5308
5309         if (cleaned_count)
5310                 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5311
5312         rx_ring->total_packets += total_packets;
5313         rx_ring->total_bytes += total_bytes;
5314         rx_ring->rx_stats.packets += total_packets;
5315         rx_ring->rx_stats.bytes += total_bytes;
5316         return cleaned;
5317 }
5318
5319 /**
5320  * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5321  * @adapter: address of board private structure
5322  **/
5323 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5324 {
5325         struct net_device *netdev = rx_ring->netdev;
5326         union e1000_adv_rx_desc *rx_desc;
5327         struct igb_buffer *buffer_info;
5328         struct sk_buff *skb;
5329         unsigned int i;
5330         int bufsz;
5331
5332         i = rx_ring->next_to_use;
5333         buffer_info = &rx_ring->buffer_info[i];
5334
5335         bufsz = rx_ring->rx_buffer_len;
5336
5337         while (cleaned_count--) {
5338                 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5339
5340                 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5341                         if (!buffer_info->page) {
5342                                 buffer_info->page = netdev_alloc_page(netdev);
5343                                 if (!buffer_info->page) {
5344                                         rx_ring->rx_stats.alloc_failed++;
5345                                         goto no_buffers;
5346                                 }
5347                                 buffer_info->page_offset = 0;
5348                         } else {
5349                                 buffer_info->page_offset ^= PAGE_SIZE / 2;
5350                         }
5351                         buffer_info->page_dma =
5352                                 pci_map_page(rx_ring->pdev, buffer_info->page,
5353                                              buffer_info->page_offset,
5354                                              PAGE_SIZE / 2,
5355                                              PCI_DMA_FROMDEVICE);
5356                         if (pci_dma_mapping_error(rx_ring->pdev,
5357                                                   buffer_info->page_dma)) {
5358                                 buffer_info->page_dma = 0;
5359                                 rx_ring->rx_stats.alloc_failed++;
5360                                 goto no_buffers;
5361                         }
5362                 }
5363
5364                 skb = buffer_info->skb;
5365                 if (!skb) {
5366                         skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5367                         if (!skb) {
5368                                 rx_ring->rx_stats.alloc_failed++;
5369                                 goto no_buffers;
5370                         }
5371
5372                         buffer_info->skb = skb;
5373                 }
5374                 if (!buffer_info->dma) {
5375                         buffer_info->dma = pci_map_single(rx_ring->pdev,
5376                                                           skb->data,
5377                                                           bufsz,
5378                                                           PCI_DMA_FROMDEVICE);
5379                         if (pci_dma_mapping_error(rx_ring->pdev,
5380                                                   buffer_info->dma)) {
5381                                 buffer_info->dma = 0;
5382                                 rx_ring->rx_stats.alloc_failed++;
5383                                 goto no_buffers;
5384                         }
5385                 }
5386                 /* Refresh the desc even if buffer_addrs didn't change because
5387                  * each write-back erases this info. */
5388                 if (bufsz < IGB_RXBUFFER_1024) {
5389                         rx_desc->read.pkt_addr =
5390                              cpu_to_le64(buffer_info->page_dma);
5391                         rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5392                 } else {
5393                         rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
5394                         rx_desc->read.hdr_addr = 0;
5395                 }
5396
5397                 i++;
5398                 if (i == rx_ring->count)
5399                         i = 0;
5400                 buffer_info = &rx_ring->buffer_info[i];
5401         }
5402
5403 no_buffers:
5404         if (rx_ring->next_to_use != i) {
5405                 rx_ring->next_to_use = i;
5406                 if (i == 0)
5407                         i = (rx_ring->count - 1);
5408                 else
5409                         i--;
5410
5411                 /* Force memory writes to complete before letting h/w
5412                  * know there are new descriptors to fetch.  (Only
5413                  * applicable for weak-ordered memory model archs,
5414                  * such as IA-64). */
5415                 wmb();
5416                 writel(i, rx_ring->tail);
5417         }
5418 }
5419
5420 /**
5421  * igb_mii_ioctl -
5422  * @netdev:
5423  * @ifreq:
5424  * @cmd:
5425  **/
5426 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5427 {
5428         struct igb_adapter *adapter = netdev_priv(netdev);
5429         struct mii_ioctl_data *data = if_mii(ifr);
5430
5431         if (adapter->hw.phy.media_type != e1000_media_type_copper)
5432                 return -EOPNOTSUPP;
5433
5434         switch (cmd) {
5435         case SIOCGMIIPHY:
5436                 data->phy_id = adapter->hw.phy.addr;
5437                 break;
5438         case SIOCGMIIREG:
5439                 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
5440                                      &data->val_out))
5441                         return -EIO;
5442                 break;
5443         case SIOCSMIIREG:
5444         default:
5445                 return -EOPNOTSUPP;
5446         }
5447         return 0;
5448 }
5449
5450 /**
5451  * igb_hwtstamp_ioctl - control hardware time stamping
5452  * @netdev:
5453  * @ifreq:
5454  * @cmd:
5455  *
5456  * Outgoing time stamping can be enabled and disabled. Play nice and
5457  * disable it when requested, although it shouldn't case any overhead
5458  * when no packet needs it. At most one packet in the queue may be
5459  * marked for time stamping, otherwise it would be impossible to tell
5460  * for sure to which packet the hardware time stamp belongs.
5461  *
5462  * Incoming time stamping has to be configured via the hardware
5463  * filters. Not all combinations are supported, in particular event
5464  * type has to be specified. Matching the kind of event packet is
5465  * not supported, with the exception of "all V2 events regardless of
5466  * level 2 or 4".
5467  *
5468  **/
5469 static int igb_hwtstamp_ioctl(struct net_device *netdev,
5470                               struct ifreq *ifr, int cmd)
5471 {
5472         struct igb_adapter *adapter = netdev_priv(netdev);
5473         struct e1000_hw *hw = &adapter->hw;
5474         struct hwtstamp_config config;
5475         u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
5476         u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5477         u32 tsync_rx_cfg = 0;
5478         bool is_l4 = false;
5479         bool is_l2 = false;
5480         u32 regval;
5481
5482         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
5483                 return -EFAULT;
5484
5485         /* reserved for future extensions */
5486         if (config.flags)
5487                 return -EINVAL;
5488
5489         switch (config.tx_type) {
5490         case HWTSTAMP_TX_OFF:
5491                 tsync_tx_ctl = 0;
5492         case HWTSTAMP_TX_ON:
5493                 break;
5494         default:
5495                 return -ERANGE;
5496         }
5497
5498         switch (config.rx_filter) {
5499         case HWTSTAMP_FILTER_NONE:
5500                 tsync_rx_ctl = 0;
5501                 break;
5502         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
5503         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
5504         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
5505         case HWTSTAMP_FILTER_ALL:
5506                 /*
5507                  * register TSYNCRXCFG must be set, therefore it is not
5508                  * possible to time stamp both Sync and Delay_Req messages
5509                  * => fall back to time stamping all packets
5510                  */
5511                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5512                 config.rx_filter = HWTSTAMP_FILTER_ALL;
5513                 break;
5514         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
5515                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5516                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
5517                 is_l4 = true;
5518                 break;
5519         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
5520                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5521                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
5522                 is_l4 = true;
5523                 break;
5524         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
5525         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
5526                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5527                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
5528                 is_l2 = true;
5529                 is_l4 = true;
5530                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5531                 break;
5532         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
5533         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
5534                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5535                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
5536                 is_l2 = true;
5537                 is_l4 = true;
5538                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5539                 break;
5540         case HWTSTAMP_FILTER_PTP_V2_EVENT:
5541         case HWTSTAMP_FILTER_PTP_V2_SYNC:
5542         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
5543                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
5544                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
5545                 is_l2 = true;
5546                 break;
5547         default:
5548                 return -ERANGE;
5549         }
5550
5551         if (hw->mac.type == e1000_82575) {
5552                 if (tsync_rx_ctl | tsync_tx_ctl)
5553                         return -EINVAL;
5554                 return 0;
5555         }
5556
5557         /* enable/disable TX */
5558         regval = rd32(E1000_TSYNCTXCTL);
5559         regval &= ~E1000_TSYNCTXCTL_ENABLED;
5560         regval |= tsync_tx_ctl;
5561         wr32(E1000_TSYNCTXCTL, regval);
5562
5563         /* enable/disable RX */
5564         regval = rd32(E1000_TSYNCRXCTL);
5565         regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
5566         regval |= tsync_rx_ctl;
5567         wr32(E1000_TSYNCRXCTL, regval);
5568
5569         /* define which PTP packets are time stamped */
5570         wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
5571
5572         /* define ethertype filter for timestamped packets */
5573         if (is_l2)
5574                 wr32(E1000_ETQF(3),
5575                                 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
5576                                  E1000_ETQF_1588 | /* enable timestamping */
5577                                  ETH_P_1588));     /* 1588 eth protocol type */
5578         else
5579                 wr32(E1000_ETQF(3), 0);
5580
5581 #define PTP_PORT 319
5582         /* L4 Queue Filter[3]: filter by destination port and protocol */
5583         if (is_l4) {
5584                 u32 ftqf = (IPPROTO_UDP /* UDP */
5585                         | E1000_FTQF_VF_BP /* VF not compared */
5586                         | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
5587                         | E1000_FTQF_MASK); /* mask all inputs */
5588                 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
5589
5590                 wr32(E1000_IMIR(3), htons(PTP_PORT));
5591                 wr32(E1000_IMIREXT(3),
5592                      (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
5593                 if (hw->mac.type == e1000_82576) {
5594                         /* enable source port check */
5595                         wr32(E1000_SPQF(3), htons(PTP_PORT));
5596                         ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
5597                 }
5598                 wr32(E1000_FTQF(3), ftqf);
5599         } else {
5600                 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
5601         }
5602         wrfl();
5603
5604         adapter->hwtstamp_config = config;
5605
5606         /* clear TX/RX time stamp registers, just to be sure */
5607         regval = rd32(E1000_TXSTMPH);
5608         regval = rd32(E1000_RXSTMPH);
5609
5610         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
5611                 -EFAULT : 0;
5612 }
5613
5614 /**
5615  * igb_ioctl -
5616  * @netdev:
5617  * @ifreq:
5618  * @cmd:
5619  **/
5620 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5621 {
5622         switch (cmd) {
5623         case SIOCGMIIPHY:
5624         case SIOCGMIIREG:
5625         case SIOCSMIIREG:
5626                 return igb_mii_ioctl(netdev, ifr, cmd);
5627         case SIOCSHWTSTAMP:
5628                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
5629         default:
5630                 return -EOPNOTSUPP;
5631         }
5632 }
5633
5634 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5635 {
5636         struct igb_adapter *adapter = hw->back;
5637         u16 cap_offset;
5638
5639         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5640         if (!cap_offset)
5641                 return -E1000_ERR_CONFIG;
5642
5643         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
5644
5645         return 0;
5646 }
5647
5648 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5649 {
5650         struct igb_adapter *adapter = hw->back;
5651         u16 cap_offset;
5652
5653         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5654         if (!cap_offset)
5655                 return -E1000_ERR_CONFIG;
5656
5657         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
5658
5659         return 0;
5660 }
5661
5662 static void igb_vlan_rx_register(struct net_device *netdev,
5663                                  struct vlan_group *grp)
5664 {
5665         struct igb_adapter *adapter = netdev_priv(netdev);
5666         struct e1000_hw *hw = &adapter->hw;
5667         u32 ctrl, rctl;
5668
5669         igb_irq_disable(adapter);
5670         adapter->vlgrp = grp;
5671
5672         if (grp) {
5673                 /* enable VLAN tag insert/strip */
5674                 ctrl = rd32(E1000_CTRL);
5675                 ctrl |= E1000_CTRL_VME;
5676                 wr32(E1000_CTRL, ctrl);
5677
5678                 /* Disable CFI check */
5679                 rctl = rd32(E1000_RCTL);
5680                 rctl &= ~E1000_RCTL_CFIEN;
5681                 wr32(E1000_RCTL, rctl);
5682         } else {
5683                 /* disable VLAN tag insert/strip */
5684                 ctrl = rd32(E1000_CTRL);
5685                 ctrl &= ~E1000_CTRL_VME;
5686                 wr32(E1000_CTRL, ctrl);
5687         }
5688
5689         igb_rlpml_set(adapter);
5690
5691         if (!test_bit(__IGB_DOWN, &adapter->state))
5692                 igb_irq_enable(adapter);
5693 }
5694
5695 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
5696 {
5697         struct igb_adapter *adapter = netdev_priv(netdev);
5698         struct e1000_hw *hw = &adapter->hw;
5699         int pf_id = adapter->vfs_allocated_count;
5700
5701         /* attempt to add filter to vlvf array */
5702         igb_vlvf_set(adapter, vid, true, pf_id);
5703
5704         /* add the filter since PF can receive vlans w/o entry in vlvf */
5705         igb_vfta_set(hw, vid, true);
5706 }
5707
5708 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
5709 {
5710         struct igb_adapter *adapter = netdev_priv(netdev);
5711         struct e1000_hw *hw = &adapter->hw;
5712         int pf_id = adapter->vfs_allocated_count;
5713         s32 err;
5714
5715         igb_irq_disable(adapter);
5716         vlan_group_set_device(adapter->vlgrp, vid, NULL);
5717
5718         if (!test_bit(__IGB_DOWN, &adapter->state))
5719                 igb_irq_enable(adapter);
5720
5721         /* remove vlan from VLVF table array */
5722         err = igb_vlvf_set(adapter, vid, false, pf_id);
5723
5724         /* if vid was not present in VLVF just remove it from table */
5725         if (err)
5726                 igb_vfta_set(hw, vid, false);
5727 }
5728
5729 static void igb_restore_vlan(struct igb_adapter *adapter)
5730 {
5731         igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
5732
5733         if (adapter->vlgrp) {
5734                 u16 vid;
5735                 for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
5736                         if (!vlan_group_get_device(adapter->vlgrp, vid))
5737                                 continue;
5738                         igb_vlan_rx_add_vid(adapter->netdev, vid);
5739                 }
5740         }
5741 }
5742
5743 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
5744 {
5745         struct pci_dev *pdev = adapter->pdev;
5746         struct e1000_mac_info *mac = &adapter->hw.mac;
5747
5748         mac->autoneg = 0;
5749
5750         switch (spddplx) {
5751         case SPEED_10 + DUPLEX_HALF:
5752                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
5753                 break;
5754         case SPEED_10 + DUPLEX_FULL:
5755                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
5756                 break;
5757         case SPEED_100 + DUPLEX_HALF:
5758                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
5759                 break;
5760         case SPEED_100 + DUPLEX_FULL:
5761                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
5762                 break;
5763         case SPEED_1000 + DUPLEX_FULL:
5764                 mac->autoneg = 1;
5765                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
5766                 break;
5767         case SPEED_1000 + DUPLEX_HALF: /* not supported */
5768         default:
5769                 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
5770                 return -EINVAL;
5771         }
5772         return 0;
5773 }
5774
5775 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
5776 {
5777         struct net_device *netdev = pci_get_drvdata(pdev);
5778         struct igb_adapter *adapter = netdev_priv(netdev);
5779         struct e1000_hw *hw = &adapter->hw;
5780         u32 ctrl, rctl, status;
5781         u32 wufc = adapter->wol;
5782 #ifdef CONFIG_PM
5783         int retval = 0;
5784 #endif
5785
5786         netif_device_detach(netdev);
5787
5788         if (netif_running(netdev))
5789                 igb_close(netdev);
5790
5791         igb_clear_interrupt_scheme(adapter);
5792
5793 #ifdef CONFIG_PM
5794         retval = pci_save_state(pdev);
5795         if (retval)
5796                 return retval;
5797 #endif
5798
5799         status = rd32(E1000_STATUS);
5800         if (status & E1000_STATUS_LU)
5801                 wufc &= ~E1000_WUFC_LNKC;
5802
5803         if (wufc) {
5804                 igb_setup_rctl(adapter);
5805                 igb_set_rx_mode(netdev);
5806
5807                 /* turn on all-multi mode if wake on multicast is enabled */
5808                 if (wufc & E1000_WUFC_MC) {
5809                         rctl = rd32(E1000_RCTL);
5810                         rctl |= E1000_RCTL_MPE;
5811                         wr32(E1000_RCTL, rctl);
5812                 }
5813
5814                 ctrl = rd32(E1000_CTRL);
5815                 /* advertise wake from D3Cold */
5816                 #define E1000_CTRL_ADVD3WUC 0x00100000
5817                 /* phy power management enable */
5818                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
5819                 ctrl |= E1000_CTRL_ADVD3WUC;
5820                 wr32(E1000_CTRL, ctrl);
5821
5822                 /* Allow time for pending master requests to run */
5823                 igb_disable_pcie_master(hw);
5824
5825                 wr32(E1000_WUC, E1000_WUC_PME_EN);
5826                 wr32(E1000_WUFC, wufc);
5827         } else {
5828                 wr32(E1000_WUC, 0);
5829                 wr32(E1000_WUFC, 0);
5830         }
5831
5832         *enable_wake = wufc || adapter->en_mng_pt;
5833         if (!*enable_wake)
5834                 igb_power_down_link(adapter);
5835         else
5836                 igb_power_up_link(adapter);
5837
5838         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
5839          * would have already happened in close and is redundant. */
5840         igb_release_hw_control(adapter);
5841
5842         pci_disable_device(pdev);
5843
5844         return 0;
5845 }
5846
5847 #ifdef CONFIG_PM
5848 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
5849 {
5850         int retval;
5851         bool wake;
5852
5853         retval = __igb_shutdown(pdev, &wake);
5854         if (retval)
5855                 return retval;
5856
5857         if (wake) {
5858                 pci_prepare_to_sleep(pdev);
5859         } else {
5860                 pci_wake_from_d3(pdev, false);
5861                 pci_set_power_state(pdev, PCI_D3hot);
5862         }
5863
5864         return 0;
5865 }
5866
5867 static int igb_resume(struct pci_dev *pdev)
5868 {
5869         struct net_device *netdev = pci_get_drvdata(pdev);
5870         struct igb_adapter *adapter = netdev_priv(netdev);
5871         struct e1000_hw *hw = &adapter->hw;
5872         u32 err;
5873
5874         pci_set_power_state(pdev, PCI_D0);
5875         pci_restore_state(pdev);
5876         pci_save_state(pdev);
5877
5878         err = pci_enable_device_mem(pdev);
5879         if (err) {
5880                 dev_err(&pdev->dev,
5881                         "igb: Cannot enable PCI device from suspend\n");
5882                 return err;
5883         }
5884         pci_set_master(pdev);
5885
5886         pci_enable_wake(pdev, PCI_D3hot, 0);
5887         pci_enable_wake(pdev, PCI_D3cold, 0);
5888
5889         if (igb_init_interrupt_scheme(adapter)) {
5890                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
5891                 return -ENOMEM;
5892         }
5893
5894         igb_reset(adapter);
5895
5896         /* let the f/w know that the h/w is now under the control of the
5897          * driver. */
5898         igb_get_hw_control(adapter);
5899
5900         wr32(E1000_WUS, ~0);
5901
5902         if (netif_running(netdev)) {
5903                 err = igb_open(netdev);
5904                 if (err)
5905                         return err;
5906         }
5907
5908         netif_device_attach(netdev);
5909
5910         return 0;
5911 }
5912 #endif
5913
5914 static void igb_shutdown(struct pci_dev *pdev)
5915 {
5916         bool wake;
5917
5918         __igb_shutdown(pdev, &wake);
5919
5920         if (system_state == SYSTEM_POWER_OFF) {
5921                 pci_wake_from_d3(pdev, wake);
5922                 pci_set_power_state(pdev, PCI_D3hot);
5923         }
5924 }
5925
5926 #ifdef CONFIG_NET_POLL_CONTROLLER
5927 /*
5928  * Polling 'interrupt' - used by things like netconsole to send skbs
5929  * without having to re-enable interrupts. It's not called while
5930  * the interrupt routine is executing.
5931  */
5932 static void igb_netpoll(struct net_device *netdev)
5933 {
5934         struct igb_adapter *adapter = netdev_priv(netdev);
5935         struct e1000_hw *hw = &adapter->hw;
5936         int i;
5937
5938         if (!adapter->msix_entries) {
5939                 struct igb_q_vector *q_vector = adapter->q_vector[0];
5940                 igb_irq_disable(adapter);
5941                 napi_schedule(&q_vector->napi);
5942                 return;
5943         }
5944
5945         for (i = 0; i < adapter->num_q_vectors; i++) {
5946                 struct igb_q_vector *q_vector = adapter->q_vector[i];
5947                 wr32(E1000_EIMC, q_vector->eims_value);
5948                 napi_schedule(&q_vector->napi);
5949         }
5950 }
5951 #endif /* CONFIG_NET_POLL_CONTROLLER */
5952
5953 /**
5954  * igb_io_error_detected - called when PCI error is detected
5955  * @pdev: Pointer to PCI device
5956  * @state: The current pci connection state
5957  *
5958  * This function is called after a PCI bus error affecting
5959  * this device has been detected.
5960  */
5961 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
5962                                               pci_channel_state_t state)
5963 {
5964         struct net_device *netdev = pci_get_drvdata(pdev);
5965         struct igb_adapter *adapter = netdev_priv(netdev);
5966
5967         netif_device_detach(netdev);
5968
5969         if (state == pci_channel_io_perm_failure)
5970                 return PCI_ERS_RESULT_DISCONNECT;
5971
5972         if (netif_running(netdev))
5973                 igb_down(adapter);
5974         pci_disable_device(pdev);
5975
5976         /* Request a slot slot reset. */
5977         return PCI_ERS_RESULT_NEED_RESET;
5978 }
5979
5980 /**
5981  * igb_io_slot_reset - called after the pci bus has been reset.
5982  * @pdev: Pointer to PCI device
5983  *
5984  * Restart the card from scratch, as if from a cold-boot. Implementation
5985  * resembles the first-half of the igb_resume routine.
5986  */
5987 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
5988 {
5989         struct net_device *netdev = pci_get_drvdata(pdev);
5990         struct igb_adapter *adapter = netdev_priv(netdev);
5991         struct e1000_hw *hw = &adapter->hw;
5992         pci_ers_result_t result;
5993         int err;
5994
5995         if (pci_enable_device_mem(pdev)) {
5996                 dev_err(&pdev->dev,
5997                         "Cannot re-enable PCI device after reset.\n");
5998                 result = PCI_ERS_RESULT_DISCONNECT;
5999         } else {
6000                 pci_set_master(pdev);
6001                 pci_restore_state(pdev);
6002                 pci_save_state(pdev);
6003
6004                 pci_enable_wake(pdev, PCI_D3hot, 0);
6005                 pci_enable_wake(pdev, PCI_D3cold, 0);
6006
6007                 igb_reset(adapter);
6008                 wr32(E1000_WUS, ~0);
6009                 result = PCI_ERS_RESULT_RECOVERED;
6010         }
6011
6012         err = pci_cleanup_aer_uncorrect_error_status(pdev);
6013         if (err) {
6014                 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6015                         "failed 0x%0x\n", err);
6016                 /* non-fatal, continue */
6017         }
6018
6019         return result;
6020 }
6021
6022 /**
6023  * igb_io_resume - called when traffic can start flowing again.
6024  * @pdev: Pointer to PCI device
6025  *
6026  * This callback is called when the error recovery driver tells us that
6027  * its OK to resume normal operation. Implementation resembles the
6028  * second-half of the igb_resume routine.
6029  */
6030 static void igb_io_resume(struct pci_dev *pdev)
6031 {
6032         struct net_device *netdev = pci_get_drvdata(pdev);
6033         struct igb_adapter *adapter = netdev_priv(netdev);
6034
6035         if (netif_running(netdev)) {
6036                 if (igb_up(adapter)) {
6037                         dev_err(&pdev->dev, "igb_up failed after reset\n");
6038                         return;
6039                 }
6040         }
6041
6042         netif_device_attach(netdev);
6043
6044         /* let the f/w know that the h/w is now under the control of the
6045          * driver. */
6046         igb_get_hw_control(adapter);
6047 }
6048
6049 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6050                              u8 qsel)
6051 {
6052         u32 rar_low, rar_high;
6053         struct e1000_hw *hw = &adapter->hw;
6054
6055         /* HW expects these in little endian so we reverse the byte order
6056          * from network order (big endian) to little endian
6057          */
6058         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6059                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6060         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6061
6062         /* Indicate to hardware the Address is Valid. */
6063         rar_high |= E1000_RAH_AV;
6064
6065         if (hw->mac.type == e1000_82575)
6066                 rar_high |= E1000_RAH_POOL_1 * qsel;
6067         else
6068                 rar_high |= E1000_RAH_POOL_1 << qsel;
6069
6070         wr32(E1000_RAL(index), rar_low);
6071         wrfl();
6072         wr32(E1000_RAH(index), rar_high);
6073         wrfl();
6074 }
6075
6076 static int igb_set_vf_mac(struct igb_adapter *adapter,
6077                           int vf, unsigned char *mac_addr)
6078 {
6079         struct e1000_hw *hw = &adapter->hw;
6080         /* VF MAC addresses start at end of receive addresses and moves
6081          * torwards the first, as a result a collision should not be possible */
6082         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6083
6084         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6085
6086         igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6087
6088         return 0;
6089 }
6090
6091 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6092 {
6093         struct igb_adapter *adapter = netdev_priv(netdev);
6094         if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6095                 return -EINVAL;
6096         adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6097         dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6098         dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6099                                       " change effective.");
6100         if (test_bit(__IGB_DOWN, &adapter->state)) {
6101                 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6102                          " but the PF device is not up.\n");
6103                 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6104                          " attempting to use the VF device.\n");
6105         }
6106         return igb_set_vf_mac(adapter, vf, mac);
6107 }
6108
6109 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6110 {
6111         return -EOPNOTSUPP;
6112 }
6113
6114 static int igb_ndo_get_vf_config(struct net_device *netdev,
6115                                  int vf, struct ifla_vf_info *ivi)
6116 {
6117         struct igb_adapter *adapter = netdev_priv(netdev);
6118         if (vf >= adapter->vfs_allocated_count)
6119                 return -EINVAL;
6120         ivi->vf = vf;
6121         memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6122         ivi->tx_rate = 0;
6123         ivi->vlan = adapter->vf_data[vf].pf_vlan;
6124         ivi->qos = adapter->vf_data[vf].pf_qos;
6125         return 0;
6126 }
6127
6128 static void igb_vmm_control(struct igb_adapter *adapter)
6129 {
6130         struct e1000_hw *hw = &adapter->hw;
6131         u32 reg;
6132
6133         /* replication is not supported for 82575 */
6134         if (hw->mac.type == e1000_82575)
6135                 return;
6136
6137         /* enable replication vlan tag stripping */
6138         reg = rd32(E1000_RPLOLR);
6139         reg |= E1000_RPLOLR_STRVLAN;
6140         wr32(E1000_RPLOLR, reg);
6141
6142         /* notify HW that the MAC is adding vlan tags */
6143         reg = rd32(E1000_DTXCTL);
6144         reg |= E1000_DTXCTL_VLAN_ADDED;
6145         wr32(E1000_DTXCTL, reg);
6146
6147         if (adapter->vfs_allocated_count) {
6148                 igb_vmdq_set_loopback_pf(hw, true);
6149                 igb_vmdq_set_replication_pf(hw, true);
6150         } else {
6151                 igb_vmdq_set_loopback_pf(hw, false);
6152                 igb_vmdq_set_replication_pf(hw, false);
6153         }
6154 }
6155
6156 /* igb_main.c */