Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
[sfrench/cifs-2.6.git] / drivers / net / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2009 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <linux/slab.h>
36 #include <net/checksum.h>
37 #include <net/ip6_checksum.h>
38 #include <linux/net_tstamp.h>
39 #include <linux/mii.h>
40 #include <linux/ethtool.h>
41 #include <linux/if_vlan.h>
42 #include <linux/pci.h>
43 #include <linux/pci-aspm.h>
44 #include <linux/delay.h>
45 #include <linux/interrupt.h>
46 #include <linux/if_ether.h>
47 #include <linux/aer.h>
48 #ifdef CONFIG_IGB_DCA
49 #include <linux/dca.h>
50 #endif
51 #include "igb.h"
52
53 #define DRV_VERSION "2.1.0-k2"
54 char igb_driver_name[] = "igb";
55 char igb_driver_version[] = DRV_VERSION;
56 static const char igb_driver_string[] =
57                                 "Intel(R) Gigabit Ethernet Network Driver";
58 static const char igb_copyright[] = "Copyright (c) 2007-2009 Intel Corporation.";
59
60 static const struct e1000_info *igb_info_tbl[] = {
61         [board_82575] = &e1000_82575_info,
62 };
63
64 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
65         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
66         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
67         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
68         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
69         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
70         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
71         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
72         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
73         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
74         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
75         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
76         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
77         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
78         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
79         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
80         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
81         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
82         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
83         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
84         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
85         /* required last entry */
86         {0, }
87 };
88
89 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
90
91 void igb_reset(struct igb_adapter *);
92 static int igb_setup_all_tx_resources(struct igb_adapter *);
93 static int igb_setup_all_rx_resources(struct igb_adapter *);
94 static void igb_free_all_tx_resources(struct igb_adapter *);
95 static void igb_free_all_rx_resources(struct igb_adapter *);
96 static void igb_setup_mrqc(struct igb_adapter *);
97 void igb_update_stats(struct igb_adapter *);
98 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
99 static void __devexit igb_remove(struct pci_dev *pdev);
100 static int igb_sw_init(struct igb_adapter *);
101 static int igb_open(struct net_device *);
102 static int igb_close(struct net_device *);
103 static void igb_configure_tx(struct igb_adapter *);
104 static void igb_configure_rx(struct igb_adapter *);
105 static void igb_clean_all_tx_rings(struct igb_adapter *);
106 static void igb_clean_all_rx_rings(struct igb_adapter *);
107 static void igb_clean_tx_ring(struct igb_ring *);
108 static void igb_clean_rx_ring(struct igb_ring *);
109 static void igb_set_rx_mode(struct net_device *);
110 static void igb_update_phy_info(unsigned long);
111 static void igb_watchdog(unsigned long);
112 static void igb_watchdog_task(struct work_struct *);
113 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
114 static struct net_device_stats *igb_get_stats(struct net_device *);
115 static int igb_change_mtu(struct net_device *, int);
116 static int igb_set_mac(struct net_device *, void *);
117 static void igb_set_uta(struct igb_adapter *adapter);
118 static irqreturn_t igb_intr(int irq, void *);
119 static irqreturn_t igb_intr_msi(int irq, void *);
120 static irqreturn_t igb_msix_other(int irq, void *);
121 static irqreturn_t igb_msix_ring(int irq, void *);
122 #ifdef CONFIG_IGB_DCA
123 static void igb_update_dca(struct igb_q_vector *);
124 static void igb_setup_dca(struct igb_adapter *);
125 #endif /* CONFIG_IGB_DCA */
126 static bool igb_clean_tx_irq(struct igb_q_vector *);
127 static int igb_poll(struct napi_struct *, int);
128 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
129 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
130 static void igb_tx_timeout(struct net_device *);
131 static void igb_reset_task(struct work_struct *);
132 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
133 static void igb_vlan_rx_add_vid(struct net_device *, u16);
134 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
135 static void igb_restore_vlan(struct igb_adapter *);
136 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
137 static void igb_ping_all_vfs(struct igb_adapter *);
138 static void igb_msg_task(struct igb_adapter *);
139 static void igb_vmm_control(struct igb_adapter *);
140 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
141 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
142 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
143 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
144                                int vf, u16 vlan, u8 qos);
145 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
146 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
147                                  struct ifla_vf_info *ivi);
148
149 #ifdef CONFIG_PM
150 static int igb_suspend(struct pci_dev *, pm_message_t);
151 static int igb_resume(struct pci_dev *);
152 #endif
153 static void igb_shutdown(struct pci_dev *);
154 #ifdef CONFIG_IGB_DCA
155 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
156 static struct notifier_block dca_notifier = {
157         .notifier_call  = igb_notify_dca,
158         .next           = NULL,
159         .priority       = 0
160 };
161 #endif
162 #ifdef CONFIG_NET_POLL_CONTROLLER
163 /* for netdump / net console */
164 static void igb_netpoll(struct net_device *);
165 #endif
166 #ifdef CONFIG_PCI_IOV
167 static unsigned int max_vfs = 0;
168 module_param(max_vfs, uint, 0);
169 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
170                  "per physical function");
171 #endif /* CONFIG_PCI_IOV */
172
173 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
174                      pci_channel_state_t);
175 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
176 static void igb_io_resume(struct pci_dev *);
177
178 static struct pci_error_handlers igb_err_handler = {
179         .error_detected = igb_io_error_detected,
180         .slot_reset = igb_io_slot_reset,
181         .resume = igb_io_resume,
182 };
183
184
185 static struct pci_driver igb_driver = {
186         .name     = igb_driver_name,
187         .id_table = igb_pci_tbl,
188         .probe    = igb_probe,
189         .remove   = __devexit_p(igb_remove),
190 #ifdef CONFIG_PM
191         /* Power Managment Hooks */
192         .suspend  = igb_suspend,
193         .resume   = igb_resume,
194 #endif
195         .shutdown = igb_shutdown,
196         .err_handler = &igb_err_handler
197 };
198
199 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
200 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
201 MODULE_LICENSE("GPL");
202 MODULE_VERSION(DRV_VERSION);
203
204 struct igb_reg_info {
205         u32 ofs;
206         char *name;
207 };
208
209 static const struct igb_reg_info igb_reg_info_tbl[] = {
210
211         /* General Registers */
212         {E1000_CTRL, "CTRL"},
213         {E1000_STATUS, "STATUS"},
214         {E1000_CTRL_EXT, "CTRL_EXT"},
215
216         /* Interrupt Registers */
217         {E1000_ICR, "ICR"},
218
219         /* RX Registers */
220         {E1000_RCTL, "RCTL"},
221         {E1000_RDLEN(0), "RDLEN"},
222         {E1000_RDH(0), "RDH"},
223         {E1000_RDT(0), "RDT"},
224         {E1000_RXDCTL(0), "RXDCTL"},
225         {E1000_RDBAL(0), "RDBAL"},
226         {E1000_RDBAH(0), "RDBAH"},
227
228         /* TX Registers */
229         {E1000_TCTL, "TCTL"},
230         {E1000_TDBAL(0), "TDBAL"},
231         {E1000_TDBAH(0), "TDBAH"},
232         {E1000_TDLEN(0), "TDLEN"},
233         {E1000_TDH(0), "TDH"},
234         {E1000_TDT(0), "TDT"},
235         {E1000_TXDCTL(0), "TXDCTL"},
236         {E1000_TDFH, "TDFH"},
237         {E1000_TDFT, "TDFT"},
238         {E1000_TDFHS, "TDFHS"},
239         {E1000_TDFPC, "TDFPC"},
240
241         /* List Terminator */
242         {}
243 };
244
245 /*
246  * igb_regdump - register printout routine
247  */
248 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
249 {
250         int n = 0;
251         char rname[16];
252         u32 regs[8];
253
254         switch (reginfo->ofs) {
255         case E1000_RDLEN(0):
256                 for (n = 0; n < 4; n++)
257                         regs[n] = rd32(E1000_RDLEN(n));
258                 break;
259         case E1000_RDH(0):
260                 for (n = 0; n < 4; n++)
261                         regs[n] = rd32(E1000_RDH(n));
262                 break;
263         case E1000_RDT(0):
264                 for (n = 0; n < 4; n++)
265                         regs[n] = rd32(E1000_RDT(n));
266                 break;
267         case E1000_RXDCTL(0):
268                 for (n = 0; n < 4; n++)
269                         regs[n] = rd32(E1000_RXDCTL(n));
270                 break;
271         case E1000_RDBAL(0):
272                 for (n = 0; n < 4; n++)
273                         regs[n] = rd32(E1000_RDBAL(n));
274                 break;
275         case E1000_RDBAH(0):
276                 for (n = 0; n < 4; n++)
277                         regs[n] = rd32(E1000_RDBAH(n));
278                 break;
279         case E1000_TDBAL(0):
280                 for (n = 0; n < 4; n++)
281                         regs[n] = rd32(E1000_RDBAL(n));
282                 break;
283         case E1000_TDBAH(0):
284                 for (n = 0; n < 4; n++)
285                         regs[n] = rd32(E1000_TDBAH(n));
286                 break;
287         case E1000_TDLEN(0):
288                 for (n = 0; n < 4; n++)
289                         regs[n] = rd32(E1000_TDLEN(n));
290                 break;
291         case E1000_TDH(0):
292                 for (n = 0; n < 4; n++)
293                         regs[n] = rd32(E1000_TDH(n));
294                 break;
295         case E1000_TDT(0):
296                 for (n = 0; n < 4; n++)
297                         regs[n] = rd32(E1000_TDT(n));
298                 break;
299         case E1000_TXDCTL(0):
300                 for (n = 0; n < 4; n++)
301                         regs[n] = rd32(E1000_TXDCTL(n));
302                 break;
303         default:
304                 printk(KERN_INFO "%-15s %08x\n",
305                         reginfo->name, rd32(reginfo->ofs));
306                 return;
307         }
308
309         snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
310         printk(KERN_INFO "%-15s ", rname);
311         for (n = 0; n < 4; n++)
312                 printk(KERN_CONT "%08x ", regs[n]);
313         printk(KERN_CONT "\n");
314 }
315
316 /*
317  * igb_dump - Print registers, tx-rings and rx-rings
318  */
319 static void igb_dump(struct igb_adapter *adapter)
320 {
321         struct net_device *netdev = adapter->netdev;
322         struct e1000_hw *hw = &adapter->hw;
323         struct igb_reg_info *reginfo;
324         int n = 0;
325         struct igb_ring *tx_ring;
326         union e1000_adv_tx_desc *tx_desc;
327         struct my_u0 { u64 a; u64 b; } *u0;
328         struct igb_buffer *buffer_info;
329         struct igb_ring *rx_ring;
330         union e1000_adv_rx_desc *rx_desc;
331         u32 staterr;
332         int i = 0;
333
334         if (!netif_msg_hw(adapter))
335                 return;
336
337         /* Print netdevice Info */
338         if (netdev) {
339                 dev_info(&adapter->pdev->dev, "Net device Info\n");
340                 printk(KERN_INFO "Device Name     state            "
341                         "trans_start      last_rx\n");
342                 printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
343                 netdev->name,
344                 netdev->state,
345                 netdev->trans_start,
346                 netdev->last_rx);
347         }
348
349         /* Print Registers */
350         dev_info(&adapter->pdev->dev, "Register Dump\n");
351         printk(KERN_INFO " Register Name   Value\n");
352         for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
353              reginfo->name; reginfo++) {
354                 igb_regdump(hw, reginfo);
355         }
356
357         /* Print TX Ring Summary */
358         if (!netdev || !netif_running(netdev))
359                 goto exit;
360
361         dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
362         printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma  ]"
363                 " leng ntw timestamp\n");
364         for (n = 0; n < adapter->num_tx_queues; n++) {
365                 tx_ring = adapter->tx_ring[n];
366                 buffer_info = &tx_ring->buffer_info[tx_ring->next_to_clean];
367                 printk(KERN_INFO " %5d %5X %5X %016llX %04X %3X %016llX\n",
368                            n, tx_ring->next_to_use, tx_ring->next_to_clean,
369                            (u64)buffer_info->dma,
370                            buffer_info->length,
371                            buffer_info->next_to_watch,
372                            (u64)buffer_info->time_stamp);
373         }
374
375         /* Print TX Rings */
376         if (!netif_msg_tx_done(adapter))
377                 goto rx_ring_summary;
378
379         dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
380
381         /* Transmit Descriptor Formats
382          *
383          * Advanced Transmit Descriptor
384          *   +--------------------------------------------------------------+
385          * 0 |         Buffer Address [63:0]                                |
386          *   +--------------------------------------------------------------+
387          * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
388          *   +--------------------------------------------------------------+
389          *   63      46 45    40 39 38 36 35 32 31   24             15       0
390          */
391
392         for (n = 0; n < adapter->num_tx_queues; n++) {
393                 tx_ring = adapter->tx_ring[n];
394                 printk(KERN_INFO "------------------------------------\n");
395                 printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
396                 printk(KERN_INFO "------------------------------------\n");
397                 printk(KERN_INFO "T [desc]     [address 63:0  ] "
398                         "[PlPOCIStDDM Ln] [bi->dma       ] "
399                         "leng  ntw timestamp        bi->skb\n");
400
401                 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
402                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
403                         buffer_info = &tx_ring->buffer_info[i];
404                         u0 = (struct my_u0 *)tx_desc;
405                         printk(KERN_INFO "T [0x%03X]    %016llX %016llX %016llX"
406                                 " %04X  %3X %016llX %p", i,
407                                 le64_to_cpu(u0->a),
408                                 le64_to_cpu(u0->b),
409                                 (u64)buffer_info->dma,
410                                 buffer_info->length,
411                                 buffer_info->next_to_watch,
412                                 (u64)buffer_info->time_stamp,
413                                 buffer_info->skb);
414                         if (i == tx_ring->next_to_use &&
415                                 i == tx_ring->next_to_clean)
416                                 printk(KERN_CONT " NTC/U\n");
417                         else if (i == tx_ring->next_to_use)
418                                 printk(KERN_CONT " NTU\n");
419                         else if (i == tx_ring->next_to_clean)
420                                 printk(KERN_CONT " NTC\n");
421                         else
422                                 printk(KERN_CONT "\n");
423
424                         if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
425                                 print_hex_dump(KERN_INFO, "",
426                                         DUMP_PREFIX_ADDRESS,
427                                         16, 1, phys_to_virt(buffer_info->dma),
428                                         buffer_info->length, true);
429                 }
430         }
431
432         /* Print RX Rings Summary */
433 rx_ring_summary:
434         dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
435         printk(KERN_INFO "Queue [NTU] [NTC]\n");
436         for (n = 0; n < adapter->num_rx_queues; n++) {
437                 rx_ring = adapter->rx_ring[n];
438                 printk(KERN_INFO " %5d %5X %5X\n", n,
439                            rx_ring->next_to_use, rx_ring->next_to_clean);
440         }
441
442         /* Print RX Rings */
443         if (!netif_msg_rx_status(adapter))
444                 goto exit;
445
446         dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
447
448         /* Advanced Receive Descriptor (Read) Format
449          *    63                                           1        0
450          *    +-----------------------------------------------------+
451          *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
452          *    +----------------------------------------------+------+
453          *  8 |       Header Buffer Address [63:1]           |  DD  |
454          *    +-----------------------------------------------------+
455          *
456          *
457          * Advanced Receive Descriptor (Write-Back) Format
458          *
459          *   63       48 47    32 31  30      21 20 17 16   4 3     0
460          *   +------------------------------------------------------+
461          * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
462          *   | Checksum   Ident  |   |           |    | Type | Type |
463          *   +------------------------------------------------------+
464          * 8 | VLAN Tag | Length | Extended Error | Extended Status |
465          *   +------------------------------------------------------+
466          *   63       48 47    32 31            20 19               0
467          */
468
469         for (n = 0; n < adapter->num_rx_queues; n++) {
470                 rx_ring = adapter->rx_ring[n];
471                 printk(KERN_INFO "------------------------------------\n");
472                 printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
473                 printk(KERN_INFO "------------------------------------\n");
474                 printk(KERN_INFO "R  [desc]      [ PktBuf     A0] "
475                         "[  HeadBuf   DD] [bi->dma       ] [bi->skb] "
476                         "<-- Adv Rx Read format\n");
477                 printk(KERN_INFO "RWB[desc]      [PcsmIpSHl PtRs] "
478                         "[vl er S cks ln] ---------------- [bi->skb] "
479                         "<-- Adv Rx Write-Back format\n");
480
481                 for (i = 0; i < rx_ring->count; i++) {
482                         buffer_info = &rx_ring->buffer_info[i];
483                         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
484                         u0 = (struct my_u0 *)rx_desc;
485                         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
486                         if (staterr & E1000_RXD_STAT_DD) {
487                                 /* Descriptor Done */
488                                 printk(KERN_INFO "RWB[0x%03X]     %016llX "
489                                         "%016llX ---------------- %p", i,
490                                         le64_to_cpu(u0->a),
491                                         le64_to_cpu(u0->b),
492                                         buffer_info->skb);
493                         } else {
494                                 printk(KERN_INFO "R  [0x%03X]     %016llX "
495                                         "%016llX %016llX %p", i,
496                                         le64_to_cpu(u0->a),
497                                         le64_to_cpu(u0->b),
498                                         (u64)buffer_info->dma,
499                                         buffer_info->skb);
500
501                                 if (netif_msg_pktdata(adapter)) {
502                                         print_hex_dump(KERN_INFO, "",
503                                                 DUMP_PREFIX_ADDRESS,
504                                                 16, 1,
505                                                 phys_to_virt(buffer_info->dma),
506                                                 rx_ring->rx_buffer_len, true);
507                                         if (rx_ring->rx_buffer_len
508                                                 < IGB_RXBUFFER_1024)
509                                                 print_hex_dump(KERN_INFO, "",
510                                                   DUMP_PREFIX_ADDRESS,
511                                                   16, 1,
512                                                   phys_to_virt(
513                                                     buffer_info->page_dma +
514                                                     buffer_info->page_offset),
515                                                   PAGE_SIZE/2, true);
516                                 }
517                         }
518
519                         if (i == rx_ring->next_to_use)
520                                 printk(KERN_CONT " NTU\n");
521                         else if (i == rx_ring->next_to_clean)
522                                 printk(KERN_CONT " NTC\n");
523                         else
524                                 printk(KERN_CONT "\n");
525
526                 }
527         }
528
529 exit:
530         return;
531 }
532
533
534 /**
535  * igb_read_clock - read raw cycle counter (to be used by time counter)
536  */
537 static cycle_t igb_read_clock(const struct cyclecounter *tc)
538 {
539         struct igb_adapter *adapter =
540                 container_of(tc, struct igb_adapter, cycles);
541         struct e1000_hw *hw = &adapter->hw;
542         u64 stamp = 0;
543         int shift = 0;
544
545         /*
546          * The timestamp latches on lowest register read. For the 82580
547          * the lowest register is SYSTIMR instead of SYSTIML.  However we never
548          * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
549          */
550         if (hw->mac.type == e1000_82580) {
551                 stamp = rd32(E1000_SYSTIMR) >> 8;
552                 shift = IGB_82580_TSYNC_SHIFT;
553         }
554
555         stamp |= (u64)rd32(E1000_SYSTIML) << shift;
556         stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
557         return stamp;
558 }
559
560 /**
561  * igb_get_hw_dev - return device
562  * used by hardware layer to print debugging information
563  **/
564 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
565 {
566         struct igb_adapter *adapter = hw->back;
567         return adapter->netdev;
568 }
569
570 /**
571  * igb_init_module - Driver Registration Routine
572  *
573  * igb_init_module is the first routine called when the driver is
574  * loaded. All it does is register with the PCI subsystem.
575  **/
576 static int __init igb_init_module(void)
577 {
578         int ret;
579         printk(KERN_INFO "%s - version %s\n",
580                igb_driver_string, igb_driver_version);
581
582         printk(KERN_INFO "%s\n", igb_copyright);
583
584 #ifdef CONFIG_IGB_DCA
585         dca_register_notify(&dca_notifier);
586 #endif
587         ret = pci_register_driver(&igb_driver);
588         return ret;
589 }
590
591 module_init(igb_init_module);
592
593 /**
594  * igb_exit_module - Driver Exit Cleanup Routine
595  *
596  * igb_exit_module is called just before the driver is removed
597  * from memory.
598  **/
599 static void __exit igb_exit_module(void)
600 {
601 #ifdef CONFIG_IGB_DCA
602         dca_unregister_notify(&dca_notifier);
603 #endif
604         pci_unregister_driver(&igb_driver);
605 }
606
607 module_exit(igb_exit_module);
608
609 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
610 /**
611  * igb_cache_ring_register - Descriptor ring to register mapping
612  * @adapter: board private structure to initialize
613  *
614  * Once we know the feature-set enabled for the device, we'll cache
615  * the register offset the descriptor ring is assigned to.
616  **/
617 static void igb_cache_ring_register(struct igb_adapter *adapter)
618 {
619         int i = 0, j = 0;
620         u32 rbase_offset = adapter->vfs_allocated_count;
621
622         switch (adapter->hw.mac.type) {
623         case e1000_82576:
624                 /* The queues are allocated for virtualization such that VF 0
625                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
626                  * In order to avoid collision we start at the first free queue
627                  * and continue consuming queues in the same sequence
628                  */
629                 if (adapter->vfs_allocated_count) {
630                         for (; i < adapter->rss_queues; i++)
631                                 adapter->rx_ring[i]->reg_idx = rbase_offset +
632                                                                Q_IDX_82576(i);
633                 }
634         case e1000_82575:
635         case e1000_82580:
636         case e1000_i350:
637         default:
638                 for (; i < adapter->num_rx_queues; i++)
639                         adapter->rx_ring[i]->reg_idx = rbase_offset + i;
640                 for (; j < adapter->num_tx_queues; j++)
641                         adapter->tx_ring[j]->reg_idx = rbase_offset + j;
642                 break;
643         }
644 }
645
646 static void igb_free_queues(struct igb_adapter *adapter)
647 {
648         int i;
649
650         for (i = 0; i < adapter->num_tx_queues; i++) {
651                 kfree(adapter->tx_ring[i]);
652                 adapter->tx_ring[i] = NULL;
653         }
654         for (i = 0; i < adapter->num_rx_queues; i++) {
655                 kfree(adapter->rx_ring[i]);
656                 adapter->rx_ring[i] = NULL;
657         }
658         adapter->num_rx_queues = 0;
659         adapter->num_tx_queues = 0;
660 }
661
662 /**
663  * igb_alloc_queues - Allocate memory for all rings
664  * @adapter: board private structure to initialize
665  *
666  * We allocate one ring per queue at run-time since we don't know the
667  * number of queues at compile-time.
668  **/
669 static int igb_alloc_queues(struct igb_adapter *adapter)
670 {
671         struct igb_ring *ring;
672         int i;
673
674         for (i = 0; i < adapter->num_tx_queues; i++) {
675                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
676                 if (!ring)
677                         goto err;
678                 ring->count = adapter->tx_ring_count;
679                 ring->queue_index = i;
680                 ring->dev = &adapter->pdev->dev;
681                 ring->netdev = adapter->netdev;
682                 /* For 82575, context index must be unique per ring. */
683                 if (adapter->hw.mac.type == e1000_82575)
684                         ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
685                 adapter->tx_ring[i] = ring;
686         }
687
688         for (i = 0; i < adapter->num_rx_queues; i++) {
689                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
690                 if (!ring)
691                         goto err;
692                 ring->count = adapter->rx_ring_count;
693                 ring->queue_index = i;
694                 ring->dev = &adapter->pdev->dev;
695                 ring->netdev = adapter->netdev;
696                 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
697                 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
698                 /* set flag indicating ring supports SCTP checksum offload */
699                 if (adapter->hw.mac.type >= e1000_82576)
700                         ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
701                 adapter->rx_ring[i] = ring;
702         }
703
704         igb_cache_ring_register(adapter);
705
706         return 0;
707
708 err:
709         igb_free_queues(adapter);
710
711         return -ENOMEM;
712 }
713
714 #define IGB_N0_QUEUE -1
715 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
716 {
717         u32 msixbm = 0;
718         struct igb_adapter *adapter = q_vector->adapter;
719         struct e1000_hw *hw = &adapter->hw;
720         u32 ivar, index;
721         int rx_queue = IGB_N0_QUEUE;
722         int tx_queue = IGB_N0_QUEUE;
723
724         if (q_vector->rx_ring)
725                 rx_queue = q_vector->rx_ring->reg_idx;
726         if (q_vector->tx_ring)
727                 tx_queue = q_vector->tx_ring->reg_idx;
728
729         switch (hw->mac.type) {
730         case e1000_82575:
731                 /* The 82575 assigns vectors using a bitmask, which matches the
732                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
733                    or more queues to a vector, we write the appropriate bits
734                    into the MSIXBM register for that vector. */
735                 if (rx_queue > IGB_N0_QUEUE)
736                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
737                 if (tx_queue > IGB_N0_QUEUE)
738                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
739                 if (!adapter->msix_entries && msix_vector == 0)
740                         msixbm |= E1000_EIMS_OTHER;
741                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
742                 q_vector->eims_value = msixbm;
743                 break;
744         case e1000_82576:
745                 /* 82576 uses a table-based method for assigning vectors.
746                    Each queue has a single entry in the table to which we write
747                    a vector number along with a "valid" bit.  Sadly, the layout
748                    of the table is somewhat counterintuitive. */
749                 if (rx_queue > IGB_N0_QUEUE) {
750                         index = (rx_queue & 0x7);
751                         ivar = array_rd32(E1000_IVAR0, index);
752                         if (rx_queue < 8) {
753                                 /* vector goes into low byte of register */
754                                 ivar = ivar & 0xFFFFFF00;
755                                 ivar |= msix_vector | E1000_IVAR_VALID;
756                         } else {
757                                 /* vector goes into third byte of register */
758                                 ivar = ivar & 0xFF00FFFF;
759                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
760                         }
761                         array_wr32(E1000_IVAR0, index, ivar);
762                 }
763                 if (tx_queue > IGB_N0_QUEUE) {
764                         index = (tx_queue & 0x7);
765                         ivar = array_rd32(E1000_IVAR0, index);
766                         if (tx_queue < 8) {
767                                 /* vector goes into second byte of register */
768                                 ivar = ivar & 0xFFFF00FF;
769                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
770                         } else {
771                                 /* vector goes into high byte of register */
772                                 ivar = ivar & 0x00FFFFFF;
773                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
774                         }
775                         array_wr32(E1000_IVAR0, index, ivar);
776                 }
777                 q_vector->eims_value = 1 << msix_vector;
778                 break;
779         case e1000_82580:
780         case e1000_i350:
781                 /* 82580 uses the same table-based approach as 82576 but has fewer
782                    entries as a result we carry over for queues greater than 4. */
783                 if (rx_queue > IGB_N0_QUEUE) {
784                         index = (rx_queue >> 1);
785                         ivar = array_rd32(E1000_IVAR0, index);
786                         if (rx_queue & 0x1) {
787                                 /* vector goes into third byte of register */
788                                 ivar = ivar & 0xFF00FFFF;
789                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
790                         } else {
791                                 /* vector goes into low byte of register */
792                                 ivar = ivar & 0xFFFFFF00;
793                                 ivar |= msix_vector | E1000_IVAR_VALID;
794                         }
795                         array_wr32(E1000_IVAR0, index, ivar);
796                 }
797                 if (tx_queue > IGB_N0_QUEUE) {
798                         index = (tx_queue >> 1);
799                         ivar = array_rd32(E1000_IVAR0, index);
800                         if (tx_queue & 0x1) {
801                                 /* vector goes into high byte of register */
802                                 ivar = ivar & 0x00FFFFFF;
803                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
804                         } else {
805                                 /* vector goes into second byte of register */
806                                 ivar = ivar & 0xFFFF00FF;
807                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
808                         }
809                         array_wr32(E1000_IVAR0, index, ivar);
810                 }
811                 q_vector->eims_value = 1 << msix_vector;
812                 break;
813         default:
814                 BUG();
815                 break;
816         }
817
818         /* add q_vector eims value to global eims_enable_mask */
819         adapter->eims_enable_mask |= q_vector->eims_value;
820
821         /* configure q_vector to set itr on first interrupt */
822         q_vector->set_itr = 1;
823 }
824
825 /**
826  * igb_configure_msix - Configure MSI-X hardware
827  *
828  * igb_configure_msix sets up the hardware to properly
829  * generate MSI-X interrupts.
830  **/
831 static void igb_configure_msix(struct igb_adapter *adapter)
832 {
833         u32 tmp;
834         int i, vector = 0;
835         struct e1000_hw *hw = &adapter->hw;
836
837         adapter->eims_enable_mask = 0;
838
839         /* set vector for other causes, i.e. link changes */
840         switch (hw->mac.type) {
841         case e1000_82575:
842                 tmp = rd32(E1000_CTRL_EXT);
843                 /* enable MSI-X PBA support*/
844                 tmp |= E1000_CTRL_EXT_PBA_CLR;
845
846                 /* Auto-Mask interrupts upon ICR read. */
847                 tmp |= E1000_CTRL_EXT_EIAME;
848                 tmp |= E1000_CTRL_EXT_IRCA;
849
850                 wr32(E1000_CTRL_EXT, tmp);
851
852                 /* enable msix_other interrupt */
853                 array_wr32(E1000_MSIXBM(0), vector++,
854                                       E1000_EIMS_OTHER);
855                 adapter->eims_other = E1000_EIMS_OTHER;
856
857                 break;
858
859         case e1000_82576:
860         case e1000_82580:
861         case e1000_i350:
862                 /* Turn on MSI-X capability first, or our settings
863                  * won't stick.  And it will take days to debug. */
864                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
865                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
866                                 E1000_GPIE_NSICR);
867
868                 /* enable msix_other interrupt */
869                 adapter->eims_other = 1 << vector;
870                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
871
872                 wr32(E1000_IVAR_MISC, tmp);
873                 break;
874         default:
875                 /* do nothing, since nothing else supports MSI-X */
876                 break;
877         } /* switch (hw->mac.type) */
878
879         adapter->eims_enable_mask |= adapter->eims_other;
880
881         for (i = 0; i < adapter->num_q_vectors; i++)
882                 igb_assign_vector(adapter->q_vector[i], vector++);
883
884         wrfl();
885 }
886
887 /**
888  * igb_request_msix - Initialize MSI-X interrupts
889  *
890  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
891  * kernel.
892  **/
893 static int igb_request_msix(struct igb_adapter *adapter)
894 {
895         struct net_device *netdev = adapter->netdev;
896         struct e1000_hw *hw = &adapter->hw;
897         int i, err = 0, vector = 0;
898
899         err = request_irq(adapter->msix_entries[vector].vector,
900                           igb_msix_other, 0, netdev->name, adapter);
901         if (err)
902                 goto out;
903         vector++;
904
905         for (i = 0; i < adapter->num_q_vectors; i++) {
906                 struct igb_q_vector *q_vector = adapter->q_vector[i];
907
908                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
909
910                 if (q_vector->rx_ring && q_vector->tx_ring)
911                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
912                                 q_vector->rx_ring->queue_index);
913                 else if (q_vector->tx_ring)
914                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
915                                 q_vector->tx_ring->queue_index);
916                 else if (q_vector->rx_ring)
917                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
918                                 q_vector->rx_ring->queue_index);
919                 else
920                         sprintf(q_vector->name, "%s-unused", netdev->name);
921
922                 err = request_irq(adapter->msix_entries[vector].vector,
923                                   igb_msix_ring, 0, q_vector->name,
924                                   q_vector);
925                 if (err)
926                         goto out;
927                 vector++;
928         }
929
930         igb_configure_msix(adapter);
931         return 0;
932 out:
933         return err;
934 }
935
936 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
937 {
938         if (adapter->msix_entries) {
939                 pci_disable_msix(adapter->pdev);
940                 kfree(adapter->msix_entries);
941                 adapter->msix_entries = NULL;
942         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
943                 pci_disable_msi(adapter->pdev);
944         }
945 }
946
947 /**
948  * igb_free_q_vectors - Free memory allocated for interrupt vectors
949  * @adapter: board private structure to initialize
950  *
951  * This function frees the memory allocated to the q_vectors.  In addition if
952  * NAPI is enabled it will delete any references to the NAPI struct prior
953  * to freeing the q_vector.
954  **/
955 static void igb_free_q_vectors(struct igb_adapter *adapter)
956 {
957         int v_idx;
958
959         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
960                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
961                 adapter->q_vector[v_idx] = NULL;
962                 if (!q_vector)
963                         continue;
964                 netif_napi_del(&q_vector->napi);
965                 kfree(q_vector);
966         }
967         adapter->num_q_vectors = 0;
968 }
969
970 /**
971  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
972  *
973  * This function resets the device so that it has 0 rx queues, tx queues, and
974  * MSI-X interrupts allocated.
975  */
976 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
977 {
978         igb_free_queues(adapter);
979         igb_free_q_vectors(adapter);
980         igb_reset_interrupt_capability(adapter);
981 }
982
983 /**
984  * igb_set_interrupt_capability - set MSI or MSI-X if supported
985  *
986  * Attempt to configure interrupts using the best available
987  * capabilities of the hardware and kernel.
988  **/
989 static void igb_set_interrupt_capability(struct igb_adapter *adapter)
990 {
991         int err;
992         int numvecs, i;
993
994         /* Number of supported queues. */
995         adapter->num_rx_queues = adapter->rss_queues;
996         if (adapter->vfs_allocated_count)
997                 adapter->num_tx_queues = 1;
998         else
999                 adapter->num_tx_queues = adapter->rss_queues;
1000
1001         /* start with one vector for every rx queue */
1002         numvecs = adapter->num_rx_queues;
1003
1004         /* if tx handler is separate add 1 for every tx queue */
1005         if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1006                 numvecs += adapter->num_tx_queues;
1007
1008         /* store the number of vectors reserved for queues */
1009         adapter->num_q_vectors = numvecs;
1010
1011         /* add 1 vector for link status interrupts */
1012         numvecs++;
1013         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1014                                         GFP_KERNEL);
1015         if (!adapter->msix_entries)
1016                 goto msi_only;
1017
1018         for (i = 0; i < numvecs; i++)
1019                 adapter->msix_entries[i].entry = i;
1020
1021         err = pci_enable_msix(adapter->pdev,
1022                               adapter->msix_entries,
1023                               numvecs);
1024         if (err == 0)
1025                 goto out;
1026
1027         igb_reset_interrupt_capability(adapter);
1028
1029         /* If we can't do MSI-X, try MSI */
1030 msi_only:
1031 #ifdef CONFIG_PCI_IOV
1032         /* disable SR-IOV for non MSI-X configurations */
1033         if (adapter->vf_data) {
1034                 struct e1000_hw *hw = &adapter->hw;
1035                 /* disable iov and allow time for transactions to clear */
1036                 pci_disable_sriov(adapter->pdev);
1037                 msleep(500);
1038
1039                 kfree(adapter->vf_data);
1040                 adapter->vf_data = NULL;
1041                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1042                 msleep(100);
1043                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1044         }
1045 #endif
1046         adapter->vfs_allocated_count = 0;
1047         adapter->rss_queues = 1;
1048         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1049         adapter->num_rx_queues = 1;
1050         adapter->num_tx_queues = 1;
1051         adapter->num_q_vectors = 1;
1052         if (!pci_enable_msi(adapter->pdev))
1053                 adapter->flags |= IGB_FLAG_HAS_MSI;
1054 out:
1055         /* Notify the stack of the (possibly) reduced Tx Queue count. */
1056         adapter->netdev->real_num_tx_queues = adapter->num_tx_queues;
1057 }
1058
1059 /**
1060  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1061  * @adapter: board private structure to initialize
1062  *
1063  * We allocate one q_vector per queue interrupt.  If allocation fails we
1064  * return -ENOMEM.
1065  **/
1066 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1067 {
1068         struct igb_q_vector *q_vector;
1069         struct e1000_hw *hw = &adapter->hw;
1070         int v_idx;
1071
1072         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1073                 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
1074                 if (!q_vector)
1075                         goto err_out;
1076                 q_vector->adapter = adapter;
1077                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1078                 q_vector->itr_val = IGB_START_ITR;
1079                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1080                 adapter->q_vector[v_idx] = q_vector;
1081         }
1082         return 0;
1083
1084 err_out:
1085         igb_free_q_vectors(adapter);
1086         return -ENOMEM;
1087 }
1088
1089 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1090                                       int ring_idx, int v_idx)
1091 {
1092         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1093
1094         q_vector->rx_ring = adapter->rx_ring[ring_idx];
1095         q_vector->rx_ring->q_vector = q_vector;
1096         q_vector->itr_val = adapter->rx_itr_setting;
1097         if (q_vector->itr_val && q_vector->itr_val <= 3)
1098                 q_vector->itr_val = IGB_START_ITR;
1099 }
1100
1101 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1102                                       int ring_idx, int v_idx)
1103 {
1104         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1105
1106         q_vector->tx_ring = adapter->tx_ring[ring_idx];
1107         q_vector->tx_ring->q_vector = q_vector;
1108         q_vector->itr_val = adapter->tx_itr_setting;
1109         if (q_vector->itr_val && q_vector->itr_val <= 3)
1110                 q_vector->itr_val = IGB_START_ITR;
1111 }
1112
1113 /**
1114  * igb_map_ring_to_vector - maps allocated queues to vectors
1115  *
1116  * This function maps the recently allocated queues to vectors.
1117  **/
1118 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1119 {
1120         int i;
1121         int v_idx = 0;
1122
1123         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1124             (adapter->num_q_vectors < adapter->num_tx_queues))
1125                 return -ENOMEM;
1126
1127         if (adapter->num_q_vectors >=
1128             (adapter->num_rx_queues + adapter->num_tx_queues)) {
1129                 for (i = 0; i < adapter->num_rx_queues; i++)
1130                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1131                 for (i = 0; i < adapter->num_tx_queues; i++)
1132                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1133         } else {
1134                 for (i = 0; i < adapter->num_rx_queues; i++) {
1135                         if (i < adapter->num_tx_queues)
1136                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1137                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1138                 }
1139                 for (; i < adapter->num_tx_queues; i++)
1140                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1141         }
1142         return 0;
1143 }
1144
1145 /**
1146  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1147  *
1148  * This function initializes the interrupts and allocates all of the queues.
1149  **/
1150 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1151 {
1152         struct pci_dev *pdev = adapter->pdev;
1153         int err;
1154
1155         igb_set_interrupt_capability(adapter);
1156
1157         err = igb_alloc_q_vectors(adapter);
1158         if (err) {
1159                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1160                 goto err_alloc_q_vectors;
1161         }
1162
1163         err = igb_alloc_queues(adapter);
1164         if (err) {
1165                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1166                 goto err_alloc_queues;
1167         }
1168
1169         err = igb_map_ring_to_vector(adapter);
1170         if (err) {
1171                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1172                 goto err_map_queues;
1173         }
1174
1175
1176         return 0;
1177 err_map_queues:
1178         igb_free_queues(adapter);
1179 err_alloc_queues:
1180         igb_free_q_vectors(adapter);
1181 err_alloc_q_vectors:
1182         igb_reset_interrupt_capability(adapter);
1183         return err;
1184 }
1185
1186 /**
1187  * igb_request_irq - initialize interrupts
1188  *
1189  * Attempts to configure interrupts using the best available
1190  * capabilities of the hardware and kernel.
1191  **/
1192 static int igb_request_irq(struct igb_adapter *adapter)
1193 {
1194         struct net_device *netdev = adapter->netdev;
1195         struct pci_dev *pdev = adapter->pdev;
1196         int err = 0;
1197
1198         if (adapter->msix_entries) {
1199                 err = igb_request_msix(adapter);
1200                 if (!err)
1201                         goto request_done;
1202                 /* fall back to MSI */
1203                 igb_clear_interrupt_scheme(adapter);
1204                 if (!pci_enable_msi(adapter->pdev))
1205                         adapter->flags |= IGB_FLAG_HAS_MSI;
1206                 igb_free_all_tx_resources(adapter);
1207                 igb_free_all_rx_resources(adapter);
1208                 adapter->num_tx_queues = 1;
1209                 adapter->num_rx_queues = 1;
1210                 adapter->num_q_vectors = 1;
1211                 err = igb_alloc_q_vectors(adapter);
1212                 if (err) {
1213                         dev_err(&pdev->dev,
1214                                 "Unable to allocate memory for vectors\n");
1215                         goto request_done;
1216                 }
1217                 err = igb_alloc_queues(adapter);
1218                 if (err) {
1219                         dev_err(&pdev->dev,
1220                                 "Unable to allocate memory for queues\n");
1221                         igb_free_q_vectors(adapter);
1222                         goto request_done;
1223                 }
1224                 igb_setup_all_tx_resources(adapter);
1225                 igb_setup_all_rx_resources(adapter);
1226         } else {
1227                 igb_assign_vector(adapter->q_vector[0], 0);
1228         }
1229
1230         if (adapter->flags & IGB_FLAG_HAS_MSI) {
1231                 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
1232                                   netdev->name, adapter);
1233                 if (!err)
1234                         goto request_done;
1235
1236                 /* fall back to legacy interrupts */
1237                 igb_reset_interrupt_capability(adapter);
1238                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1239         }
1240
1241         err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
1242                           netdev->name, adapter);
1243
1244         if (err)
1245                 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
1246                         err);
1247
1248 request_done:
1249         return err;
1250 }
1251
1252 static void igb_free_irq(struct igb_adapter *adapter)
1253 {
1254         if (adapter->msix_entries) {
1255                 int vector = 0, i;
1256
1257                 free_irq(adapter->msix_entries[vector++].vector, adapter);
1258
1259                 for (i = 0; i < adapter->num_q_vectors; i++) {
1260                         struct igb_q_vector *q_vector = adapter->q_vector[i];
1261                         free_irq(adapter->msix_entries[vector++].vector,
1262                                  q_vector);
1263                 }
1264         } else {
1265                 free_irq(adapter->pdev->irq, adapter);
1266         }
1267 }
1268
1269 /**
1270  * igb_irq_disable - Mask off interrupt generation on the NIC
1271  * @adapter: board private structure
1272  **/
1273 static void igb_irq_disable(struct igb_adapter *adapter)
1274 {
1275         struct e1000_hw *hw = &adapter->hw;
1276
1277         /*
1278          * we need to be careful when disabling interrupts.  The VFs are also
1279          * mapped into these registers and so clearing the bits can cause
1280          * issues on the VF drivers so we only need to clear what we set
1281          */
1282         if (adapter->msix_entries) {
1283                 u32 regval = rd32(E1000_EIAM);
1284                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1285                 wr32(E1000_EIMC, adapter->eims_enable_mask);
1286                 regval = rd32(E1000_EIAC);
1287                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1288         }
1289
1290         wr32(E1000_IAM, 0);
1291         wr32(E1000_IMC, ~0);
1292         wrfl();
1293         if (adapter->msix_entries) {
1294                 int i;
1295                 for (i = 0; i < adapter->num_q_vectors; i++)
1296                         synchronize_irq(adapter->msix_entries[i].vector);
1297         } else {
1298                 synchronize_irq(adapter->pdev->irq);
1299         }
1300 }
1301
1302 /**
1303  * igb_irq_enable - Enable default interrupt generation settings
1304  * @adapter: board private structure
1305  **/
1306 static void igb_irq_enable(struct igb_adapter *adapter)
1307 {
1308         struct e1000_hw *hw = &adapter->hw;
1309
1310         if (adapter->msix_entries) {
1311                 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
1312                 u32 regval = rd32(E1000_EIAC);
1313                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1314                 regval = rd32(E1000_EIAM);
1315                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1316                 wr32(E1000_EIMS, adapter->eims_enable_mask);
1317                 if (adapter->vfs_allocated_count) {
1318                         wr32(E1000_MBVFIMR, 0xFF);
1319                         ims |= E1000_IMS_VMMB;
1320                 }
1321                 if (adapter->hw.mac.type == e1000_82580)
1322                         ims |= E1000_IMS_DRSTA;
1323
1324                 wr32(E1000_IMS, ims);
1325         } else {
1326                 wr32(E1000_IMS, IMS_ENABLE_MASK |
1327                                 E1000_IMS_DRSTA);
1328                 wr32(E1000_IAM, IMS_ENABLE_MASK |
1329                                 E1000_IMS_DRSTA);
1330         }
1331 }
1332
1333 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1334 {
1335         struct e1000_hw *hw = &adapter->hw;
1336         u16 vid = adapter->hw.mng_cookie.vlan_id;
1337         u16 old_vid = adapter->mng_vlan_id;
1338
1339         if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1340                 /* add VID to filter table */
1341                 igb_vfta_set(hw, vid, true);
1342                 adapter->mng_vlan_id = vid;
1343         } else {
1344                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1345         }
1346
1347         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1348             (vid != old_vid) &&
1349             !vlan_group_get_device(adapter->vlgrp, old_vid)) {
1350                 /* remove VID from filter table */
1351                 igb_vfta_set(hw, old_vid, false);
1352         }
1353 }
1354
1355 /**
1356  * igb_release_hw_control - release control of the h/w to f/w
1357  * @adapter: address of board private structure
1358  *
1359  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1360  * For ASF and Pass Through versions of f/w this means that the
1361  * driver is no longer loaded.
1362  *
1363  **/
1364 static void igb_release_hw_control(struct igb_adapter *adapter)
1365 {
1366         struct e1000_hw *hw = &adapter->hw;
1367         u32 ctrl_ext;
1368
1369         /* Let firmware take over control of h/w */
1370         ctrl_ext = rd32(E1000_CTRL_EXT);
1371         wr32(E1000_CTRL_EXT,
1372                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1373 }
1374
1375 /**
1376  * igb_get_hw_control - get control of the h/w from f/w
1377  * @adapter: address of board private structure
1378  *
1379  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1380  * For ASF and Pass Through versions of f/w this means that
1381  * the driver is loaded.
1382  *
1383  **/
1384 static void igb_get_hw_control(struct igb_adapter *adapter)
1385 {
1386         struct e1000_hw *hw = &adapter->hw;
1387         u32 ctrl_ext;
1388
1389         /* Let firmware know the driver has taken over */
1390         ctrl_ext = rd32(E1000_CTRL_EXT);
1391         wr32(E1000_CTRL_EXT,
1392                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1393 }
1394
1395 /**
1396  * igb_configure - configure the hardware for RX and TX
1397  * @adapter: private board structure
1398  **/
1399 static void igb_configure(struct igb_adapter *adapter)
1400 {
1401         struct net_device *netdev = adapter->netdev;
1402         int i;
1403
1404         igb_get_hw_control(adapter);
1405         igb_set_rx_mode(netdev);
1406
1407         igb_restore_vlan(adapter);
1408
1409         igb_setup_tctl(adapter);
1410         igb_setup_mrqc(adapter);
1411         igb_setup_rctl(adapter);
1412
1413         igb_configure_tx(adapter);
1414         igb_configure_rx(adapter);
1415
1416         igb_rx_fifo_flush_82575(&adapter->hw);
1417
1418         /* call igb_desc_unused which always leaves
1419          * at least 1 descriptor unused to make sure
1420          * next_to_use != next_to_clean */
1421         for (i = 0; i < adapter->num_rx_queues; i++) {
1422                 struct igb_ring *ring = adapter->rx_ring[i];
1423                 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1424         }
1425 }
1426
1427 /**
1428  * igb_power_up_link - Power up the phy/serdes link
1429  * @adapter: address of board private structure
1430  **/
1431 void igb_power_up_link(struct igb_adapter *adapter)
1432 {
1433         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1434                 igb_power_up_phy_copper(&adapter->hw);
1435         else
1436                 igb_power_up_serdes_link_82575(&adapter->hw);
1437 }
1438
1439 /**
1440  * igb_power_down_link - Power down the phy/serdes link
1441  * @adapter: address of board private structure
1442  */
1443 static void igb_power_down_link(struct igb_adapter *adapter)
1444 {
1445         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1446                 igb_power_down_phy_copper_82575(&adapter->hw);
1447         else
1448                 igb_shutdown_serdes_link_82575(&adapter->hw);
1449 }
1450
1451 /**
1452  * igb_up - Open the interface and prepare it to handle traffic
1453  * @adapter: board private structure
1454  **/
1455 int igb_up(struct igb_adapter *adapter)
1456 {
1457         struct e1000_hw *hw = &adapter->hw;
1458         int i;
1459
1460         /* hardware has been reset, we need to reload some things */
1461         igb_configure(adapter);
1462
1463         clear_bit(__IGB_DOWN, &adapter->state);
1464
1465         for (i = 0; i < adapter->num_q_vectors; i++) {
1466                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1467                 napi_enable(&q_vector->napi);
1468         }
1469         if (adapter->msix_entries)
1470                 igb_configure_msix(adapter);
1471         else
1472                 igb_assign_vector(adapter->q_vector[0], 0);
1473
1474         /* Clear any pending interrupts. */
1475         rd32(E1000_ICR);
1476         igb_irq_enable(adapter);
1477
1478         /* notify VFs that reset has been completed */
1479         if (adapter->vfs_allocated_count) {
1480                 u32 reg_data = rd32(E1000_CTRL_EXT);
1481                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1482                 wr32(E1000_CTRL_EXT, reg_data);
1483         }
1484
1485         netif_tx_start_all_queues(adapter->netdev);
1486
1487         /* start the watchdog. */
1488         hw->mac.get_link_status = 1;
1489         schedule_work(&adapter->watchdog_task);
1490
1491         return 0;
1492 }
1493
1494 void igb_down(struct igb_adapter *adapter)
1495 {
1496         struct net_device *netdev = adapter->netdev;
1497         struct e1000_hw *hw = &adapter->hw;
1498         u32 tctl, rctl;
1499         int i;
1500
1501         /* signal that we're down so the interrupt handler does not
1502          * reschedule our watchdog timer */
1503         set_bit(__IGB_DOWN, &adapter->state);
1504
1505         /* disable receives in the hardware */
1506         rctl = rd32(E1000_RCTL);
1507         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1508         /* flush and sleep below */
1509
1510         netif_tx_stop_all_queues(netdev);
1511
1512         /* disable transmits in the hardware */
1513         tctl = rd32(E1000_TCTL);
1514         tctl &= ~E1000_TCTL_EN;
1515         wr32(E1000_TCTL, tctl);
1516         /* flush both disables and wait for them to finish */
1517         wrfl();
1518         msleep(10);
1519
1520         for (i = 0; i < adapter->num_q_vectors; i++) {
1521                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1522                 napi_disable(&q_vector->napi);
1523         }
1524
1525         igb_irq_disable(adapter);
1526
1527         del_timer_sync(&adapter->watchdog_timer);
1528         del_timer_sync(&adapter->phy_info_timer);
1529
1530         netif_carrier_off(netdev);
1531
1532         /* record the stats before reset*/
1533         igb_update_stats(adapter);
1534
1535         adapter->link_speed = 0;
1536         adapter->link_duplex = 0;
1537
1538         if (!pci_channel_offline(adapter->pdev))
1539                 igb_reset(adapter);
1540         igb_clean_all_tx_rings(adapter);
1541         igb_clean_all_rx_rings(adapter);
1542 #ifdef CONFIG_IGB_DCA
1543
1544         /* since we reset the hardware DCA settings were cleared */
1545         igb_setup_dca(adapter);
1546 #endif
1547 }
1548
1549 void igb_reinit_locked(struct igb_adapter *adapter)
1550 {
1551         WARN_ON(in_interrupt());
1552         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1553                 msleep(1);
1554         igb_down(adapter);
1555         igb_up(adapter);
1556         clear_bit(__IGB_RESETTING, &adapter->state);
1557 }
1558
1559 void igb_reset(struct igb_adapter *adapter)
1560 {
1561         struct pci_dev *pdev = adapter->pdev;
1562         struct e1000_hw *hw = &adapter->hw;
1563         struct e1000_mac_info *mac = &hw->mac;
1564         struct e1000_fc_info *fc = &hw->fc;
1565         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1566         u16 hwm;
1567
1568         /* Repartition Pba for greater than 9k mtu
1569          * To take effect CTRL.RST is required.
1570          */
1571         switch (mac->type) {
1572         case e1000_i350:
1573         case e1000_82580:
1574                 pba = rd32(E1000_RXPBS);
1575                 pba = igb_rxpbs_adjust_82580(pba);
1576                 break;
1577         case e1000_82576:
1578                 pba = rd32(E1000_RXPBS);
1579                 pba &= E1000_RXPBS_SIZE_MASK_82576;
1580                 break;
1581         case e1000_82575:
1582         default:
1583                 pba = E1000_PBA_34K;
1584                 break;
1585         }
1586
1587         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1588             (mac->type < e1000_82576)) {
1589                 /* adjust PBA for jumbo frames */
1590                 wr32(E1000_PBA, pba);
1591
1592                 /* To maintain wire speed transmits, the Tx FIFO should be
1593                  * large enough to accommodate two full transmit packets,
1594                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1595                  * the Rx FIFO should be large enough to accommodate at least
1596                  * one full receive packet and is similarly rounded up and
1597                  * expressed in KB. */
1598                 pba = rd32(E1000_PBA);
1599                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1600                 tx_space = pba >> 16;
1601                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1602                 pba &= 0xffff;
1603                 /* the tx fifo also stores 16 bytes of information about the tx
1604                  * but don't include ethernet FCS because hardware appends it */
1605                 min_tx_space = (adapter->max_frame_size +
1606                                 sizeof(union e1000_adv_tx_desc) -
1607                                 ETH_FCS_LEN) * 2;
1608                 min_tx_space = ALIGN(min_tx_space, 1024);
1609                 min_tx_space >>= 10;
1610                 /* software strips receive CRC, so leave room for it */
1611                 min_rx_space = adapter->max_frame_size;
1612                 min_rx_space = ALIGN(min_rx_space, 1024);
1613                 min_rx_space >>= 10;
1614
1615                 /* If current Tx allocation is less than the min Tx FIFO size,
1616                  * and the min Tx FIFO size is less than the current Rx FIFO
1617                  * allocation, take space away from current Rx allocation */
1618                 if (tx_space < min_tx_space &&
1619                     ((min_tx_space - tx_space) < pba)) {
1620                         pba = pba - (min_tx_space - tx_space);
1621
1622                         /* if short on rx space, rx wins and must trump tx
1623                          * adjustment */
1624                         if (pba < min_rx_space)
1625                                 pba = min_rx_space;
1626                 }
1627                 wr32(E1000_PBA, pba);
1628         }
1629
1630         /* flow control settings */
1631         /* The high water mark must be low enough to fit one full frame
1632          * (or the size used for early receive) above it in the Rx FIFO.
1633          * Set it to the lower of:
1634          * - 90% of the Rx FIFO size, or
1635          * - the full Rx FIFO size minus one full frame */
1636         hwm = min(((pba << 10) * 9 / 10),
1637                         ((pba << 10) - 2 * adapter->max_frame_size));
1638
1639         fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1640         fc->low_water = fc->high_water - 16;
1641         fc->pause_time = 0xFFFF;
1642         fc->send_xon = 1;
1643         fc->current_mode = fc->requested_mode;
1644
1645         /* disable receive for all VFs and wait one second */
1646         if (adapter->vfs_allocated_count) {
1647                 int i;
1648                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1649                         adapter->vf_data[i].flags = 0;
1650
1651                 /* ping all the active vfs to let them know we are going down */
1652                 igb_ping_all_vfs(adapter);
1653
1654                 /* disable transmits and receives */
1655                 wr32(E1000_VFRE, 0);
1656                 wr32(E1000_VFTE, 0);
1657         }
1658
1659         /* Allow time for pending master requests to run */
1660         hw->mac.ops.reset_hw(hw);
1661         wr32(E1000_WUC, 0);
1662
1663         if (hw->mac.ops.init_hw(hw))
1664                 dev_err(&pdev->dev, "Hardware Error\n");
1665
1666         if (hw->mac.type == e1000_82580) {
1667                 u32 reg = rd32(E1000_PCIEMISC);
1668                 wr32(E1000_PCIEMISC,
1669                                 reg & ~E1000_PCIEMISC_LX_DECISION);
1670         }
1671         if (!netif_running(adapter->netdev))
1672                 igb_power_down_link(adapter);
1673
1674         igb_update_mng_vlan(adapter);
1675
1676         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1677         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1678
1679         igb_get_phy_info(hw);
1680 }
1681
1682 static const struct net_device_ops igb_netdev_ops = {
1683         .ndo_open               = igb_open,
1684         .ndo_stop               = igb_close,
1685         .ndo_start_xmit         = igb_xmit_frame_adv,
1686         .ndo_get_stats          = igb_get_stats,
1687         .ndo_set_rx_mode        = igb_set_rx_mode,
1688         .ndo_set_multicast_list = igb_set_rx_mode,
1689         .ndo_set_mac_address    = igb_set_mac,
1690         .ndo_change_mtu         = igb_change_mtu,
1691         .ndo_do_ioctl           = igb_ioctl,
1692         .ndo_tx_timeout         = igb_tx_timeout,
1693         .ndo_validate_addr      = eth_validate_addr,
1694         .ndo_vlan_rx_register   = igb_vlan_rx_register,
1695         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1696         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1697         .ndo_set_vf_mac         = igb_ndo_set_vf_mac,
1698         .ndo_set_vf_vlan        = igb_ndo_set_vf_vlan,
1699         .ndo_set_vf_tx_rate     = igb_ndo_set_vf_bw,
1700         .ndo_get_vf_config      = igb_ndo_get_vf_config,
1701 #ifdef CONFIG_NET_POLL_CONTROLLER
1702         .ndo_poll_controller    = igb_netpoll,
1703 #endif
1704 };
1705
1706 /**
1707  * igb_probe - Device Initialization Routine
1708  * @pdev: PCI device information struct
1709  * @ent: entry in igb_pci_tbl
1710  *
1711  * Returns 0 on success, negative on failure
1712  *
1713  * igb_probe initializes an adapter identified by a pci_dev structure.
1714  * The OS initialization, configuring of the adapter private structure,
1715  * and a hardware reset occur.
1716  **/
1717 static int __devinit igb_probe(struct pci_dev *pdev,
1718                                const struct pci_device_id *ent)
1719 {
1720         struct net_device *netdev;
1721         struct igb_adapter *adapter;
1722         struct e1000_hw *hw;
1723         u16 eeprom_data = 0;
1724         static int global_quad_port_a; /* global quad port a indication */
1725         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1726         unsigned long mmio_start, mmio_len;
1727         int err, pci_using_dac;
1728         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1729         u32 part_num;
1730
1731         /* Catch broken hardware that put the wrong VF device ID in
1732          * the PCIe SR-IOV capability.
1733          */
1734         if (pdev->is_virtfn) {
1735                 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1736                      pci_name(pdev), pdev->vendor, pdev->device);
1737                 return -EINVAL;
1738         }
1739
1740         err = pci_enable_device_mem(pdev);
1741         if (err)
1742                 return err;
1743
1744         pci_using_dac = 0;
1745         err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1746         if (!err) {
1747                 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1748                 if (!err)
1749                         pci_using_dac = 1;
1750         } else {
1751                 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1752                 if (err) {
1753                         err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1754                         if (err) {
1755                                 dev_err(&pdev->dev, "No usable DMA "
1756                                         "configuration, aborting\n");
1757                                 goto err_dma;
1758                         }
1759                 }
1760         }
1761
1762         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1763                                            IORESOURCE_MEM),
1764                                            igb_driver_name);
1765         if (err)
1766                 goto err_pci_reg;
1767
1768         pci_enable_pcie_error_reporting(pdev);
1769
1770         pci_set_master(pdev);
1771         pci_save_state(pdev);
1772
1773         err = -ENOMEM;
1774         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1775                                    IGB_ABS_MAX_TX_QUEUES);
1776         if (!netdev)
1777                 goto err_alloc_etherdev;
1778
1779         SET_NETDEV_DEV(netdev, &pdev->dev);
1780
1781         pci_set_drvdata(pdev, netdev);
1782         adapter = netdev_priv(netdev);
1783         adapter->netdev = netdev;
1784         adapter->pdev = pdev;
1785         hw = &adapter->hw;
1786         hw->back = adapter;
1787         adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1788
1789         mmio_start = pci_resource_start(pdev, 0);
1790         mmio_len = pci_resource_len(pdev, 0);
1791
1792         err = -EIO;
1793         hw->hw_addr = ioremap(mmio_start, mmio_len);
1794         if (!hw->hw_addr)
1795                 goto err_ioremap;
1796
1797         netdev->netdev_ops = &igb_netdev_ops;
1798         igb_set_ethtool_ops(netdev);
1799         netdev->watchdog_timeo = 5 * HZ;
1800
1801         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1802
1803         netdev->mem_start = mmio_start;
1804         netdev->mem_end = mmio_start + mmio_len;
1805
1806         /* PCI config space info */
1807         hw->vendor_id = pdev->vendor;
1808         hw->device_id = pdev->device;
1809         hw->revision_id = pdev->revision;
1810         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1811         hw->subsystem_device_id = pdev->subsystem_device;
1812
1813         /* Copy the default MAC, PHY and NVM function pointers */
1814         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1815         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1816         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1817         /* Initialize skew-specific constants */
1818         err = ei->get_invariants(hw);
1819         if (err)
1820                 goto err_sw_init;
1821
1822         /* setup the private structure */
1823         err = igb_sw_init(adapter);
1824         if (err)
1825                 goto err_sw_init;
1826
1827         igb_get_bus_info_pcie(hw);
1828
1829         hw->phy.autoneg_wait_to_complete = false;
1830
1831         /* Copper options */
1832         if (hw->phy.media_type == e1000_media_type_copper) {
1833                 hw->phy.mdix = AUTO_ALL_MODES;
1834                 hw->phy.disable_polarity_correction = false;
1835                 hw->phy.ms_type = e1000_ms_hw_default;
1836         }
1837
1838         if (igb_check_reset_block(hw))
1839                 dev_info(&pdev->dev,
1840                         "PHY reset is blocked due to SOL/IDER session.\n");
1841
1842         netdev->features = NETIF_F_SG |
1843                            NETIF_F_IP_CSUM |
1844                            NETIF_F_HW_VLAN_TX |
1845                            NETIF_F_HW_VLAN_RX |
1846                            NETIF_F_HW_VLAN_FILTER;
1847
1848         netdev->features |= NETIF_F_IPV6_CSUM;
1849         netdev->features |= NETIF_F_TSO;
1850         netdev->features |= NETIF_F_TSO6;
1851         netdev->features |= NETIF_F_GRO;
1852
1853         netdev->vlan_features |= NETIF_F_TSO;
1854         netdev->vlan_features |= NETIF_F_TSO6;
1855         netdev->vlan_features |= NETIF_F_IP_CSUM;
1856         netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1857         netdev->vlan_features |= NETIF_F_SG;
1858
1859         if (pci_using_dac)
1860                 netdev->features |= NETIF_F_HIGHDMA;
1861
1862         if (hw->mac.type >= e1000_82576)
1863                 netdev->features |= NETIF_F_SCTP_CSUM;
1864
1865         adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1866
1867         /* before reading the NVM, reset the controller to put the device in a
1868          * known good starting state */
1869         hw->mac.ops.reset_hw(hw);
1870
1871         /* make sure the NVM is good */
1872         if (igb_validate_nvm_checksum(hw) < 0) {
1873                 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1874                 err = -EIO;
1875                 goto err_eeprom;
1876         }
1877
1878         /* copy the MAC address out of the NVM */
1879         if (hw->mac.ops.read_mac_addr(hw))
1880                 dev_err(&pdev->dev, "NVM Read Error\n");
1881
1882         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1883         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1884
1885         if (!is_valid_ether_addr(netdev->perm_addr)) {
1886                 dev_err(&pdev->dev, "Invalid MAC Address\n");
1887                 err = -EIO;
1888                 goto err_eeprom;
1889         }
1890
1891         setup_timer(&adapter->watchdog_timer, &igb_watchdog,
1892                     (unsigned long) adapter);
1893         setup_timer(&adapter->phy_info_timer, &igb_update_phy_info,
1894                     (unsigned long) adapter);
1895
1896         INIT_WORK(&adapter->reset_task, igb_reset_task);
1897         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1898
1899         /* Initialize link properties that are user-changeable */
1900         adapter->fc_autoneg = true;
1901         hw->mac.autoneg = true;
1902         hw->phy.autoneg_advertised = 0x2f;
1903
1904         hw->fc.requested_mode = e1000_fc_default;
1905         hw->fc.current_mode = e1000_fc_default;
1906
1907         igb_validate_mdi_setting(hw);
1908
1909         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1910          * enable the ACPI Magic Packet filter
1911          */
1912
1913         if (hw->bus.func == 0)
1914                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1915         else if (hw->mac.type == e1000_82580)
1916                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
1917                                  NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
1918                                  &eeprom_data);
1919         else if (hw->bus.func == 1)
1920                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1921
1922         if (eeprom_data & eeprom_apme_mask)
1923                 adapter->eeprom_wol |= E1000_WUFC_MAG;
1924
1925         /* now that we have the eeprom settings, apply the special cases where
1926          * the eeprom may be wrong or the board simply won't support wake on
1927          * lan on a particular port */
1928         switch (pdev->device) {
1929         case E1000_DEV_ID_82575GB_QUAD_COPPER:
1930                 adapter->eeprom_wol = 0;
1931                 break;
1932         case E1000_DEV_ID_82575EB_FIBER_SERDES:
1933         case E1000_DEV_ID_82576_FIBER:
1934         case E1000_DEV_ID_82576_SERDES:
1935                 /* Wake events only supported on port A for dual fiber
1936                  * regardless of eeprom setting */
1937                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
1938                         adapter->eeprom_wol = 0;
1939                 break;
1940         case E1000_DEV_ID_82576_QUAD_COPPER:
1941         case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
1942                 /* if quad port adapter, disable WoL on all but port A */
1943                 if (global_quad_port_a != 0)
1944                         adapter->eeprom_wol = 0;
1945                 else
1946                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
1947                 /* Reset for multiple quad port adapters */
1948                 if (++global_quad_port_a == 4)
1949                         global_quad_port_a = 0;
1950                 break;
1951         }
1952
1953         /* initialize the wol settings based on the eeprom settings */
1954         adapter->wol = adapter->eeprom_wol;
1955         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
1956
1957         /* reset the hardware with the new settings */
1958         igb_reset(adapter);
1959
1960         /* let the f/w know that the h/w is now under the control of the
1961          * driver. */
1962         igb_get_hw_control(adapter);
1963
1964         strcpy(netdev->name, "eth%d");
1965         err = register_netdev(netdev);
1966         if (err)
1967                 goto err_register;
1968
1969         /* carrier off reporting is important to ethtool even BEFORE open */
1970         netif_carrier_off(netdev);
1971
1972 #ifdef CONFIG_IGB_DCA
1973         if (dca_add_requester(&pdev->dev) == 0) {
1974                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
1975                 dev_info(&pdev->dev, "DCA enabled\n");
1976                 igb_setup_dca(adapter);
1977         }
1978
1979 #endif
1980         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
1981         /* print bus type/speed/width info */
1982         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
1983                  netdev->name,
1984                  ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
1985                   (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
1986                                                             "unknown"),
1987                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
1988                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
1989                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
1990                    "unknown"),
1991                  netdev->dev_addr);
1992
1993         igb_read_part_num(hw, &part_num);
1994         dev_info(&pdev->dev, "%s: PBA No: %06x-%03x\n", netdev->name,
1995                 (part_num >> 8), (part_num & 0xff));
1996
1997         dev_info(&pdev->dev,
1998                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
1999                 adapter->msix_entries ? "MSI-X" :
2000                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2001                 adapter->num_rx_queues, adapter->num_tx_queues);
2002
2003         return 0;
2004
2005 err_register:
2006         igb_release_hw_control(adapter);
2007 err_eeprom:
2008         if (!igb_check_reset_block(hw))
2009                 igb_reset_phy(hw);
2010
2011         if (hw->flash_address)
2012                 iounmap(hw->flash_address);
2013 err_sw_init:
2014         igb_clear_interrupt_scheme(adapter);
2015         iounmap(hw->hw_addr);
2016 err_ioremap:
2017         free_netdev(netdev);
2018 err_alloc_etherdev:
2019         pci_release_selected_regions(pdev,
2020                                      pci_select_bars(pdev, IORESOURCE_MEM));
2021 err_pci_reg:
2022 err_dma:
2023         pci_disable_device(pdev);
2024         return err;
2025 }
2026
2027 /**
2028  * igb_remove - Device Removal Routine
2029  * @pdev: PCI device information struct
2030  *
2031  * igb_remove is called by the PCI subsystem to alert the driver
2032  * that it should release a PCI device.  The could be caused by a
2033  * Hot-Plug event, or because the driver is going to be removed from
2034  * memory.
2035  **/
2036 static void __devexit igb_remove(struct pci_dev *pdev)
2037 {
2038         struct net_device *netdev = pci_get_drvdata(pdev);
2039         struct igb_adapter *adapter = netdev_priv(netdev);
2040         struct e1000_hw *hw = &adapter->hw;
2041
2042         /* flush_scheduled work may reschedule our watchdog task, so
2043          * explicitly disable watchdog tasks from being rescheduled  */
2044         set_bit(__IGB_DOWN, &adapter->state);
2045         del_timer_sync(&adapter->watchdog_timer);
2046         del_timer_sync(&adapter->phy_info_timer);
2047
2048         flush_scheduled_work();
2049
2050 #ifdef CONFIG_IGB_DCA
2051         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2052                 dev_info(&pdev->dev, "DCA disabled\n");
2053                 dca_remove_requester(&pdev->dev);
2054                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2055                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2056         }
2057 #endif
2058
2059         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
2060          * would have already happened in close and is redundant. */
2061         igb_release_hw_control(adapter);
2062
2063         unregister_netdev(netdev);
2064
2065         igb_clear_interrupt_scheme(adapter);
2066
2067 #ifdef CONFIG_PCI_IOV
2068         /* reclaim resources allocated to VFs */
2069         if (adapter->vf_data) {
2070                 /* disable iov and allow time for transactions to clear */
2071                 pci_disable_sriov(pdev);
2072                 msleep(500);
2073
2074                 kfree(adapter->vf_data);
2075                 adapter->vf_data = NULL;
2076                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2077                 msleep(100);
2078                 dev_info(&pdev->dev, "IOV Disabled\n");
2079         }
2080 #endif
2081
2082         iounmap(hw->hw_addr);
2083         if (hw->flash_address)
2084                 iounmap(hw->flash_address);
2085         pci_release_selected_regions(pdev,
2086                                      pci_select_bars(pdev, IORESOURCE_MEM));
2087
2088         free_netdev(netdev);
2089
2090         pci_disable_pcie_error_reporting(pdev);
2091
2092         pci_disable_device(pdev);
2093 }
2094
2095 /**
2096  * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2097  * @adapter: board private structure to initialize
2098  *
2099  * This function initializes the vf specific data storage and then attempts to
2100  * allocate the VFs.  The reason for ordering it this way is because it is much
2101  * mor expensive time wise to disable SR-IOV than it is to allocate and free
2102  * the memory for the VFs.
2103  **/
2104 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2105 {
2106 #ifdef CONFIG_PCI_IOV
2107         struct pci_dev *pdev = adapter->pdev;
2108
2109         if (adapter->vfs_allocated_count) {
2110                 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2111                                            sizeof(struct vf_data_storage),
2112                                            GFP_KERNEL);
2113                 /* if allocation failed then we do not support SR-IOV */
2114                 if (!adapter->vf_data) {
2115                         adapter->vfs_allocated_count = 0;
2116                         dev_err(&pdev->dev, "Unable to allocate memory for VF "
2117                                 "Data Storage\n");
2118                 }
2119         }
2120
2121         if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
2122                 kfree(adapter->vf_data);
2123                 adapter->vf_data = NULL;
2124 #endif /* CONFIG_PCI_IOV */
2125                 adapter->vfs_allocated_count = 0;
2126 #ifdef CONFIG_PCI_IOV
2127         } else {
2128                 unsigned char mac_addr[ETH_ALEN];
2129                 int i;
2130                 dev_info(&pdev->dev, "%d vfs allocated\n",
2131                          adapter->vfs_allocated_count);
2132                 for (i = 0; i < adapter->vfs_allocated_count; i++) {
2133                         random_ether_addr(mac_addr);
2134                         igb_set_vf_mac(adapter, i, mac_addr);
2135                 }
2136         }
2137 #endif /* CONFIG_PCI_IOV */
2138 }
2139
2140
2141 /**
2142  * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2143  * @adapter: board private structure to initialize
2144  *
2145  * igb_init_hw_timer initializes the function pointer and values for the hw
2146  * timer found in hardware.
2147  **/
2148 static void igb_init_hw_timer(struct igb_adapter *adapter)
2149 {
2150         struct e1000_hw *hw = &adapter->hw;
2151
2152         switch (hw->mac.type) {
2153         case e1000_i350:
2154         case e1000_82580:
2155                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2156                 adapter->cycles.read = igb_read_clock;
2157                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2158                 adapter->cycles.mult = 1;
2159                 /*
2160                  * The 82580 timesync updates the system timer every 8ns by 8ns
2161                  * and the value cannot be shifted.  Instead we need to shift
2162                  * the registers to generate a 64bit timer value.  As a result
2163                  * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2164                  * 24 in order to generate a larger value for synchronization.
2165                  */
2166                 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2167                 /* disable system timer temporarily by setting bit 31 */
2168                 wr32(E1000_TSAUXC, 0x80000000);
2169                 wrfl();
2170
2171                 /* Set registers so that rollover occurs soon to test this. */
2172                 wr32(E1000_SYSTIMR, 0x00000000);
2173                 wr32(E1000_SYSTIML, 0x80000000);
2174                 wr32(E1000_SYSTIMH, 0x000000FF);
2175                 wrfl();
2176
2177                 /* enable system timer by clearing bit 31 */
2178                 wr32(E1000_TSAUXC, 0x0);
2179                 wrfl();
2180
2181                 timecounter_init(&adapter->clock,
2182                                  &adapter->cycles,
2183                                  ktime_to_ns(ktime_get_real()));
2184                 /*
2185                  * Synchronize our NIC clock against system wall clock. NIC
2186                  * time stamp reading requires ~3us per sample, each sample
2187                  * was pretty stable even under load => only require 10
2188                  * samples for each offset comparison.
2189                  */
2190                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2191                 adapter->compare.source = &adapter->clock;
2192                 adapter->compare.target = ktime_get_real;
2193                 adapter->compare.num_samples = 10;
2194                 timecompare_update(&adapter->compare, 0);
2195                 break;
2196         case e1000_82576:
2197                 /*
2198                  * Initialize hardware timer: we keep it running just in case
2199                  * that some program needs it later on.
2200                  */
2201                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2202                 adapter->cycles.read = igb_read_clock;
2203                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2204                 adapter->cycles.mult = 1;
2205                 /**
2206                  * Scale the NIC clock cycle by a large factor so that
2207                  * relatively small clock corrections can be added or
2208                  * substracted at each clock tick. The drawbacks of a large
2209                  * factor are a) that the clock register overflows more quickly
2210                  * (not such a big deal) and b) that the increment per tick has
2211                  * to fit into 24 bits.  As a result we need to use a shift of
2212                  * 19 so we can fit a value of 16 into the TIMINCA register.
2213                  */
2214                 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2215                 wr32(E1000_TIMINCA,
2216                                 (1 << E1000_TIMINCA_16NS_SHIFT) |
2217                                 (16 << IGB_82576_TSYNC_SHIFT));
2218
2219                 /* Set registers so that rollover occurs soon to test this. */
2220                 wr32(E1000_SYSTIML, 0x00000000);
2221                 wr32(E1000_SYSTIMH, 0xFF800000);
2222                 wrfl();
2223
2224                 timecounter_init(&adapter->clock,
2225                                  &adapter->cycles,
2226                                  ktime_to_ns(ktime_get_real()));
2227                 /*
2228                  * Synchronize our NIC clock against system wall clock. NIC
2229                  * time stamp reading requires ~3us per sample, each sample
2230                  * was pretty stable even under load => only require 10
2231                  * samples for each offset comparison.
2232                  */
2233                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2234                 adapter->compare.source = &adapter->clock;
2235                 adapter->compare.target = ktime_get_real;
2236                 adapter->compare.num_samples = 10;
2237                 timecompare_update(&adapter->compare, 0);
2238                 break;
2239         case e1000_82575:
2240                 /* 82575 does not support timesync */
2241         default:
2242                 break;
2243         }
2244
2245 }
2246
2247 /**
2248  * igb_sw_init - Initialize general software structures (struct igb_adapter)
2249  * @adapter: board private structure to initialize
2250  *
2251  * igb_sw_init initializes the Adapter private data structure.
2252  * Fields are initialized based on PCI device information and
2253  * OS network device settings (MTU size).
2254  **/
2255 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2256 {
2257         struct e1000_hw *hw = &adapter->hw;
2258         struct net_device *netdev = adapter->netdev;
2259         struct pci_dev *pdev = adapter->pdev;
2260
2261         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2262
2263         adapter->tx_ring_count = IGB_DEFAULT_TXD;
2264         adapter->rx_ring_count = IGB_DEFAULT_RXD;
2265         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2266         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2267
2268         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
2269         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2270
2271 #ifdef CONFIG_PCI_IOV
2272         if (hw->mac.type == e1000_82576)
2273                 adapter->vfs_allocated_count = (max_vfs > 7) ? 7 : max_vfs;
2274
2275 #endif /* CONFIG_PCI_IOV */
2276         adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2277
2278         /*
2279          * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2280          * then we should combine the queues into a queue pair in order to
2281          * conserve interrupts due to limited supply
2282          */
2283         if ((adapter->rss_queues > 4) ||
2284             ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2285                 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2286
2287         /* This call may decrease the number of queues */
2288         if (igb_init_interrupt_scheme(adapter)) {
2289                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2290                 return -ENOMEM;
2291         }
2292
2293         igb_init_hw_timer(adapter);
2294         igb_probe_vfs(adapter);
2295
2296         /* Explicitly disable IRQ since the NIC can be in any state. */
2297         igb_irq_disable(adapter);
2298
2299         set_bit(__IGB_DOWN, &adapter->state);
2300         return 0;
2301 }
2302
2303 /**
2304  * igb_open - Called when a network interface is made active
2305  * @netdev: network interface device structure
2306  *
2307  * Returns 0 on success, negative value on failure
2308  *
2309  * The open entry point is called when a network interface is made
2310  * active by the system (IFF_UP).  At this point all resources needed
2311  * for transmit and receive operations are allocated, the interrupt
2312  * handler is registered with the OS, the watchdog timer is started,
2313  * and the stack is notified that the interface is ready.
2314  **/
2315 static int igb_open(struct net_device *netdev)
2316 {
2317         struct igb_adapter *adapter = netdev_priv(netdev);
2318         struct e1000_hw *hw = &adapter->hw;
2319         int err;
2320         int i;
2321
2322         /* disallow open during test */
2323         if (test_bit(__IGB_TESTING, &adapter->state))
2324                 return -EBUSY;
2325
2326         netif_carrier_off(netdev);
2327
2328         /* allocate transmit descriptors */
2329         err = igb_setup_all_tx_resources(adapter);
2330         if (err)
2331                 goto err_setup_tx;
2332
2333         /* allocate receive descriptors */
2334         err = igb_setup_all_rx_resources(adapter);
2335         if (err)
2336                 goto err_setup_rx;
2337
2338         igb_power_up_link(adapter);
2339
2340         /* before we allocate an interrupt, we must be ready to handle it.
2341          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2342          * as soon as we call pci_request_irq, so we have to setup our
2343          * clean_rx handler before we do so.  */
2344         igb_configure(adapter);
2345
2346         err = igb_request_irq(adapter);
2347         if (err)
2348                 goto err_req_irq;
2349
2350         /* From here on the code is the same as igb_up() */
2351         clear_bit(__IGB_DOWN, &adapter->state);
2352
2353         for (i = 0; i < adapter->num_q_vectors; i++) {
2354                 struct igb_q_vector *q_vector = adapter->q_vector[i];
2355                 napi_enable(&q_vector->napi);
2356         }
2357
2358         /* Clear any pending interrupts. */
2359         rd32(E1000_ICR);
2360
2361         igb_irq_enable(adapter);
2362
2363         /* notify VFs that reset has been completed */
2364         if (adapter->vfs_allocated_count) {
2365                 u32 reg_data = rd32(E1000_CTRL_EXT);
2366                 reg_data |= E1000_CTRL_EXT_PFRSTD;
2367                 wr32(E1000_CTRL_EXT, reg_data);
2368         }
2369
2370         netif_tx_start_all_queues(netdev);
2371
2372         /* start the watchdog. */
2373         hw->mac.get_link_status = 1;
2374         schedule_work(&adapter->watchdog_task);
2375
2376         return 0;
2377
2378 err_req_irq:
2379         igb_release_hw_control(adapter);
2380         igb_power_down_link(adapter);
2381         igb_free_all_rx_resources(adapter);
2382 err_setup_rx:
2383         igb_free_all_tx_resources(adapter);
2384 err_setup_tx:
2385         igb_reset(adapter);
2386
2387         return err;
2388 }
2389
2390 /**
2391  * igb_close - Disables a network interface
2392  * @netdev: network interface device structure
2393  *
2394  * Returns 0, this is not allowed to fail
2395  *
2396  * The close entry point is called when an interface is de-activated
2397  * by the OS.  The hardware is still under the driver's control, but
2398  * needs to be disabled.  A global MAC reset is issued to stop the
2399  * hardware, and all transmit and receive resources are freed.
2400  **/
2401 static int igb_close(struct net_device *netdev)
2402 {
2403         struct igb_adapter *adapter = netdev_priv(netdev);
2404
2405         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2406         igb_down(adapter);
2407
2408         igb_free_irq(adapter);
2409
2410         igb_free_all_tx_resources(adapter);
2411         igb_free_all_rx_resources(adapter);
2412
2413         return 0;
2414 }
2415
2416 /**
2417  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2418  * @tx_ring: tx descriptor ring (for a specific queue) to setup
2419  *
2420  * Return 0 on success, negative on failure
2421  **/
2422 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2423 {
2424         struct device *dev = tx_ring->dev;
2425         int size;
2426
2427         size = sizeof(struct igb_buffer) * tx_ring->count;
2428         tx_ring->buffer_info = vmalloc(size);
2429         if (!tx_ring->buffer_info)
2430                 goto err;
2431         memset(tx_ring->buffer_info, 0, size);
2432
2433         /* round up to nearest 4K */
2434         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2435         tx_ring->size = ALIGN(tx_ring->size, 4096);
2436
2437         tx_ring->desc = dma_alloc_coherent(dev,
2438                                            tx_ring->size,
2439                                            &tx_ring->dma,
2440                                            GFP_KERNEL);
2441
2442         if (!tx_ring->desc)
2443                 goto err;
2444
2445         tx_ring->next_to_use = 0;
2446         tx_ring->next_to_clean = 0;
2447         return 0;
2448
2449 err:
2450         vfree(tx_ring->buffer_info);
2451         dev_err(dev,
2452                 "Unable to allocate memory for the transmit descriptor ring\n");
2453         return -ENOMEM;
2454 }
2455
2456 /**
2457  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2458  *                                (Descriptors) for all queues
2459  * @adapter: board private structure
2460  *
2461  * Return 0 on success, negative on failure
2462  **/
2463 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2464 {
2465         struct pci_dev *pdev = adapter->pdev;
2466         int i, err = 0;
2467
2468         for (i = 0; i < adapter->num_tx_queues; i++) {
2469                 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2470                 if (err) {
2471                         dev_err(&pdev->dev,
2472                                 "Allocation for Tx Queue %u failed\n", i);
2473                         for (i--; i >= 0; i--)
2474                                 igb_free_tx_resources(adapter->tx_ring[i]);
2475                         break;
2476                 }
2477         }
2478
2479         for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2480                 int r_idx = i % adapter->num_tx_queues;
2481                 adapter->multi_tx_table[i] = adapter->tx_ring[r_idx];
2482         }
2483         return err;
2484 }
2485
2486 /**
2487  * igb_setup_tctl - configure the transmit control registers
2488  * @adapter: Board private structure
2489  **/
2490 void igb_setup_tctl(struct igb_adapter *adapter)
2491 {
2492         struct e1000_hw *hw = &adapter->hw;
2493         u32 tctl;
2494
2495         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2496         wr32(E1000_TXDCTL(0), 0);
2497
2498         /* Program the Transmit Control Register */
2499         tctl = rd32(E1000_TCTL);
2500         tctl &= ~E1000_TCTL_CT;
2501         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2502                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2503
2504         igb_config_collision_dist(hw);
2505
2506         /* Enable transmits */
2507         tctl |= E1000_TCTL_EN;
2508
2509         wr32(E1000_TCTL, tctl);
2510 }
2511
2512 /**
2513  * igb_configure_tx_ring - Configure transmit ring after Reset
2514  * @adapter: board private structure
2515  * @ring: tx ring to configure
2516  *
2517  * Configure a transmit ring after a reset.
2518  **/
2519 void igb_configure_tx_ring(struct igb_adapter *adapter,
2520                            struct igb_ring *ring)
2521 {
2522         struct e1000_hw *hw = &adapter->hw;
2523         u32 txdctl;
2524         u64 tdba = ring->dma;
2525         int reg_idx = ring->reg_idx;
2526
2527         /* disable the queue */
2528         txdctl = rd32(E1000_TXDCTL(reg_idx));
2529         wr32(E1000_TXDCTL(reg_idx),
2530                         txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2531         wrfl();
2532         mdelay(10);
2533
2534         wr32(E1000_TDLEN(reg_idx),
2535                         ring->count * sizeof(union e1000_adv_tx_desc));
2536         wr32(E1000_TDBAL(reg_idx),
2537                         tdba & 0x00000000ffffffffULL);
2538         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2539
2540         ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2541         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2542         writel(0, ring->head);
2543         writel(0, ring->tail);
2544
2545         txdctl |= IGB_TX_PTHRESH;
2546         txdctl |= IGB_TX_HTHRESH << 8;
2547         txdctl |= IGB_TX_WTHRESH << 16;
2548
2549         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2550         wr32(E1000_TXDCTL(reg_idx), txdctl);
2551 }
2552
2553 /**
2554  * igb_configure_tx - Configure transmit Unit after Reset
2555  * @adapter: board private structure
2556  *
2557  * Configure the Tx unit of the MAC after a reset.
2558  **/
2559 static void igb_configure_tx(struct igb_adapter *adapter)
2560 {
2561         int i;
2562
2563         for (i = 0; i < adapter->num_tx_queues; i++)
2564                 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2565 }
2566
2567 /**
2568  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2569  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2570  *
2571  * Returns 0 on success, negative on failure
2572  **/
2573 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2574 {
2575         struct device *dev = rx_ring->dev;
2576         int size, desc_len;
2577
2578         size = sizeof(struct igb_buffer) * rx_ring->count;
2579         rx_ring->buffer_info = vmalloc(size);
2580         if (!rx_ring->buffer_info)
2581                 goto err;
2582         memset(rx_ring->buffer_info, 0, size);
2583
2584         desc_len = sizeof(union e1000_adv_rx_desc);
2585
2586         /* Round up to nearest 4K */
2587         rx_ring->size = rx_ring->count * desc_len;
2588         rx_ring->size = ALIGN(rx_ring->size, 4096);
2589
2590         rx_ring->desc = dma_alloc_coherent(dev,
2591                                            rx_ring->size,
2592                                            &rx_ring->dma,
2593                                            GFP_KERNEL);
2594
2595         if (!rx_ring->desc)
2596                 goto err;
2597
2598         rx_ring->next_to_clean = 0;
2599         rx_ring->next_to_use = 0;
2600
2601         return 0;
2602
2603 err:
2604         vfree(rx_ring->buffer_info);
2605         rx_ring->buffer_info = NULL;
2606         dev_err(dev, "Unable to allocate memory for the receive descriptor"
2607                 " ring\n");
2608         return -ENOMEM;
2609 }
2610
2611 /**
2612  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2613  *                                (Descriptors) for all queues
2614  * @adapter: board private structure
2615  *
2616  * Return 0 on success, negative on failure
2617  **/
2618 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2619 {
2620         struct pci_dev *pdev = adapter->pdev;
2621         int i, err = 0;
2622
2623         for (i = 0; i < adapter->num_rx_queues; i++) {
2624                 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2625                 if (err) {
2626                         dev_err(&pdev->dev,
2627                                 "Allocation for Rx Queue %u failed\n", i);
2628                         for (i--; i >= 0; i--)
2629                                 igb_free_rx_resources(adapter->rx_ring[i]);
2630                         break;
2631                 }
2632         }
2633
2634         return err;
2635 }
2636
2637 /**
2638  * igb_setup_mrqc - configure the multiple receive queue control registers
2639  * @adapter: Board private structure
2640  **/
2641 static void igb_setup_mrqc(struct igb_adapter *adapter)
2642 {
2643         struct e1000_hw *hw = &adapter->hw;
2644         u32 mrqc, rxcsum;
2645         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2646         union e1000_reta {
2647                 u32 dword;
2648                 u8  bytes[4];
2649         } reta;
2650         static const u8 rsshash[40] = {
2651                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2652                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2653                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2654                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2655
2656         /* Fill out hash function seeds */
2657         for (j = 0; j < 10; j++) {
2658                 u32 rsskey = rsshash[(j * 4)];
2659                 rsskey |= rsshash[(j * 4) + 1] << 8;
2660                 rsskey |= rsshash[(j * 4) + 2] << 16;
2661                 rsskey |= rsshash[(j * 4) + 3] << 24;
2662                 array_wr32(E1000_RSSRK(0), j, rsskey);
2663         }
2664
2665         num_rx_queues = adapter->rss_queues;
2666
2667         if (adapter->vfs_allocated_count) {
2668                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2669                 switch (hw->mac.type) {
2670                 case e1000_i350:
2671                 case e1000_82580:
2672                         num_rx_queues = 1;
2673                         shift = 0;
2674                         break;
2675                 case e1000_82576:
2676                         shift = 3;
2677                         num_rx_queues = 2;
2678                         break;
2679                 case e1000_82575:
2680                         shift = 2;
2681                         shift2 = 6;
2682                 default:
2683                         break;
2684                 }
2685         } else {
2686                 if (hw->mac.type == e1000_82575)
2687                         shift = 6;
2688         }
2689
2690         for (j = 0; j < (32 * 4); j++) {
2691                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2692                 if (shift2)
2693                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2694                 if ((j & 3) == 3)
2695                         wr32(E1000_RETA(j >> 2), reta.dword);
2696         }
2697
2698         /*
2699          * Disable raw packet checksumming so that RSS hash is placed in
2700          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2701          * offloads as they are enabled by default
2702          */
2703         rxcsum = rd32(E1000_RXCSUM);
2704         rxcsum |= E1000_RXCSUM_PCSD;
2705
2706         if (adapter->hw.mac.type >= e1000_82576)
2707                 /* Enable Receive Checksum Offload for SCTP */
2708                 rxcsum |= E1000_RXCSUM_CRCOFL;
2709
2710         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2711         wr32(E1000_RXCSUM, rxcsum);
2712
2713         /* If VMDq is enabled then we set the appropriate mode for that, else
2714          * we default to RSS so that an RSS hash is calculated per packet even
2715          * if we are only using one queue */
2716         if (adapter->vfs_allocated_count) {
2717                 if (hw->mac.type > e1000_82575) {
2718                         /* Set the default pool for the PF's first queue */
2719                         u32 vtctl = rd32(E1000_VT_CTL);
2720                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2721                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2722                         vtctl |= adapter->vfs_allocated_count <<
2723                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2724                         wr32(E1000_VT_CTL, vtctl);
2725                 }
2726                 if (adapter->rss_queues > 1)
2727                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2728                 else
2729                         mrqc = E1000_MRQC_ENABLE_VMDQ;
2730         } else {
2731                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2732         }
2733         igb_vmm_control(adapter);
2734
2735         /*
2736          * Generate RSS hash based on TCP port numbers and/or
2737          * IPv4/v6 src and dst addresses since UDP cannot be
2738          * hashed reliably due to IP fragmentation
2739          */
2740         mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2741                 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2742                 E1000_MRQC_RSS_FIELD_IPV6 |
2743                 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2744                 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2745
2746         wr32(E1000_MRQC, mrqc);
2747 }
2748
2749 /**
2750  * igb_setup_rctl - configure the receive control registers
2751  * @adapter: Board private structure
2752  **/
2753 void igb_setup_rctl(struct igb_adapter *adapter)
2754 {
2755         struct e1000_hw *hw = &adapter->hw;
2756         u32 rctl;
2757
2758         rctl = rd32(E1000_RCTL);
2759
2760         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2761         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2762
2763         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2764                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2765
2766         /*
2767          * enable stripping of CRC. It's unlikely this will break BMC
2768          * redirection as it did with e1000. Newer features require
2769          * that the HW strips the CRC.
2770          */
2771         rctl |= E1000_RCTL_SECRC;
2772
2773         /* disable store bad packets and clear size bits. */
2774         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2775
2776         /* enable LPE to prevent packets larger than max_frame_size */
2777         rctl |= E1000_RCTL_LPE;
2778
2779         /* disable queue 0 to prevent tail write w/o re-config */
2780         wr32(E1000_RXDCTL(0), 0);
2781
2782         /* Attention!!!  For SR-IOV PF driver operations you must enable
2783          * queue drop for all VF and PF queues to prevent head of line blocking
2784          * if an un-trusted VF does not provide descriptors to hardware.
2785          */
2786         if (adapter->vfs_allocated_count) {
2787                 /* set all queue drop enable bits */
2788                 wr32(E1000_QDE, ALL_QUEUES);
2789         }
2790
2791         wr32(E1000_RCTL, rctl);
2792 }
2793
2794 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2795                                    int vfn)
2796 {
2797         struct e1000_hw *hw = &adapter->hw;
2798         u32 vmolr;
2799
2800         /* if it isn't the PF check to see if VFs are enabled and
2801          * increase the size to support vlan tags */
2802         if (vfn < adapter->vfs_allocated_count &&
2803             adapter->vf_data[vfn].vlans_enabled)
2804                 size += VLAN_TAG_SIZE;
2805
2806         vmolr = rd32(E1000_VMOLR(vfn));
2807         vmolr &= ~E1000_VMOLR_RLPML_MASK;
2808         vmolr |= size | E1000_VMOLR_LPE;
2809         wr32(E1000_VMOLR(vfn), vmolr);
2810
2811         return 0;
2812 }
2813
2814 /**
2815  * igb_rlpml_set - set maximum receive packet size
2816  * @adapter: board private structure
2817  *
2818  * Configure maximum receivable packet size.
2819  **/
2820 static void igb_rlpml_set(struct igb_adapter *adapter)
2821 {
2822         u32 max_frame_size = adapter->max_frame_size;
2823         struct e1000_hw *hw = &adapter->hw;
2824         u16 pf_id = adapter->vfs_allocated_count;
2825
2826         if (adapter->vlgrp)
2827                 max_frame_size += VLAN_TAG_SIZE;
2828
2829         /* if vfs are enabled we set RLPML to the largest possible request
2830          * size and set the VMOLR RLPML to the size we need */
2831         if (pf_id) {
2832                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2833                 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2834         }
2835
2836         wr32(E1000_RLPML, max_frame_size);
2837 }
2838
2839 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2840                                  int vfn, bool aupe)
2841 {
2842         struct e1000_hw *hw = &adapter->hw;
2843         u32 vmolr;
2844
2845         /*
2846          * This register exists only on 82576 and newer so if we are older then
2847          * we should exit and do nothing
2848          */
2849         if (hw->mac.type < e1000_82576)
2850                 return;
2851
2852         vmolr = rd32(E1000_VMOLR(vfn));
2853         vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
2854         if (aupe)
2855                 vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
2856         else
2857                 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
2858
2859         /* clear all bits that might not be set */
2860         vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2861
2862         if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
2863                 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
2864         /*
2865          * for VMDq only allow the VFs and pool 0 to accept broadcast and
2866          * multicast packets
2867          */
2868         if (vfn <= adapter->vfs_allocated_count)
2869                 vmolr |= E1000_VMOLR_BAM;          /* Accept broadcast */
2870
2871         wr32(E1000_VMOLR(vfn), vmolr);
2872 }
2873
2874 /**
2875  * igb_configure_rx_ring - Configure a receive ring after Reset
2876  * @adapter: board private structure
2877  * @ring: receive ring to be configured
2878  *
2879  * Configure the Rx unit of the MAC after a reset.
2880  **/
2881 void igb_configure_rx_ring(struct igb_adapter *adapter,
2882                            struct igb_ring *ring)
2883 {
2884         struct e1000_hw *hw = &adapter->hw;
2885         u64 rdba = ring->dma;
2886         int reg_idx = ring->reg_idx;
2887         u32 srrctl, rxdctl;
2888
2889         /* disable the queue */
2890         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2891         wr32(E1000_RXDCTL(reg_idx),
2892                         rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2893
2894         /* Set DMA base address registers */
2895         wr32(E1000_RDBAL(reg_idx),
2896              rdba & 0x00000000ffffffffULL);
2897         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2898         wr32(E1000_RDLEN(reg_idx),
2899                        ring->count * sizeof(union e1000_adv_rx_desc));
2900
2901         /* initialize head and tail */
2902         ring->head = hw->hw_addr + E1000_RDH(reg_idx);
2903         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
2904         writel(0, ring->head);
2905         writel(0, ring->tail);
2906
2907         /* set descriptor configuration */
2908         if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
2909                 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
2910                          E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
2911 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
2912                 srrctl |= IGB_RXBUFFER_16384 >>
2913                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2914 #else
2915                 srrctl |= (PAGE_SIZE / 2) >>
2916                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2917 #endif
2918                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2919         } else {
2920                 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
2921                          E1000_SRRCTL_BSIZEPKT_SHIFT;
2922                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2923         }
2924         if (hw->mac.type == e1000_82580)
2925                 srrctl |= E1000_SRRCTL_TIMESTAMP;
2926         /* Only set Drop Enable if we are supporting multiple queues */
2927         if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
2928                 srrctl |= E1000_SRRCTL_DROP_EN;
2929
2930         wr32(E1000_SRRCTL(reg_idx), srrctl);
2931
2932         /* set filtering for VMDQ pools */
2933         igb_set_vmolr(adapter, reg_idx & 0x7, true);
2934
2935         /* enable receive descriptor fetching */
2936         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2937         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2938         rxdctl &= 0xFFF00000;
2939         rxdctl |= IGB_RX_PTHRESH;
2940         rxdctl |= IGB_RX_HTHRESH << 8;
2941         rxdctl |= IGB_RX_WTHRESH << 16;
2942         wr32(E1000_RXDCTL(reg_idx), rxdctl);
2943 }
2944
2945 /**
2946  * igb_configure_rx - Configure receive Unit after Reset
2947  * @adapter: board private structure
2948  *
2949  * Configure the Rx unit of the MAC after a reset.
2950  **/
2951 static void igb_configure_rx(struct igb_adapter *adapter)
2952 {
2953         int i;
2954
2955         /* set UTA to appropriate mode */
2956         igb_set_uta(adapter);
2957
2958         /* set the correct pool for the PF default MAC address in entry 0 */
2959         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
2960                          adapter->vfs_allocated_count);
2961
2962         /* Setup the HW Rx Head and Tail Descriptor Pointers and
2963          * the Base and Length of the Rx Descriptor Ring */
2964         for (i = 0; i < adapter->num_rx_queues; i++)
2965                 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
2966 }
2967
2968 /**
2969  * igb_free_tx_resources - Free Tx Resources per Queue
2970  * @tx_ring: Tx descriptor ring for a specific queue
2971  *
2972  * Free all transmit software resources
2973  **/
2974 void igb_free_tx_resources(struct igb_ring *tx_ring)
2975 {
2976         igb_clean_tx_ring(tx_ring);
2977
2978         vfree(tx_ring->buffer_info);
2979         tx_ring->buffer_info = NULL;
2980
2981         /* if not set, then don't free */
2982         if (!tx_ring->desc)
2983                 return;
2984
2985         dma_free_coherent(tx_ring->dev, tx_ring->size,
2986                           tx_ring->desc, tx_ring->dma);
2987
2988         tx_ring->desc = NULL;
2989 }
2990
2991 /**
2992  * igb_free_all_tx_resources - Free Tx Resources for All Queues
2993  * @adapter: board private structure
2994  *
2995  * Free all transmit software resources
2996  **/
2997 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
2998 {
2999         int i;
3000
3001         for (i = 0; i < adapter->num_tx_queues; i++)
3002                 igb_free_tx_resources(adapter->tx_ring[i]);
3003 }
3004
3005 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
3006                                     struct igb_buffer *buffer_info)
3007 {
3008         if (buffer_info->dma) {
3009                 if (buffer_info->mapped_as_page)
3010                         dma_unmap_page(tx_ring->dev,
3011                                         buffer_info->dma,
3012                                         buffer_info->length,
3013                                         DMA_TO_DEVICE);
3014                 else
3015                         dma_unmap_single(tx_ring->dev,
3016                                         buffer_info->dma,
3017                                         buffer_info->length,
3018                                         DMA_TO_DEVICE);
3019                 buffer_info->dma = 0;
3020         }
3021         if (buffer_info->skb) {
3022                 dev_kfree_skb_any(buffer_info->skb);
3023                 buffer_info->skb = NULL;
3024         }
3025         buffer_info->time_stamp = 0;
3026         buffer_info->length = 0;
3027         buffer_info->next_to_watch = 0;
3028         buffer_info->mapped_as_page = false;
3029 }
3030
3031 /**
3032  * igb_clean_tx_ring - Free Tx Buffers
3033  * @tx_ring: ring to be cleaned
3034  **/
3035 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3036 {
3037         struct igb_buffer *buffer_info;
3038         unsigned long size;
3039         unsigned int i;
3040
3041         if (!tx_ring->buffer_info)
3042                 return;
3043         /* Free all the Tx ring sk_buffs */
3044
3045         for (i = 0; i < tx_ring->count; i++) {
3046                 buffer_info = &tx_ring->buffer_info[i];
3047                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3048         }
3049
3050         size = sizeof(struct igb_buffer) * tx_ring->count;
3051         memset(tx_ring->buffer_info, 0, size);
3052
3053         /* Zero out the descriptor ring */
3054         memset(tx_ring->desc, 0, tx_ring->size);
3055
3056         tx_ring->next_to_use = 0;
3057         tx_ring->next_to_clean = 0;
3058 }
3059
3060 /**
3061  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3062  * @adapter: board private structure
3063  **/
3064 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3065 {
3066         int i;
3067
3068         for (i = 0; i < adapter->num_tx_queues; i++)
3069                 igb_clean_tx_ring(adapter->tx_ring[i]);
3070 }
3071
3072 /**
3073  * igb_free_rx_resources - Free Rx Resources
3074  * @rx_ring: ring to clean the resources from
3075  *
3076  * Free all receive software resources
3077  **/
3078 void igb_free_rx_resources(struct igb_ring *rx_ring)
3079 {
3080         igb_clean_rx_ring(rx_ring);
3081
3082         vfree(rx_ring->buffer_info);
3083         rx_ring->buffer_info = NULL;
3084
3085         /* if not set, then don't free */
3086         if (!rx_ring->desc)
3087                 return;
3088
3089         dma_free_coherent(rx_ring->dev, rx_ring->size,
3090                           rx_ring->desc, rx_ring->dma);
3091
3092         rx_ring->desc = NULL;
3093 }
3094
3095 /**
3096  * igb_free_all_rx_resources - Free Rx Resources for All Queues
3097  * @adapter: board private structure
3098  *
3099  * Free all receive software resources
3100  **/
3101 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3102 {
3103         int i;
3104
3105         for (i = 0; i < adapter->num_rx_queues; i++)
3106                 igb_free_rx_resources(adapter->rx_ring[i]);
3107 }
3108
3109 /**
3110  * igb_clean_rx_ring - Free Rx Buffers per Queue
3111  * @rx_ring: ring to free buffers from
3112  **/
3113 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3114 {
3115         struct igb_buffer *buffer_info;
3116         unsigned long size;
3117         unsigned int i;
3118
3119         if (!rx_ring->buffer_info)
3120                 return;
3121
3122         /* Free all the Rx ring sk_buffs */
3123         for (i = 0; i < rx_ring->count; i++) {
3124                 buffer_info = &rx_ring->buffer_info[i];
3125                 if (buffer_info->dma) {
3126                         dma_unmap_single(rx_ring->dev,
3127                                          buffer_info->dma,
3128                                          rx_ring->rx_buffer_len,
3129                                          DMA_FROM_DEVICE);
3130                         buffer_info->dma = 0;
3131                 }
3132
3133                 if (buffer_info->skb) {
3134                         dev_kfree_skb(buffer_info->skb);
3135                         buffer_info->skb = NULL;
3136                 }
3137                 if (buffer_info->page_dma) {
3138                         dma_unmap_page(rx_ring->dev,
3139                                        buffer_info->page_dma,
3140                                        PAGE_SIZE / 2,
3141                                        DMA_FROM_DEVICE);
3142                         buffer_info->page_dma = 0;
3143                 }
3144                 if (buffer_info->page) {
3145                         put_page(buffer_info->page);
3146                         buffer_info->page = NULL;
3147                         buffer_info->page_offset = 0;
3148                 }
3149         }
3150
3151         size = sizeof(struct igb_buffer) * rx_ring->count;
3152         memset(rx_ring->buffer_info, 0, size);
3153
3154         /* Zero out the descriptor ring */
3155         memset(rx_ring->desc, 0, rx_ring->size);
3156
3157         rx_ring->next_to_clean = 0;
3158         rx_ring->next_to_use = 0;
3159 }
3160
3161 /**
3162  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3163  * @adapter: board private structure
3164  **/
3165 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3166 {
3167         int i;
3168
3169         for (i = 0; i < adapter->num_rx_queues; i++)
3170                 igb_clean_rx_ring(adapter->rx_ring[i]);
3171 }
3172
3173 /**
3174  * igb_set_mac - Change the Ethernet Address of the NIC
3175  * @netdev: network interface device structure
3176  * @p: pointer to an address structure
3177  *
3178  * Returns 0 on success, negative on failure
3179  **/
3180 static int igb_set_mac(struct net_device *netdev, void *p)
3181 {
3182         struct igb_adapter *adapter = netdev_priv(netdev);
3183         struct e1000_hw *hw = &adapter->hw;
3184         struct sockaddr *addr = p;
3185
3186         if (!is_valid_ether_addr(addr->sa_data))
3187                 return -EADDRNOTAVAIL;
3188
3189         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3190         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3191
3192         /* set the correct pool for the new PF MAC address in entry 0 */
3193         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3194                          adapter->vfs_allocated_count);
3195
3196         return 0;
3197 }
3198
3199 /**
3200  * igb_write_mc_addr_list - write multicast addresses to MTA
3201  * @netdev: network interface device structure
3202  *
3203  * Writes multicast address list to the MTA hash table.
3204  * Returns: -ENOMEM on failure
3205  *                0 on no addresses written
3206  *                X on writing X addresses to MTA
3207  **/
3208 static int igb_write_mc_addr_list(struct net_device *netdev)
3209 {
3210         struct igb_adapter *adapter = netdev_priv(netdev);
3211         struct e1000_hw *hw = &adapter->hw;
3212         struct netdev_hw_addr *ha;
3213         u8  *mta_list;
3214         int i;
3215
3216         if (netdev_mc_empty(netdev)) {
3217                 /* nothing to program, so clear mc list */
3218                 igb_update_mc_addr_list(hw, NULL, 0);
3219                 igb_restore_vf_multicasts(adapter);
3220                 return 0;
3221         }
3222
3223         mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3224         if (!mta_list)
3225                 return -ENOMEM;
3226
3227         /* The shared function expects a packed array of only addresses. */
3228         i = 0;
3229         netdev_for_each_mc_addr(ha, netdev)
3230                 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3231
3232         igb_update_mc_addr_list(hw, mta_list, i);
3233         kfree(mta_list);
3234
3235         return netdev_mc_count(netdev);
3236 }
3237
3238 /**
3239  * igb_write_uc_addr_list - write unicast addresses to RAR table
3240  * @netdev: network interface device structure
3241  *
3242  * Writes unicast address list to the RAR table.
3243  * Returns: -ENOMEM on failure/insufficient address space
3244  *                0 on no addresses written
3245  *                X on writing X addresses to the RAR table
3246  **/
3247 static int igb_write_uc_addr_list(struct net_device *netdev)
3248 {
3249         struct igb_adapter *adapter = netdev_priv(netdev);
3250         struct e1000_hw *hw = &adapter->hw;
3251         unsigned int vfn = adapter->vfs_allocated_count;
3252         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3253         int count = 0;
3254
3255         /* return ENOMEM indicating insufficient memory for addresses */
3256         if (netdev_uc_count(netdev) > rar_entries)
3257                 return -ENOMEM;
3258
3259         if (!netdev_uc_empty(netdev) && rar_entries) {
3260                 struct netdev_hw_addr *ha;
3261
3262                 netdev_for_each_uc_addr(ha, netdev) {
3263                         if (!rar_entries)
3264                                 break;
3265                         igb_rar_set_qsel(adapter, ha->addr,
3266                                          rar_entries--,
3267                                          vfn);
3268                         count++;
3269                 }
3270         }
3271         /* write the addresses in reverse order to avoid write combining */
3272         for (; rar_entries > 0 ; rar_entries--) {
3273                 wr32(E1000_RAH(rar_entries), 0);
3274                 wr32(E1000_RAL(rar_entries), 0);
3275         }
3276         wrfl();
3277
3278         return count;
3279 }
3280
3281 /**
3282  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3283  * @netdev: network interface device structure
3284  *
3285  * The set_rx_mode entry point is called whenever the unicast or multicast
3286  * address lists or the network interface flags are updated.  This routine is
3287  * responsible for configuring the hardware for proper unicast, multicast,
3288  * promiscuous mode, and all-multi behavior.
3289  **/
3290 static void igb_set_rx_mode(struct net_device *netdev)
3291 {
3292         struct igb_adapter *adapter = netdev_priv(netdev);
3293         struct e1000_hw *hw = &adapter->hw;
3294         unsigned int vfn = adapter->vfs_allocated_count;
3295         u32 rctl, vmolr = 0;
3296         int count;
3297
3298         /* Check for Promiscuous and All Multicast modes */
3299         rctl = rd32(E1000_RCTL);
3300
3301         /* clear the effected bits */
3302         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3303
3304         if (netdev->flags & IFF_PROMISC) {
3305                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3306                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3307         } else {
3308                 if (netdev->flags & IFF_ALLMULTI) {
3309                         rctl |= E1000_RCTL_MPE;
3310                         vmolr |= E1000_VMOLR_MPME;
3311                 } else {
3312                         /*
3313                          * Write addresses to the MTA, if the attempt fails
3314                          * then we should just turn on promiscous mode so
3315                          * that we can at least receive multicast traffic
3316                          */
3317                         count = igb_write_mc_addr_list(netdev);
3318                         if (count < 0) {
3319                                 rctl |= E1000_RCTL_MPE;
3320                                 vmolr |= E1000_VMOLR_MPME;
3321                         } else if (count) {
3322                                 vmolr |= E1000_VMOLR_ROMPE;
3323                         }
3324                 }
3325                 /*
3326                  * Write addresses to available RAR registers, if there is not
3327                  * sufficient space to store all the addresses then enable
3328                  * unicast promiscous mode
3329                  */
3330                 count = igb_write_uc_addr_list(netdev);
3331                 if (count < 0) {
3332                         rctl |= E1000_RCTL_UPE;
3333                         vmolr |= E1000_VMOLR_ROPE;
3334                 }
3335                 rctl |= E1000_RCTL_VFE;
3336         }
3337         wr32(E1000_RCTL, rctl);
3338
3339         /*
3340          * In order to support SR-IOV and eventually VMDq it is necessary to set
3341          * the VMOLR to enable the appropriate modes.  Without this workaround
3342          * we will have issues with VLAN tag stripping not being done for frames
3343          * that are only arriving because we are the default pool
3344          */
3345         if (hw->mac.type < e1000_82576)
3346                 return;
3347
3348         vmolr |= rd32(E1000_VMOLR(vfn)) &
3349                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3350         wr32(E1000_VMOLR(vfn), vmolr);
3351         igb_restore_vf_multicasts(adapter);
3352 }
3353
3354 /* Need to wait a few seconds after link up to get diagnostic information from
3355  * the phy */
3356 static void igb_update_phy_info(unsigned long data)
3357 {
3358         struct igb_adapter *adapter = (struct igb_adapter *) data;
3359         igb_get_phy_info(&adapter->hw);
3360 }
3361
3362 /**
3363  * igb_has_link - check shared code for link and determine up/down
3364  * @adapter: pointer to driver private info
3365  **/
3366 bool igb_has_link(struct igb_adapter *adapter)
3367 {
3368         struct e1000_hw *hw = &adapter->hw;
3369         bool link_active = false;
3370         s32 ret_val = 0;
3371
3372         /* get_link_status is set on LSC (link status) interrupt or
3373          * rx sequence error interrupt.  get_link_status will stay
3374          * false until the e1000_check_for_link establishes link
3375          * for copper adapters ONLY
3376          */
3377         switch (hw->phy.media_type) {
3378         case e1000_media_type_copper:
3379                 if (hw->mac.get_link_status) {
3380                         ret_val = hw->mac.ops.check_for_link(hw);
3381                         link_active = !hw->mac.get_link_status;
3382                 } else {
3383                         link_active = true;
3384                 }
3385                 break;
3386         case e1000_media_type_internal_serdes:
3387                 ret_val = hw->mac.ops.check_for_link(hw);
3388                 link_active = hw->mac.serdes_has_link;
3389                 break;
3390         default:
3391         case e1000_media_type_unknown:
3392                 break;
3393         }
3394
3395         return link_active;
3396 }
3397
3398 /**
3399  * igb_watchdog - Timer Call-back
3400  * @data: pointer to adapter cast into an unsigned long
3401  **/
3402 static void igb_watchdog(unsigned long data)
3403 {
3404         struct igb_adapter *adapter = (struct igb_adapter *)data;
3405         /* Do the rest outside of interrupt context */
3406         schedule_work(&adapter->watchdog_task);
3407 }
3408
3409 static void igb_watchdog_task(struct work_struct *work)
3410 {
3411         struct igb_adapter *adapter = container_of(work,
3412                                                    struct igb_adapter,
3413                                                    watchdog_task);
3414         struct e1000_hw *hw = &adapter->hw;
3415         struct net_device *netdev = adapter->netdev;
3416         u32 link;
3417         int i;
3418
3419         link = igb_has_link(adapter);
3420         if (link) {
3421                 if (!netif_carrier_ok(netdev)) {
3422                         u32 ctrl;
3423                         hw->mac.ops.get_speed_and_duplex(hw,
3424                                                          &adapter->link_speed,
3425                                                          &adapter->link_duplex);
3426
3427                         ctrl = rd32(E1000_CTRL);
3428                         /* Links status message must follow this format */
3429                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3430                                  "Flow Control: %s\n",
3431                                netdev->name,
3432                                adapter->link_speed,
3433                                adapter->link_duplex == FULL_DUPLEX ?
3434                                  "Full Duplex" : "Half Duplex",
3435                                ((ctrl & E1000_CTRL_TFCE) &&
3436                                 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3437                                ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
3438                                ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
3439
3440                         /* adjust timeout factor according to speed/duplex */
3441                         adapter->tx_timeout_factor = 1;
3442                         switch (adapter->link_speed) {
3443                         case SPEED_10:
3444                                 adapter->tx_timeout_factor = 14;
3445                                 break;
3446                         case SPEED_100:
3447                                 /* maybe add some timeout factor ? */
3448                                 break;
3449                         }
3450
3451                         netif_carrier_on(netdev);
3452
3453                         igb_ping_all_vfs(adapter);
3454
3455                         /* link state has changed, schedule phy info update */
3456                         if (!test_bit(__IGB_DOWN, &adapter->state))
3457                                 mod_timer(&adapter->phy_info_timer,
3458                                           round_jiffies(jiffies + 2 * HZ));
3459                 }
3460         } else {
3461                 if (netif_carrier_ok(netdev)) {
3462                         adapter->link_speed = 0;
3463                         adapter->link_duplex = 0;
3464                         /* Links status message must follow this format */
3465                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
3466                                netdev->name);
3467                         netif_carrier_off(netdev);
3468
3469                         igb_ping_all_vfs(adapter);
3470
3471                         /* link state has changed, schedule phy info update */
3472                         if (!test_bit(__IGB_DOWN, &adapter->state))
3473                                 mod_timer(&adapter->phy_info_timer,
3474                                           round_jiffies(jiffies + 2 * HZ));
3475                 }
3476         }
3477
3478         igb_update_stats(adapter);
3479
3480         for (i = 0; i < adapter->num_tx_queues; i++) {
3481                 struct igb_ring *tx_ring = adapter->tx_ring[i];
3482                 if (!netif_carrier_ok(netdev)) {
3483                         /* We've lost link, so the controller stops DMA,
3484                          * but we've got queued Tx work that's never going
3485                          * to get done, so reset controller to flush Tx.
3486                          * (Do the reset outside of interrupt context). */
3487                         if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3488                                 adapter->tx_timeout_count++;
3489                                 schedule_work(&adapter->reset_task);
3490                                 /* return immediately since reset is imminent */
3491                                 return;
3492                         }
3493                 }
3494
3495                 /* Force detection of hung controller every watchdog period */
3496                 tx_ring->detect_tx_hung = true;
3497         }
3498
3499         /* Cause software interrupt to ensure rx ring is cleaned */
3500         if (adapter->msix_entries) {
3501                 u32 eics = 0;
3502                 for (i = 0; i < adapter->num_q_vectors; i++) {
3503                         struct igb_q_vector *q_vector = adapter->q_vector[i];
3504                         eics |= q_vector->eims_value;
3505                 }
3506                 wr32(E1000_EICS, eics);
3507         } else {
3508                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3509         }
3510
3511         /* Reset the timer */
3512         if (!test_bit(__IGB_DOWN, &adapter->state))
3513                 mod_timer(&adapter->watchdog_timer,
3514                           round_jiffies(jiffies + 2 * HZ));
3515 }
3516
3517 enum latency_range {
3518         lowest_latency = 0,
3519         low_latency = 1,
3520         bulk_latency = 2,
3521         latency_invalid = 255
3522 };
3523
3524 /**
3525  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3526  *
3527  *      Stores a new ITR value based on strictly on packet size.  This
3528  *      algorithm is less sophisticated than that used in igb_update_itr,
3529  *      due to the difficulty of synchronizing statistics across multiple
3530  *      receive rings.  The divisors and thresholds used by this fuction
3531  *      were determined based on theoretical maximum wire speed and testing
3532  *      data, in order to minimize response time while increasing bulk
3533  *      throughput.
3534  *      This functionality is controlled by the InterruptThrottleRate module
3535  *      parameter (see igb_param.c)
3536  *      NOTE:  This function is called only when operating in a multiqueue
3537  *             receive environment.
3538  * @q_vector: pointer to q_vector
3539  **/
3540 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3541 {
3542         int new_val = q_vector->itr_val;
3543         int avg_wire_size = 0;
3544         struct igb_adapter *adapter = q_vector->adapter;
3545
3546         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3547          * ints/sec - ITR timer value of 120 ticks.
3548          */
3549         if (adapter->link_speed != SPEED_1000) {
3550                 new_val = 976;
3551                 goto set_itr_val;
3552         }
3553
3554         if (q_vector->rx_ring && q_vector->rx_ring->total_packets) {
3555                 struct igb_ring *ring = q_vector->rx_ring;
3556                 avg_wire_size = ring->total_bytes / ring->total_packets;
3557         }
3558
3559         if (q_vector->tx_ring && q_vector->tx_ring->total_packets) {
3560                 struct igb_ring *ring = q_vector->tx_ring;
3561                 avg_wire_size = max_t(u32, avg_wire_size,
3562                                       (ring->total_bytes /
3563                                        ring->total_packets));
3564         }
3565
3566         /* if avg_wire_size isn't set no work was done */
3567         if (!avg_wire_size)
3568                 goto clear_counts;
3569
3570         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3571         avg_wire_size += 24;
3572
3573         /* Don't starve jumbo frames */
3574         avg_wire_size = min(avg_wire_size, 3000);
3575
3576         /* Give a little boost to mid-size frames */
3577         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3578                 new_val = avg_wire_size / 3;
3579         else
3580                 new_val = avg_wire_size / 2;
3581
3582         /* when in itr mode 3 do not exceed 20K ints/sec */
3583         if (adapter->rx_itr_setting == 3 && new_val < 196)
3584                 new_val = 196;
3585
3586 set_itr_val:
3587         if (new_val != q_vector->itr_val) {
3588                 q_vector->itr_val = new_val;
3589                 q_vector->set_itr = 1;
3590         }
3591 clear_counts:
3592         if (q_vector->rx_ring) {
3593                 q_vector->rx_ring->total_bytes = 0;
3594                 q_vector->rx_ring->total_packets = 0;
3595         }
3596         if (q_vector->tx_ring) {
3597                 q_vector->tx_ring->total_bytes = 0;
3598                 q_vector->tx_ring->total_packets = 0;
3599         }
3600 }
3601
3602 /**
3603  * igb_update_itr - update the dynamic ITR value based on statistics
3604  *      Stores a new ITR value based on packets and byte
3605  *      counts during the last interrupt.  The advantage of per interrupt
3606  *      computation is faster updates and more accurate ITR for the current
3607  *      traffic pattern.  Constants in this function were computed
3608  *      based on theoretical maximum wire speed and thresholds were set based
3609  *      on testing data as well as attempting to minimize response time
3610  *      while increasing bulk throughput.
3611  *      this functionality is controlled by the InterruptThrottleRate module
3612  *      parameter (see igb_param.c)
3613  *      NOTE:  These calculations are only valid when operating in a single-
3614  *             queue environment.
3615  * @adapter: pointer to adapter
3616  * @itr_setting: current q_vector->itr_val
3617  * @packets: the number of packets during this measurement interval
3618  * @bytes: the number of bytes during this measurement interval
3619  **/
3620 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3621                                    int packets, int bytes)
3622 {
3623         unsigned int retval = itr_setting;
3624
3625         if (packets == 0)
3626                 goto update_itr_done;
3627
3628         switch (itr_setting) {
3629         case lowest_latency:
3630                 /* handle TSO and jumbo frames */
3631                 if (bytes/packets > 8000)
3632                         retval = bulk_latency;
3633                 else if ((packets < 5) && (bytes > 512))
3634                         retval = low_latency;
3635                 break;
3636         case low_latency:  /* 50 usec aka 20000 ints/s */
3637                 if (bytes > 10000) {
3638                         /* this if handles the TSO accounting */
3639                         if (bytes/packets > 8000) {
3640                                 retval = bulk_latency;
3641                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3642                                 retval = bulk_latency;
3643                         } else if ((packets > 35)) {
3644                                 retval = lowest_latency;
3645                         }
3646                 } else if (bytes/packets > 2000) {
3647                         retval = bulk_latency;
3648                 } else if (packets <= 2 && bytes < 512) {
3649                         retval = lowest_latency;
3650                 }
3651                 break;
3652         case bulk_latency: /* 250 usec aka 4000 ints/s */
3653                 if (bytes > 25000) {
3654                         if (packets > 35)
3655                                 retval = low_latency;
3656                 } else if (bytes < 1500) {
3657                         retval = low_latency;
3658                 }
3659                 break;
3660         }
3661
3662 update_itr_done:
3663         return retval;
3664 }
3665
3666 static void igb_set_itr(struct igb_adapter *adapter)
3667 {
3668         struct igb_q_vector *q_vector = adapter->q_vector[0];
3669         u16 current_itr;
3670         u32 new_itr = q_vector->itr_val;
3671
3672         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3673         if (adapter->link_speed != SPEED_1000) {
3674                 current_itr = 0;
3675                 new_itr = 4000;
3676                 goto set_itr_now;
3677         }
3678
3679         adapter->rx_itr = igb_update_itr(adapter,
3680                                     adapter->rx_itr,
3681                                     q_vector->rx_ring->total_packets,
3682                                     q_vector->rx_ring->total_bytes);
3683
3684         adapter->tx_itr = igb_update_itr(adapter,
3685                                     adapter->tx_itr,
3686                                     q_vector->tx_ring->total_packets,
3687                                     q_vector->tx_ring->total_bytes);
3688         current_itr = max(adapter->rx_itr, adapter->tx_itr);
3689
3690         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3691         if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3692                 current_itr = low_latency;
3693
3694         switch (current_itr) {
3695         /* counts and packets in update_itr are dependent on these numbers */
3696         case lowest_latency:
3697                 new_itr = 56;  /* aka 70,000 ints/sec */
3698                 break;
3699         case low_latency:
3700                 new_itr = 196; /* aka 20,000 ints/sec */
3701                 break;
3702         case bulk_latency:
3703                 new_itr = 980; /* aka 4,000 ints/sec */
3704                 break;
3705         default:
3706                 break;
3707         }
3708
3709 set_itr_now:
3710         q_vector->rx_ring->total_bytes = 0;
3711         q_vector->rx_ring->total_packets = 0;
3712         q_vector->tx_ring->total_bytes = 0;
3713         q_vector->tx_ring->total_packets = 0;
3714
3715         if (new_itr != q_vector->itr_val) {
3716                 /* this attempts to bias the interrupt rate towards Bulk
3717                  * by adding intermediate steps when interrupt rate is
3718                  * increasing */
3719                 new_itr = new_itr > q_vector->itr_val ?
3720                              max((new_itr * q_vector->itr_val) /
3721                                  (new_itr + (q_vector->itr_val >> 2)),
3722                                  new_itr) :
3723                              new_itr;
3724                 /* Don't write the value here; it resets the adapter's
3725                  * internal timer, and causes us to delay far longer than
3726                  * we should between interrupts.  Instead, we write the ITR
3727                  * value at the beginning of the next interrupt so the timing
3728                  * ends up being correct.
3729                  */
3730                 q_vector->itr_val = new_itr;
3731                 q_vector->set_itr = 1;
3732         }
3733 }
3734
3735 #define IGB_TX_FLAGS_CSUM               0x00000001
3736 #define IGB_TX_FLAGS_VLAN               0x00000002
3737 #define IGB_TX_FLAGS_TSO                0x00000004
3738 #define IGB_TX_FLAGS_IPV4               0x00000008
3739 #define IGB_TX_FLAGS_TSTAMP             0x00000010
3740 #define IGB_TX_FLAGS_VLAN_MASK          0xffff0000
3741 #define IGB_TX_FLAGS_VLAN_SHIFT                 16
3742
3743 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3744                               struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3745 {
3746         struct e1000_adv_tx_context_desc *context_desc;
3747         unsigned int i;
3748         int err;
3749         struct igb_buffer *buffer_info;
3750         u32 info = 0, tu_cmd = 0;
3751         u32 mss_l4len_idx;
3752         u8 l4len;
3753
3754         if (skb_header_cloned(skb)) {
3755                 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3756                 if (err)
3757                         return err;
3758         }
3759
3760         l4len = tcp_hdrlen(skb);
3761         *hdr_len += l4len;
3762
3763         if (skb->protocol == htons(ETH_P_IP)) {
3764                 struct iphdr *iph = ip_hdr(skb);
3765                 iph->tot_len = 0;
3766                 iph->check = 0;
3767                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3768                                                          iph->daddr, 0,
3769                                                          IPPROTO_TCP,
3770                                                          0);
3771         } else if (skb_is_gso_v6(skb)) {
3772                 ipv6_hdr(skb)->payload_len = 0;
3773                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3774                                                        &ipv6_hdr(skb)->daddr,
3775                                                        0, IPPROTO_TCP, 0);
3776         }
3777
3778         i = tx_ring->next_to_use;
3779
3780         buffer_info = &tx_ring->buffer_info[i];
3781         context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3782         /* VLAN MACLEN IPLEN */
3783         if (tx_flags & IGB_TX_FLAGS_VLAN)
3784                 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3785         info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3786         *hdr_len += skb_network_offset(skb);
3787         info |= skb_network_header_len(skb);
3788         *hdr_len += skb_network_header_len(skb);
3789         context_desc->vlan_macip_lens = cpu_to_le32(info);
3790
3791         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3792         tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3793
3794         if (skb->protocol == htons(ETH_P_IP))
3795                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3796         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3797
3798         context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3799
3800         /* MSS L4LEN IDX */
3801         mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3802         mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3803
3804         /* For 82575, context index must be unique per ring. */
3805         if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3806                 mss_l4len_idx |= tx_ring->reg_idx << 4;
3807
3808         context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3809         context_desc->seqnum_seed = 0;
3810
3811         buffer_info->time_stamp = jiffies;
3812         buffer_info->next_to_watch = i;
3813         buffer_info->dma = 0;
3814         i++;
3815         if (i == tx_ring->count)
3816                 i = 0;
3817
3818         tx_ring->next_to_use = i;
3819
3820         return true;
3821 }
3822
3823 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
3824                                    struct sk_buff *skb, u32 tx_flags)
3825 {
3826         struct e1000_adv_tx_context_desc *context_desc;
3827         struct device *dev = tx_ring->dev;
3828         struct igb_buffer *buffer_info;
3829         u32 info = 0, tu_cmd = 0;
3830         unsigned int i;
3831
3832         if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
3833             (tx_flags & IGB_TX_FLAGS_VLAN)) {
3834                 i = tx_ring->next_to_use;
3835                 buffer_info = &tx_ring->buffer_info[i];
3836                 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3837
3838                 if (tx_flags & IGB_TX_FLAGS_VLAN)
3839                         info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3840
3841                 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3842                 if (skb->ip_summed == CHECKSUM_PARTIAL)
3843                         info |= skb_network_header_len(skb);
3844
3845                 context_desc->vlan_macip_lens = cpu_to_le32(info);
3846
3847                 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3848
3849                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3850                         __be16 protocol;
3851
3852                         if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3853                                 const struct vlan_ethhdr *vhdr =
3854                                           (const struct vlan_ethhdr*)skb->data;
3855
3856                                 protocol = vhdr->h_vlan_encapsulated_proto;
3857                         } else {
3858                                 protocol = skb->protocol;
3859                         }
3860
3861                         switch (protocol) {
3862                         case cpu_to_be16(ETH_P_IP):
3863                                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3864                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
3865                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3866                                 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
3867                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3868                                 break;
3869                         case cpu_to_be16(ETH_P_IPV6):
3870                                 /* XXX what about other V6 headers?? */
3871                                 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
3872                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3873                                 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
3874                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3875                                 break;
3876                         default:
3877                                 if (unlikely(net_ratelimit()))
3878                                         dev_warn(dev,
3879                                             "partial checksum but proto=%x!\n",
3880                                             skb->protocol);
3881                                 break;
3882                         }
3883                 }
3884
3885                 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3886                 context_desc->seqnum_seed = 0;
3887                 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3888                         context_desc->mss_l4len_idx =
3889                                 cpu_to_le32(tx_ring->reg_idx << 4);
3890
3891                 buffer_info->time_stamp = jiffies;
3892                 buffer_info->next_to_watch = i;
3893                 buffer_info->dma = 0;
3894
3895                 i++;
3896                 if (i == tx_ring->count)
3897                         i = 0;
3898                 tx_ring->next_to_use = i;
3899
3900                 return true;
3901         }
3902         return false;
3903 }
3904
3905 #define IGB_MAX_TXD_PWR 16
3906 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
3907
3908 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
3909                                  unsigned int first)
3910 {
3911         struct igb_buffer *buffer_info;
3912         struct device *dev = tx_ring->dev;
3913         unsigned int hlen = skb_headlen(skb);
3914         unsigned int count = 0, i;
3915         unsigned int f;
3916         u16 gso_segs = skb_shinfo(skb)->gso_segs ?: 1;
3917
3918         i = tx_ring->next_to_use;
3919
3920         buffer_info = &tx_ring->buffer_info[i];
3921         BUG_ON(hlen >= IGB_MAX_DATA_PER_TXD);
3922         buffer_info->length = hlen;
3923         /* set time_stamp *before* dma to help avoid a possible race */
3924         buffer_info->time_stamp = jiffies;
3925         buffer_info->next_to_watch = i;
3926         buffer_info->dma = dma_map_single(dev, skb->data, hlen,
3927                                           DMA_TO_DEVICE);
3928         if (dma_mapping_error(dev, buffer_info->dma))
3929                 goto dma_error;
3930
3931         for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
3932                 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[f];
3933                 unsigned int len = frag->size;
3934
3935                 count++;
3936                 i++;
3937                 if (i == tx_ring->count)
3938                         i = 0;
3939
3940                 buffer_info = &tx_ring->buffer_info[i];
3941                 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3942                 buffer_info->length = len;
3943                 buffer_info->time_stamp = jiffies;
3944                 buffer_info->next_to_watch = i;
3945                 buffer_info->mapped_as_page = true;
3946                 buffer_info->dma = dma_map_page(dev,
3947                                                 frag->page,
3948                                                 frag->page_offset,
3949                                                 len,
3950                                                 DMA_TO_DEVICE);
3951                 if (dma_mapping_error(dev, buffer_info->dma))
3952                         goto dma_error;
3953
3954         }
3955
3956         tx_ring->buffer_info[i].skb = skb;
3957         tx_ring->buffer_info[i].shtx = skb_shinfo(skb)->tx_flags;
3958         /* multiply data chunks by size of headers */
3959         tx_ring->buffer_info[i].bytecount = ((gso_segs - 1) * hlen) + skb->len;
3960         tx_ring->buffer_info[i].gso_segs = gso_segs;
3961         tx_ring->buffer_info[first].next_to_watch = i;
3962
3963         return ++count;
3964
3965 dma_error:
3966         dev_err(dev, "TX DMA map failed\n");
3967
3968         /* clear timestamp and dma mappings for failed buffer_info mapping */
3969         buffer_info->dma = 0;
3970         buffer_info->time_stamp = 0;
3971         buffer_info->length = 0;
3972         buffer_info->next_to_watch = 0;
3973         buffer_info->mapped_as_page = false;
3974
3975         /* clear timestamp and dma mappings for remaining portion of packet */
3976         while (count--) {
3977                 if (i == 0)
3978                         i = tx_ring->count;
3979                 i--;
3980                 buffer_info = &tx_ring->buffer_info[i];
3981                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3982         }
3983
3984         return 0;
3985 }
3986
3987 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
3988                                     u32 tx_flags, int count, u32 paylen,
3989                                     u8 hdr_len)
3990 {
3991         union e1000_adv_tx_desc *tx_desc;
3992         struct igb_buffer *buffer_info;
3993         u32 olinfo_status = 0, cmd_type_len;
3994         unsigned int i = tx_ring->next_to_use;
3995
3996         cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
3997                         E1000_ADVTXD_DCMD_DEXT);
3998
3999         if (tx_flags & IGB_TX_FLAGS_VLAN)
4000                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
4001
4002         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4003                 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
4004
4005         if (tx_flags & IGB_TX_FLAGS_TSO) {
4006                 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
4007
4008                 /* insert tcp checksum */
4009                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4010
4011                 /* insert ip checksum */
4012                 if (tx_flags & IGB_TX_FLAGS_IPV4)
4013                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4014
4015         } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
4016                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4017         }
4018
4019         if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
4020             (tx_flags & (IGB_TX_FLAGS_CSUM |
4021                          IGB_TX_FLAGS_TSO |
4022                          IGB_TX_FLAGS_VLAN)))
4023                 olinfo_status |= tx_ring->reg_idx << 4;
4024
4025         olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
4026
4027         do {
4028                 buffer_info = &tx_ring->buffer_info[i];
4029                 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
4030                 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
4031                 tx_desc->read.cmd_type_len =
4032                         cpu_to_le32(cmd_type_len | buffer_info->length);
4033                 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4034                 count--;
4035                 i++;
4036                 if (i == tx_ring->count)
4037                         i = 0;
4038         } while (count > 0);
4039
4040         tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
4041         /* Force memory writes to complete before letting h/w
4042          * know there are new descriptors to fetch.  (Only
4043          * applicable for weak-ordered memory model archs,
4044          * such as IA-64). */
4045         wmb();
4046
4047         tx_ring->next_to_use = i;
4048         writel(i, tx_ring->tail);
4049         /* we need this if more than one processor can write to our tail
4050          * at a time, it syncronizes IO on IA64/Altix systems */
4051         mmiowb();
4052 }
4053
4054 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4055 {
4056         struct net_device *netdev = tx_ring->netdev;
4057
4058         netif_stop_subqueue(netdev, tx_ring->queue_index);
4059
4060         /* Herbert's original patch had:
4061          *  smp_mb__after_netif_stop_queue();
4062          * but since that doesn't exist yet, just open code it. */
4063         smp_mb();
4064
4065         /* We need to check again in a case another CPU has just
4066          * made room available. */
4067         if (igb_desc_unused(tx_ring) < size)
4068                 return -EBUSY;
4069
4070         /* A reprieve! */
4071         netif_wake_subqueue(netdev, tx_ring->queue_index);
4072         tx_ring->tx_stats.restart_queue++;
4073         return 0;
4074 }
4075
4076 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4077 {
4078         if (igb_desc_unused(tx_ring) >= size)
4079                 return 0;
4080         return __igb_maybe_stop_tx(tx_ring, size);
4081 }
4082
4083 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
4084                                     struct igb_ring *tx_ring)
4085 {
4086         struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
4087         int tso = 0, count;
4088         u32 tx_flags = 0;
4089         u16 first;
4090         u8 hdr_len = 0;
4091         union skb_shared_tx *shtx = skb_tx(skb);
4092
4093         /* need: 1 descriptor per page,
4094          *       + 2 desc gap to keep tail from touching head,
4095          *       + 1 desc for skb->data,
4096          *       + 1 desc for context descriptor,
4097          * otherwise try next time */
4098         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4099                 /* this is a hard error */
4100                 return NETDEV_TX_BUSY;
4101         }
4102
4103         if (unlikely(shtx->hardware)) {
4104                 shtx->in_progress = 1;
4105                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4106         }
4107
4108         if (vlan_tx_tag_present(skb) && adapter->vlgrp) {
4109                 tx_flags |= IGB_TX_FLAGS_VLAN;
4110                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4111         }
4112
4113         if (skb->protocol == htons(ETH_P_IP))
4114                 tx_flags |= IGB_TX_FLAGS_IPV4;
4115
4116         first = tx_ring->next_to_use;
4117         if (skb_is_gso(skb)) {
4118                 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
4119
4120                 if (tso < 0) {
4121                         dev_kfree_skb_any(skb);
4122                         return NETDEV_TX_OK;
4123                 }
4124         }
4125
4126         if (tso)
4127                 tx_flags |= IGB_TX_FLAGS_TSO;
4128         else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
4129                  (skb->ip_summed == CHECKSUM_PARTIAL))
4130                 tx_flags |= IGB_TX_FLAGS_CSUM;
4131
4132         /*
4133          * count reflects descriptors mapped, if 0 or less then mapping error
4134          * has occured and we need to rewind the descriptor queue
4135          */
4136         count = igb_tx_map_adv(tx_ring, skb, first);
4137         if (!count) {
4138                 dev_kfree_skb_any(skb);
4139                 tx_ring->buffer_info[first].time_stamp = 0;
4140                 tx_ring->next_to_use = first;
4141                 return NETDEV_TX_OK;
4142         }
4143
4144         igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
4145
4146         /* Make sure there is space in the ring for the next send. */
4147         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4148
4149         return NETDEV_TX_OK;
4150 }
4151
4152 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
4153                                       struct net_device *netdev)
4154 {
4155         struct igb_adapter *adapter = netdev_priv(netdev);
4156         struct igb_ring *tx_ring;
4157         int r_idx = 0;
4158
4159         if (test_bit(__IGB_DOWN, &adapter->state)) {
4160                 dev_kfree_skb_any(skb);
4161                 return NETDEV_TX_OK;
4162         }
4163
4164         if (skb->len <= 0) {
4165                 dev_kfree_skb_any(skb);
4166                 return NETDEV_TX_OK;
4167         }
4168
4169         r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
4170         tx_ring = adapter->multi_tx_table[r_idx];
4171
4172         /* This goes back to the question of how to logically map a tx queue
4173          * to a flow.  Right now, performance is impacted slightly negatively
4174          * if using multiple tx queues.  If the stack breaks away from a
4175          * single qdisc implementation, we can look at this again. */
4176         return igb_xmit_frame_ring_adv(skb, tx_ring);
4177 }
4178
4179 /**
4180  * igb_tx_timeout - Respond to a Tx Hang
4181  * @netdev: network interface device structure
4182  **/
4183 static void igb_tx_timeout(struct net_device *netdev)
4184 {
4185         struct igb_adapter *adapter = netdev_priv(netdev);
4186         struct e1000_hw *hw = &adapter->hw;
4187
4188         /* Do the reset outside of interrupt context */
4189         adapter->tx_timeout_count++;
4190
4191         if (hw->mac.type == e1000_82580)
4192                 hw->dev_spec._82575.global_device_reset = true;
4193
4194         schedule_work(&adapter->reset_task);
4195         wr32(E1000_EICS,
4196              (adapter->eims_enable_mask & ~adapter->eims_other));
4197 }
4198
4199 static void igb_reset_task(struct work_struct *work)
4200 {
4201         struct igb_adapter *adapter;
4202         adapter = container_of(work, struct igb_adapter, reset_task);
4203
4204         igb_dump(adapter);
4205         netdev_err(adapter->netdev, "Reset adapter\n");
4206         igb_reinit_locked(adapter);
4207 }
4208
4209 /**
4210  * igb_get_stats - Get System Network Statistics
4211  * @netdev: network interface device structure
4212  *
4213  * Returns the address of the device statistics structure.
4214  * The statistics are actually updated from the timer callback.
4215  **/
4216 static struct net_device_stats *igb_get_stats(struct net_device *netdev)
4217 {
4218         /* only return the current stats */
4219         return &netdev->stats;
4220 }
4221
4222 /**
4223  * igb_change_mtu - Change the Maximum Transfer Unit
4224  * @netdev: network interface device structure
4225  * @new_mtu: new value for maximum frame size
4226  *
4227  * Returns 0 on success, negative on failure
4228  **/
4229 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4230 {
4231         struct igb_adapter *adapter = netdev_priv(netdev);
4232         struct pci_dev *pdev = adapter->pdev;
4233         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
4234         u32 rx_buffer_len, i;
4235
4236         if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4237                 dev_err(&pdev->dev, "Invalid MTU setting\n");
4238                 return -EINVAL;
4239         }
4240
4241         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4242                 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4243                 return -EINVAL;
4244         }
4245
4246         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4247                 msleep(1);
4248
4249         /* igb_down has a dependency on max_frame_size */
4250         adapter->max_frame_size = max_frame;
4251
4252         /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
4253          * means we reserve 2 more, this pushes us to allocate from the next
4254          * larger slab size.
4255          * i.e. RXBUFFER_2048 --> size-4096 slab
4256          */
4257
4258         if (adapter->hw.mac.type == e1000_82580)
4259                 max_frame += IGB_TS_HDR_LEN;
4260
4261         if (max_frame <= IGB_RXBUFFER_1024)
4262                 rx_buffer_len = IGB_RXBUFFER_1024;
4263         else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
4264                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
4265         else
4266                 rx_buffer_len = IGB_RXBUFFER_128;
4267
4268         if ((max_frame == ETH_FRAME_LEN + ETH_FCS_LEN + IGB_TS_HDR_LEN) ||
4269              (max_frame == MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN))
4270                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN;
4271
4272         if ((adapter->hw.mac.type == e1000_82580) &&
4273             (rx_buffer_len == IGB_RXBUFFER_128))
4274                 rx_buffer_len += IGB_RXBUFFER_64;
4275
4276         if (netif_running(netdev))
4277                 igb_down(adapter);
4278
4279         dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4280                  netdev->mtu, new_mtu);
4281         netdev->mtu = new_mtu;
4282
4283         for (i = 0; i < adapter->num_rx_queues; i++)
4284                 adapter->rx_ring[i]->rx_buffer_len = rx_buffer_len;
4285
4286         if (netif_running(netdev))
4287                 igb_up(adapter);
4288         else
4289                 igb_reset(adapter);
4290
4291         clear_bit(__IGB_RESETTING, &adapter->state);
4292
4293         return 0;
4294 }
4295
4296 /**
4297  * igb_update_stats - Update the board statistics counters
4298  * @adapter: board private structure
4299  **/
4300
4301 void igb_update_stats(struct igb_adapter *adapter)
4302 {
4303         struct net_device_stats *net_stats = igb_get_stats(adapter->netdev);
4304         struct e1000_hw *hw = &adapter->hw;
4305         struct pci_dev *pdev = adapter->pdev;
4306         u32 reg, mpc;
4307         u16 phy_tmp;
4308         int i;
4309         u64 bytes, packets;
4310
4311 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4312
4313         /*
4314          * Prevent stats update while adapter is being reset, or if the pci
4315          * connection is down.
4316          */
4317         if (adapter->link_speed == 0)
4318                 return;
4319         if (pci_channel_offline(pdev))
4320                 return;
4321
4322         bytes = 0;
4323         packets = 0;
4324         for (i = 0; i < adapter->num_rx_queues; i++) {
4325                 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4326                 struct igb_ring *ring = adapter->rx_ring[i];
4327                 ring->rx_stats.drops += rqdpc_tmp;
4328                 net_stats->rx_fifo_errors += rqdpc_tmp;
4329                 bytes += ring->rx_stats.bytes;
4330                 packets += ring->rx_stats.packets;
4331         }
4332
4333         net_stats->rx_bytes = bytes;
4334         net_stats->rx_packets = packets;
4335
4336         bytes = 0;
4337         packets = 0;
4338         for (i = 0; i < adapter->num_tx_queues; i++) {
4339                 struct igb_ring *ring = adapter->tx_ring[i];
4340                 bytes += ring->tx_stats.bytes;
4341                 packets += ring->tx_stats.packets;
4342         }
4343         net_stats->tx_bytes = bytes;
4344         net_stats->tx_packets = packets;
4345
4346         /* read stats registers */
4347         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4348         adapter->stats.gprc += rd32(E1000_GPRC);
4349         adapter->stats.gorc += rd32(E1000_GORCL);
4350         rd32(E1000_GORCH); /* clear GORCL */
4351         adapter->stats.bprc += rd32(E1000_BPRC);
4352         adapter->stats.mprc += rd32(E1000_MPRC);
4353         adapter->stats.roc += rd32(E1000_ROC);
4354
4355         adapter->stats.prc64 += rd32(E1000_PRC64);
4356         adapter->stats.prc127 += rd32(E1000_PRC127);
4357         adapter->stats.prc255 += rd32(E1000_PRC255);
4358         adapter->stats.prc511 += rd32(E1000_PRC511);
4359         adapter->stats.prc1023 += rd32(E1000_PRC1023);
4360         adapter->stats.prc1522 += rd32(E1000_PRC1522);
4361         adapter->stats.symerrs += rd32(E1000_SYMERRS);
4362         adapter->stats.sec += rd32(E1000_SEC);
4363
4364         mpc = rd32(E1000_MPC);
4365         adapter->stats.mpc += mpc;
4366         net_stats->rx_fifo_errors += mpc;
4367         adapter->stats.scc += rd32(E1000_SCC);
4368         adapter->stats.ecol += rd32(E1000_ECOL);
4369         adapter->stats.mcc += rd32(E1000_MCC);
4370         adapter->stats.latecol += rd32(E1000_LATECOL);
4371         adapter->stats.dc += rd32(E1000_DC);
4372         adapter->stats.rlec += rd32(E1000_RLEC);
4373         adapter->stats.xonrxc += rd32(E1000_XONRXC);
4374         adapter->stats.xontxc += rd32(E1000_XONTXC);
4375         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4376         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4377         adapter->stats.fcruc += rd32(E1000_FCRUC);
4378         adapter->stats.gptc += rd32(E1000_GPTC);
4379         adapter->stats.gotc += rd32(E1000_GOTCL);
4380         rd32(E1000_GOTCH); /* clear GOTCL */
4381         adapter->stats.rnbc += rd32(E1000_RNBC);
4382         adapter->stats.ruc += rd32(E1000_RUC);
4383         adapter->stats.rfc += rd32(E1000_RFC);
4384         adapter->stats.rjc += rd32(E1000_RJC);
4385         adapter->stats.tor += rd32(E1000_TORH);
4386         adapter->stats.tot += rd32(E1000_TOTH);
4387         adapter->stats.tpr += rd32(E1000_TPR);
4388
4389         adapter->stats.ptc64 += rd32(E1000_PTC64);
4390         adapter->stats.ptc127 += rd32(E1000_PTC127);
4391         adapter->stats.ptc255 += rd32(E1000_PTC255);
4392         adapter->stats.ptc511 += rd32(E1000_PTC511);
4393         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4394         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4395
4396         adapter->stats.mptc += rd32(E1000_MPTC);
4397         adapter->stats.bptc += rd32(E1000_BPTC);
4398
4399         adapter->stats.tpt += rd32(E1000_TPT);
4400         adapter->stats.colc += rd32(E1000_COLC);
4401
4402         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4403         /* read internal phy specific stats */
4404         reg = rd32(E1000_CTRL_EXT);
4405         if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4406                 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4407                 adapter->stats.tncrs += rd32(E1000_TNCRS);
4408         }
4409
4410         adapter->stats.tsctc += rd32(E1000_TSCTC);
4411         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4412
4413         adapter->stats.iac += rd32(E1000_IAC);
4414         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4415         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4416         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4417         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4418         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4419         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4420         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4421         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4422
4423         /* Fill out the OS statistics structure */
4424         net_stats->multicast = adapter->stats.mprc;
4425         net_stats->collisions = adapter->stats.colc;
4426
4427         /* Rx Errors */
4428
4429         /* RLEC on some newer hardware can be incorrect so build
4430          * our own version based on RUC and ROC */
4431         net_stats->rx_errors = adapter->stats.rxerrc +
4432                 adapter->stats.crcerrs + adapter->stats.algnerrc +
4433                 adapter->stats.ruc + adapter->stats.roc +
4434                 adapter->stats.cexterr;
4435         net_stats->rx_length_errors = adapter->stats.ruc +
4436                                       adapter->stats.roc;
4437         net_stats->rx_crc_errors = adapter->stats.crcerrs;
4438         net_stats->rx_frame_errors = adapter->stats.algnerrc;
4439         net_stats->rx_missed_errors = adapter->stats.mpc;
4440
4441         /* Tx Errors */
4442         net_stats->tx_errors = adapter->stats.ecol +
4443                                adapter->stats.latecol;
4444         net_stats->tx_aborted_errors = adapter->stats.ecol;
4445         net_stats->tx_window_errors = adapter->stats.latecol;
4446         net_stats->tx_carrier_errors = adapter->stats.tncrs;
4447
4448         /* Tx Dropped needs to be maintained elsewhere */
4449
4450         /* Phy Stats */
4451         if (hw->phy.media_type == e1000_media_type_copper) {
4452                 if ((adapter->link_speed == SPEED_1000) &&
4453                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4454                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4455                         adapter->phy_stats.idle_errors += phy_tmp;
4456                 }
4457         }
4458
4459         /* Management Stats */
4460         adapter->stats.mgptc += rd32(E1000_MGTPTC);
4461         adapter->stats.mgprc += rd32(E1000_MGTPRC);
4462         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4463 }
4464
4465 static irqreturn_t igb_msix_other(int irq, void *data)
4466 {
4467         struct igb_adapter *adapter = data;
4468         struct e1000_hw *hw = &adapter->hw;
4469         u32 icr = rd32(E1000_ICR);
4470         /* reading ICR causes bit 31 of EICR to be cleared */
4471
4472         if (icr & E1000_ICR_DRSTA)
4473                 schedule_work(&adapter->reset_task);
4474
4475         if (icr & E1000_ICR_DOUTSYNC) {
4476                 /* HW is reporting DMA is out of sync */
4477                 adapter->stats.doosync++;
4478         }
4479
4480         /* Check for a mailbox event */
4481         if (icr & E1000_ICR_VMMB)
4482                 igb_msg_task(adapter);
4483
4484         if (icr & E1000_ICR_LSC) {
4485                 hw->mac.get_link_status = 1;
4486                 /* guard against interrupt when we're going down */
4487                 if (!test_bit(__IGB_DOWN, &adapter->state))
4488                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4489         }
4490
4491         if (adapter->vfs_allocated_count)
4492                 wr32(E1000_IMS, E1000_IMS_LSC |
4493                                 E1000_IMS_VMMB |
4494                                 E1000_IMS_DOUTSYNC);
4495         else
4496                 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4497         wr32(E1000_EIMS, adapter->eims_other);
4498
4499         return IRQ_HANDLED;
4500 }
4501
4502 static void igb_write_itr(struct igb_q_vector *q_vector)
4503 {
4504         struct igb_adapter *adapter = q_vector->adapter;
4505         u32 itr_val = q_vector->itr_val & 0x7FFC;
4506
4507         if (!q_vector->set_itr)
4508                 return;
4509
4510         if (!itr_val)
4511                 itr_val = 0x4;
4512
4513         if (adapter->hw.mac.type == e1000_82575)
4514                 itr_val |= itr_val << 16;
4515         else
4516                 itr_val |= 0x8000000;
4517
4518         writel(itr_val, q_vector->itr_register);
4519         q_vector->set_itr = 0;
4520 }
4521
4522 static irqreturn_t igb_msix_ring(int irq, void *data)
4523 {
4524         struct igb_q_vector *q_vector = data;
4525
4526         /* Write the ITR value calculated from the previous interrupt. */
4527         igb_write_itr(q_vector);
4528
4529         napi_schedule(&q_vector->napi);
4530
4531         return IRQ_HANDLED;
4532 }
4533
4534 #ifdef CONFIG_IGB_DCA
4535 static void igb_update_dca(struct igb_q_vector *q_vector)
4536 {
4537         struct igb_adapter *adapter = q_vector->adapter;
4538         struct e1000_hw *hw = &adapter->hw;
4539         int cpu = get_cpu();
4540
4541         if (q_vector->cpu == cpu)
4542                 goto out_no_update;
4543
4544         if (q_vector->tx_ring) {
4545                 int q = q_vector->tx_ring->reg_idx;
4546                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4547                 if (hw->mac.type == e1000_82575) {
4548                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4549                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4550                 } else {
4551                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4552                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4553                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
4554                 }
4555                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4556                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4557         }
4558         if (q_vector->rx_ring) {
4559                 int q = q_vector->rx_ring->reg_idx;
4560                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4561                 if (hw->mac.type == e1000_82575) {
4562                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4563                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4564                 } else {
4565                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4566                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4567                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4568                 }
4569                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4570                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4571                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4572                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4573         }
4574         q_vector->cpu = cpu;
4575 out_no_update:
4576         put_cpu();
4577 }
4578
4579 static void igb_setup_dca(struct igb_adapter *adapter)
4580 {
4581         struct e1000_hw *hw = &adapter->hw;
4582         int i;
4583
4584         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4585                 return;
4586
4587         /* Always use CB2 mode, difference is masked in the CB driver. */
4588         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4589
4590         for (i = 0; i < adapter->num_q_vectors; i++) {
4591                 adapter->q_vector[i]->cpu = -1;
4592                 igb_update_dca(adapter->q_vector[i]);
4593         }
4594 }
4595
4596 static int __igb_notify_dca(struct device *dev, void *data)
4597 {
4598         struct net_device *netdev = dev_get_drvdata(dev);
4599         struct igb_adapter *adapter = netdev_priv(netdev);
4600         struct pci_dev *pdev = adapter->pdev;
4601         struct e1000_hw *hw = &adapter->hw;
4602         unsigned long event = *(unsigned long *)data;
4603
4604         switch (event) {
4605         case DCA_PROVIDER_ADD:
4606                 /* if already enabled, don't do it again */
4607                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4608                         break;
4609                 if (dca_add_requester(dev) == 0) {
4610                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
4611                         dev_info(&pdev->dev, "DCA enabled\n");
4612                         igb_setup_dca(adapter);
4613                         break;
4614                 }
4615                 /* Fall Through since DCA is disabled. */
4616         case DCA_PROVIDER_REMOVE:
4617                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4618                         /* without this a class_device is left
4619                          * hanging around in the sysfs model */
4620                         dca_remove_requester(dev);
4621                         dev_info(&pdev->dev, "DCA disabled\n");
4622                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4623                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4624                 }
4625                 break;
4626         }
4627
4628         return 0;
4629 }
4630
4631 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4632                           void *p)
4633 {
4634         int ret_val;
4635
4636         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4637                                          __igb_notify_dca);
4638
4639         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4640 }
4641 #endif /* CONFIG_IGB_DCA */
4642
4643 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4644 {
4645         struct e1000_hw *hw = &adapter->hw;
4646         u32 ping;
4647         int i;
4648
4649         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4650                 ping = E1000_PF_CONTROL_MSG;
4651                 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4652                         ping |= E1000_VT_MSGTYPE_CTS;
4653                 igb_write_mbx(hw, &ping, 1, i);
4654         }
4655 }
4656
4657 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4658 {
4659         struct e1000_hw *hw = &adapter->hw;
4660         u32 vmolr = rd32(E1000_VMOLR(vf));
4661         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4662
4663         vf_data->flags |= ~(IGB_VF_FLAG_UNI_PROMISC |
4664                             IGB_VF_FLAG_MULTI_PROMISC);
4665         vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4666
4667         if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4668                 vmolr |= E1000_VMOLR_MPME;
4669                 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4670         } else {
4671                 /*
4672                  * if we have hashes and we are clearing a multicast promisc
4673                  * flag we need to write the hashes to the MTA as this step
4674                  * was previously skipped
4675                  */
4676                 if (vf_data->num_vf_mc_hashes > 30) {
4677                         vmolr |= E1000_VMOLR_MPME;
4678                 } else if (vf_data->num_vf_mc_hashes) {
4679                         int j;
4680                         vmolr |= E1000_VMOLR_ROMPE;
4681                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4682                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4683                 }
4684         }
4685
4686         wr32(E1000_VMOLR(vf), vmolr);
4687
4688         /* there are flags left unprocessed, likely not supported */
4689         if (*msgbuf & E1000_VT_MSGINFO_MASK)
4690                 return -EINVAL;
4691
4692         return 0;
4693
4694 }
4695
4696 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4697                                   u32 *msgbuf, u32 vf)
4698 {
4699         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4700         u16 *hash_list = (u16 *)&msgbuf[1];
4701         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4702         int i;
4703
4704         /* salt away the number of multicast addresses assigned
4705          * to this VF for later use to restore when the PF multi cast
4706          * list changes
4707          */
4708         vf_data->num_vf_mc_hashes = n;
4709
4710         /* only up to 30 hash values supported */
4711         if (n > 30)
4712                 n = 30;
4713
4714         /* store the hashes for later use */
4715         for (i = 0; i < n; i++)
4716                 vf_data->vf_mc_hashes[i] = hash_list[i];
4717
4718         /* Flush and reset the mta with the new values */
4719         igb_set_rx_mode(adapter->netdev);
4720
4721         return 0;
4722 }
4723
4724 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4725 {
4726         struct e1000_hw *hw = &adapter->hw;
4727         struct vf_data_storage *vf_data;
4728         int i, j;
4729
4730         for (i = 0; i < adapter->vfs_allocated_count; i++) {
4731                 u32 vmolr = rd32(E1000_VMOLR(i));
4732                 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4733
4734                 vf_data = &adapter->vf_data[i];
4735
4736                 if ((vf_data->num_vf_mc_hashes > 30) ||
4737                     (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4738                         vmolr |= E1000_VMOLR_MPME;
4739                 } else if (vf_data->num_vf_mc_hashes) {
4740                         vmolr |= E1000_VMOLR_ROMPE;
4741                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4742                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4743                 }
4744                 wr32(E1000_VMOLR(i), vmolr);
4745         }
4746 }
4747
4748 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4749 {
4750         struct e1000_hw *hw = &adapter->hw;
4751         u32 pool_mask, reg, vid;
4752         int i;
4753
4754         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4755
4756         /* Find the vlan filter for this id */
4757         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4758                 reg = rd32(E1000_VLVF(i));
4759
4760                 /* remove the vf from the pool */
4761                 reg &= ~pool_mask;
4762
4763                 /* if pool is empty then remove entry from vfta */
4764                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4765                     (reg & E1000_VLVF_VLANID_ENABLE)) {
4766                         reg = 0;
4767                         vid = reg & E1000_VLVF_VLANID_MASK;
4768                         igb_vfta_set(hw, vid, false);
4769                 }
4770
4771                 wr32(E1000_VLVF(i), reg);
4772         }
4773
4774         adapter->vf_data[vf].vlans_enabled = 0;
4775 }
4776
4777 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4778 {
4779         struct e1000_hw *hw = &adapter->hw;
4780         u32 reg, i;
4781
4782         /* The vlvf table only exists on 82576 hardware and newer */
4783         if (hw->mac.type < e1000_82576)
4784                 return -1;
4785
4786         /* we only need to do this if VMDq is enabled */
4787         if (!adapter->vfs_allocated_count)
4788                 return -1;
4789
4790         /* Find the vlan filter for this id */
4791         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4792                 reg = rd32(E1000_VLVF(i));
4793                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
4794                     vid == (reg & E1000_VLVF_VLANID_MASK))
4795                         break;
4796         }
4797
4798         if (add) {
4799                 if (i == E1000_VLVF_ARRAY_SIZE) {
4800                         /* Did not find a matching VLAN ID entry that was
4801                          * enabled.  Search for a free filter entry, i.e.
4802                          * one without the enable bit set
4803                          */
4804                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4805                                 reg = rd32(E1000_VLVF(i));
4806                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
4807                                         break;
4808                         }
4809                 }
4810                 if (i < E1000_VLVF_ARRAY_SIZE) {
4811                         /* Found an enabled/available entry */
4812                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4813
4814                         /* if !enabled we need to set this up in vfta */
4815                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
4816                                 /* add VID to filter table */
4817                                 igb_vfta_set(hw, vid, true);
4818                                 reg |= E1000_VLVF_VLANID_ENABLE;
4819                         }
4820                         reg &= ~E1000_VLVF_VLANID_MASK;
4821                         reg |= vid;
4822                         wr32(E1000_VLVF(i), reg);
4823
4824                         /* do not modify RLPML for PF devices */
4825                         if (vf >= adapter->vfs_allocated_count)
4826                                 return 0;
4827
4828                         if (!adapter->vf_data[vf].vlans_enabled) {
4829                                 u32 size;
4830                                 reg = rd32(E1000_VMOLR(vf));
4831                                 size = reg & E1000_VMOLR_RLPML_MASK;
4832                                 size += 4;
4833                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4834                                 reg |= size;
4835                                 wr32(E1000_VMOLR(vf), reg);
4836                         }
4837
4838                         adapter->vf_data[vf].vlans_enabled++;
4839                         return 0;
4840                 }
4841         } else {
4842                 if (i < E1000_VLVF_ARRAY_SIZE) {
4843                         /* remove vf from the pool */
4844                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
4845                         /* if pool is empty then remove entry from vfta */
4846                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
4847                                 reg = 0;
4848                                 igb_vfta_set(hw, vid, false);
4849                         }
4850                         wr32(E1000_VLVF(i), reg);
4851
4852                         /* do not modify RLPML for PF devices */
4853                         if (vf >= adapter->vfs_allocated_count)
4854                                 return 0;
4855
4856                         adapter->vf_data[vf].vlans_enabled--;
4857                         if (!adapter->vf_data[vf].vlans_enabled) {
4858                                 u32 size;
4859                                 reg = rd32(E1000_VMOLR(vf));
4860                                 size = reg & E1000_VMOLR_RLPML_MASK;
4861                                 size -= 4;
4862                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4863                                 reg |= size;
4864                                 wr32(E1000_VMOLR(vf), reg);
4865                         }
4866                 }
4867         }
4868         return 0;
4869 }
4870
4871 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
4872 {
4873         struct e1000_hw *hw = &adapter->hw;
4874
4875         if (vid)
4876                 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
4877         else
4878                 wr32(E1000_VMVIR(vf), 0);
4879 }
4880
4881 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
4882                                int vf, u16 vlan, u8 qos)
4883 {
4884         int err = 0;
4885         struct igb_adapter *adapter = netdev_priv(netdev);
4886
4887         if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
4888                 return -EINVAL;
4889         if (vlan || qos) {
4890                 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
4891                 if (err)
4892                         goto out;
4893                 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
4894                 igb_set_vmolr(adapter, vf, !vlan);
4895                 adapter->vf_data[vf].pf_vlan = vlan;
4896                 adapter->vf_data[vf].pf_qos = qos;
4897                 dev_info(&adapter->pdev->dev,
4898                          "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
4899                 if (test_bit(__IGB_DOWN, &adapter->state)) {
4900                         dev_warn(&adapter->pdev->dev,
4901                                  "The VF VLAN has been set,"
4902                                  " but the PF device is not up.\n");
4903                         dev_warn(&adapter->pdev->dev,
4904                                  "Bring the PF device up before"
4905                                  " attempting to use the VF device.\n");
4906                 }
4907         } else {
4908                 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
4909                                    false, vf);
4910                 igb_set_vmvir(adapter, vlan, vf);
4911                 igb_set_vmolr(adapter, vf, true);
4912                 adapter->vf_data[vf].pf_vlan = 0;
4913                 adapter->vf_data[vf].pf_qos = 0;
4914        }
4915 out:
4916        return err;
4917 }
4918
4919 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4920 {
4921         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4922         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
4923
4924         return igb_vlvf_set(adapter, vid, add, vf);
4925 }
4926
4927 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
4928 {
4929         /* clear flags */
4930         adapter->vf_data[vf].flags &= ~(IGB_VF_FLAG_PF_SET_MAC);
4931         adapter->vf_data[vf].last_nack = jiffies;
4932
4933         /* reset offloads to defaults */
4934         igb_set_vmolr(adapter, vf, true);
4935
4936         /* reset vlans for device */
4937         igb_clear_vf_vfta(adapter, vf);
4938         if (adapter->vf_data[vf].pf_vlan)
4939                 igb_ndo_set_vf_vlan(adapter->netdev, vf,
4940                                     adapter->vf_data[vf].pf_vlan,
4941                                     adapter->vf_data[vf].pf_qos);
4942         else
4943                 igb_clear_vf_vfta(adapter, vf);
4944
4945         /* reset multicast table array for vf */
4946         adapter->vf_data[vf].num_vf_mc_hashes = 0;
4947
4948         /* Flush and reset the mta with the new values */
4949         igb_set_rx_mode(adapter->netdev);
4950 }
4951
4952 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
4953 {
4954         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4955
4956         /* generate a new mac address as we were hotplug removed/added */
4957         if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
4958                 random_ether_addr(vf_mac);
4959
4960         /* process remaining reset events */
4961         igb_vf_reset(adapter, vf);
4962 }
4963
4964 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
4965 {
4966         struct e1000_hw *hw = &adapter->hw;
4967         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4968         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
4969         u32 reg, msgbuf[3];
4970         u8 *addr = (u8 *)(&msgbuf[1]);
4971
4972         /* process all the same items cleared in a function level reset */
4973         igb_vf_reset(adapter, vf);
4974
4975         /* set vf mac address */
4976         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
4977
4978         /* enable transmit and receive for vf */
4979         reg = rd32(E1000_VFTE);
4980         wr32(E1000_VFTE, reg | (1 << vf));
4981         reg = rd32(E1000_VFRE);
4982         wr32(E1000_VFRE, reg | (1 << vf));
4983
4984         adapter->vf_data[vf].flags = IGB_VF_FLAG_CTS;
4985
4986         /* reply to reset with ack and vf mac address */
4987         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
4988         memcpy(addr, vf_mac, 6);
4989         igb_write_mbx(hw, msgbuf, 3, vf);
4990 }
4991
4992 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
4993 {
4994         /*
4995          * The VF MAC Address is stored in a packed array of bytes
4996          * starting at the second 32 bit word of the msg array
4997          */
4998         unsigned char *addr = (char *)&msg[1];
4999         int err = -1;
5000
5001         if (is_valid_ether_addr(addr))
5002                 err = igb_set_vf_mac(adapter, vf, addr);
5003
5004         return err;
5005 }
5006
5007 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5008 {
5009         struct e1000_hw *hw = &adapter->hw;
5010         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5011         u32 msg = E1000_VT_MSGTYPE_NACK;
5012
5013         /* if device isn't clear to send it shouldn't be reading either */
5014         if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5015             time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5016                 igb_write_mbx(hw, &msg, 1, vf);
5017                 vf_data->last_nack = jiffies;
5018         }
5019 }
5020
5021 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5022 {
5023         struct pci_dev *pdev = adapter->pdev;
5024         u32 msgbuf[E1000_VFMAILBOX_SIZE];
5025         struct e1000_hw *hw = &adapter->hw;
5026         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5027         s32 retval;
5028
5029         retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5030
5031         if (retval) {
5032                 /* if receive failed revoke VF CTS stats and restart init */
5033                 dev_err(&pdev->dev, "Error receiving message from VF\n");
5034                 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5035                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5036                         return;
5037                 goto out;
5038         }
5039
5040         /* this is a message we already processed, do nothing */
5041         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5042                 return;
5043
5044         /*
5045          * until the vf completes a reset it should not be
5046          * allowed to start any configuration.
5047          */
5048
5049         if (msgbuf[0] == E1000_VF_RESET) {
5050                 igb_vf_reset_msg(adapter, vf);
5051                 return;
5052         }
5053
5054         if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5055                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5056                         return;
5057                 retval = -1;
5058                 goto out;
5059         }
5060
5061         switch ((msgbuf[0] & 0xFFFF)) {
5062         case E1000_VF_SET_MAC_ADDR:
5063                 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5064                 break;
5065         case E1000_VF_SET_PROMISC:
5066                 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5067                 break;
5068         case E1000_VF_SET_MULTICAST:
5069                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5070                 break;
5071         case E1000_VF_SET_LPE:
5072                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5073                 break;
5074         case E1000_VF_SET_VLAN:
5075                 if (adapter->vf_data[vf].pf_vlan)
5076                         retval = -1;
5077                 else
5078                         retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5079                 break;
5080         default:
5081                 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5082                 retval = -1;
5083                 break;
5084         }
5085
5086         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5087 out:
5088         /* notify the VF of the results of what it sent us */
5089         if (retval)
5090                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5091         else
5092                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5093
5094         igb_write_mbx(hw, msgbuf, 1, vf);
5095 }
5096
5097 static void igb_msg_task(struct igb_adapter *adapter)
5098 {
5099         struct e1000_hw *hw = &adapter->hw;
5100         u32 vf;
5101
5102         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5103                 /* process any reset requests */
5104                 if (!igb_check_for_rst(hw, vf))
5105                         igb_vf_reset_event(adapter, vf);
5106
5107                 /* process any messages pending */
5108                 if (!igb_check_for_msg(hw, vf))
5109                         igb_rcv_msg_from_vf(adapter, vf);
5110
5111                 /* process any acks */
5112                 if (!igb_check_for_ack(hw, vf))
5113                         igb_rcv_ack_from_vf(adapter, vf);
5114         }
5115 }
5116
5117 /**
5118  *  igb_set_uta - Set unicast filter table address
5119  *  @adapter: board private structure
5120  *
5121  *  The unicast table address is a register array of 32-bit registers.
5122  *  The table is meant to be used in a way similar to how the MTA is used
5123  *  however due to certain limitations in the hardware it is necessary to
5124  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscous
5125  *  enable bit to allow vlan tag stripping when promiscous mode is enabled
5126  **/
5127 static void igb_set_uta(struct igb_adapter *adapter)
5128 {
5129         struct e1000_hw *hw = &adapter->hw;
5130         int i;
5131
5132         /* The UTA table only exists on 82576 hardware and newer */
5133         if (hw->mac.type < e1000_82576)
5134                 return;
5135
5136         /* we only need to do this if VMDq is enabled */
5137         if (!adapter->vfs_allocated_count)
5138                 return;
5139
5140         for (i = 0; i < hw->mac.uta_reg_count; i++)
5141                 array_wr32(E1000_UTA, i, ~0);
5142 }
5143
5144 /**
5145  * igb_intr_msi - Interrupt Handler
5146  * @irq: interrupt number
5147  * @data: pointer to a network interface device structure
5148  **/
5149 static irqreturn_t igb_intr_msi(int irq, void *data)
5150 {
5151         struct igb_adapter *adapter = data;
5152         struct igb_q_vector *q_vector = adapter->q_vector[0];
5153         struct e1000_hw *hw = &adapter->hw;
5154         /* read ICR disables interrupts using IAM */
5155         u32 icr = rd32(E1000_ICR);
5156
5157         igb_write_itr(q_vector);
5158
5159         if (icr & E1000_ICR_DRSTA)
5160                 schedule_work(&adapter->reset_task);
5161
5162         if (icr & E1000_ICR_DOUTSYNC) {
5163                 /* HW is reporting DMA is out of sync */
5164                 adapter->stats.doosync++;
5165         }
5166
5167         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5168                 hw->mac.get_link_status = 1;
5169                 if (!test_bit(__IGB_DOWN, &adapter->state))
5170                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5171         }
5172
5173         napi_schedule(&q_vector->napi);
5174
5175         return IRQ_HANDLED;
5176 }
5177
5178 /**
5179  * igb_intr - Legacy Interrupt Handler
5180  * @irq: interrupt number
5181  * @data: pointer to a network interface device structure
5182  **/
5183 static irqreturn_t igb_intr(int irq, void *data)
5184 {
5185         struct igb_adapter *adapter = data;
5186         struct igb_q_vector *q_vector = adapter->q_vector[0];
5187         struct e1000_hw *hw = &adapter->hw;
5188         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5189          * need for the IMC write */
5190         u32 icr = rd32(E1000_ICR);
5191         if (!icr)
5192                 return IRQ_NONE;  /* Not our interrupt */
5193
5194         igb_write_itr(q_vector);
5195
5196         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5197          * not set, then the adapter didn't send an interrupt */
5198         if (!(icr & E1000_ICR_INT_ASSERTED))
5199                 return IRQ_NONE;
5200
5201         if (icr & E1000_ICR_DRSTA)
5202                 schedule_work(&adapter->reset_task);
5203
5204         if (icr & E1000_ICR_DOUTSYNC) {
5205                 /* HW is reporting DMA is out of sync */
5206                 adapter->stats.doosync++;
5207         }
5208
5209         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5210                 hw->mac.get_link_status = 1;
5211                 /* guard against interrupt when we're going down */
5212                 if (!test_bit(__IGB_DOWN, &adapter->state))
5213                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5214         }
5215
5216         napi_schedule(&q_vector->napi);
5217
5218         return IRQ_HANDLED;
5219 }
5220
5221 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5222 {
5223         struct igb_adapter *adapter = q_vector->adapter;
5224         struct e1000_hw *hw = &adapter->hw;
5225
5226         if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
5227             (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
5228                 if (!adapter->msix_entries)
5229                         igb_set_itr(adapter);
5230                 else
5231                         igb_update_ring_itr(q_vector);
5232         }
5233
5234         if (!test_bit(__IGB_DOWN, &adapter->state)) {
5235                 if (adapter->msix_entries)
5236                         wr32(E1000_EIMS, q_vector->eims_value);
5237                 else
5238                         igb_irq_enable(adapter);
5239         }
5240 }
5241
5242 /**
5243  * igb_poll - NAPI Rx polling callback
5244  * @napi: napi polling structure
5245  * @budget: count of how many packets we should handle
5246  **/
5247 static int igb_poll(struct napi_struct *napi, int budget)
5248 {
5249         struct igb_q_vector *q_vector = container_of(napi,
5250                                                      struct igb_q_vector,
5251                                                      napi);
5252         int tx_clean_complete = 1, work_done = 0;
5253
5254 #ifdef CONFIG_IGB_DCA
5255         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5256                 igb_update_dca(q_vector);
5257 #endif
5258         if (q_vector->tx_ring)
5259                 tx_clean_complete = igb_clean_tx_irq(q_vector);
5260
5261         if (q_vector->rx_ring)
5262                 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
5263
5264         if (!tx_clean_complete)
5265                 work_done = budget;
5266
5267         /* If not enough Rx work done, exit the polling mode */
5268         if (work_done < budget) {
5269                 napi_complete(napi);
5270                 igb_ring_irq_enable(q_vector);
5271         }
5272
5273         return work_done;
5274 }
5275
5276 /**
5277  * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5278  * @adapter: board private structure
5279  * @shhwtstamps: timestamp structure to update
5280  * @regval: unsigned 64bit system time value.
5281  *
5282  * We need to convert the system time value stored in the RX/TXSTMP registers
5283  * into a hwtstamp which can be used by the upper level timestamping functions
5284  */
5285 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5286                                    struct skb_shared_hwtstamps *shhwtstamps,
5287                                    u64 regval)
5288 {
5289         u64 ns;
5290
5291         /*
5292          * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5293          * 24 to match clock shift we setup earlier.
5294          */
5295         if (adapter->hw.mac.type == e1000_82580)
5296                 regval <<= IGB_82580_TSYNC_SHIFT;
5297
5298         ns = timecounter_cyc2time(&adapter->clock, regval);
5299         timecompare_update(&adapter->compare, ns);
5300         memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5301         shhwtstamps->hwtstamp = ns_to_ktime(ns);
5302         shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5303 }
5304
5305 /**
5306  * igb_tx_hwtstamp - utility function which checks for TX time stamp
5307  * @q_vector: pointer to q_vector containing needed info
5308  * @buffer: pointer to igb_buffer structure
5309  *
5310  * If we were asked to do hardware stamping and such a time stamp is
5311  * available, then it must have been for this skb here because we only
5312  * allow only one such packet into the queue.
5313  */
5314 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct igb_buffer *buffer_info)
5315 {
5316         struct igb_adapter *adapter = q_vector->adapter;
5317         struct e1000_hw *hw = &adapter->hw;
5318         struct skb_shared_hwtstamps shhwtstamps;
5319         u64 regval;
5320
5321         /* if skb does not support hw timestamp or TX stamp not valid exit */
5322         if (likely(!buffer_info->shtx.hardware) ||
5323             !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5324                 return;
5325
5326         regval = rd32(E1000_TXSTMPL);
5327         regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5328
5329         igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5330         skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5331 }
5332
5333 /**
5334  * igb_clean_tx_irq - Reclaim resources after transmit completes
5335  * @q_vector: pointer to q_vector containing needed info
5336  * returns true if ring is completely cleaned
5337  **/
5338 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5339 {
5340         struct igb_adapter *adapter = q_vector->adapter;
5341         struct igb_ring *tx_ring = q_vector->tx_ring;
5342         struct net_device *netdev = tx_ring->netdev;
5343         struct e1000_hw *hw = &adapter->hw;
5344         struct igb_buffer *buffer_info;
5345         union e1000_adv_tx_desc *tx_desc, *eop_desc;
5346         unsigned int total_bytes = 0, total_packets = 0;
5347         unsigned int i, eop, count = 0;
5348         bool cleaned = false;
5349
5350         i = tx_ring->next_to_clean;
5351         eop = tx_ring->buffer_info[i].next_to_watch;
5352         eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5353
5354         while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5355                (count < tx_ring->count)) {
5356                 for (cleaned = false; !cleaned; count++) {
5357                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
5358                         buffer_info = &tx_ring->buffer_info[i];
5359                         cleaned = (i == eop);
5360
5361                         if (buffer_info->skb) {
5362                                 total_bytes += buffer_info->bytecount;
5363                                 /* gso_segs is currently only valid for tcp */
5364                                 total_packets += buffer_info->gso_segs;
5365                                 igb_tx_hwtstamp(q_vector, buffer_info);
5366                         }
5367
5368                         igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5369                         tx_desc->wb.status = 0;
5370
5371                         i++;
5372                         if (i == tx_ring->count)
5373                                 i = 0;
5374                 }
5375                 eop = tx_ring->buffer_info[i].next_to_watch;
5376                 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5377         }
5378
5379         tx_ring->next_to_clean = i;
5380
5381         if (unlikely(count &&
5382                      netif_carrier_ok(netdev) &&
5383                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5384                 /* Make sure that anybody stopping the queue after this
5385                  * sees the new next_to_clean.
5386                  */
5387                 smp_mb();
5388                 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5389                     !(test_bit(__IGB_DOWN, &adapter->state))) {
5390                         netif_wake_subqueue(netdev, tx_ring->queue_index);
5391                         tx_ring->tx_stats.restart_queue++;
5392                 }
5393         }
5394
5395         if (tx_ring->detect_tx_hung) {
5396                 /* Detect a transmit hang in hardware, this serializes the
5397                  * check with the clearing of time_stamp and movement of i */
5398                 tx_ring->detect_tx_hung = false;
5399                 if (tx_ring->buffer_info[i].time_stamp &&
5400                     time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5401                                (adapter->tx_timeout_factor * HZ)) &&
5402                     !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5403
5404                         /* detected Tx unit hang */
5405                         dev_err(tx_ring->dev,
5406                                 "Detected Tx Unit Hang\n"
5407                                 "  Tx Queue             <%d>\n"
5408                                 "  TDH                  <%x>\n"
5409                                 "  TDT                  <%x>\n"
5410                                 "  next_to_use          <%x>\n"
5411                                 "  next_to_clean        <%x>\n"
5412                                 "buffer_info[next_to_clean]\n"
5413                                 "  time_stamp           <%lx>\n"
5414                                 "  next_to_watch        <%x>\n"
5415                                 "  jiffies              <%lx>\n"
5416                                 "  desc.status          <%x>\n",
5417                                 tx_ring->queue_index,
5418                                 readl(tx_ring->head),
5419                                 readl(tx_ring->tail),
5420                                 tx_ring->next_to_use,
5421                                 tx_ring->next_to_clean,
5422                                 tx_ring->buffer_info[eop].time_stamp,
5423                                 eop,
5424                                 jiffies,
5425                                 eop_desc->wb.status);
5426                         netif_stop_subqueue(netdev, tx_ring->queue_index);
5427                 }
5428         }
5429         tx_ring->total_bytes += total_bytes;
5430         tx_ring->total_packets += total_packets;
5431         tx_ring->tx_stats.bytes += total_bytes;
5432         tx_ring->tx_stats.packets += total_packets;
5433         return (count < tx_ring->count);
5434 }
5435
5436 /**
5437  * igb_receive_skb - helper function to handle rx indications
5438  * @q_vector: structure containing interrupt and ring information
5439  * @skb: packet to send up
5440  * @vlan_tag: vlan tag for packet
5441  **/
5442 static void igb_receive_skb(struct igb_q_vector *q_vector,
5443                             struct sk_buff *skb,
5444                             u16 vlan_tag)
5445 {
5446         struct igb_adapter *adapter = q_vector->adapter;
5447
5448         if (vlan_tag && adapter->vlgrp)
5449                 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
5450                                  vlan_tag, skb);
5451         else
5452                 napi_gro_receive(&q_vector->napi, skb);
5453 }
5454
5455 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5456                                        u32 status_err, struct sk_buff *skb)
5457 {
5458         skb->ip_summed = CHECKSUM_NONE;
5459
5460         /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5461         if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5462              (status_err & E1000_RXD_STAT_IXSM))
5463                 return;
5464
5465         /* TCP/UDP checksum error bit is set */
5466         if (status_err &
5467             (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5468                 /*
5469                  * work around errata with sctp packets where the TCPE aka
5470                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5471                  * packets, (aka let the stack check the crc32c)
5472                  */
5473                 if ((skb->len == 60) &&
5474                     (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM))
5475                         ring->rx_stats.csum_err++;
5476
5477                 /* let the stack verify checksum errors */
5478                 return;
5479         }
5480         /* It must be a TCP or UDP packet with a valid checksum */
5481         if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5482                 skb->ip_summed = CHECKSUM_UNNECESSARY;
5483
5484         dev_dbg(ring->dev, "cksum success: bits %08X\n", status_err);
5485 }
5486
5487 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5488                                    struct sk_buff *skb)
5489 {
5490         struct igb_adapter *adapter = q_vector->adapter;
5491         struct e1000_hw *hw = &adapter->hw;
5492         u64 regval;
5493
5494         /*
5495          * If this bit is set, then the RX registers contain the time stamp. No
5496          * other packet will be time stamped until we read these registers, so
5497          * read the registers to make them available again. Because only one
5498          * packet can be time stamped at a time, we know that the register
5499          * values must belong to this one here and therefore we don't need to
5500          * compare any of the additional attributes stored for it.
5501          *
5502          * If nothing went wrong, then it should have a skb_shared_tx that we
5503          * can turn into a skb_shared_hwtstamps.
5504          */
5505         if (staterr & E1000_RXDADV_STAT_TSIP) {
5506                 u32 *stamp = (u32 *)skb->data;
5507                 regval = le32_to_cpu(*(stamp + 2));
5508                 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5509                 skb_pull(skb, IGB_TS_HDR_LEN);
5510         } else {
5511                 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5512                         return;
5513
5514                 regval = rd32(E1000_RXSTMPL);
5515                 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5516         }
5517
5518         igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5519 }
5520 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
5521                                union e1000_adv_rx_desc *rx_desc)
5522 {
5523         /* HW will not DMA in data larger than the given buffer, even if it
5524          * parses the (NFS, of course) header to be larger.  In that case, it
5525          * fills the header buffer and spills the rest into the page.
5526          */
5527         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5528                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5529         if (hlen > rx_ring->rx_buffer_len)
5530                 hlen = rx_ring->rx_buffer_len;
5531         return hlen;
5532 }
5533
5534 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5535                                  int *work_done, int budget)
5536 {
5537         struct igb_ring *rx_ring = q_vector->rx_ring;
5538         struct net_device *netdev = rx_ring->netdev;
5539         struct device *dev = rx_ring->dev;
5540         union e1000_adv_rx_desc *rx_desc , *next_rxd;
5541         struct igb_buffer *buffer_info , *next_buffer;
5542         struct sk_buff *skb;
5543         bool cleaned = false;
5544         int cleaned_count = 0;
5545         int current_node = numa_node_id();
5546         unsigned int total_bytes = 0, total_packets = 0;
5547         unsigned int i;
5548         u32 staterr;
5549         u16 length;
5550         u16 vlan_tag;
5551
5552         i = rx_ring->next_to_clean;
5553         buffer_info = &rx_ring->buffer_info[i];
5554         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5555         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5556
5557         while (staterr & E1000_RXD_STAT_DD) {
5558                 if (*work_done >= budget)
5559                         break;
5560                 (*work_done)++;
5561
5562                 skb = buffer_info->skb;
5563                 prefetch(skb->data - NET_IP_ALIGN);
5564                 buffer_info->skb = NULL;
5565
5566                 i++;
5567                 if (i == rx_ring->count)
5568                         i = 0;
5569
5570                 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5571                 prefetch(next_rxd);
5572                 next_buffer = &rx_ring->buffer_info[i];
5573
5574                 length = le16_to_cpu(rx_desc->wb.upper.length);
5575                 cleaned = true;
5576                 cleaned_count++;
5577
5578                 if (buffer_info->dma) {
5579                         dma_unmap_single(dev, buffer_info->dma,
5580                                          rx_ring->rx_buffer_len,
5581                                          DMA_FROM_DEVICE);
5582                         buffer_info->dma = 0;
5583                         if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
5584                                 skb_put(skb, length);
5585                                 goto send_up;
5586                         }
5587                         skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
5588                 }
5589
5590                 if (length) {
5591                         dma_unmap_page(dev, buffer_info->page_dma,
5592                                        PAGE_SIZE / 2, DMA_FROM_DEVICE);
5593                         buffer_info->page_dma = 0;
5594
5595                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
5596                                                 buffer_info->page,
5597                                                 buffer_info->page_offset,
5598                                                 length);
5599
5600                         if ((page_count(buffer_info->page) != 1) ||
5601                             (page_to_nid(buffer_info->page) != current_node))
5602                                 buffer_info->page = NULL;
5603                         else
5604                                 get_page(buffer_info->page);
5605
5606                         skb->len += length;
5607                         skb->data_len += length;
5608                         skb->truesize += length;
5609                 }
5610
5611                 if (!(staterr & E1000_RXD_STAT_EOP)) {
5612                         buffer_info->skb = next_buffer->skb;
5613                         buffer_info->dma = next_buffer->dma;
5614                         next_buffer->skb = skb;
5615                         next_buffer->dma = 0;
5616                         goto next_desc;
5617                 }
5618 send_up:
5619                 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5620                         dev_kfree_skb_irq(skb);
5621                         goto next_desc;
5622                 }
5623
5624                 if (staterr & (E1000_RXDADV_STAT_TSIP | E1000_RXDADV_STAT_TS))
5625                         igb_rx_hwtstamp(q_vector, staterr, skb);
5626                 total_bytes += skb->len;
5627                 total_packets++;
5628
5629                 igb_rx_checksum_adv(rx_ring, staterr, skb);
5630
5631                 skb->protocol = eth_type_trans(skb, netdev);
5632                 skb_record_rx_queue(skb, rx_ring->queue_index);
5633
5634                 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
5635                             le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
5636
5637                 igb_receive_skb(q_vector, skb, vlan_tag);
5638
5639 next_desc:
5640                 rx_desc->wb.upper.status_error = 0;
5641
5642                 /* return some buffers to hardware, one at a time is too slow */
5643                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5644                         igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5645                         cleaned_count = 0;
5646                 }
5647
5648                 /* use prefetched values */
5649                 rx_desc = next_rxd;
5650                 buffer_info = next_buffer;
5651                 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5652         }
5653
5654         rx_ring->next_to_clean = i;
5655         cleaned_count = igb_desc_unused(rx_ring);
5656
5657         if (cleaned_count)
5658                 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5659
5660         rx_ring->total_packets += total_packets;
5661         rx_ring->total_bytes += total_bytes;
5662         rx_ring->rx_stats.packets += total_packets;
5663         rx_ring->rx_stats.bytes += total_bytes;
5664         return cleaned;
5665 }
5666
5667 /**
5668  * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5669  * @adapter: address of board private structure
5670  **/
5671 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5672 {
5673         struct net_device *netdev = rx_ring->netdev;
5674         union e1000_adv_rx_desc *rx_desc;
5675         struct igb_buffer *buffer_info;
5676         struct sk_buff *skb;
5677         unsigned int i;
5678         int bufsz;
5679
5680         i = rx_ring->next_to_use;
5681         buffer_info = &rx_ring->buffer_info[i];
5682
5683         bufsz = rx_ring->rx_buffer_len;
5684
5685         while (cleaned_count--) {
5686                 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5687
5688                 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5689                         if (!buffer_info->page) {
5690                                 buffer_info->page = netdev_alloc_page(netdev);
5691                                 if (!buffer_info->page) {
5692                                         rx_ring->rx_stats.alloc_failed++;
5693                                         goto no_buffers;
5694                                 }
5695                                 buffer_info->page_offset = 0;
5696                         } else {
5697                                 buffer_info->page_offset ^= PAGE_SIZE / 2;
5698                         }
5699                         buffer_info->page_dma =
5700                                 dma_map_page(rx_ring->dev, buffer_info->page,
5701                                              buffer_info->page_offset,
5702                                              PAGE_SIZE / 2,
5703                                              DMA_FROM_DEVICE);
5704                         if (dma_mapping_error(rx_ring->dev,
5705                                               buffer_info->page_dma)) {
5706                                 buffer_info->page_dma = 0;
5707                                 rx_ring->rx_stats.alloc_failed++;
5708                                 goto no_buffers;
5709                         }
5710                 }
5711
5712                 skb = buffer_info->skb;
5713                 if (!skb) {
5714                         skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5715                         if (!skb) {
5716                                 rx_ring->rx_stats.alloc_failed++;
5717                                 goto no_buffers;
5718                         }
5719
5720                         buffer_info->skb = skb;
5721                 }
5722                 if (!buffer_info->dma) {
5723                         buffer_info->dma = dma_map_single(rx_ring->dev,
5724                                                           skb->data,
5725                                                           bufsz,
5726                                                           DMA_FROM_DEVICE);
5727                         if (dma_mapping_error(rx_ring->dev,
5728                                               buffer_info->dma)) {
5729                                 buffer_info->dma = 0;
5730                                 rx_ring->rx_stats.alloc_failed++;
5731                                 goto no_buffers;
5732                         }
5733                 }
5734                 /* Refresh the desc even if buffer_addrs didn't change because
5735                  * each write-back erases this info. */
5736                 if (bufsz < IGB_RXBUFFER_1024) {
5737                         rx_desc->read.pkt_addr =
5738                              cpu_to_le64(buffer_info->page_dma);
5739                         rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5740                 } else {
5741                         rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
5742                         rx_desc->read.hdr_addr = 0;
5743                 }
5744
5745                 i++;
5746                 if (i == rx_ring->count)
5747                         i = 0;
5748                 buffer_info = &rx_ring->buffer_info[i];
5749         }
5750
5751 no_buffers:
5752         if (rx_ring->next_to_use != i) {
5753                 rx_ring->next_to_use = i;
5754                 if (i == 0)
5755                         i = (rx_ring->count - 1);
5756                 else
5757                         i--;
5758
5759                 /* Force memory writes to complete before letting h/w
5760                  * know there are new descriptors to fetch.  (Only
5761                  * applicable for weak-ordered memory model archs,
5762                  * such as IA-64). */
5763                 wmb();
5764                 writel(i, rx_ring->tail);
5765         }
5766 }
5767
5768 /**
5769  * igb_mii_ioctl -
5770  * @netdev:
5771  * @ifreq:
5772  * @cmd:
5773  **/
5774 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5775 {
5776         struct igb_adapter *adapter = netdev_priv(netdev);
5777         struct mii_ioctl_data *data = if_mii(ifr);
5778
5779         if (adapter->hw.phy.media_type != e1000_media_type_copper)
5780                 return -EOPNOTSUPP;
5781
5782         switch (cmd) {
5783         case SIOCGMIIPHY:
5784                 data->phy_id = adapter->hw.phy.addr;
5785                 break;
5786         case SIOCGMIIREG:
5787                 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
5788                                      &data->val_out))
5789                         return -EIO;
5790                 break;
5791         case SIOCSMIIREG:
5792         default:
5793                 return -EOPNOTSUPP;
5794         }
5795         return 0;
5796 }
5797
5798 /**
5799  * igb_hwtstamp_ioctl - control hardware time stamping
5800  * @netdev:
5801  * @ifreq:
5802  * @cmd:
5803  *
5804  * Outgoing time stamping can be enabled and disabled. Play nice and
5805  * disable it when requested, although it shouldn't case any overhead
5806  * when no packet needs it. At most one packet in the queue may be
5807  * marked for time stamping, otherwise it would be impossible to tell
5808  * for sure to which packet the hardware time stamp belongs.
5809  *
5810  * Incoming time stamping has to be configured via the hardware
5811  * filters. Not all combinations are supported, in particular event
5812  * type has to be specified. Matching the kind of event packet is
5813  * not supported, with the exception of "all V2 events regardless of
5814  * level 2 or 4".
5815  *
5816  **/
5817 static int igb_hwtstamp_ioctl(struct net_device *netdev,
5818                               struct ifreq *ifr, int cmd)
5819 {
5820         struct igb_adapter *adapter = netdev_priv(netdev);
5821         struct e1000_hw *hw = &adapter->hw;
5822         struct hwtstamp_config config;
5823         u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
5824         u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5825         u32 tsync_rx_cfg = 0;
5826         bool is_l4 = false;
5827         bool is_l2 = false;
5828         u32 regval;
5829
5830         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
5831                 return -EFAULT;
5832
5833         /* reserved for future extensions */
5834         if (config.flags)
5835                 return -EINVAL;
5836
5837         switch (config.tx_type) {
5838         case HWTSTAMP_TX_OFF:
5839                 tsync_tx_ctl = 0;
5840         case HWTSTAMP_TX_ON:
5841                 break;
5842         default:
5843                 return -ERANGE;
5844         }
5845
5846         switch (config.rx_filter) {
5847         case HWTSTAMP_FILTER_NONE:
5848                 tsync_rx_ctl = 0;
5849                 break;
5850         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
5851         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
5852         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
5853         case HWTSTAMP_FILTER_ALL:
5854                 /*
5855                  * register TSYNCRXCFG must be set, therefore it is not
5856                  * possible to time stamp both Sync and Delay_Req messages
5857                  * => fall back to time stamping all packets
5858                  */
5859                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5860                 config.rx_filter = HWTSTAMP_FILTER_ALL;
5861                 break;
5862         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
5863                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5864                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
5865                 is_l4 = true;
5866                 break;
5867         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
5868                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5869                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
5870                 is_l4 = true;
5871                 break;
5872         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
5873         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
5874                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5875                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
5876                 is_l2 = true;
5877                 is_l4 = true;
5878                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5879                 break;
5880         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
5881         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
5882                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5883                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
5884                 is_l2 = true;
5885                 is_l4 = true;
5886                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5887                 break;
5888         case HWTSTAMP_FILTER_PTP_V2_EVENT:
5889         case HWTSTAMP_FILTER_PTP_V2_SYNC:
5890         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
5891                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
5892                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
5893                 is_l2 = true;
5894                 break;
5895         default:
5896                 return -ERANGE;
5897         }
5898
5899         if (hw->mac.type == e1000_82575) {
5900                 if (tsync_rx_ctl | tsync_tx_ctl)
5901                         return -EINVAL;
5902                 return 0;
5903         }
5904
5905         /*
5906          * Per-packet timestamping only works if all packets are
5907          * timestamped, so enable timestamping in all packets as
5908          * long as one rx filter was configured.
5909          */
5910         if ((hw->mac.type == e1000_82580) && tsync_rx_ctl) {
5911                 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5912                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5913         }
5914
5915         /* enable/disable TX */
5916         regval = rd32(E1000_TSYNCTXCTL);
5917         regval &= ~E1000_TSYNCTXCTL_ENABLED;
5918         regval |= tsync_tx_ctl;
5919         wr32(E1000_TSYNCTXCTL, regval);
5920
5921         /* enable/disable RX */
5922         regval = rd32(E1000_TSYNCRXCTL);
5923         regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
5924         regval |= tsync_rx_ctl;
5925         wr32(E1000_TSYNCRXCTL, regval);
5926
5927         /* define which PTP packets are time stamped */
5928         wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
5929
5930         /* define ethertype filter for timestamped packets */
5931         if (is_l2)
5932                 wr32(E1000_ETQF(3),
5933                                 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
5934                                  E1000_ETQF_1588 | /* enable timestamping */
5935                                  ETH_P_1588));     /* 1588 eth protocol type */
5936         else
5937                 wr32(E1000_ETQF(3), 0);
5938
5939 #define PTP_PORT 319
5940         /* L4 Queue Filter[3]: filter by destination port and protocol */
5941         if (is_l4) {
5942                 u32 ftqf = (IPPROTO_UDP /* UDP */
5943                         | E1000_FTQF_VF_BP /* VF not compared */
5944                         | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
5945                         | E1000_FTQF_MASK); /* mask all inputs */
5946                 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
5947
5948                 wr32(E1000_IMIR(3), htons(PTP_PORT));
5949                 wr32(E1000_IMIREXT(3),
5950                      (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
5951                 if (hw->mac.type == e1000_82576) {
5952                         /* enable source port check */
5953                         wr32(E1000_SPQF(3), htons(PTP_PORT));
5954                         ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
5955                 }
5956                 wr32(E1000_FTQF(3), ftqf);
5957         } else {
5958                 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
5959         }
5960         wrfl();
5961
5962         adapter->hwtstamp_config = config;
5963
5964         /* clear TX/RX time stamp registers, just to be sure */
5965         regval = rd32(E1000_TXSTMPH);
5966         regval = rd32(E1000_RXSTMPH);
5967
5968         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
5969                 -EFAULT : 0;
5970 }
5971
5972 /**
5973  * igb_ioctl -
5974  * @netdev:
5975  * @ifreq:
5976  * @cmd:
5977  **/
5978 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5979 {
5980         switch (cmd) {
5981         case SIOCGMIIPHY:
5982         case SIOCGMIIREG:
5983         case SIOCSMIIREG:
5984                 return igb_mii_ioctl(netdev, ifr, cmd);
5985         case SIOCSHWTSTAMP:
5986                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
5987         default:
5988                 return -EOPNOTSUPP;
5989         }
5990 }
5991
5992 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5993 {
5994         struct igb_adapter *adapter = hw->back;
5995         u16 cap_offset;
5996
5997         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5998         if (!cap_offset)
5999                 return -E1000_ERR_CONFIG;
6000
6001         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6002
6003         return 0;
6004 }
6005
6006 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6007 {
6008         struct igb_adapter *adapter = hw->back;
6009         u16 cap_offset;
6010
6011         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
6012         if (!cap_offset)
6013                 return -E1000_ERR_CONFIG;
6014
6015         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6016
6017         return 0;
6018 }
6019
6020 static void igb_vlan_rx_register(struct net_device *netdev,
6021                                  struct vlan_group *grp)
6022 {
6023         struct igb_adapter *adapter = netdev_priv(netdev);
6024         struct e1000_hw *hw = &adapter->hw;
6025         u32 ctrl, rctl;
6026
6027         igb_irq_disable(adapter);
6028         adapter->vlgrp = grp;
6029
6030         if (grp) {
6031                 /* enable VLAN tag insert/strip */
6032                 ctrl = rd32(E1000_CTRL);
6033                 ctrl |= E1000_CTRL_VME;
6034                 wr32(E1000_CTRL, ctrl);
6035
6036                 /* Disable CFI check */
6037                 rctl = rd32(E1000_RCTL);
6038                 rctl &= ~E1000_RCTL_CFIEN;
6039                 wr32(E1000_RCTL, rctl);
6040         } else {
6041                 /* disable VLAN tag insert/strip */
6042                 ctrl = rd32(E1000_CTRL);
6043                 ctrl &= ~E1000_CTRL_VME;
6044                 wr32(E1000_CTRL, ctrl);
6045         }
6046
6047         igb_rlpml_set(adapter);
6048
6049         if (!test_bit(__IGB_DOWN, &adapter->state))
6050                 igb_irq_enable(adapter);
6051 }
6052
6053 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6054 {
6055         struct igb_adapter *adapter = netdev_priv(netdev);
6056         struct e1000_hw *hw = &adapter->hw;
6057         int pf_id = adapter->vfs_allocated_count;
6058
6059         /* attempt to add filter to vlvf array */
6060         igb_vlvf_set(adapter, vid, true, pf_id);
6061
6062         /* add the filter since PF can receive vlans w/o entry in vlvf */
6063         igb_vfta_set(hw, vid, true);
6064 }
6065
6066 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6067 {
6068         struct igb_adapter *adapter = netdev_priv(netdev);
6069         struct e1000_hw *hw = &adapter->hw;
6070         int pf_id = adapter->vfs_allocated_count;
6071         s32 err;
6072
6073         igb_irq_disable(adapter);
6074         vlan_group_set_device(adapter->vlgrp, vid, NULL);
6075
6076         if (!test_bit(__IGB_DOWN, &adapter->state))
6077                 igb_irq_enable(adapter);
6078
6079         /* remove vlan from VLVF table array */
6080         err = igb_vlvf_set(adapter, vid, false, pf_id);
6081
6082         /* if vid was not present in VLVF just remove it from table */
6083         if (err)
6084                 igb_vfta_set(hw, vid, false);
6085 }
6086
6087 static void igb_restore_vlan(struct igb_adapter *adapter)
6088 {
6089         igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
6090
6091         if (adapter->vlgrp) {
6092                 u16 vid;
6093                 for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
6094                         if (!vlan_group_get_device(adapter->vlgrp, vid))
6095                                 continue;
6096                         igb_vlan_rx_add_vid(adapter->netdev, vid);
6097                 }
6098         }
6099 }
6100
6101 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
6102 {
6103         struct pci_dev *pdev = adapter->pdev;
6104         struct e1000_mac_info *mac = &adapter->hw.mac;
6105
6106         mac->autoneg = 0;
6107
6108         switch (spddplx) {
6109         case SPEED_10 + DUPLEX_HALF:
6110                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6111                 break;
6112         case SPEED_10 + DUPLEX_FULL:
6113                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6114                 break;
6115         case SPEED_100 + DUPLEX_HALF:
6116                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6117                 break;
6118         case SPEED_100 + DUPLEX_FULL:
6119                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6120                 break;
6121         case SPEED_1000 + DUPLEX_FULL:
6122                 mac->autoneg = 1;
6123                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6124                 break;
6125         case SPEED_1000 + DUPLEX_HALF: /* not supported */
6126         default:
6127                 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6128                 return -EINVAL;
6129         }
6130         return 0;
6131 }
6132
6133 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6134 {
6135         struct net_device *netdev = pci_get_drvdata(pdev);
6136         struct igb_adapter *adapter = netdev_priv(netdev);
6137         struct e1000_hw *hw = &adapter->hw;
6138         u32 ctrl, rctl, status;
6139         u32 wufc = adapter->wol;
6140 #ifdef CONFIG_PM
6141         int retval = 0;
6142 #endif
6143
6144         netif_device_detach(netdev);
6145
6146         if (netif_running(netdev))
6147                 igb_close(netdev);
6148
6149         igb_clear_interrupt_scheme(adapter);
6150
6151 #ifdef CONFIG_PM
6152         retval = pci_save_state(pdev);
6153         if (retval)
6154                 return retval;
6155 #endif
6156
6157         status = rd32(E1000_STATUS);
6158         if (status & E1000_STATUS_LU)
6159                 wufc &= ~E1000_WUFC_LNKC;
6160
6161         if (wufc) {
6162                 igb_setup_rctl(adapter);
6163                 igb_set_rx_mode(netdev);
6164
6165                 /* turn on all-multi mode if wake on multicast is enabled */
6166                 if (wufc & E1000_WUFC_MC) {
6167                         rctl = rd32(E1000_RCTL);
6168                         rctl |= E1000_RCTL_MPE;
6169                         wr32(E1000_RCTL, rctl);
6170                 }
6171
6172                 ctrl = rd32(E1000_CTRL);
6173                 /* advertise wake from D3Cold */
6174                 #define E1000_CTRL_ADVD3WUC 0x00100000
6175                 /* phy power management enable */
6176                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6177                 ctrl |= E1000_CTRL_ADVD3WUC;
6178                 wr32(E1000_CTRL, ctrl);
6179
6180                 /* Allow time for pending master requests to run */
6181                 igb_disable_pcie_master(hw);
6182
6183                 wr32(E1000_WUC, E1000_WUC_PME_EN);
6184                 wr32(E1000_WUFC, wufc);
6185         } else {
6186                 wr32(E1000_WUC, 0);
6187                 wr32(E1000_WUFC, 0);
6188         }
6189
6190         *enable_wake = wufc || adapter->en_mng_pt;
6191         if (!*enable_wake)
6192                 igb_power_down_link(adapter);
6193         else
6194                 igb_power_up_link(adapter);
6195
6196         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
6197          * would have already happened in close and is redundant. */
6198         igb_release_hw_control(adapter);
6199
6200         pci_disable_device(pdev);
6201
6202         return 0;
6203 }
6204
6205 #ifdef CONFIG_PM
6206 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6207 {
6208         int retval;
6209         bool wake;
6210
6211         retval = __igb_shutdown(pdev, &wake);
6212         if (retval)
6213                 return retval;
6214
6215         if (wake) {
6216                 pci_prepare_to_sleep(pdev);
6217         } else {
6218                 pci_wake_from_d3(pdev, false);
6219                 pci_set_power_state(pdev, PCI_D3hot);
6220         }
6221
6222         return 0;
6223 }
6224
6225 static int igb_resume(struct pci_dev *pdev)
6226 {
6227         struct net_device *netdev = pci_get_drvdata(pdev);
6228         struct igb_adapter *adapter = netdev_priv(netdev);
6229         struct e1000_hw *hw = &adapter->hw;
6230         u32 err;
6231
6232         pci_set_power_state(pdev, PCI_D0);
6233         pci_restore_state(pdev);
6234         pci_save_state(pdev);
6235
6236         err = pci_enable_device_mem(pdev);
6237         if (err) {
6238                 dev_err(&pdev->dev,
6239                         "igb: Cannot enable PCI device from suspend\n");
6240                 return err;
6241         }
6242         pci_set_master(pdev);
6243
6244         pci_enable_wake(pdev, PCI_D3hot, 0);
6245         pci_enable_wake(pdev, PCI_D3cold, 0);
6246
6247         if (igb_init_interrupt_scheme(adapter)) {
6248                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6249                 return -ENOMEM;
6250         }
6251
6252         igb_reset(adapter);
6253
6254         /* let the f/w know that the h/w is now under the control of the
6255          * driver. */
6256         igb_get_hw_control(adapter);
6257
6258         wr32(E1000_WUS, ~0);
6259
6260         if (netif_running(netdev)) {
6261                 err = igb_open(netdev);
6262                 if (err)
6263                         return err;
6264         }
6265
6266         netif_device_attach(netdev);
6267
6268         return 0;
6269 }
6270 #endif
6271
6272 static void igb_shutdown(struct pci_dev *pdev)
6273 {
6274         bool wake;
6275
6276         __igb_shutdown(pdev, &wake);
6277
6278         if (system_state == SYSTEM_POWER_OFF) {
6279                 pci_wake_from_d3(pdev, wake);
6280                 pci_set_power_state(pdev, PCI_D3hot);
6281         }
6282 }
6283
6284 #ifdef CONFIG_NET_POLL_CONTROLLER
6285 /*
6286  * Polling 'interrupt' - used by things like netconsole to send skbs
6287  * without having to re-enable interrupts. It's not called while
6288  * the interrupt routine is executing.
6289  */
6290 static void igb_netpoll(struct net_device *netdev)
6291 {
6292         struct igb_adapter *adapter = netdev_priv(netdev);
6293         struct e1000_hw *hw = &adapter->hw;
6294         int i;
6295
6296         if (!adapter->msix_entries) {
6297                 struct igb_q_vector *q_vector = adapter->q_vector[0];
6298                 igb_irq_disable(adapter);
6299                 napi_schedule(&q_vector->napi);
6300                 return;
6301         }
6302
6303         for (i = 0; i < adapter->num_q_vectors; i++) {
6304                 struct igb_q_vector *q_vector = adapter->q_vector[i];
6305                 wr32(E1000_EIMC, q_vector->eims_value);
6306                 napi_schedule(&q_vector->napi);
6307         }
6308 }
6309 #endif /* CONFIG_NET_POLL_CONTROLLER */
6310
6311 /**
6312  * igb_io_error_detected - called when PCI error is detected
6313  * @pdev: Pointer to PCI device
6314  * @state: The current pci connection state
6315  *
6316  * This function is called after a PCI bus error affecting
6317  * this device has been detected.
6318  */
6319 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6320                                               pci_channel_state_t state)
6321 {
6322         struct net_device *netdev = pci_get_drvdata(pdev);
6323         struct igb_adapter *adapter = netdev_priv(netdev);
6324
6325         netif_device_detach(netdev);
6326
6327         if (state == pci_channel_io_perm_failure)
6328                 return PCI_ERS_RESULT_DISCONNECT;
6329
6330         if (netif_running(netdev))
6331                 igb_down(adapter);
6332         pci_disable_device(pdev);
6333
6334         /* Request a slot slot reset. */
6335         return PCI_ERS_RESULT_NEED_RESET;
6336 }
6337
6338 /**
6339  * igb_io_slot_reset - called after the pci bus has been reset.
6340  * @pdev: Pointer to PCI device
6341  *
6342  * Restart the card from scratch, as if from a cold-boot. Implementation
6343  * resembles the first-half of the igb_resume routine.
6344  */
6345 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6346 {
6347         struct net_device *netdev = pci_get_drvdata(pdev);
6348         struct igb_adapter *adapter = netdev_priv(netdev);
6349         struct e1000_hw *hw = &adapter->hw;
6350         pci_ers_result_t result;
6351         int err;
6352
6353         if (pci_enable_device_mem(pdev)) {
6354                 dev_err(&pdev->dev,
6355                         "Cannot re-enable PCI device after reset.\n");
6356                 result = PCI_ERS_RESULT_DISCONNECT;
6357         } else {
6358                 pci_set_master(pdev);
6359                 pci_restore_state(pdev);
6360                 pci_save_state(pdev);
6361
6362                 pci_enable_wake(pdev, PCI_D3hot, 0);
6363                 pci_enable_wake(pdev, PCI_D3cold, 0);
6364
6365                 igb_reset(adapter);
6366                 wr32(E1000_WUS, ~0);
6367                 result = PCI_ERS_RESULT_RECOVERED;
6368         }
6369
6370         err = pci_cleanup_aer_uncorrect_error_status(pdev);
6371         if (err) {
6372                 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6373                         "failed 0x%0x\n", err);
6374                 /* non-fatal, continue */
6375         }
6376
6377         return result;
6378 }
6379
6380 /**
6381  * igb_io_resume - called when traffic can start flowing again.
6382  * @pdev: Pointer to PCI device
6383  *
6384  * This callback is called when the error recovery driver tells us that
6385  * its OK to resume normal operation. Implementation resembles the
6386  * second-half of the igb_resume routine.
6387  */
6388 static void igb_io_resume(struct pci_dev *pdev)
6389 {
6390         struct net_device *netdev = pci_get_drvdata(pdev);
6391         struct igb_adapter *adapter = netdev_priv(netdev);
6392
6393         if (netif_running(netdev)) {
6394                 if (igb_up(adapter)) {
6395                         dev_err(&pdev->dev, "igb_up failed after reset\n");
6396                         return;
6397                 }
6398         }
6399
6400         netif_device_attach(netdev);
6401
6402         /* let the f/w know that the h/w is now under the control of the
6403          * driver. */
6404         igb_get_hw_control(adapter);
6405 }
6406
6407 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6408                              u8 qsel)
6409 {
6410         u32 rar_low, rar_high;
6411         struct e1000_hw *hw = &adapter->hw;
6412
6413         /* HW expects these in little endian so we reverse the byte order
6414          * from network order (big endian) to little endian
6415          */
6416         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6417                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6418         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6419
6420         /* Indicate to hardware the Address is Valid. */
6421         rar_high |= E1000_RAH_AV;
6422
6423         if (hw->mac.type == e1000_82575)
6424                 rar_high |= E1000_RAH_POOL_1 * qsel;
6425         else
6426                 rar_high |= E1000_RAH_POOL_1 << qsel;
6427
6428         wr32(E1000_RAL(index), rar_low);
6429         wrfl();
6430         wr32(E1000_RAH(index), rar_high);
6431         wrfl();
6432 }
6433
6434 static int igb_set_vf_mac(struct igb_adapter *adapter,
6435                           int vf, unsigned char *mac_addr)
6436 {
6437         struct e1000_hw *hw = &adapter->hw;
6438         /* VF MAC addresses start at end of receive addresses and moves
6439          * torwards the first, as a result a collision should not be possible */
6440         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6441
6442         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6443
6444         igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6445
6446         return 0;
6447 }
6448
6449 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6450 {
6451         struct igb_adapter *adapter = netdev_priv(netdev);
6452         if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6453                 return -EINVAL;
6454         adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6455         dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6456         dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6457                                       " change effective.");
6458         if (test_bit(__IGB_DOWN, &adapter->state)) {
6459                 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6460                          " but the PF device is not up.\n");
6461                 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6462                          " attempting to use the VF device.\n");
6463         }
6464         return igb_set_vf_mac(adapter, vf, mac);
6465 }
6466
6467 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6468 {
6469         return -EOPNOTSUPP;
6470 }
6471
6472 static int igb_ndo_get_vf_config(struct net_device *netdev,
6473                                  int vf, struct ifla_vf_info *ivi)
6474 {
6475         struct igb_adapter *adapter = netdev_priv(netdev);
6476         if (vf >= adapter->vfs_allocated_count)
6477                 return -EINVAL;
6478         ivi->vf = vf;
6479         memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6480         ivi->tx_rate = 0;
6481         ivi->vlan = adapter->vf_data[vf].pf_vlan;
6482         ivi->qos = adapter->vf_data[vf].pf_qos;
6483         return 0;
6484 }
6485
6486 static void igb_vmm_control(struct igb_adapter *adapter)
6487 {
6488         struct e1000_hw *hw = &adapter->hw;
6489         u32 reg;
6490
6491         switch (hw->mac.type) {
6492         case e1000_82575:
6493         default:
6494                 /* replication is not supported for 82575 */
6495                 return;
6496         case e1000_82576:
6497                 /* notify HW that the MAC is adding vlan tags */
6498                 reg = rd32(E1000_DTXCTL);
6499                 reg |= E1000_DTXCTL_VLAN_ADDED;
6500                 wr32(E1000_DTXCTL, reg);
6501         case e1000_82580:
6502                 /* enable replication vlan tag stripping */
6503                 reg = rd32(E1000_RPLOLR);
6504                 reg |= E1000_RPLOLR_STRVLAN;
6505                 wr32(E1000_RPLOLR, reg);
6506         case e1000_i350:
6507                 /* none of the above registers are supported by i350 */
6508                 break;
6509         }
6510
6511         if (adapter->vfs_allocated_count) {
6512                 igb_vmdq_set_loopback_pf(hw, true);
6513                 igb_vmdq_set_replication_pf(hw, true);
6514         } else {
6515                 igb_vmdq_set_loopback_pf(hw, false);
6516                 igb_vmdq_set_replication_pf(hw, false);
6517         }
6518 }
6519
6520 /* igb_main.c */