Merge tag 'nfs-for-4.20-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
[sfrench/cifs-2.6.git] / drivers / net / ethernet / intel / igc / igc_main.c
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c)  2018 Intel Corporation */
3
4 #include <linux/module.h>
5 #include <linux/types.h>
6 #include <linux/if_vlan.h>
7 #include <linux/aer.h>
8
9 #include "igc.h"
10 #include "igc_hw.h"
11
12 #define DRV_VERSION     "0.0.1-k"
13 #define DRV_SUMMARY     "Intel(R) 2.5G Ethernet Linux Driver"
14
15 static int debug = -1;
16
17 MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
18 MODULE_DESCRIPTION(DRV_SUMMARY);
19 MODULE_LICENSE("GPL v2");
20 MODULE_VERSION(DRV_VERSION);
21 module_param(debug, int, 0);
22 MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
23
24 char igc_driver_name[] = "igc";
25 char igc_driver_version[] = DRV_VERSION;
26 static const char igc_driver_string[] = DRV_SUMMARY;
27 static const char igc_copyright[] =
28         "Copyright(c) 2018 Intel Corporation.";
29
30 static const struct igc_info *igc_info_tbl[] = {
31         [board_base] = &igc_base_info,
32 };
33
34 static const struct pci_device_id igc_pci_tbl[] = {
35         { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_LM), board_base },
36         { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_V), board_base },
37         /* required last entry */
38         {0, }
39 };
40
41 MODULE_DEVICE_TABLE(pci, igc_pci_tbl);
42
43 /* forward declaration */
44 static void igc_clean_tx_ring(struct igc_ring *tx_ring);
45 static int igc_sw_init(struct igc_adapter *);
46 static void igc_configure(struct igc_adapter *adapter);
47 static void igc_power_down_link(struct igc_adapter *adapter);
48 static void igc_set_default_mac_filter(struct igc_adapter *adapter);
49 static void igc_set_rx_mode(struct net_device *netdev);
50 static void igc_write_itr(struct igc_q_vector *q_vector);
51 static void igc_assign_vector(struct igc_q_vector *q_vector, int msix_vector);
52 static void igc_free_q_vector(struct igc_adapter *adapter, int v_idx);
53 static void igc_set_interrupt_capability(struct igc_adapter *adapter,
54                                          bool msix);
55 static void igc_free_q_vectors(struct igc_adapter *adapter);
56 static void igc_irq_disable(struct igc_adapter *adapter);
57 static void igc_irq_enable(struct igc_adapter *adapter);
58 static void igc_configure_msix(struct igc_adapter *adapter);
59 static bool igc_alloc_mapped_page(struct igc_ring *rx_ring,
60                                   struct igc_rx_buffer *bi);
61
62 enum latency_range {
63         lowest_latency = 0,
64         low_latency = 1,
65         bulk_latency = 2,
66         latency_invalid = 255
67 };
68
69 static void igc_reset(struct igc_adapter *adapter)
70 {
71         struct pci_dev *pdev = adapter->pdev;
72         struct igc_hw *hw = &adapter->hw;
73
74         hw->mac.ops.reset_hw(hw);
75
76         if (hw->mac.ops.init_hw(hw))
77                 dev_err(&pdev->dev, "Hardware Error\n");
78
79         if (!netif_running(adapter->netdev))
80                 igc_power_down_link(adapter);
81
82         igc_get_phy_info(hw);
83 }
84
85 /**
86  * igc_power_up_link - Power up the phy/serdes link
87  * @adapter: address of board private structure
88  */
89 static void igc_power_up_link(struct igc_adapter *adapter)
90 {
91         igc_reset_phy(&adapter->hw);
92
93         if (adapter->hw.phy.media_type == igc_media_type_copper)
94                 igc_power_up_phy_copper(&adapter->hw);
95
96         igc_setup_link(&adapter->hw);
97 }
98
99 /**
100  * igc_power_down_link - Power down the phy/serdes link
101  * @adapter: address of board private structure
102  */
103 static void igc_power_down_link(struct igc_adapter *adapter)
104 {
105         if (adapter->hw.phy.media_type == igc_media_type_copper)
106                 igc_power_down_phy_copper_base(&adapter->hw);
107 }
108
109 /**
110  * igc_release_hw_control - release control of the h/w to f/w
111  * @adapter: address of board private structure
112  *
113  * igc_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
114  * For ASF and Pass Through versions of f/w this means that the
115  * driver is no longer loaded.
116  */
117 static void igc_release_hw_control(struct igc_adapter *adapter)
118 {
119         struct igc_hw *hw = &adapter->hw;
120         u32 ctrl_ext;
121
122         /* Let firmware take over control of h/w */
123         ctrl_ext = rd32(IGC_CTRL_EXT);
124         wr32(IGC_CTRL_EXT,
125              ctrl_ext & ~IGC_CTRL_EXT_DRV_LOAD);
126 }
127
128 /**
129  * igc_get_hw_control - get control of the h/w from f/w
130  * @adapter: address of board private structure
131  *
132  * igc_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
133  * For ASF and Pass Through versions of f/w this means that
134  * the driver is loaded.
135  */
136 static void igc_get_hw_control(struct igc_adapter *adapter)
137 {
138         struct igc_hw *hw = &adapter->hw;
139         u32 ctrl_ext;
140
141         /* Let firmware know the driver has taken over */
142         ctrl_ext = rd32(IGC_CTRL_EXT);
143         wr32(IGC_CTRL_EXT,
144              ctrl_ext | IGC_CTRL_EXT_DRV_LOAD);
145 }
146
147 /**
148  * igc_free_tx_resources - Free Tx Resources per Queue
149  * @tx_ring: Tx descriptor ring for a specific queue
150  *
151  * Free all transmit software resources
152  */
153 static void igc_free_tx_resources(struct igc_ring *tx_ring)
154 {
155         igc_clean_tx_ring(tx_ring);
156
157         vfree(tx_ring->tx_buffer_info);
158         tx_ring->tx_buffer_info = NULL;
159
160         /* if not set, then don't free */
161         if (!tx_ring->desc)
162                 return;
163
164         dma_free_coherent(tx_ring->dev, tx_ring->size,
165                           tx_ring->desc, tx_ring->dma);
166
167         tx_ring->desc = NULL;
168 }
169
170 /**
171  * igc_free_all_tx_resources - Free Tx Resources for All Queues
172  * @adapter: board private structure
173  *
174  * Free all transmit software resources
175  */
176 static void igc_free_all_tx_resources(struct igc_adapter *adapter)
177 {
178         int i;
179
180         for (i = 0; i < adapter->num_tx_queues; i++)
181                 igc_free_tx_resources(adapter->tx_ring[i]);
182 }
183
184 /**
185  * igc_clean_tx_ring - Free Tx Buffers
186  * @tx_ring: ring to be cleaned
187  */
188 static void igc_clean_tx_ring(struct igc_ring *tx_ring)
189 {
190         u16 i = tx_ring->next_to_clean;
191         struct igc_tx_buffer *tx_buffer = &tx_ring->tx_buffer_info[i];
192
193         while (i != tx_ring->next_to_use) {
194                 union igc_adv_tx_desc *eop_desc, *tx_desc;
195
196                 /* Free all the Tx ring sk_buffs */
197                 dev_kfree_skb_any(tx_buffer->skb);
198
199                 /* unmap skb header data */
200                 dma_unmap_single(tx_ring->dev,
201                                  dma_unmap_addr(tx_buffer, dma),
202                                  dma_unmap_len(tx_buffer, len),
203                                  DMA_TO_DEVICE);
204
205                 /* check for eop_desc to determine the end of the packet */
206                 eop_desc = tx_buffer->next_to_watch;
207                 tx_desc = IGC_TX_DESC(tx_ring, i);
208
209                 /* unmap remaining buffers */
210                 while (tx_desc != eop_desc) {
211                         tx_buffer++;
212                         tx_desc++;
213                         i++;
214                         if (unlikely(i == tx_ring->count)) {
215                                 i = 0;
216                                 tx_buffer = tx_ring->tx_buffer_info;
217                                 tx_desc = IGC_TX_DESC(tx_ring, 0);
218                         }
219
220                         /* unmap any remaining paged data */
221                         if (dma_unmap_len(tx_buffer, len))
222                                 dma_unmap_page(tx_ring->dev,
223                                                dma_unmap_addr(tx_buffer, dma),
224                                                dma_unmap_len(tx_buffer, len),
225                                                DMA_TO_DEVICE);
226                 }
227
228                 /* move us one more past the eop_desc for start of next pkt */
229                 tx_buffer++;
230                 i++;
231                 if (unlikely(i == tx_ring->count)) {
232                         i = 0;
233                         tx_buffer = tx_ring->tx_buffer_info;
234                 }
235         }
236
237         /* reset BQL for queue */
238         netdev_tx_reset_queue(txring_txq(tx_ring));
239
240         /* reset next_to_use and next_to_clean */
241         tx_ring->next_to_use = 0;
242         tx_ring->next_to_clean = 0;
243 }
244
245 /**
246  * igc_clean_all_tx_rings - Free Tx Buffers for all queues
247  * @adapter: board private structure
248  */
249 static void igc_clean_all_tx_rings(struct igc_adapter *adapter)
250 {
251         int i;
252
253         for (i = 0; i < adapter->num_tx_queues; i++)
254                 if (adapter->tx_ring[i])
255                         igc_clean_tx_ring(adapter->tx_ring[i]);
256 }
257
258 /**
259  * igc_setup_tx_resources - allocate Tx resources (Descriptors)
260  * @tx_ring: tx descriptor ring (for a specific queue) to setup
261  *
262  * Return 0 on success, negative on failure
263  */
264 static int igc_setup_tx_resources(struct igc_ring *tx_ring)
265 {
266         struct device *dev = tx_ring->dev;
267         int size = 0;
268
269         size = sizeof(struct igc_tx_buffer) * tx_ring->count;
270         tx_ring->tx_buffer_info = vzalloc(size);
271         if (!tx_ring->tx_buffer_info)
272                 goto err;
273
274         /* round up to nearest 4K */
275         tx_ring->size = tx_ring->count * sizeof(union igc_adv_tx_desc);
276         tx_ring->size = ALIGN(tx_ring->size, 4096);
277
278         tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
279                                            &tx_ring->dma, GFP_KERNEL);
280
281         if (!tx_ring->desc)
282                 goto err;
283
284         tx_ring->next_to_use = 0;
285         tx_ring->next_to_clean = 0;
286
287         return 0;
288
289 err:
290         vfree(tx_ring->tx_buffer_info);
291         dev_err(dev,
292                 "Unable to allocate memory for the transmit descriptor ring\n");
293         return -ENOMEM;
294 }
295
296 /**
297  * igc_setup_all_tx_resources - wrapper to allocate Tx resources for all queues
298  * @adapter: board private structure
299  *
300  * Return 0 on success, negative on failure
301  */
302 static int igc_setup_all_tx_resources(struct igc_adapter *adapter)
303 {
304         struct pci_dev *pdev = adapter->pdev;
305         int i, err = 0;
306
307         for (i = 0; i < adapter->num_tx_queues; i++) {
308                 err = igc_setup_tx_resources(adapter->tx_ring[i]);
309                 if (err) {
310                         dev_err(&pdev->dev,
311                                 "Allocation for Tx Queue %u failed\n", i);
312                         for (i--; i >= 0; i--)
313                                 igc_free_tx_resources(adapter->tx_ring[i]);
314                         break;
315                 }
316         }
317
318         return err;
319 }
320
321 /**
322  * igc_clean_rx_ring - Free Rx Buffers per Queue
323  * @rx_ring: ring to free buffers from
324  */
325 static void igc_clean_rx_ring(struct igc_ring *rx_ring)
326 {
327         u16 i = rx_ring->next_to_clean;
328
329         if (rx_ring->skb)
330                 dev_kfree_skb(rx_ring->skb);
331         rx_ring->skb = NULL;
332
333         /* Free all the Rx ring sk_buffs */
334         while (i != rx_ring->next_to_alloc) {
335                 struct igc_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
336
337                 /* Invalidate cache lines that may have been written to by
338                  * device so that we avoid corrupting memory.
339                  */
340                 dma_sync_single_range_for_cpu(rx_ring->dev,
341                                               buffer_info->dma,
342                                               buffer_info->page_offset,
343                                               igc_rx_bufsz(rx_ring),
344                                               DMA_FROM_DEVICE);
345
346                 /* free resources associated with mapping */
347                 dma_unmap_page_attrs(rx_ring->dev,
348                                      buffer_info->dma,
349                                      igc_rx_pg_size(rx_ring),
350                                      DMA_FROM_DEVICE,
351                                      IGC_RX_DMA_ATTR);
352                 __page_frag_cache_drain(buffer_info->page,
353                                         buffer_info->pagecnt_bias);
354
355                 i++;
356                 if (i == rx_ring->count)
357                         i = 0;
358         }
359
360         rx_ring->next_to_alloc = 0;
361         rx_ring->next_to_clean = 0;
362         rx_ring->next_to_use = 0;
363 }
364
365 /**
366  * igc_clean_all_rx_rings - Free Rx Buffers for all queues
367  * @adapter: board private structure
368  */
369 static void igc_clean_all_rx_rings(struct igc_adapter *adapter)
370 {
371         int i;
372
373         for (i = 0; i < adapter->num_rx_queues; i++)
374                 if (adapter->rx_ring[i])
375                         igc_clean_rx_ring(adapter->rx_ring[i]);
376 }
377
378 /**
379  * igc_free_rx_resources - Free Rx Resources
380  * @rx_ring: ring to clean the resources from
381  *
382  * Free all receive software resources
383  */
384 static void igc_free_rx_resources(struct igc_ring *rx_ring)
385 {
386         igc_clean_rx_ring(rx_ring);
387
388         vfree(rx_ring->rx_buffer_info);
389         rx_ring->rx_buffer_info = NULL;
390
391         /* if not set, then don't free */
392         if (!rx_ring->desc)
393                 return;
394
395         dma_free_coherent(rx_ring->dev, rx_ring->size,
396                           rx_ring->desc, rx_ring->dma);
397
398         rx_ring->desc = NULL;
399 }
400
401 /**
402  * igc_free_all_rx_resources - Free Rx Resources for All Queues
403  * @adapter: board private structure
404  *
405  * Free all receive software resources
406  */
407 static void igc_free_all_rx_resources(struct igc_adapter *adapter)
408 {
409         int i;
410
411         for (i = 0; i < adapter->num_rx_queues; i++)
412                 igc_free_rx_resources(adapter->rx_ring[i]);
413 }
414
415 /**
416  * igc_setup_rx_resources - allocate Rx resources (Descriptors)
417  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
418  *
419  * Returns 0 on success, negative on failure
420  */
421 static int igc_setup_rx_resources(struct igc_ring *rx_ring)
422 {
423         struct device *dev = rx_ring->dev;
424         int size, desc_len;
425
426         size = sizeof(struct igc_rx_buffer) * rx_ring->count;
427         rx_ring->rx_buffer_info = vzalloc(size);
428         if (!rx_ring->rx_buffer_info)
429                 goto err;
430
431         desc_len = sizeof(union igc_adv_rx_desc);
432
433         /* Round up to nearest 4K */
434         rx_ring->size = rx_ring->count * desc_len;
435         rx_ring->size = ALIGN(rx_ring->size, 4096);
436
437         rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
438                                            &rx_ring->dma, GFP_KERNEL);
439
440         if (!rx_ring->desc)
441                 goto err;
442
443         rx_ring->next_to_alloc = 0;
444         rx_ring->next_to_clean = 0;
445         rx_ring->next_to_use = 0;
446
447         return 0;
448
449 err:
450         vfree(rx_ring->rx_buffer_info);
451         rx_ring->rx_buffer_info = NULL;
452         dev_err(dev,
453                 "Unable to allocate memory for the receive descriptor ring\n");
454         return -ENOMEM;
455 }
456
457 /**
458  * igc_setup_all_rx_resources - wrapper to allocate Rx resources
459  *                                (Descriptors) for all queues
460  * @adapter: board private structure
461  *
462  * Return 0 on success, negative on failure
463  */
464 static int igc_setup_all_rx_resources(struct igc_adapter *adapter)
465 {
466         struct pci_dev *pdev = adapter->pdev;
467         int i, err = 0;
468
469         for (i = 0; i < adapter->num_rx_queues; i++) {
470                 err = igc_setup_rx_resources(adapter->rx_ring[i]);
471                 if (err) {
472                         dev_err(&pdev->dev,
473                                 "Allocation for Rx Queue %u failed\n", i);
474                         for (i--; i >= 0; i--)
475                                 igc_free_rx_resources(adapter->rx_ring[i]);
476                         break;
477                 }
478         }
479
480         return err;
481 }
482
483 /**
484  * igc_configure_rx_ring - Configure a receive ring after Reset
485  * @adapter: board private structure
486  * @ring: receive ring to be configured
487  *
488  * Configure the Rx unit of the MAC after a reset.
489  */
490 static void igc_configure_rx_ring(struct igc_adapter *adapter,
491                                   struct igc_ring *ring)
492 {
493         struct igc_hw *hw = &adapter->hw;
494         union igc_adv_rx_desc *rx_desc;
495         int reg_idx = ring->reg_idx;
496         u32 srrctl = 0, rxdctl = 0;
497         u64 rdba = ring->dma;
498
499         /* disable the queue */
500         wr32(IGC_RXDCTL(reg_idx), 0);
501
502         /* Set DMA base address registers */
503         wr32(IGC_RDBAL(reg_idx),
504              rdba & 0x00000000ffffffffULL);
505         wr32(IGC_RDBAH(reg_idx), rdba >> 32);
506         wr32(IGC_RDLEN(reg_idx),
507              ring->count * sizeof(union igc_adv_rx_desc));
508
509         /* initialize head and tail */
510         ring->tail = adapter->io_addr + IGC_RDT(reg_idx);
511         wr32(IGC_RDH(reg_idx), 0);
512         writel(0, ring->tail);
513
514         /* reset next-to- use/clean to place SW in sync with hardware */
515         ring->next_to_clean = 0;
516         ring->next_to_use = 0;
517
518         /* set descriptor configuration */
519         srrctl = IGC_RX_HDR_LEN << IGC_SRRCTL_BSIZEHDRSIZE_SHIFT;
520         if (ring_uses_large_buffer(ring))
521                 srrctl |= IGC_RXBUFFER_3072 >> IGC_SRRCTL_BSIZEPKT_SHIFT;
522         else
523                 srrctl |= IGC_RXBUFFER_2048 >> IGC_SRRCTL_BSIZEPKT_SHIFT;
524         srrctl |= IGC_SRRCTL_DESCTYPE_ADV_ONEBUF;
525
526         wr32(IGC_SRRCTL(reg_idx), srrctl);
527
528         rxdctl |= IGC_RX_PTHRESH;
529         rxdctl |= IGC_RX_HTHRESH << 8;
530         rxdctl |= IGC_RX_WTHRESH << 16;
531
532         /* initialize rx_buffer_info */
533         memset(ring->rx_buffer_info, 0,
534                sizeof(struct igc_rx_buffer) * ring->count);
535
536         /* initialize Rx descriptor 0 */
537         rx_desc = IGC_RX_DESC(ring, 0);
538         rx_desc->wb.upper.length = 0;
539
540         /* enable receive descriptor fetching */
541         rxdctl |= IGC_RXDCTL_QUEUE_ENABLE;
542
543         wr32(IGC_RXDCTL(reg_idx), rxdctl);
544 }
545
546 /**
547  * igc_configure_rx - Configure receive Unit after Reset
548  * @adapter: board private structure
549  *
550  * Configure the Rx unit of the MAC after a reset.
551  */
552 static void igc_configure_rx(struct igc_adapter *adapter)
553 {
554         int i;
555
556         /* Setup the HW Rx Head and Tail Descriptor Pointers and
557          * the Base and Length of the Rx Descriptor Ring
558          */
559         for (i = 0; i < adapter->num_rx_queues; i++)
560                 igc_configure_rx_ring(adapter, adapter->rx_ring[i]);
561 }
562
563 /**
564  * igc_configure_tx_ring - Configure transmit ring after Reset
565  * @adapter: board private structure
566  * @ring: tx ring to configure
567  *
568  * Configure a transmit ring after a reset.
569  */
570 static void igc_configure_tx_ring(struct igc_adapter *adapter,
571                                   struct igc_ring *ring)
572 {
573         struct igc_hw *hw = &adapter->hw;
574         int reg_idx = ring->reg_idx;
575         u64 tdba = ring->dma;
576         u32 txdctl = 0;
577
578         /* disable the queue */
579         wr32(IGC_TXDCTL(reg_idx), 0);
580         wrfl();
581         mdelay(10);
582
583         wr32(IGC_TDLEN(reg_idx),
584              ring->count * sizeof(union igc_adv_tx_desc));
585         wr32(IGC_TDBAL(reg_idx),
586              tdba & 0x00000000ffffffffULL);
587         wr32(IGC_TDBAH(reg_idx), tdba >> 32);
588
589         ring->tail = adapter->io_addr + IGC_TDT(reg_idx);
590         wr32(IGC_TDH(reg_idx), 0);
591         writel(0, ring->tail);
592
593         txdctl |= IGC_TX_PTHRESH;
594         txdctl |= IGC_TX_HTHRESH << 8;
595         txdctl |= IGC_TX_WTHRESH << 16;
596
597         txdctl |= IGC_TXDCTL_QUEUE_ENABLE;
598         wr32(IGC_TXDCTL(reg_idx), txdctl);
599 }
600
601 /**
602  * igc_configure_tx - Configure transmit Unit after Reset
603  * @adapter: board private structure
604  *
605  * Configure the Tx unit of the MAC after a reset.
606  */
607 static void igc_configure_tx(struct igc_adapter *adapter)
608 {
609         int i;
610
611         for (i = 0; i < adapter->num_tx_queues; i++)
612                 igc_configure_tx_ring(adapter, adapter->tx_ring[i]);
613 }
614
615 /**
616  * igc_setup_mrqc - configure the multiple receive queue control registers
617  * @adapter: Board private structure
618  */
619 static void igc_setup_mrqc(struct igc_adapter *adapter)
620 {
621 }
622
623 /**
624  * igc_setup_rctl - configure the receive control registers
625  * @adapter: Board private structure
626  */
627 static void igc_setup_rctl(struct igc_adapter *adapter)
628 {
629         struct igc_hw *hw = &adapter->hw;
630         u32 rctl;
631
632         rctl = rd32(IGC_RCTL);
633
634         rctl &= ~(3 << IGC_RCTL_MO_SHIFT);
635         rctl &= ~(IGC_RCTL_LBM_TCVR | IGC_RCTL_LBM_MAC);
636
637         rctl |= IGC_RCTL_EN | IGC_RCTL_BAM | IGC_RCTL_RDMTS_HALF |
638                 (hw->mac.mc_filter_type << IGC_RCTL_MO_SHIFT);
639
640         /* enable stripping of CRC. Newer features require
641          * that the HW strips the CRC.
642          */
643         rctl |= IGC_RCTL_SECRC;
644
645         /* disable store bad packets and clear size bits. */
646         rctl &= ~(IGC_RCTL_SBP | IGC_RCTL_SZ_256);
647
648         /* enable LPE to allow for reception of jumbo frames */
649         rctl |= IGC_RCTL_LPE;
650
651         /* disable queue 0 to prevent tail write w/o re-config */
652         wr32(IGC_RXDCTL(0), 0);
653
654         /* This is useful for sniffing bad packets. */
655         if (adapter->netdev->features & NETIF_F_RXALL) {
656                 /* UPE and MPE will be handled by normal PROMISC logic
657                  * in set_rx_mode
658                  */
659                 rctl |= (IGC_RCTL_SBP | /* Receive bad packets */
660                          IGC_RCTL_BAM | /* RX All Bcast Pkts */
661                          IGC_RCTL_PMCF); /* RX All MAC Ctrl Pkts */
662
663                 rctl &= ~(IGC_RCTL_DPF | /* Allow filtered pause */
664                           IGC_RCTL_CFIEN); /* Disable VLAN CFIEN Filter */
665         }
666
667         wr32(IGC_RCTL, rctl);
668 }
669
670 /**
671  * igc_setup_tctl - configure the transmit control registers
672  * @adapter: Board private structure
673  */
674 static void igc_setup_tctl(struct igc_adapter *adapter)
675 {
676         struct igc_hw *hw = &adapter->hw;
677         u32 tctl;
678
679         /* disable queue 0 which icould be enabled by default */
680         wr32(IGC_TXDCTL(0), 0);
681
682         /* Program the Transmit Control Register */
683         tctl = rd32(IGC_TCTL);
684         tctl &= ~IGC_TCTL_CT;
685         tctl |= IGC_TCTL_PSP | IGC_TCTL_RTLC |
686                 (IGC_COLLISION_THRESHOLD << IGC_CT_SHIFT);
687
688         /* Enable transmits */
689         tctl |= IGC_TCTL_EN;
690
691         wr32(IGC_TCTL, tctl);
692 }
693
694 /**
695  * igc_set_mac - Change the Ethernet Address of the NIC
696  * @netdev: network interface device structure
697  * @p: pointer to an address structure
698  *
699  * Returns 0 on success, negative on failure
700  */
701 static int igc_set_mac(struct net_device *netdev, void *p)
702 {
703         struct igc_adapter *adapter = netdev_priv(netdev);
704         struct igc_hw *hw = &adapter->hw;
705         struct sockaddr *addr = p;
706
707         if (!is_valid_ether_addr(addr->sa_data))
708                 return -EADDRNOTAVAIL;
709
710         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
711         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
712
713         /* set the correct pool for the new PF MAC address in entry 0 */
714         igc_set_default_mac_filter(adapter);
715
716         return 0;
717 }
718
719 static void igc_tx_csum(struct igc_ring *tx_ring, struct igc_tx_buffer *first)
720 {
721 }
722
723 static int __igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size)
724 {
725         struct net_device *netdev = tx_ring->netdev;
726
727         netif_stop_subqueue(netdev, tx_ring->queue_index);
728
729         /* memory barriier comment */
730         smp_mb();
731
732         /* We need to check again in a case another CPU has just
733          * made room available.
734          */
735         if (igc_desc_unused(tx_ring) < size)
736                 return -EBUSY;
737
738         /* A reprieve! */
739         netif_wake_subqueue(netdev, tx_ring->queue_index);
740
741         u64_stats_update_begin(&tx_ring->tx_syncp2);
742         tx_ring->tx_stats.restart_queue2++;
743         u64_stats_update_end(&tx_ring->tx_syncp2);
744
745         return 0;
746 }
747
748 static inline int igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size)
749 {
750         if (igc_desc_unused(tx_ring) >= size)
751                 return 0;
752         return __igc_maybe_stop_tx(tx_ring, size);
753 }
754
755 static u32 igc_tx_cmd_type(struct sk_buff *skb, u32 tx_flags)
756 {
757         /* set type for advanced descriptor with frame checksum insertion */
758         u32 cmd_type = IGC_ADVTXD_DTYP_DATA |
759                        IGC_ADVTXD_DCMD_DEXT |
760                        IGC_ADVTXD_DCMD_IFCS;
761
762         return cmd_type;
763 }
764
765 static void igc_tx_olinfo_status(struct igc_ring *tx_ring,
766                                  union igc_adv_tx_desc *tx_desc,
767                                  u32 tx_flags, unsigned int paylen)
768 {
769         u32 olinfo_status = paylen << IGC_ADVTXD_PAYLEN_SHIFT;
770
771         /* insert L4 checksum */
772         olinfo_status |= (tx_flags & IGC_TX_FLAGS_CSUM) *
773                           ((IGC_TXD_POPTS_TXSM << 8) /
774                           IGC_TX_FLAGS_CSUM);
775
776         /* insert IPv4 checksum */
777         olinfo_status |= (tx_flags & IGC_TX_FLAGS_IPV4) *
778                           (((IGC_TXD_POPTS_IXSM << 8)) /
779                           IGC_TX_FLAGS_IPV4);
780
781         tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
782 }
783
784 static int igc_tx_map(struct igc_ring *tx_ring,
785                       struct igc_tx_buffer *first,
786                       const u8 hdr_len)
787 {
788         struct sk_buff *skb = first->skb;
789         struct igc_tx_buffer *tx_buffer;
790         union igc_adv_tx_desc *tx_desc;
791         u32 tx_flags = first->tx_flags;
792         struct skb_frag_struct *frag;
793         u16 i = tx_ring->next_to_use;
794         unsigned int data_len, size;
795         dma_addr_t dma;
796         u32 cmd_type = igc_tx_cmd_type(skb, tx_flags);
797
798         tx_desc = IGC_TX_DESC(tx_ring, i);
799
800         igc_tx_olinfo_status(tx_ring, tx_desc, tx_flags, skb->len - hdr_len);
801
802         size = skb_headlen(skb);
803         data_len = skb->data_len;
804
805         dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
806
807         tx_buffer = first;
808
809         for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
810                 if (dma_mapping_error(tx_ring->dev, dma))
811                         goto dma_error;
812
813                 /* record length, and DMA address */
814                 dma_unmap_len_set(tx_buffer, len, size);
815                 dma_unmap_addr_set(tx_buffer, dma, dma);
816
817                 tx_desc->read.buffer_addr = cpu_to_le64(dma);
818
819                 while (unlikely(size > IGC_MAX_DATA_PER_TXD)) {
820                         tx_desc->read.cmd_type_len =
821                                 cpu_to_le32(cmd_type ^ IGC_MAX_DATA_PER_TXD);
822
823                         i++;
824                         tx_desc++;
825                         if (i == tx_ring->count) {
826                                 tx_desc = IGC_TX_DESC(tx_ring, 0);
827                                 i = 0;
828                         }
829                         tx_desc->read.olinfo_status = 0;
830
831                         dma += IGC_MAX_DATA_PER_TXD;
832                         size -= IGC_MAX_DATA_PER_TXD;
833
834                         tx_desc->read.buffer_addr = cpu_to_le64(dma);
835                 }
836
837                 if (likely(!data_len))
838                         break;
839
840                 tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type ^ size);
841
842                 i++;
843                 tx_desc++;
844                 if (i == tx_ring->count) {
845                         tx_desc = IGC_TX_DESC(tx_ring, 0);
846                         i = 0;
847                 }
848                 tx_desc->read.olinfo_status = 0;
849
850                 size = skb_frag_size(frag);
851                 data_len -= size;
852
853                 dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
854                                        size, DMA_TO_DEVICE);
855
856                 tx_buffer = &tx_ring->tx_buffer_info[i];
857         }
858
859         /* write last descriptor with RS and EOP bits */
860         cmd_type |= size | IGC_TXD_DCMD;
861         tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
862
863         netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
864
865         /* set the timestamp */
866         first->time_stamp = jiffies;
867
868         /* Force memory writes to complete before letting h/w know there
869          * are new descriptors to fetch.  (Only applicable for weak-ordered
870          * memory model archs, such as IA-64).
871          *
872          * We also need this memory barrier to make certain all of the
873          * status bits have been updated before next_to_watch is written.
874          */
875         wmb();
876
877         /* set next_to_watch value indicating a packet is present */
878         first->next_to_watch = tx_desc;
879
880         i++;
881         if (i == tx_ring->count)
882                 i = 0;
883
884         tx_ring->next_to_use = i;
885
886         /* Make sure there is space in the ring for the next send. */
887         igc_maybe_stop_tx(tx_ring, DESC_NEEDED);
888
889         if (netif_xmit_stopped(txring_txq(tx_ring)) || !skb->xmit_more) {
890                 writel(i, tx_ring->tail);
891
892                 /* we need this if more than one processor can write to our tail
893                  * at a time, it synchronizes IO on IA64/Altix systems
894                  */
895                 mmiowb();
896         }
897
898         return 0;
899 dma_error:
900         dev_err(tx_ring->dev, "TX DMA map failed\n");
901         tx_buffer = &tx_ring->tx_buffer_info[i];
902
903         /* clear dma mappings for failed tx_buffer_info map */
904         while (tx_buffer != first) {
905                 if (dma_unmap_len(tx_buffer, len))
906                         dma_unmap_page(tx_ring->dev,
907                                        dma_unmap_addr(tx_buffer, dma),
908                                        dma_unmap_len(tx_buffer, len),
909                                        DMA_TO_DEVICE);
910                 dma_unmap_len_set(tx_buffer, len, 0);
911
912                 if (i-- == 0)
913                         i += tx_ring->count;
914                 tx_buffer = &tx_ring->tx_buffer_info[i];
915         }
916
917         if (dma_unmap_len(tx_buffer, len))
918                 dma_unmap_single(tx_ring->dev,
919                                  dma_unmap_addr(tx_buffer, dma),
920                                  dma_unmap_len(tx_buffer, len),
921                                  DMA_TO_DEVICE);
922         dma_unmap_len_set(tx_buffer, len, 0);
923
924         dev_kfree_skb_any(tx_buffer->skb);
925         tx_buffer->skb = NULL;
926
927         tx_ring->next_to_use = i;
928
929         return -1;
930 }
931
932 static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb,
933                                        struct igc_ring *tx_ring)
934 {
935         u16 count = TXD_USE_COUNT(skb_headlen(skb));
936         __be16 protocol = vlan_get_protocol(skb);
937         struct igc_tx_buffer *first;
938         u32 tx_flags = 0;
939         unsigned short f;
940         u8 hdr_len = 0;
941
942         /* need: 1 descriptor per page * PAGE_SIZE/IGC_MAX_DATA_PER_TXD,
943          *      + 1 desc for skb_headlen/IGC_MAX_DATA_PER_TXD,
944          *      + 2 desc gap to keep tail from touching head,
945          *      + 1 desc for context descriptor,
946          * otherwise try next time
947          */
948         for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
949                 count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size);
950
951         if (igc_maybe_stop_tx(tx_ring, count + 3)) {
952                 /* this is a hard error */
953                 return NETDEV_TX_BUSY;
954         }
955
956         /* record the location of the first descriptor for this packet */
957         first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
958         first->skb = skb;
959         first->bytecount = skb->len;
960         first->gso_segs = 1;
961
962         skb_tx_timestamp(skb);
963
964         /* record initial flags and protocol */
965         first->tx_flags = tx_flags;
966         first->protocol = protocol;
967
968         igc_tx_csum(tx_ring, first);
969
970         igc_tx_map(tx_ring, first, hdr_len);
971
972         return NETDEV_TX_OK;
973 }
974
975 static inline struct igc_ring *igc_tx_queue_mapping(struct igc_adapter *adapter,
976                                                     struct sk_buff *skb)
977 {
978         unsigned int r_idx = skb->queue_mapping;
979
980         if (r_idx >= adapter->num_tx_queues)
981                 r_idx = r_idx % adapter->num_tx_queues;
982
983         return adapter->tx_ring[r_idx];
984 }
985
986 static netdev_tx_t igc_xmit_frame(struct sk_buff *skb,
987                                   struct net_device *netdev)
988 {
989         struct igc_adapter *adapter = netdev_priv(netdev);
990
991         /* The minimum packet size with TCTL.PSP set is 17 so pad the skb
992          * in order to meet this minimum size requirement.
993          */
994         if (skb->len < 17) {
995                 if (skb_padto(skb, 17))
996                         return NETDEV_TX_OK;
997                 skb->len = 17;
998         }
999
1000         return igc_xmit_frame_ring(skb, igc_tx_queue_mapping(adapter, skb));
1001 }
1002
1003 static inline void igc_rx_hash(struct igc_ring *ring,
1004                                union igc_adv_rx_desc *rx_desc,
1005                                struct sk_buff *skb)
1006 {
1007         if (ring->netdev->features & NETIF_F_RXHASH)
1008                 skb_set_hash(skb,
1009                              le32_to_cpu(rx_desc->wb.lower.hi_dword.rss),
1010                              PKT_HASH_TYPE_L3);
1011 }
1012
1013 /**
1014  * igc_process_skb_fields - Populate skb header fields from Rx descriptor
1015  * @rx_ring: rx descriptor ring packet is being transacted on
1016  * @rx_desc: pointer to the EOP Rx descriptor
1017  * @skb: pointer to current skb being populated
1018  *
1019  * This function checks the ring, descriptor, and packet information in
1020  * order to populate the hash, checksum, VLAN, timestamp, protocol, and
1021  * other fields within the skb.
1022  */
1023 static void igc_process_skb_fields(struct igc_ring *rx_ring,
1024                                    union igc_adv_rx_desc *rx_desc,
1025                                    struct sk_buff *skb)
1026 {
1027         igc_rx_hash(rx_ring, rx_desc, skb);
1028
1029         skb_record_rx_queue(skb, rx_ring->queue_index);
1030
1031         skb->protocol = eth_type_trans(skb, rx_ring->netdev);
1032 }
1033
1034 static struct igc_rx_buffer *igc_get_rx_buffer(struct igc_ring *rx_ring,
1035                                                const unsigned int size)
1036 {
1037         struct igc_rx_buffer *rx_buffer;
1038
1039         rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
1040         prefetchw(rx_buffer->page);
1041
1042         /* we are reusing so sync this buffer for CPU use */
1043         dma_sync_single_range_for_cpu(rx_ring->dev,
1044                                       rx_buffer->dma,
1045                                       rx_buffer->page_offset,
1046                                       size,
1047                                       DMA_FROM_DEVICE);
1048
1049         rx_buffer->pagecnt_bias--;
1050
1051         return rx_buffer;
1052 }
1053
1054 /**
1055  * igc_add_rx_frag - Add contents of Rx buffer to sk_buff
1056  * @rx_ring: rx descriptor ring to transact packets on
1057  * @rx_buffer: buffer containing page to add
1058  * @skb: sk_buff to place the data into
1059  * @size: size of buffer to be added
1060  *
1061  * This function will add the data contained in rx_buffer->page to the skb.
1062  */
1063 static void igc_add_rx_frag(struct igc_ring *rx_ring,
1064                             struct igc_rx_buffer *rx_buffer,
1065                             struct sk_buff *skb,
1066                             unsigned int size)
1067 {
1068 #if (PAGE_SIZE < 8192)
1069         unsigned int truesize = igc_rx_pg_size(rx_ring) / 2;
1070
1071         skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
1072                         rx_buffer->page_offset, size, truesize);
1073         rx_buffer->page_offset ^= truesize;
1074 #else
1075         unsigned int truesize = ring_uses_build_skb(rx_ring) ?
1076                                 SKB_DATA_ALIGN(IGC_SKB_PAD + size) :
1077                                 SKB_DATA_ALIGN(size);
1078         skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
1079                         rx_buffer->page_offset, size, truesize);
1080         rx_buffer->page_offset += truesize;
1081 #endif
1082 }
1083
1084 static struct sk_buff *igc_build_skb(struct igc_ring *rx_ring,
1085                                      struct igc_rx_buffer *rx_buffer,
1086                                      union igc_adv_rx_desc *rx_desc,
1087                                      unsigned int size)
1088 {
1089         void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
1090 #if (PAGE_SIZE < 8192)
1091         unsigned int truesize = igc_rx_pg_size(rx_ring) / 2;
1092 #else
1093         unsigned int truesize = SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
1094                                 SKB_DATA_ALIGN(IGC_SKB_PAD + size);
1095 #endif
1096         struct sk_buff *skb;
1097
1098         /* prefetch first cache line of first page */
1099         prefetch(va);
1100 #if L1_CACHE_BYTES < 128
1101         prefetch(va + L1_CACHE_BYTES);
1102 #endif
1103
1104         /* build an skb around the page buffer */
1105         skb = build_skb(va - IGC_SKB_PAD, truesize);
1106         if (unlikely(!skb))
1107                 return NULL;
1108
1109         /* update pointers within the skb to store the data */
1110         skb_reserve(skb, IGC_SKB_PAD);
1111          __skb_put(skb, size);
1112
1113         /* update buffer offset */
1114 #if (PAGE_SIZE < 8192)
1115         rx_buffer->page_offset ^= truesize;
1116 #else
1117         rx_buffer->page_offset += truesize;
1118 #endif
1119
1120         return skb;
1121 }
1122
1123 static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring,
1124                                          struct igc_rx_buffer *rx_buffer,
1125                                          union igc_adv_rx_desc *rx_desc,
1126                                          unsigned int size)
1127 {
1128         void *va = page_address(rx_buffer->page) + rx_buffer->page_offset;
1129 #if (PAGE_SIZE < 8192)
1130         unsigned int truesize = igc_rx_pg_size(rx_ring) / 2;
1131 #else
1132         unsigned int truesize = SKB_DATA_ALIGN(size);
1133 #endif
1134         unsigned int headlen;
1135         struct sk_buff *skb;
1136
1137         /* prefetch first cache line of first page */
1138         prefetch(va);
1139 #if L1_CACHE_BYTES < 128
1140         prefetch(va + L1_CACHE_BYTES);
1141 #endif
1142
1143         /* allocate a skb to store the frags */
1144         skb = napi_alloc_skb(&rx_ring->q_vector->napi, IGC_RX_HDR_LEN);
1145         if (unlikely(!skb))
1146                 return NULL;
1147
1148         /* Determine available headroom for copy */
1149         headlen = size;
1150         if (headlen > IGC_RX_HDR_LEN)
1151                 headlen = eth_get_headlen(va, IGC_RX_HDR_LEN);
1152
1153         /* align pull length to size of long to optimize memcpy performance */
1154         memcpy(__skb_put(skb, headlen), va, ALIGN(headlen, sizeof(long)));
1155
1156         /* update all of the pointers */
1157         size -= headlen;
1158         if (size) {
1159                 skb_add_rx_frag(skb, 0, rx_buffer->page,
1160                                 (va + headlen) - page_address(rx_buffer->page),
1161                                 size, truesize);
1162 #if (PAGE_SIZE < 8192)
1163         rx_buffer->page_offset ^= truesize;
1164 #else
1165         rx_buffer->page_offset += truesize;
1166 #endif
1167         } else {
1168                 rx_buffer->pagecnt_bias++;
1169         }
1170
1171         return skb;
1172 }
1173
1174 /**
1175  * igc_reuse_rx_page - page flip buffer and store it back on the ring
1176  * @rx_ring: rx descriptor ring to store buffers on
1177  * @old_buff: donor buffer to have page reused
1178  *
1179  * Synchronizes page for reuse by the adapter
1180  */
1181 static void igc_reuse_rx_page(struct igc_ring *rx_ring,
1182                               struct igc_rx_buffer *old_buff)
1183 {
1184         u16 nta = rx_ring->next_to_alloc;
1185         struct igc_rx_buffer *new_buff;
1186
1187         new_buff = &rx_ring->rx_buffer_info[nta];
1188
1189         /* update, and store next to alloc */
1190         nta++;
1191         rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
1192
1193         /* Transfer page from old buffer to new buffer.
1194          * Move each member individually to avoid possible store
1195          * forwarding stalls.
1196          */
1197         new_buff->dma           = old_buff->dma;
1198         new_buff->page          = old_buff->page;
1199         new_buff->page_offset   = old_buff->page_offset;
1200         new_buff->pagecnt_bias  = old_buff->pagecnt_bias;
1201 }
1202
1203 static inline bool igc_page_is_reserved(struct page *page)
1204 {
1205         return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page);
1206 }
1207
1208 static bool igc_can_reuse_rx_page(struct igc_rx_buffer *rx_buffer)
1209 {
1210         unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
1211         struct page *page = rx_buffer->page;
1212
1213         /* avoid re-using remote pages */
1214         if (unlikely(igc_page_is_reserved(page)))
1215                 return false;
1216
1217 #if (PAGE_SIZE < 8192)
1218         /* if we are only owner of page we can reuse it */
1219         if (unlikely((page_ref_count(page) - pagecnt_bias) > 1))
1220                 return false;
1221 #else
1222 #define IGC_LAST_OFFSET \
1223         (SKB_WITH_OVERHEAD(PAGE_SIZE) - IGC_RXBUFFER_2048)
1224
1225         if (rx_buffer->page_offset > IGC_LAST_OFFSET)
1226                 return false;
1227 #endif
1228
1229         /* If we have drained the page fragment pool we need to update
1230          * the pagecnt_bias and page count so that we fully restock the
1231          * number of references the driver holds.
1232          */
1233         if (unlikely(!pagecnt_bias)) {
1234                 page_ref_add(page, USHRT_MAX);
1235                 rx_buffer->pagecnt_bias = USHRT_MAX;
1236         }
1237
1238         return true;
1239 }
1240
1241 /**
1242  * igc_is_non_eop - process handling of non-EOP buffers
1243  * @rx_ring: Rx ring being processed
1244  * @rx_desc: Rx descriptor for current buffer
1245  * @skb: current socket buffer containing buffer in progress
1246  *
1247  * This function updates next to clean.  If the buffer is an EOP buffer
1248  * this function exits returning false, otherwise it will place the
1249  * sk_buff in the next buffer to be chained and return true indicating
1250  * that this is in fact a non-EOP buffer.
1251  */
1252 static bool igc_is_non_eop(struct igc_ring *rx_ring,
1253                            union igc_adv_rx_desc *rx_desc)
1254 {
1255         u32 ntc = rx_ring->next_to_clean + 1;
1256
1257         /* fetch, update, and store next to clean */
1258         ntc = (ntc < rx_ring->count) ? ntc : 0;
1259         rx_ring->next_to_clean = ntc;
1260
1261         prefetch(IGC_RX_DESC(rx_ring, ntc));
1262
1263         if (likely(igc_test_staterr(rx_desc, IGC_RXD_STAT_EOP)))
1264                 return false;
1265
1266         return true;
1267 }
1268
1269 /**
1270  * igc_cleanup_headers - Correct corrupted or empty headers
1271  * @rx_ring: rx descriptor ring packet is being transacted on
1272  * @rx_desc: pointer to the EOP Rx descriptor
1273  * @skb: pointer to current skb being fixed
1274  *
1275  * Address the case where we are pulling data in on pages only
1276  * and as such no data is present in the skb header.
1277  *
1278  * In addition if skb is not at least 60 bytes we need to pad it so that
1279  * it is large enough to qualify as a valid Ethernet frame.
1280  *
1281  * Returns true if an error was encountered and skb was freed.
1282  */
1283 static bool igc_cleanup_headers(struct igc_ring *rx_ring,
1284                                 union igc_adv_rx_desc *rx_desc,
1285                                 struct sk_buff *skb)
1286 {
1287         if (unlikely((igc_test_staterr(rx_desc,
1288                                        IGC_RXDEXT_ERR_FRAME_ERR_MASK)))) {
1289                 struct net_device *netdev = rx_ring->netdev;
1290
1291                 if (!(netdev->features & NETIF_F_RXALL)) {
1292                         dev_kfree_skb_any(skb);
1293                         return true;
1294                 }
1295         }
1296
1297         /* if eth_skb_pad returns an error the skb was freed */
1298         if (eth_skb_pad(skb))
1299                 return true;
1300
1301         return false;
1302 }
1303
1304 static void igc_put_rx_buffer(struct igc_ring *rx_ring,
1305                               struct igc_rx_buffer *rx_buffer)
1306 {
1307         if (igc_can_reuse_rx_page(rx_buffer)) {
1308                 /* hand second half of page back to the ring */
1309                 igc_reuse_rx_page(rx_ring, rx_buffer);
1310         } else {
1311                 /* We are not reusing the buffer so unmap it and free
1312                  * any references we are holding to it
1313                  */
1314                 dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
1315                                      igc_rx_pg_size(rx_ring), DMA_FROM_DEVICE,
1316                                      IGC_RX_DMA_ATTR);
1317                 __page_frag_cache_drain(rx_buffer->page,
1318                                         rx_buffer->pagecnt_bias);
1319         }
1320
1321         /* clear contents of rx_buffer */
1322         rx_buffer->page = NULL;
1323 }
1324
1325 /**
1326  * igc_alloc_rx_buffers - Replace used receive buffers; packet split
1327  * @adapter: address of board private structure
1328  */
1329 static void igc_alloc_rx_buffers(struct igc_ring *rx_ring, u16 cleaned_count)
1330 {
1331         union igc_adv_rx_desc *rx_desc;
1332         u16 i = rx_ring->next_to_use;
1333         struct igc_rx_buffer *bi;
1334         u16 bufsz;
1335
1336         /* nothing to do */
1337         if (!cleaned_count)
1338                 return;
1339
1340         rx_desc = IGC_RX_DESC(rx_ring, i);
1341         bi = &rx_ring->rx_buffer_info[i];
1342         i -= rx_ring->count;
1343
1344         bufsz = igc_rx_bufsz(rx_ring);
1345
1346         do {
1347                 if (!igc_alloc_mapped_page(rx_ring, bi))
1348                         break;
1349
1350                 /* sync the buffer for use by the device */
1351                 dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
1352                                                  bi->page_offset, bufsz,
1353                                                  DMA_FROM_DEVICE);
1354
1355                 /* Refresh the desc even if buffer_addrs didn't change
1356                  * because each write-back erases this info.
1357                  */
1358                 rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
1359
1360                 rx_desc++;
1361                 bi++;
1362                 i++;
1363                 if (unlikely(!i)) {
1364                         rx_desc = IGC_RX_DESC(rx_ring, 0);
1365                         bi = rx_ring->rx_buffer_info;
1366                         i -= rx_ring->count;
1367                 }
1368
1369                 /* clear the length for the next_to_use descriptor */
1370                 rx_desc->wb.upper.length = 0;
1371
1372                 cleaned_count--;
1373         } while (cleaned_count);
1374
1375         i += rx_ring->count;
1376
1377         if (rx_ring->next_to_use != i) {
1378                 /* record the next descriptor to use */
1379                 rx_ring->next_to_use = i;
1380
1381                 /* update next to alloc since we have filled the ring */
1382                 rx_ring->next_to_alloc = i;
1383
1384                 /* Force memory writes to complete before letting h/w
1385                  * know there are new descriptors to fetch.  (Only
1386                  * applicable for weak-ordered memory model archs,
1387                  * such as IA-64).
1388                  */
1389                 wmb();
1390                 writel(i, rx_ring->tail);
1391         }
1392 }
1393
1394 static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
1395 {
1396         unsigned int total_bytes = 0, total_packets = 0;
1397         struct igc_ring *rx_ring = q_vector->rx.ring;
1398         struct sk_buff *skb = rx_ring->skb;
1399         u16 cleaned_count = igc_desc_unused(rx_ring);
1400
1401         while (likely(total_packets < budget)) {
1402                 union igc_adv_rx_desc *rx_desc;
1403                 struct igc_rx_buffer *rx_buffer;
1404                 unsigned int size;
1405
1406                 /* return some buffers to hardware, one at a time is too slow */
1407                 if (cleaned_count >= IGC_RX_BUFFER_WRITE) {
1408                         igc_alloc_rx_buffers(rx_ring, cleaned_count);
1409                         cleaned_count = 0;
1410                 }
1411
1412                 rx_desc = IGC_RX_DESC(rx_ring, rx_ring->next_to_clean);
1413                 size = le16_to_cpu(rx_desc->wb.upper.length);
1414                 if (!size)
1415                         break;
1416
1417                 /* This memory barrier is needed to keep us from reading
1418                  * any other fields out of the rx_desc until we know the
1419                  * descriptor has been written back
1420                  */
1421                 dma_rmb();
1422
1423                 rx_buffer = igc_get_rx_buffer(rx_ring, size);
1424
1425                 /* retrieve a buffer from the ring */
1426                 if (skb)
1427                         igc_add_rx_frag(rx_ring, rx_buffer, skb, size);
1428                 else if (ring_uses_build_skb(rx_ring))
1429                         skb = igc_build_skb(rx_ring, rx_buffer, rx_desc, size);
1430                 else
1431                         skb = igc_construct_skb(rx_ring, rx_buffer,
1432                                                 rx_desc, size);
1433
1434                 /* exit if we failed to retrieve a buffer */
1435                 if (!skb) {
1436                         rx_ring->rx_stats.alloc_failed++;
1437                         rx_buffer->pagecnt_bias++;
1438                         break;
1439                 }
1440
1441                 igc_put_rx_buffer(rx_ring, rx_buffer);
1442                 cleaned_count++;
1443
1444                 /* fetch next buffer in frame if non-eop */
1445                 if (igc_is_non_eop(rx_ring, rx_desc))
1446                         continue;
1447
1448                 /* verify the packet layout is correct */
1449                 if (igc_cleanup_headers(rx_ring, rx_desc, skb)) {
1450                         skb = NULL;
1451                         continue;
1452                 }
1453
1454                 /* probably a little skewed due to removing CRC */
1455                 total_bytes += skb->len;
1456
1457                 /* populate checksum, timestamp, VLAN, and protocol */
1458                 igc_process_skb_fields(rx_ring, rx_desc, skb);
1459
1460                 napi_gro_receive(&q_vector->napi, skb);
1461
1462                 /* reset skb pointer */
1463                 skb = NULL;
1464
1465                 /* update budget accounting */
1466                 total_packets++;
1467         }
1468
1469         /* place incomplete frames back on ring for completion */
1470         rx_ring->skb = skb;
1471
1472         u64_stats_update_begin(&rx_ring->rx_syncp);
1473         rx_ring->rx_stats.packets += total_packets;
1474         rx_ring->rx_stats.bytes += total_bytes;
1475         u64_stats_update_end(&rx_ring->rx_syncp);
1476         q_vector->rx.total_packets += total_packets;
1477         q_vector->rx.total_bytes += total_bytes;
1478
1479         if (cleaned_count)
1480                 igc_alloc_rx_buffers(rx_ring, cleaned_count);
1481
1482         return total_packets;
1483 }
1484
1485 static inline unsigned int igc_rx_offset(struct igc_ring *rx_ring)
1486 {
1487         return ring_uses_build_skb(rx_ring) ? IGC_SKB_PAD : 0;
1488 }
1489
1490 static bool igc_alloc_mapped_page(struct igc_ring *rx_ring,
1491                                   struct igc_rx_buffer *bi)
1492 {
1493         struct page *page = bi->page;
1494         dma_addr_t dma;
1495
1496         /* since we are recycling buffers we should seldom need to alloc */
1497         if (likely(page))
1498                 return true;
1499
1500         /* alloc new page for storage */
1501         page = dev_alloc_pages(igc_rx_pg_order(rx_ring));
1502         if (unlikely(!page)) {
1503                 rx_ring->rx_stats.alloc_failed++;
1504                 return false;
1505         }
1506
1507         /* map page for use */
1508         dma = dma_map_page_attrs(rx_ring->dev, page, 0,
1509                                  igc_rx_pg_size(rx_ring),
1510                                  DMA_FROM_DEVICE,
1511                                  IGC_RX_DMA_ATTR);
1512
1513         /* if mapping failed free memory back to system since
1514          * there isn't much point in holding memory we can't use
1515          */
1516         if (dma_mapping_error(rx_ring->dev, dma)) {
1517                 __free_page(page);
1518
1519                 rx_ring->rx_stats.alloc_failed++;
1520                 return false;
1521         }
1522
1523         bi->dma = dma;
1524         bi->page = page;
1525         bi->page_offset = igc_rx_offset(rx_ring);
1526         bi->pagecnt_bias = 1;
1527
1528         return true;
1529 }
1530
1531 /**
1532  * igc_clean_tx_irq - Reclaim resources after transmit completes
1533  * @q_vector: pointer to q_vector containing needed info
1534  * @napi_budget: Used to determine if we are in netpoll
1535  *
1536  * returns true if ring is completely cleaned
1537  */
1538 static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget)
1539 {
1540         struct igc_adapter *adapter = q_vector->adapter;
1541         unsigned int total_bytes = 0, total_packets = 0;
1542         unsigned int budget = q_vector->tx.work_limit;
1543         struct igc_ring *tx_ring = q_vector->tx.ring;
1544         unsigned int i = tx_ring->next_to_clean;
1545         struct igc_tx_buffer *tx_buffer;
1546         union igc_adv_tx_desc *tx_desc;
1547
1548         if (test_bit(__IGC_DOWN, &adapter->state))
1549                 return true;
1550
1551         tx_buffer = &tx_ring->tx_buffer_info[i];
1552         tx_desc = IGC_TX_DESC(tx_ring, i);
1553         i -= tx_ring->count;
1554
1555         do {
1556                 union igc_adv_tx_desc *eop_desc = tx_buffer->next_to_watch;
1557
1558                 /* if next_to_watch is not set then there is no work pending */
1559                 if (!eop_desc)
1560                         break;
1561
1562                 /* prevent any other reads prior to eop_desc */
1563                 smp_rmb();
1564
1565                 /* if DD is not set pending work has not been completed */
1566                 if (!(eop_desc->wb.status & cpu_to_le32(IGC_TXD_STAT_DD)))
1567                         break;
1568
1569                 /* clear next_to_watch to prevent false hangs */
1570                 tx_buffer->next_to_watch = NULL;
1571
1572                 /* update the statistics for this packet */
1573                 total_bytes += tx_buffer->bytecount;
1574                 total_packets += tx_buffer->gso_segs;
1575
1576                 /* free the skb */
1577                 napi_consume_skb(tx_buffer->skb, napi_budget);
1578
1579                 /* unmap skb header data */
1580                 dma_unmap_single(tx_ring->dev,
1581                                  dma_unmap_addr(tx_buffer, dma),
1582                                  dma_unmap_len(tx_buffer, len),
1583                                  DMA_TO_DEVICE);
1584
1585                 /* clear tx_buffer data */
1586                 dma_unmap_len_set(tx_buffer, len, 0);
1587
1588                 /* clear last DMA location and unmap remaining buffers */
1589                 while (tx_desc != eop_desc) {
1590                         tx_buffer++;
1591                         tx_desc++;
1592                         i++;
1593                         if (unlikely(!i)) {
1594                                 i -= tx_ring->count;
1595                                 tx_buffer = tx_ring->tx_buffer_info;
1596                                 tx_desc = IGC_TX_DESC(tx_ring, 0);
1597                         }
1598
1599                         /* unmap any remaining paged data */
1600                         if (dma_unmap_len(tx_buffer, len)) {
1601                                 dma_unmap_page(tx_ring->dev,
1602                                                dma_unmap_addr(tx_buffer, dma),
1603                                                dma_unmap_len(tx_buffer, len),
1604                                                DMA_TO_DEVICE);
1605                                 dma_unmap_len_set(tx_buffer, len, 0);
1606                         }
1607                 }
1608
1609                 /* move us one more past the eop_desc for start of next pkt */
1610                 tx_buffer++;
1611                 tx_desc++;
1612                 i++;
1613                 if (unlikely(!i)) {
1614                         i -= tx_ring->count;
1615                         tx_buffer = tx_ring->tx_buffer_info;
1616                         tx_desc = IGC_TX_DESC(tx_ring, 0);
1617                 }
1618
1619                 /* issue prefetch for next Tx descriptor */
1620                 prefetch(tx_desc);
1621
1622                 /* update budget accounting */
1623                 budget--;
1624         } while (likely(budget));
1625
1626         netdev_tx_completed_queue(txring_txq(tx_ring),
1627                                   total_packets, total_bytes);
1628
1629         i += tx_ring->count;
1630         tx_ring->next_to_clean = i;
1631         u64_stats_update_begin(&tx_ring->tx_syncp);
1632         tx_ring->tx_stats.bytes += total_bytes;
1633         tx_ring->tx_stats.packets += total_packets;
1634         u64_stats_update_end(&tx_ring->tx_syncp);
1635         q_vector->tx.total_bytes += total_bytes;
1636         q_vector->tx.total_packets += total_packets;
1637
1638         if (test_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
1639                 struct igc_hw *hw = &adapter->hw;
1640
1641                 /* Detect a transmit hang in hardware, this serializes the
1642                  * check with the clearing of time_stamp and movement of i
1643                  */
1644                 clear_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
1645                 if (tx_buffer->next_to_watch &&
1646                     time_after(jiffies, tx_buffer->time_stamp +
1647                     (adapter->tx_timeout_factor * HZ)) &&
1648                     !(rd32(IGC_STATUS) & IGC_STATUS_TXOFF)) {
1649                         /* detected Tx unit hang */
1650                         dev_err(tx_ring->dev,
1651                                 "Detected Tx Unit Hang\n"
1652                                 "  Tx Queue             <%d>\n"
1653                                 "  TDH                  <%x>\n"
1654                                 "  TDT                  <%x>\n"
1655                                 "  next_to_use          <%x>\n"
1656                                 "  next_to_clean        <%x>\n"
1657                                 "buffer_info[next_to_clean]\n"
1658                                 "  time_stamp           <%lx>\n"
1659                                 "  next_to_watch        <%p>\n"
1660                                 "  jiffies              <%lx>\n"
1661                                 "  desc.status          <%x>\n",
1662                                 tx_ring->queue_index,
1663                                 rd32(IGC_TDH(tx_ring->reg_idx)),
1664                                 readl(tx_ring->tail),
1665                                 tx_ring->next_to_use,
1666                                 tx_ring->next_to_clean,
1667                                 tx_buffer->time_stamp,
1668                                 tx_buffer->next_to_watch,
1669                                 jiffies,
1670                                 tx_buffer->next_to_watch->wb.status);
1671                                 netif_stop_subqueue(tx_ring->netdev,
1672                                                     tx_ring->queue_index);
1673
1674                         /* we are about to reset, no point in enabling stuff */
1675                         return true;
1676                 }
1677         }
1678
1679 #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
1680         if (unlikely(total_packets &&
1681                      netif_carrier_ok(tx_ring->netdev) &&
1682                      igc_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD)) {
1683                 /* Make sure that anybody stopping the queue after this
1684                  * sees the new next_to_clean.
1685                  */
1686                 smp_mb();
1687                 if (__netif_subqueue_stopped(tx_ring->netdev,
1688                                              tx_ring->queue_index) &&
1689                     !(test_bit(__IGC_DOWN, &adapter->state))) {
1690                         netif_wake_subqueue(tx_ring->netdev,
1691                                             tx_ring->queue_index);
1692
1693                         u64_stats_update_begin(&tx_ring->tx_syncp);
1694                         tx_ring->tx_stats.restart_queue++;
1695                         u64_stats_update_end(&tx_ring->tx_syncp);
1696                 }
1697         }
1698
1699         return !!budget;
1700 }
1701
1702 /**
1703  * igc_ioctl - I/O control method
1704  * @netdev: network interface device structure
1705  * @ifreq: frequency
1706  * @cmd: command
1707  */
1708 static int igc_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
1709 {
1710         switch (cmd) {
1711         default:
1712                 return -EOPNOTSUPP;
1713         }
1714 }
1715
1716 /**
1717  * igc_up - Open the interface and prepare it to handle traffic
1718  * @adapter: board private structure
1719  */
1720 static void igc_up(struct igc_adapter *adapter)
1721 {
1722         struct igc_hw *hw = &adapter->hw;
1723         int i = 0;
1724
1725         /* hardware has been reset, we need to reload some things */
1726         igc_configure(adapter);
1727
1728         clear_bit(__IGC_DOWN, &adapter->state);
1729
1730         for (i = 0; i < adapter->num_q_vectors; i++)
1731                 napi_enable(&adapter->q_vector[i]->napi);
1732
1733         if (adapter->msix_entries)
1734                 igc_configure_msix(adapter);
1735         else
1736                 igc_assign_vector(adapter->q_vector[0], 0);
1737
1738         /* Clear any pending interrupts. */
1739         rd32(IGC_ICR);
1740         igc_irq_enable(adapter);
1741
1742         netif_tx_start_all_queues(adapter->netdev);
1743
1744         /* start the watchdog. */
1745         hw->mac.get_link_status = 1;
1746         schedule_work(&adapter->watchdog_task);
1747 }
1748
1749 /**
1750  * igc_update_stats - Update the board statistics counters
1751  * @adapter: board private structure
1752  */
1753 static void igc_update_stats(struct igc_adapter *adapter)
1754 {
1755 }
1756
1757 static void igc_nfc_filter_exit(struct igc_adapter *adapter)
1758 {
1759 }
1760
1761 /**
1762  * igc_down - Close the interface
1763  * @adapter: board private structure
1764  */
1765 static void igc_down(struct igc_adapter *adapter)
1766 {
1767         struct net_device *netdev = adapter->netdev;
1768         struct igc_hw *hw = &adapter->hw;
1769         u32 tctl, rctl;
1770         int i = 0;
1771
1772         set_bit(__IGC_DOWN, &adapter->state);
1773
1774         /* disable receives in the hardware */
1775         rctl = rd32(IGC_RCTL);
1776         wr32(IGC_RCTL, rctl & ~IGC_RCTL_EN);
1777         /* flush and sleep below */
1778
1779         igc_nfc_filter_exit(adapter);
1780
1781         /* set trans_start so we don't get spurious watchdogs during reset */
1782         netif_trans_update(netdev);
1783
1784         netif_carrier_off(netdev);
1785         netif_tx_stop_all_queues(netdev);
1786
1787         /* disable transmits in the hardware */
1788         tctl = rd32(IGC_TCTL);
1789         tctl &= ~IGC_TCTL_EN;
1790         wr32(IGC_TCTL, tctl);
1791         /* flush both disables and wait for them to finish */
1792         wrfl();
1793         usleep_range(10000, 20000);
1794
1795         igc_irq_disable(adapter);
1796
1797         adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE;
1798
1799         for (i = 0; i < adapter->num_q_vectors; i++) {
1800                 if (adapter->q_vector[i]) {
1801                         napi_synchronize(&adapter->q_vector[i]->napi);
1802                         napi_disable(&adapter->q_vector[i]->napi);
1803                 }
1804         }
1805
1806         del_timer_sync(&adapter->watchdog_timer);
1807         del_timer_sync(&adapter->phy_info_timer);
1808
1809         /* record the stats before reset*/
1810         spin_lock(&adapter->stats64_lock);
1811         igc_update_stats(adapter);
1812         spin_unlock(&adapter->stats64_lock);
1813
1814         adapter->link_speed = 0;
1815         adapter->link_duplex = 0;
1816
1817         if (!pci_channel_offline(adapter->pdev))
1818                 igc_reset(adapter);
1819
1820         /* clear VLAN promisc flag so VFTA will be updated if necessary */
1821         adapter->flags &= ~IGC_FLAG_VLAN_PROMISC;
1822
1823         igc_clean_all_tx_rings(adapter);
1824         igc_clean_all_rx_rings(adapter);
1825 }
1826
1827 static void igc_reinit_locked(struct igc_adapter *adapter)
1828 {
1829         WARN_ON(in_interrupt());
1830         while (test_and_set_bit(__IGC_RESETTING, &adapter->state))
1831                 usleep_range(1000, 2000);
1832         igc_down(adapter);
1833         igc_up(adapter);
1834         clear_bit(__IGC_RESETTING, &adapter->state);
1835 }
1836
1837 static void igc_reset_task(struct work_struct *work)
1838 {
1839         struct igc_adapter *adapter;
1840
1841         adapter = container_of(work, struct igc_adapter, reset_task);
1842
1843         netdev_err(adapter->netdev, "Reset adapter\n");
1844         igc_reinit_locked(adapter);
1845 }
1846
1847 /**
1848  * igc_change_mtu - Change the Maximum Transfer Unit
1849  * @netdev: network interface device structure
1850  * @new_mtu: new value for maximum frame size
1851  *
1852  * Returns 0 on success, negative on failure
1853  */
1854 static int igc_change_mtu(struct net_device *netdev, int new_mtu)
1855 {
1856         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
1857         struct igc_adapter *adapter = netdev_priv(netdev);
1858         struct pci_dev *pdev = adapter->pdev;
1859
1860         /* adjust max frame to be at least the size of a standard frame */
1861         if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN))
1862                 max_frame = ETH_FRAME_LEN + ETH_FCS_LEN;
1863
1864         while (test_and_set_bit(__IGC_RESETTING, &adapter->state))
1865                 usleep_range(1000, 2000);
1866
1867         /* igc_down has a dependency on max_frame_size */
1868         adapter->max_frame_size = max_frame;
1869
1870         if (netif_running(netdev))
1871                 igc_down(adapter);
1872
1873         dev_info(&pdev->dev, "changing MTU from %d to %d\n",
1874                  netdev->mtu, new_mtu);
1875         netdev->mtu = new_mtu;
1876
1877         if (netif_running(netdev))
1878                 igc_up(adapter);
1879         else
1880                 igc_reset(adapter);
1881
1882         clear_bit(__IGC_RESETTING, &adapter->state);
1883
1884         return 0;
1885 }
1886
1887 /**
1888  * igc_get_stats - Get System Network Statistics
1889  * @netdev: network interface device structure
1890  *
1891  * Returns the address of the device statistics structure.
1892  * The statistics are updated here and also from the timer callback.
1893  */
1894 static struct net_device_stats *igc_get_stats(struct net_device *netdev)
1895 {
1896         struct igc_adapter *adapter = netdev_priv(netdev);
1897
1898         if (!test_bit(__IGC_RESETTING, &adapter->state))
1899                 igc_update_stats(adapter);
1900
1901         /* only return the current stats */
1902         return &netdev->stats;
1903 }
1904
1905 /**
1906  * igc_configure - configure the hardware for RX and TX
1907  * @adapter: private board structure
1908  */
1909 static void igc_configure(struct igc_adapter *adapter)
1910 {
1911         struct net_device *netdev = adapter->netdev;
1912         int i = 0;
1913
1914         igc_get_hw_control(adapter);
1915         igc_set_rx_mode(netdev);
1916
1917         igc_setup_tctl(adapter);
1918         igc_setup_mrqc(adapter);
1919         igc_setup_rctl(adapter);
1920
1921         igc_configure_tx(adapter);
1922         igc_configure_rx(adapter);
1923
1924         igc_rx_fifo_flush_base(&adapter->hw);
1925
1926         /* call igc_desc_unused which always leaves
1927          * at least 1 descriptor unused to make sure
1928          * next_to_use != next_to_clean
1929          */
1930         for (i = 0; i < adapter->num_rx_queues; i++) {
1931                 struct igc_ring *ring = adapter->rx_ring[i];
1932
1933                 igc_alloc_rx_buffers(ring, igc_desc_unused(ring));
1934         }
1935 }
1936
1937 /**
1938  * igc_rar_set_index - Sync RAL[index] and RAH[index] registers with MAC table
1939  * @adapter: Pointer to adapter structure
1940  * @index: Index of the RAR entry which need to be synced with MAC table
1941  */
1942 static void igc_rar_set_index(struct igc_adapter *adapter, u32 index)
1943 {
1944         u8 *addr = adapter->mac_table[index].addr;
1945         struct igc_hw *hw = &adapter->hw;
1946         u32 rar_low, rar_high;
1947
1948         /* HW expects these to be in network order when they are plugged
1949          * into the registers which are little endian.  In order to guarantee
1950          * that ordering we need to do an leXX_to_cpup here in order to be
1951          * ready for the byteswap that occurs with writel
1952          */
1953         rar_low = le32_to_cpup((__le32 *)(addr));
1954         rar_high = le16_to_cpup((__le16 *)(addr + 4));
1955
1956         /* Indicate to hardware the Address is Valid. */
1957         if (adapter->mac_table[index].state & IGC_MAC_STATE_IN_USE) {
1958                 if (is_valid_ether_addr(addr))
1959                         rar_high |= IGC_RAH_AV;
1960
1961                 rar_high |= IGC_RAH_POOL_1 <<
1962                         adapter->mac_table[index].queue;
1963         }
1964
1965         wr32(IGC_RAL(index), rar_low);
1966         wrfl();
1967         wr32(IGC_RAH(index), rar_high);
1968         wrfl();
1969 }
1970
1971 /* Set default MAC address for the PF in the first RAR entry */
1972 static void igc_set_default_mac_filter(struct igc_adapter *adapter)
1973 {
1974         struct igc_mac_addr *mac_table = &adapter->mac_table[0];
1975
1976         ether_addr_copy(mac_table->addr, adapter->hw.mac.addr);
1977         mac_table->state = IGC_MAC_STATE_DEFAULT | IGC_MAC_STATE_IN_USE;
1978
1979         igc_rar_set_index(adapter, 0);
1980 }
1981
1982 /**
1983  * igc_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
1984  * @netdev: network interface device structure
1985  *
1986  * The set_rx_mode entry point is called whenever the unicast or multicast
1987  * address lists or the network interface flags are updated.  This routine is
1988  * responsible for configuring the hardware for proper unicast, multicast,
1989  * promiscuous mode, and all-multi behavior.
1990  */
1991 static void igc_set_rx_mode(struct net_device *netdev)
1992 {
1993 }
1994
1995 /**
1996  * igc_msix_other - msix other interrupt handler
1997  * @irq: interrupt number
1998  * @data: pointer to a q_vector
1999  */
2000 static irqreturn_t igc_msix_other(int irq, void *data)
2001 {
2002         struct igc_adapter *adapter = data;
2003         struct igc_hw *hw = &adapter->hw;
2004         u32 icr = rd32(IGC_ICR);
2005
2006         /* reading ICR causes bit 31 of EICR to be cleared */
2007         if (icr & IGC_ICR_DRSTA)
2008                 schedule_work(&adapter->reset_task);
2009
2010         if (icr & IGC_ICR_DOUTSYNC) {
2011                 /* HW is reporting DMA is out of sync */
2012                 adapter->stats.doosync++;
2013         }
2014
2015         if (icr & IGC_ICR_LSC) {
2016                 hw->mac.get_link_status = 1;
2017                 /* guard against interrupt when we're going down */
2018                 if (!test_bit(__IGC_DOWN, &adapter->state))
2019                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
2020         }
2021
2022         wr32(IGC_EIMS, adapter->eims_other);
2023
2024         return IRQ_HANDLED;
2025 }
2026
2027 /**
2028  * igc_write_ivar - configure ivar for given MSI-X vector
2029  * @hw: pointer to the HW structure
2030  * @msix_vector: vector number we are allocating to a given ring
2031  * @index: row index of IVAR register to write within IVAR table
2032  * @offset: column offset of in IVAR, should be multiple of 8
2033  *
2034  * The IVAR table consists of 2 columns,
2035  * each containing an cause allocation for an Rx and Tx ring, and a
2036  * variable number of rows depending on the number of queues supported.
2037  */
2038 static void igc_write_ivar(struct igc_hw *hw, int msix_vector,
2039                            int index, int offset)
2040 {
2041         u32 ivar = array_rd32(IGC_IVAR0, index);
2042
2043         /* clear any bits that are currently set */
2044         ivar &= ~((u32)0xFF << offset);
2045
2046         /* write vector and valid bit */
2047         ivar |= (msix_vector | IGC_IVAR_VALID) << offset;
2048
2049         array_wr32(IGC_IVAR0, index, ivar);
2050 }
2051
2052 static void igc_assign_vector(struct igc_q_vector *q_vector, int msix_vector)
2053 {
2054         struct igc_adapter *adapter = q_vector->adapter;
2055         struct igc_hw *hw = &adapter->hw;
2056         int rx_queue = IGC_N0_QUEUE;
2057         int tx_queue = IGC_N0_QUEUE;
2058
2059         if (q_vector->rx.ring)
2060                 rx_queue = q_vector->rx.ring->reg_idx;
2061         if (q_vector->tx.ring)
2062                 tx_queue = q_vector->tx.ring->reg_idx;
2063
2064         switch (hw->mac.type) {
2065         case igc_i225:
2066                 if (rx_queue > IGC_N0_QUEUE)
2067                         igc_write_ivar(hw, msix_vector,
2068                                        rx_queue >> 1,
2069                                        (rx_queue & 0x1) << 4);
2070                 if (tx_queue > IGC_N0_QUEUE)
2071                         igc_write_ivar(hw, msix_vector,
2072                                        tx_queue >> 1,
2073                                        ((tx_queue & 0x1) << 4) + 8);
2074                 q_vector->eims_value = BIT(msix_vector);
2075                 break;
2076         default:
2077                 WARN_ONCE(hw->mac.type != igc_i225, "Wrong MAC type\n");
2078                 break;
2079         }
2080
2081         /* add q_vector eims value to global eims_enable_mask */
2082         adapter->eims_enable_mask |= q_vector->eims_value;
2083
2084         /* configure q_vector to set itr on first interrupt */
2085         q_vector->set_itr = 1;
2086 }
2087
2088 /**
2089  * igc_configure_msix - Configure MSI-X hardware
2090  * @adapter: Pointer to adapter structure
2091  *
2092  * igc_configure_msix sets up the hardware to properly
2093  * generate MSI-X interrupts.
2094  */
2095 static void igc_configure_msix(struct igc_adapter *adapter)
2096 {
2097         struct igc_hw *hw = &adapter->hw;
2098         int i, vector = 0;
2099         u32 tmp;
2100
2101         adapter->eims_enable_mask = 0;
2102
2103         /* set vector for other causes, i.e. link changes */
2104         switch (hw->mac.type) {
2105         case igc_i225:
2106                 /* Turn on MSI-X capability first, or our settings
2107                  * won't stick.  And it will take days to debug.
2108                  */
2109                 wr32(IGC_GPIE, IGC_GPIE_MSIX_MODE |
2110                      IGC_GPIE_PBA | IGC_GPIE_EIAME |
2111                      IGC_GPIE_NSICR);
2112
2113                 /* enable msix_other interrupt */
2114                 adapter->eims_other = BIT(vector);
2115                 tmp = (vector++ | IGC_IVAR_VALID) << 8;
2116
2117                 wr32(IGC_IVAR_MISC, tmp);
2118                 break;
2119         default:
2120                 /* do nothing, since nothing else supports MSI-X */
2121                 break;
2122         } /* switch (hw->mac.type) */
2123
2124         adapter->eims_enable_mask |= adapter->eims_other;
2125
2126         for (i = 0; i < adapter->num_q_vectors; i++)
2127                 igc_assign_vector(adapter->q_vector[i], vector++);
2128
2129         wrfl();
2130 }
2131
2132 static irqreturn_t igc_msix_ring(int irq, void *data)
2133 {
2134         struct igc_q_vector *q_vector = data;
2135
2136         /* Write the ITR value calculated from the previous interrupt. */
2137         igc_write_itr(q_vector);
2138
2139         napi_schedule(&q_vector->napi);
2140
2141         return IRQ_HANDLED;
2142 }
2143
2144 /**
2145  * igc_request_msix - Initialize MSI-X interrupts
2146  * @adapter: Pointer to adapter structure
2147  *
2148  * igc_request_msix allocates MSI-X vectors and requests interrupts from the
2149  * kernel.
2150  */
2151 static int igc_request_msix(struct igc_adapter *adapter)
2152 {
2153         int i = 0, err = 0, vector = 0, free_vector = 0;
2154         struct net_device *netdev = adapter->netdev;
2155
2156         err = request_irq(adapter->msix_entries[vector].vector,
2157                           &igc_msix_other, 0, netdev->name, adapter);
2158         if (err)
2159                 goto err_out;
2160
2161         for (i = 0; i < adapter->num_q_vectors; i++) {
2162                 struct igc_q_vector *q_vector = adapter->q_vector[i];
2163
2164                 vector++;
2165
2166                 q_vector->itr_register = adapter->io_addr + IGC_EITR(vector);
2167
2168                 if (q_vector->rx.ring && q_vector->tx.ring)
2169                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
2170                                 q_vector->rx.ring->queue_index);
2171                 else if (q_vector->tx.ring)
2172                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
2173                                 q_vector->tx.ring->queue_index);
2174                 else if (q_vector->rx.ring)
2175                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
2176                                 q_vector->rx.ring->queue_index);
2177                 else
2178                         sprintf(q_vector->name, "%s-unused", netdev->name);
2179
2180                 err = request_irq(adapter->msix_entries[vector].vector,
2181                                   igc_msix_ring, 0, q_vector->name,
2182                                   q_vector);
2183                 if (err)
2184                         goto err_free;
2185         }
2186
2187         igc_configure_msix(adapter);
2188         return 0;
2189
2190 err_free:
2191         /* free already assigned IRQs */
2192         free_irq(adapter->msix_entries[free_vector++].vector, adapter);
2193
2194         vector--;
2195         for (i = 0; i < vector; i++) {
2196                 free_irq(adapter->msix_entries[free_vector++].vector,
2197                          adapter->q_vector[i]);
2198         }
2199 err_out:
2200         return err;
2201 }
2202
2203 /**
2204  * igc_reset_q_vector - Reset config for interrupt vector
2205  * @adapter: board private structure to initialize
2206  * @v_idx: Index of vector to be reset
2207  *
2208  * If NAPI is enabled it will delete any references to the
2209  * NAPI struct. This is preparation for igc_free_q_vector.
2210  */
2211 static void igc_reset_q_vector(struct igc_adapter *adapter, int v_idx)
2212 {
2213         struct igc_q_vector *q_vector = adapter->q_vector[v_idx];
2214
2215         /* if we're coming from igc_set_interrupt_capability, the vectors are
2216          * not yet allocated
2217          */
2218         if (!q_vector)
2219                 return;
2220
2221         if (q_vector->tx.ring)
2222                 adapter->tx_ring[q_vector->tx.ring->queue_index] = NULL;
2223
2224         if (q_vector->rx.ring)
2225                 adapter->rx_ring[q_vector->rx.ring->queue_index] = NULL;
2226
2227         netif_napi_del(&q_vector->napi);
2228 }
2229
2230 static void igc_reset_interrupt_capability(struct igc_adapter *adapter)
2231 {
2232         int v_idx = adapter->num_q_vectors;
2233
2234         if (adapter->msix_entries) {
2235                 pci_disable_msix(adapter->pdev);
2236                 kfree(adapter->msix_entries);
2237                 adapter->msix_entries = NULL;
2238         } else if (adapter->flags & IGC_FLAG_HAS_MSI) {
2239                 pci_disable_msi(adapter->pdev);
2240         }
2241
2242         while (v_idx--)
2243                 igc_reset_q_vector(adapter, v_idx);
2244 }
2245
2246 /**
2247  * igc_clear_interrupt_scheme - reset the device to a state of no interrupts
2248  * @adapter: Pointer to adapter structure
2249  *
2250  * This function resets the device so that it has 0 rx queues, tx queues, and
2251  * MSI-X interrupts allocated.
2252  */
2253 static void igc_clear_interrupt_scheme(struct igc_adapter *adapter)
2254 {
2255         igc_free_q_vectors(adapter);
2256         igc_reset_interrupt_capability(adapter);
2257 }
2258
2259 /**
2260  * igc_free_q_vectors - Free memory allocated for interrupt vectors
2261  * @adapter: board private structure to initialize
2262  *
2263  * This function frees the memory allocated to the q_vectors.  In addition if
2264  * NAPI is enabled it will delete any references to the NAPI struct prior
2265  * to freeing the q_vector.
2266  */
2267 static void igc_free_q_vectors(struct igc_adapter *adapter)
2268 {
2269         int v_idx = adapter->num_q_vectors;
2270
2271         adapter->num_tx_queues = 0;
2272         adapter->num_rx_queues = 0;
2273         adapter->num_q_vectors = 0;
2274
2275         while (v_idx--) {
2276                 igc_reset_q_vector(adapter, v_idx);
2277                 igc_free_q_vector(adapter, v_idx);
2278         }
2279 }
2280
2281 /**
2282  * igc_free_q_vector - Free memory allocated for specific interrupt vector
2283  * @adapter: board private structure to initialize
2284  * @v_idx: Index of vector to be freed
2285  *
2286  * This function frees the memory allocated to the q_vector.
2287  */
2288 static void igc_free_q_vector(struct igc_adapter *adapter, int v_idx)
2289 {
2290         struct igc_q_vector *q_vector = adapter->q_vector[v_idx];
2291
2292         adapter->q_vector[v_idx] = NULL;
2293
2294         /* igc_get_stats64() might access the rings on this vector,
2295          * we must wait a grace period before freeing it.
2296          */
2297         if (q_vector)
2298                 kfree_rcu(q_vector, rcu);
2299 }
2300
2301 /* Need to wait a few seconds after link up to get diagnostic information from
2302  * the phy
2303  */
2304 static void igc_update_phy_info(struct timer_list *t)
2305 {
2306         struct igc_adapter *adapter = from_timer(adapter, t, phy_info_timer);
2307
2308         igc_get_phy_info(&adapter->hw);
2309 }
2310
2311 /**
2312  * igc_has_link - check shared code for link and determine up/down
2313  * @adapter: pointer to driver private info
2314  */
2315 static bool igc_has_link(struct igc_adapter *adapter)
2316 {
2317         struct igc_hw *hw = &adapter->hw;
2318         bool link_active = false;
2319
2320         /* get_link_status is set on LSC (link status) interrupt or
2321          * rx sequence error interrupt.  get_link_status will stay
2322          * false until the igc_check_for_link establishes link
2323          * for copper adapters ONLY
2324          */
2325         switch (hw->phy.media_type) {
2326         case igc_media_type_copper:
2327                 if (!hw->mac.get_link_status)
2328                         return true;
2329                 hw->mac.ops.check_for_link(hw);
2330                 link_active = !hw->mac.get_link_status;
2331                 break;
2332         default:
2333         case igc_media_type_unknown:
2334                 break;
2335         }
2336
2337         if (hw->mac.type == igc_i225 &&
2338             hw->phy.id == I225_I_PHY_ID) {
2339                 if (!netif_carrier_ok(adapter->netdev)) {
2340                         adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE;
2341                 } else if (!(adapter->flags & IGC_FLAG_NEED_LINK_UPDATE)) {
2342                         adapter->flags |= IGC_FLAG_NEED_LINK_UPDATE;
2343                         adapter->link_check_timeout = jiffies;
2344                 }
2345         }
2346
2347         return link_active;
2348 }
2349
2350 /**
2351  * igc_watchdog - Timer Call-back
2352  * @data: pointer to adapter cast into an unsigned long
2353  */
2354 static void igc_watchdog(struct timer_list *t)
2355 {
2356         struct igc_adapter *adapter = from_timer(adapter, t, watchdog_timer);
2357         /* Do the rest outside of interrupt context */
2358         schedule_work(&adapter->watchdog_task);
2359 }
2360
2361 static void igc_watchdog_task(struct work_struct *work)
2362 {
2363         struct igc_adapter *adapter = container_of(work,
2364                                                    struct igc_adapter,
2365                                                    watchdog_task);
2366         struct net_device *netdev = adapter->netdev;
2367         struct igc_hw *hw = &adapter->hw;
2368         struct igc_phy_info *phy = &hw->phy;
2369         u16 phy_data, retry_count = 20;
2370         u32 connsw;
2371         u32 link;
2372         int i;
2373
2374         link = igc_has_link(adapter);
2375
2376         if (adapter->flags & IGC_FLAG_NEED_LINK_UPDATE) {
2377                 if (time_after(jiffies, (adapter->link_check_timeout + HZ)))
2378                         adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE;
2379                 else
2380                         link = false;
2381         }
2382
2383         /* Force link down if we have fiber to swap to */
2384         if (adapter->flags & IGC_FLAG_MAS_ENABLE) {
2385                 if (hw->phy.media_type == igc_media_type_copper) {
2386                         connsw = rd32(IGC_CONNSW);
2387                         if (!(connsw & IGC_CONNSW_AUTOSENSE_EN))
2388                                 link = 0;
2389                 }
2390         }
2391         if (link) {
2392                 if (!netif_carrier_ok(netdev)) {
2393                         u32 ctrl;
2394
2395                         hw->mac.ops.get_speed_and_duplex(hw,
2396                                                          &adapter->link_speed,
2397                                                          &adapter->link_duplex);
2398
2399                         ctrl = rd32(IGC_CTRL);
2400                         /* Link status message must follow this format */
2401                         netdev_info(netdev,
2402                                     "igc: %s NIC Link is Up %d Mbps %s Duplex, Flow Control: %s\n",
2403                                     netdev->name,
2404                                     adapter->link_speed,
2405                                     adapter->link_duplex == FULL_DUPLEX ?
2406                                     "Full" : "Half",
2407                                     (ctrl & IGC_CTRL_TFCE) &&
2408                                     (ctrl & IGC_CTRL_RFCE) ? "RX/TX" :
2409                                     (ctrl & IGC_CTRL_RFCE) ?  "RX" :
2410                                     (ctrl & IGC_CTRL_TFCE) ?  "TX" : "None");
2411
2412                         /* check if SmartSpeed worked */
2413                         igc_check_downshift(hw);
2414                         if (phy->speed_downgraded)
2415                                 netdev_warn(netdev, "Link Speed was downgraded by SmartSpeed\n");
2416
2417                         /* adjust timeout factor according to speed/duplex */
2418                         adapter->tx_timeout_factor = 1;
2419                         switch (adapter->link_speed) {
2420                         case SPEED_10:
2421                                 adapter->tx_timeout_factor = 14;
2422                                 break;
2423                         case SPEED_100:
2424                                 /* maybe add some timeout factor ? */
2425                                 break;
2426                         }
2427
2428                         if (adapter->link_speed != SPEED_1000)
2429                                 goto no_wait;
2430
2431                         /* wait for Remote receiver status OK */
2432 retry_read_status:
2433                         if (!igc_read_phy_reg(hw, PHY_1000T_STATUS,
2434                                               &phy_data)) {
2435                                 if (!(phy_data & SR_1000T_REMOTE_RX_STATUS) &&
2436                                     retry_count) {
2437                                         msleep(100);
2438                                         retry_count--;
2439                                         goto retry_read_status;
2440                                 } else if (!retry_count) {
2441                                         dev_err(&adapter->pdev->dev, "exceed max 2 second\n");
2442                                 }
2443                         } else {
2444                                 dev_err(&adapter->pdev->dev, "read 1000Base-T Status Reg\n");
2445                         }
2446 no_wait:
2447                         netif_carrier_on(netdev);
2448
2449                         /* link state has changed, schedule phy info update */
2450                         if (!test_bit(__IGC_DOWN, &adapter->state))
2451                                 mod_timer(&adapter->phy_info_timer,
2452                                           round_jiffies(jiffies + 2 * HZ));
2453                 }
2454         } else {
2455                 if (netif_carrier_ok(netdev)) {
2456                         adapter->link_speed = 0;
2457                         adapter->link_duplex = 0;
2458
2459                         /* Links status message must follow this format */
2460                         netdev_info(netdev, "igc: %s NIC Link is Down\n",
2461                                     netdev->name);
2462                         netif_carrier_off(netdev);
2463
2464                         /* link state has changed, schedule phy info update */
2465                         if (!test_bit(__IGC_DOWN, &adapter->state))
2466                                 mod_timer(&adapter->phy_info_timer,
2467                                           round_jiffies(jiffies + 2 * HZ));
2468
2469                         /* link is down, time to check for alternate media */
2470                         if (adapter->flags & IGC_FLAG_MAS_ENABLE) {
2471                                 if (adapter->flags & IGC_FLAG_MEDIA_RESET) {
2472                                         schedule_work(&adapter->reset_task);
2473                                         /* return immediately */
2474                                         return;
2475                                 }
2476                         }
2477
2478                 /* also check for alternate media here */
2479                 } else if (!netif_carrier_ok(netdev) &&
2480                            (adapter->flags & IGC_FLAG_MAS_ENABLE)) {
2481                         if (adapter->flags & IGC_FLAG_MEDIA_RESET) {
2482                                 schedule_work(&adapter->reset_task);
2483                                 /* return immediately */
2484                                 return;
2485                         }
2486                 }
2487         }
2488
2489         spin_lock(&adapter->stats64_lock);
2490         igc_update_stats(adapter);
2491         spin_unlock(&adapter->stats64_lock);
2492
2493         for (i = 0; i < adapter->num_tx_queues; i++) {
2494                 struct igc_ring *tx_ring = adapter->tx_ring[i];
2495
2496                 if (!netif_carrier_ok(netdev)) {
2497                         /* We've lost link, so the controller stops DMA,
2498                          * but we've got queued Tx work that's never going
2499                          * to get done, so reset controller to flush Tx.
2500                          * (Do the reset outside of interrupt context).
2501                          */
2502                         if (igc_desc_unused(tx_ring) + 1 < tx_ring->count) {
2503                                 adapter->tx_timeout_count++;
2504                                 schedule_work(&adapter->reset_task);
2505                                 /* return immediately since reset is imminent */
2506                                 return;
2507                         }
2508                 }
2509
2510                 /* Force detection of hung controller every watchdog period */
2511                 set_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
2512         }
2513
2514         /* Cause software interrupt to ensure Rx ring is cleaned */
2515         if (adapter->flags & IGC_FLAG_HAS_MSIX) {
2516                 u32 eics = 0;
2517
2518                 for (i = 0; i < adapter->num_q_vectors; i++)
2519                         eics |= adapter->q_vector[i]->eims_value;
2520                 wr32(IGC_EICS, eics);
2521         } else {
2522                 wr32(IGC_ICS, IGC_ICS_RXDMT0);
2523         }
2524
2525         /* Reset the timer */
2526         if (!test_bit(__IGC_DOWN, &adapter->state)) {
2527                 if (adapter->flags & IGC_FLAG_NEED_LINK_UPDATE)
2528                         mod_timer(&adapter->watchdog_timer,
2529                                   round_jiffies(jiffies +  HZ));
2530                 else
2531                         mod_timer(&adapter->watchdog_timer,
2532                                   round_jiffies(jiffies + 2 * HZ));
2533         }
2534 }
2535
2536 /**
2537  * igc_update_ring_itr - update the dynamic ITR value based on packet size
2538  * @q_vector: pointer to q_vector
2539  *
2540  * Stores a new ITR value based on strictly on packet size.  This
2541  * algorithm is less sophisticated than that used in igc_update_itr,
2542  * due to the difficulty of synchronizing statistics across multiple
2543  * receive rings.  The divisors and thresholds used by this function
2544  * were determined based on theoretical maximum wire speed and testing
2545  * data, in order to minimize response time while increasing bulk
2546  * throughput.
2547  * NOTE: This function is called only when operating in a multiqueue
2548  * receive environment.
2549  */
2550 static void igc_update_ring_itr(struct igc_q_vector *q_vector)
2551 {
2552         struct igc_adapter *adapter = q_vector->adapter;
2553         int new_val = q_vector->itr_val;
2554         int avg_wire_size = 0;
2555         unsigned int packets;
2556
2557         /* For non-gigabit speeds, just fix the interrupt rate at 4000
2558          * ints/sec - ITR timer value of 120 ticks.
2559          */
2560         switch (adapter->link_speed) {
2561         case SPEED_10:
2562         case SPEED_100:
2563                 new_val = IGC_4K_ITR;
2564                 goto set_itr_val;
2565         default:
2566                 break;
2567         }
2568
2569         packets = q_vector->rx.total_packets;
2570         if (packets)
2571                 avg_wire_size = q_vector->rx.total_bytes / packets;
2572
2573         packets = q_vector->tx.total_packets;
2574         if (packets)
2575                 avg_wire_size = max_t(u32, avg_wire_size,
2576                                       q_vector->tx.total_bytes / packets);
2577
2578         /* if avg_wire_size isn't set no work was done */
2579         if (!avg_wire_size)
2580                 goto clear_counts;
2581
2582         /* Add 24 bytes to size to account for CRC, preamble, and gap */
2583         avg_wire_size += 24;
2584
2585         /* Don't starve jumbo frames */
2586         avg_wire_size = min(avg_wire_size, 3000);
2587
2588         /* Give a little boost to mid-size frames */
2589         if (avg_wire_size > 300 && avg_wire_size < 1200)
2590                 new_val = avg_wire_size / 3;
2591         else
2592                 new_val = avg_wire_size / 2;
2593
2594         /* conservative mode (itr 3) eliminates the lowest_latency setting */
2595         if (new_val < IGC_20K_ITR &&
2596             ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
2597             (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
2598                 new_val = IGC_20K_ITR;
2599
2600 set_itr_val:
2601         if (new_val != q_vector->itr_val) {
2602                 q_vector->itr_val = new_val;
2603                 q_vector->set_itr = 1;
2604         }
2605 clear_counts:
2606         q_vector->rx.total_bytes = 0;
2607         q_vector->rx.total_packets = 0;
2608         q_vector->tx.total_bytes = 0;
2609         q_vector->tx.total_packets = 0;
2610 }
2611
2612 /**
2613  * igc_update_itr - update the dynamic ITR value based on statistics
2614  * @q_vector: pointer to q_vector
2615  * @ring_container: ring info to update the itr for
2616  *
2617  * Stores a new ITR value based on packets and byte
2618  * counts during the last interrupt.  The advantage of per interrupt
2619  * computation is faster updates and more accurate ITR for the current
2620  * traffic pattern.  Constants in this function were computed
2621  * based on theoretical maximum wire speed and thresholds were set based
2622  * on testing data as well as attempting to minimize response time
2623  * while increasing bulk throughput.
2624  * NOTE: These calculations are only valid when operating in a single-
2625  * queue environment.
2626  */
2627 static void igc_update_itr(struct igc_q_vector *q_vector,
2628                            struct igc_ring_container *ring_container)
2629 {
2630         unsigned int packets = ring_container->total_packets;
2631         unsigned int bytes = ring_container->total_bytes;
2632         u8 itrval = ring_container->itr;
2633
2634         /* no packets, exit with status unchanged */
2635         if (packets == 0)
2636                 return;
2637
2638         switch (itrval) {
2639         case lowest_latency:
2640                 /* handle TSO and jumbo frames */
2641                 if (bytes / packets > 8000)
2642                         itrval = bulk_latency;
2643                 else if ((packets < 5) && (bytes > 512))
2644                         itrval = low_latency;
2645                 break;
2646         case low_latency:  /* 50 usec aka 20000 ints/s */
2647                 if (bytes > 10000) {
2648                         /* this if handles the TSO accounting */
2649                         if (bytes / packets > 8000)
2650                                 itrval = bulk_latency;
2651                         else if ((packets < 10) || ((bytes / packets) > 1200))
2652                                 itrval = bulk_latency;
2653                         else if ((packets > 35))
2654                                 itrval = lowest_latency;
2655                 } else if (bytes / packets > 2000) {
2656                         itrval = bulk_latency;
2657                 } else if (packets <= 2 && bytes < 512) {
2658                         itrval = lowest_latency;
2659                 }
2660                 break;
2661         case bulk_latency: /* 250 usec aka 4000 ints/s */
2662                 if (bytes > 25000) {
2663                         if (packets > 35)
2664                                 itrval = low_latency;
2665                 } else if (bytes < 1500) {
2666                         itrval = low_latency;
2667                 }
2668                 break;
2669         }
2670
2671         /* clear work counters since we have the values we need */
2672         ring_container->total_bytes = 0;
2673         ring_container->total_packets = 0;
2674
2675         /* write updated itr to ring container */
2676         ring_container->itr = itrval;
2677 }
2678
2679 /**
2680  * igc_intr_msi - Interrupt Handler
2681  * @irq: interrupt number
2682  * @data: pointer to a network interface device structure
2683  */
2684 static irqreturn_t igc_intr_msi(int irq, void *data)
2685 {
2686         struct igc_adapter *adapter = data;
2687         struct igc_q_vector *q_vector = adapter->q_vector[0];
2688         struct igc_hw *hw = &adapter->hw;
2689         /* read ICR disables interrupts using IAM */
2690         u32 icr = rd32(IGC_ICR);
2691
2692         igc_write_itr(q_vector);
2693
2694         if (icr & IGC_ICR_DRSTA)
2695                 schedule_work(&adapter->reset_task);
2696
2697         if (icr & IGC_ICR_DOUTSYNC) {
2698                 /* HW is reporting DMA is out of sync */
2699                 adapter->stats.doosync++;
2700         }
2701
2702         if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) {
2703                 hw->mac.get_link_status = 1;
2704                 if (!test_bit(__IGC_DOWN, &adapter->state))
2705                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
2706         }
2707
2708         napi_schedule(&q_vector->napi);
2709
2710         return IRQ_HANDLED;
2711 }
2712
2713 /**
2714  * igc_intr - Legacy Interrupt Handler
2715  * @irq: interrupt number
2716  * @data: pointer to a network interface device structure
2717  */
2718 static irqreturn_t igc_intr(int irq, void *data)
2719 {
2720         struct igc_adapter *adapter = data;
2721         struct igc_q_vector *q_vector = adapter->q_vector[0];
2722         struct igc_hw *hw = &adapter->hw;
2723         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
2724          * need for the IMC write
2725          */
2726         u32 icr = rd32(IGC_ICR);
2727
2728         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
2729          * not set, then the adapter didn't send an interrupt
2730          */
2731         if (!(icr & IGC_ICR_INT_ASSERTED))
2732                 return IRQ_NONE;
2733
2734         igc_write_itr(q_vector);
2735
2736         if (icr & IGC_ICR_DRSTA)
2737                 schedule_work(&adapter->reset_task);
2738
2739         if (icr & IGC_ICR_DOUTSYNC) {
2740                 /* HW is reporting DMA is out of sync */
2741                 adapter->stats.doosync++;
2742         }
2743
2744         if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) {
2745                 hw->mac.get_link_status = 1;
2746                 /* guard against interrupt when we're going down */
2747                 if (!test_bit(__IGC_DOWN, &adapter->state))
2748                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
2749         }
2750
2751         napi_schedule(&q_vector->napi);
2752
2753         return IRQ_HANDLED;
2754 }
2755
2756 static void igc_set_itr(struct igc_q_vector *q_vector)
2757 {
2758         struct igc_adapter *adapter = q_vector->adapter;
2759         u32 new_itr = q_vector->itr_val;
2760         u8 current_itr = 0;
2761
2762         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
2763         switch (adapter->link_speed) {
2764         case SPEED_10:
2765         case SPEED_100:
2766                 current_itr = 0;
2767                 new_itr = IGC_4K_ITR;
2768                 goto set_itr_now;
2769         default:
2770                 break;
2771         }
2772
2773         igc_update_itr(q_vector, &q_vector->tx);
2774         igc_update_itr(q_vector, &q_vector->rx);
2775
2776         current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
2777
2778         /* conservative mode (itr 3) eliminates the lowest_latency setting */
2779         if (current_itr == lowest_latency &&
2780             ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
2781             (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
2782                 current_itr = low_latency;
2783
2784         switch (current_itr) {
2785         /* counts and packets in update_itr are dependent on these numbers */
2786         case lowest_latency:
2787                 new_itr = IGC_70K_ITR; /* 70,000 ints/sec */
2788                 break;
2789         case low_latency:
2790                 new_itr = IGC_20K_ITR; /* 20,000 ints/sec */
2791                 break;
2792         case bulk_latency:
2793                 new_itr = IGC_4K_ITR;  /* 4,000 ints/sec */
2794                 break;
2795         default:
2796                 break;
2797         }
2798
2799 set_itr_now:
2800         if (new_itr != q_vector->itr_val) {
2801                 /* this attempts to bias the interrupt rate towards Bulk
2802                  * by adding intermediate steps when interrupt rate is
2803                  * increasing
2804                  */
2805                 new_itr = new_itr > q_vector->itr_val ?
2806                           max((new_itr * q_vector->itr_val) /
2807                           (new_itr + (q_vector->itr_val >> 2)),
2808                           new_itr) : new_itr;
2809                 /* Don't write the value here; it resets the adapter's
2810                  * internal timer, and causes us to delay far longer than
2811                  * we should between interrupts.  Instead, we write the ITR
2812                  * value at the beginning of the next interrupt so the timing
2813                  * ends up being correct.
2814                  */
2815                 q_vector->itr_val = new_itr;
2816                 q_vector->set_itr = 1;
2817         }
2818 }
2819
2820 static void igc_ring_irq_enable(struct igc_q_vector *q_vector)
2821 {
2822         struct igc_adapter *adapter = q_vector->adapter;
2823         struct igc_hw *hw = &adapter->hw;
2824
2825         if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
2826             (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
2827                 if (adapter->num_q_vectors == 1)
2828                         igc_set_itr(q_vector);
2829                 else
2830                         igc_update_ring_itr(q_vector);
2831         }
2832
2833         if (!test_bit(__IGC_DOWN, &adapter->state)) {
2834                 if (adapter->msix_entries)
2835                         wr32(IGC_EIMS, q_vector->eims_value);
2836                 else
2837                         igc_irq_enable(adapter);
2838         }
2839 }
2840
2841 /**
2842  * igc_poll - NAPI Rx polling callback
2843  * @napi: napi polling structure
2844  * @budget: count of how many packets we should handle
2845  */
2846 static int igc_poll(struct napi_struct *napi, int budget)
2847 {
2848         struct igc_q_vector *q_vector = container_of(napi,
2849                                                      struct igc_q_vector,
2850                                                      napi);
2851         bool clean_complete = true;
2852         int work_done = 0;
2853
2854         if (q_vector->tx.ring)
2855                 clean_complete = igc_clean_tx_irq(q_vector, budget);
2856
2857         if (q_vector->rx.ring) {
2858                 int cleaned = igc_clean_rx_irq(q_vector, budget);
2859
2860                 work_done += cleaned;
2861                 if (cleaned >= budget)
2862                         clean_complete = false;
2863         }
2864
2865         /* If all work not completed, return budget and keep polling */
2866         if (!clean_complete)
2867                 return budget;
2868
2869         /* If not enough Rx work done, exit the polling mode */
2870         napi_complete_done(napi, work_done);
2871         igc_ring_irq_enable(q_vector);
2872
2873         return 0;
2874 }
2875
2876 /**
2877  * igc_set_interrupt_capability - set MSI or MSI-X if supported
2878  * @adapter: Pointer to adapter structure
2879  *
2880  * Attempt to configure interrupts using the best available
2881  * capabilities of the hardware and kernel.
2882  */
2883 static void igc_set_interrupt_capability(struct igc_adapter *adapter,
2884                                          bool msix)
2885 {
2886         int numvecs, i;
2887         int err;
2888
2889         if (!msix)
2890                 goto msi_only;
2891         adapter->flags |= IGC_FLAG_HAS_MSIX;
2892
2893         /* Number of supported queues. */
2894         adapter->num_rx_queues = adapter->rss_queues;
2895
2896         adapter->num_tx_queues = adapter->rss_queues;
2897
2898         /* start with one vector for every Rx queue */
2899         numvecs = adapter->num_rx_queues;
2900
2901         /* if Tx handler is separate add 1 for every Tx queue */
2902         if (!(adapter->flags & IGC_FLAG_QUEUE_PAIRS))
2903                 numvecs += adapter->num_tx_queues;
2904
2905         /* store the number of vectors reserved for queues */
2906         adapter->num_q_vectors = numvecs;
2907
2908         /* add 1 vector for link status interrupts */
2909         numvecs++;
2910
2911         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
2912                                         GFP_KERNEL);
2913
2914         if (!adapter->msix_entries)
2915                 return;
2916
2917         /* populate entry values */
2918         for (i = 0; i < numvecs; i++)
2919                 adapter->msix_entries[i].entry = i;
2920
2921         err = pci_enable_msix_range(adapter->pdev,
2922                                     adapter->msix_entries,
2923                                     numvecs,
2924                                     numvecs);
2925         if (err > 0)
2926                 return;
2927
2928         kfree(adapter->msix_entries);
2929         adapter->msix_entries = NULL;
2930
2931         igc_reset_interrupt_capability(adapter);
2932
2933 msi_only:
2934         adapter->flags &= ~IGC_FLAG_HAS_MSIX;
2935
2936         adapter->rss_queues = 1;
2937         adapter->flags |= IGC_FLAG_QUEUE_PAIRS;
2938         adapter->num_rx_queues = 1;
2939         adapter->num_tx_queues = 1;
2940         adapter->num_q_vectors = 1;
2941         if (!pci_enable_msi(adapter->pdev))
2942                 adapter->flags |= IGC_FLAG_HAS_MSI;
2943 }
2944
2945 static void igc_add_ring(struct igc_ring *ring,
2946                          struct igc_ring_container *head)
2947 {
2948         head->ring = ring;
2949         head->count++;
2950 }
2951
2952 /**
2953  * igc_alloc_q_vector - Allocate memory for a single interrupt vector
2954  * @adapter: board private structure to initialize
2955  * @v_count: q_vectors allocated on adapter, used for ring interleaving
2956  * @v_idx: index of vector in adapter struct
2957  * @txr_count: total number of Tx rings to allocate
2958  * @txr_idx: index of first Tx ring to allocate
2959  * @rxr_count: total number of Rx rings to allocate
2960  * @rxr_idx: index of first Rx ring to allocate
2961  *
2962  * We allocate one q_vector.  If allocation fails we return -ENOMEM.
2963  */
2964 static int igc_alloc_q_vector(struct igc_adapter *adapter,
2965                               unsigned int v_count, unsigned int v_idx,
2966                               unsigned int txr_count, unsigned int txr_idx,
2967                               unsigned int rxr_count, unsigned int rxr_idx)
2968 {
2969         struct igc_q_vector *q_vector;
2970         struct igc_ring *ring;
2971         int ring_count, size;
2972
2973         /* igc only supports 1 Tx and/or 1 Rx queue per vector */
2974         if (txr_count > 1 || rxr_count > 1)
2975                 return -ENOMEM;
2976
2977         ring_count = txr_count + rxr_count;
2978         size = sizeof(struct igc_q_vector) +
2979                 (sizeof(struct igc_ring) * ring_count);
2980
2981         /* allocate q_vector and rings */
2982         q_vector = adapter->q_vector[v_idx];
2983         if (!q_vector)
2984                 q_vector = kzalloc(size, GFP_KERNEL);
2985         else
2986                 memset(q_vector, 0, size);
2987         if (!q_vector)
2988                 return -ENOMEM;
2989
2990         /* initialize NAPI */
2991         netif_napi_add(adapter->netdev, &q_vector->napi,
2992                        igc_poll, 64);
2993
2994         /* tie q_vector and adapter together */
2995         adapter->q_vector[v_idx] = q_vector;
2996         q_vector->adapter = adapter;
2997
2998         /* initialize work limits */
2999         q_vector->tx.work_limit = adapter->tx_work_limit;
3000
3001         /* initialize ITR configuration */
3002         q_vector->itr_register = adapter->io_addr + IGC_EITR(0);
3003         q_vector->itr_val = IGC_START_ITR;
3004
3005         /* initialize pointer to rings */
3006         ring = q_vector->ring;
3007
3008         /* initialize ITR */
3009         if (rxr_count) {
3010                 /* rx or rx/tx vector */
3011                 if (!adapter->rx_itr_setting || adapter->rx_itr_setting > 3)
3012                         q_vector->itr_val = adapter->rx_itr_setting;
3013         } else {
3014                 /* tx only vector */
3015                 if (!adapter->tx_itr_setting || adapter->tx_itr_setting > 3)
3016                         q_vector->itr_val = adapter->tx_itr_setting;
3017         }
3018
3019         if (txr_count) {
3020                 /* assign generic ring traits */
3021                 ring->dev = &adapter->pdev->dev;
3022                 ring->netdev = adapter->netdev;
3023
3024                 /* configure backlink on ring */
3025                 ring->q_vector = q_vector;
3026
3027                 /* update q_vector Tx values */
3028                 igc_add_ring(ring, &q_vector->tx);
3029
3030                 /* apply Tx specific ring traits */
3031                 ring->count = adapter->tx_ring_count;
3032                 ring->queue_index = txr_idx;
3033
3034                 /* assign ring to adapter */
3035                 adapter->tx_ring[txr_idx] = ring;
3036
3037                 /* push pointer to next ring */
3038                 ring++;
3039         }
3040
3041         if (rxr_count) {
3042                 /* assign generic ring traits */
3043                 ring->dev = &adapter->pdev->dev;
3044                 ring->netdev = adapter->netdev;
3045
3046                 /* configure backlink on ring */
3047                 ring->q_vector = q_vector;
3048
3049                 /* update q_vector Rx values */
3050                 igc_add_ring(ring, &q_vector->rx);
3051
3052                 /* apply Rx specific ring traits */
3053                 ring->count = adapter->rx_ring_count;
3054                 ring->queue_index = rxr_idx;
3055
3056                 /* assign ring to adapter */
3057                 adapter->rx_ring[rxr_idx] = ring;
3058         }
3059
3060         return 0;
3061 }
3062
3063 /**
3064  * igc_alloc_q_vectors - Allocate memory for interrupt vectors
3065  * @adapter: board private structure to initialize
3066  *
3067  * We allocate one q_vector per queue interrupt.  If allocation fails we
3068  * return -ENOMEM.
3069  */
3070 static int igc_alloc_q_vectors(struct igc_adapter *adapter)
3071 {
3072         int rxr_remaining = adapter->num_rx_queues;
3073         int txr_remaining = adapter->num_tx_queues;
3074         int rxr_idx = 0, txr_idx = 0, v_idx = 0;
3075         int q_vectors = adapter->num_q_vectors;
3076         int err;
3077
3078         if (q_vectors >= (rxr_remaining + txr_remaining)) {
3079                 for (; rxr_remaining; v_idx++) {
3080                         err = igc_alloc_q_vector(adapter, q_vectors, v_idx,
3081                                                  0, 0, 1, rxr_idx);
3082
3083                         if (err)
3084                                 goto err_out;
3085
3086                         /* update counts and index */
3087                         rxr_remaining--;
3088                         rxr_idx++;
3089                 }
3090         }
3091
3092         for (; v_idx < q_vectors; v_idx++) {
3093                 int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx);
3094                 int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx);
3095
3096                 err = igc_alloc_q_vector(adapter, q_vectors, v_idx,
3097                                          tqpv, txr_idx, rqpv, rxr_idx);
3098
3099                 if (err)
3100                         goto err_out;
3101
3102                 /* update counts and index */
3103                 rxr_remaining -= rqpv;
3104                 txr_remaining -= tqpv;
3105                 rxr_idx++;
3106                 txr_idx++;
3107         }
3108
3109         return 0;
3110
3111 err_out:
3112         adapter->num_tx_queues = 0;
3113         adapter->num_rx_queues = 0;
3114         adapter->num_q_vectors = 0;
3115
3116         while (v_idx--)
3117                 igc_free_q_vector(adapter, v_idx);
3118
3119         return -ENOMEM;
3120 }
3121
3122 /**
3123  * igc_cache_ring_register - Descriptor ring to register mapping
3124  * @adapter: board private structure to initialize
3125  *
3126  * Once we know the feature-set enabled for the device, we'll cache
3127  * the register offset the descriptor ring is assigned to.
3128  */
3129 static void igc_cache_ring_register(struct igc_adapter *adapter)
3130 {
3131         int i = 0, j = 0;
3132
3133         switch (adapter->hw.mac.type) {
3134         case igc_i225:
3135         /* Fall through */
3136         default:
3137                 for (; i < adapter->num_rx_queues; i++)
3138                         adapter->rx_ring[i]->reg_idx = i;
3139                 for (; j < adapter->num_tx_queues; j++)
3140                         adapter->tx_ring[j]->reg_idx = j;
3141                 break;
3142         }
3143 }
3144
3145 /**
3146  * igc_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
3147  * @adapter: Pointer to adapter structure
3148  *
3149  * This function initializes the interrupts and allocates all of the queues.
3150  */
3151 static int igc_init_interrupt_scheme(struct igc_adapter *adapter, bool msix)
3152 {
3153         struct pci_dev *pdev = adapter->pdev;
3154         int err = 0;
3155
3156         igc_set_interrupt_capability(adapter, msix);
3157
3158         err = igc_alloc_q_vectors(adapter);
3159         if (err) {
3160                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
3161                 goto err_alloc_q_vectors;
3162         }
3163
3164         igc_cache_ring_register(adapter);
3165
3166         return 0;
3167
3168 err_alloc_q_vectors:
3169         igc_reset_interrupt_capability(adapter);
3170         return err;
3171 }
3172
3173 static void igc_free_irq(struct igc_adapter *adapter)
3174 {
3175         if (adapter->msix_entries) {
3176                 int vector = 0, i;
3177
3178                 free_irq(adapter->msix_entries[vector++].vector, adapter);
3179
3180                 for (i = 0; i < adapter->num_q_vectors; i++)
3181                         free_irq(adapter->msix_entries[vector++].vector,
3182                                  adapter->q_vector[i]);
3183         } else {
3184                 free_irq(adapter->pdev->irq, adapter);
3185         }
3186 }
3187
3188 /**
3189  * igc_irq_disable - Mask off interrupt generation on the NIC
3190  * @adapter: board private structure
3191  */
3192 static void igc_irq_disable(struct igc_adapter *adapter)
3193 {
3194         struct igc_hw *hw = &adapter->hw;
3195
3196         if (adapter->msix_entries) {
3197                 u32 regval = rd32(IGC_EIAM);
3198
3199                 wr32(IGC_EIAM, regval & ~adapter->eims_enable_mask);
3200                 wr32(IGC_EIMC, adapter->eims_enable_mask);
3201                 regval = rd32(IGC_EIAC);
3202                 wr32(IGC_EIAC, regval & ~adapter->eims_enable_mask);
3203         }
3204
3205         wr32(IGC_IAM, 0);
3206         wr32(IGC_IMC, ~0);
3207         wrfl();
3208
3209         if (adapter->msix_entries) {
3210                 int vector = 0, i;
3211
3212                 synchronize_irq(adapter->msix_entries[vector++].vector);
3213
3214                 for (i = 0; i < adapter->num_q_vectors; i++)
3215                         synchronize_irq(adapter->msix_entries[vector++].vector);
3216         } else {
3217                 synchronize_irq(adapter->pdev->irq);
3218         }
3219 }
3220
3221 /**
3222  * igc_irq_enable - Enable default interrupt generation settings
3223  * @adapter: board private structure
3224  */
3225 static void igc_irq_enable(struct igc_adapter *adapter)
3226 {
3227         struct igc_hw *hw = &adapter->hw;
3228
3229         if (adapter->msix_entries) {
3230                 u32 ims = IGC_IMS_LSC | IGC_IMS_DOUTSYNC | IGC_IMS_DRSTA;
3231                 u32 regval = rd32(IGC_EIAC);
3232
3233                 wr32(IGC_EIAC, regval | adapter->eims_enable_mask);
3234                 regval = rd32(IGC_EIAM);
3235                 wr32(IGC_EIAM, regval | adapter->eims_enable_mask);
3236                 wr32(IGC_EIMS, adapter->eims_enable_mask);
3237                 wr32(IGC_IMS, ims);
3238         } else {
3239                 wr32(IGC_IMS, IMS_ENABLE_MASK | IGC_IMS_DRSTA);
3240                 wr32(IGC_IAM, IMS_ENABLE_MASK | IGC_IMS_DRSTA);
3241         }
3242 }
3243
3244 /**
3245  * igc_request_irq - initialize interrupts
3246  * @adapter: Pointer to adapter structure
3247  *
3248  * Attempts to configure interrupts using the best available
3249  * capabilities of the hardware and kernel.
3250  */
3251 static int igc_request_irq(struct igc_adapter *adapter)
3252 {
3253         struct net_device *netdev = adapter->netdev;
3254         struct pci_dev *pdev = adapter->pdev;
3255         int err = 0;
3256
3257         if (adapter->flags & IGC_FLAG_HAS_MSIX) {
3258                 err = igc_request_msix(adapter);
3259                 if (!err)
3260                         goto request_done;
3261                 /* fall back to MSI */
3262                 igc_free_all_tx_resources(adapter);
3263                 igc_free_all_rx_resources(adapter);
3264
3265                 igc_clear_interrupt_scheme(adapter);
3266                 err = igc_init_interrupt_scheme(adapter, false);
3267                 if (err)
3268                         goto request_done;
3269                 igc_setup_all_tx_resources(adapter);
3270                 igc_setup_all_rx_resources(adapter);
3271                 igc_configure(adapter);
3272         }
3273
3274         igc_assign_vector(adapter->q_vector[0], 0);
3275
3276         if (adapter->flags & IGC_FLAG_HAS_MSI) {
3277                 err = request_irq(pdev->irq, &igc_intr_msi, 0,
3278                                   netdev->name, adapter);
3279                 if (!err)
3280                         goto request_done;
3281
3282                 /* fall back to legacy interrupts */
3283                 igc_reset_interrupt_capability(adapter);
3284                 adapter->flags &= ~IGC_FLAG_HAS_MSI;
3285         }
3286
3287         err = request_irq(pdev->irq, &igc_intr, IRQF_SHARED,
3288                           netdev->name, adapter);
3289
3290         if (err)
3291                 dev_err(&pdev->dev, "Error %d getting interrupt\n",
3292                         err);
3293
3294 request_done:
3295         return err;
3296 }
3297
3298 static void igc_write_itr(struct igc_q_vector *q_vector)
3299 {
3300         u32 itr_val = q_vector->itr_val & IGC_QVECTOR_MASK;
3301
3302         if (!q_vector->set_itr)
3303                 return;
3304
3305         if (!itr_val)
3306                 itr_val = IGC_ITR_VAL_MASK;
3307
3308         itr_val |= IGC_EITR_CNT_IGNR;
3309
3310         writel(itr_val, q_vector->itr_register);
3311         q_vector->set_itr = 0;
3312 }
3313
3314 /**
3315  * igc_open - Called when a network interface is made active
3316  * @netdev: network interface device structure
3317  *
3318  * Returns 0 on success, negative value on failure
3319  *
3320  * The open entry point is called when a network interface is made
3321  * active by the system (IFF_UP).  At this point all resources needed
3322  * for transmit and receive operations are allocated, the interrupt
3323  * handler is registered with the OS, the watchdog timer is started,
3324  * and the stack is notified that the interface is ready.
3325  */
3326 static int __igc_open(struct net_device *netdev, bool resuming)
3327 {
3328         struct igc_adapter *adapter = netdev_priv(netdev);
3329         struct igc_hw *hw = &adapter->hw;
3330         int err = 0;
3331         int i = 0;
3332
3333         /* disallow open during test */
3334
3335         if (test_bit(__IGC_TESTING, &adapter->state)) {
3336                 WARN_ON(resuming);
3337                 return -EBUSY;
3338         }
3339
3340         netif_carrier_off(netdev);
3341
3342         /* allocate transmit descriptors */
3343         err = igc_setup_all_tx_resources(adapter);
3344         if (err)
3345                 goto err_setup_tx;
3346
3347         /* allocate receive descriptors */
3348         err = igc_setup_all_rx_resources(adapter);
3349         if (err)
3350                 goto err_setup_rx;
3351
3352         igc_power_up_link(adapter);
3353
3354         igc_configure(adapter);
3355
3356         err = igc_request_irq(adapter);
3357         if (err)
3358                 goto err_req_irq;
3359
3360         /* Notify the stack of the actual queue counts. */
3361         netif_set_real_num_tx_queues(netdev, adapter->num_tx_queues);
3362         if (err)
3363                 goto err_set_queues;
3364
3365         err = netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues);
3366         if (err)
3367                 goto err_set_queues;
3368
3369         clear_bit(__IGC_DOWN, &adapter->state);
3370
3371         for (i = 0; i < adapter->num_q_vectors; i++)
3372                 napi_enable(&adapter->q_vector[i]->napi);
3373
3374         /* Clear any pending interrupts. */
3375         rd32(IGC_ICR);
3376         igc_irq_enable(adapter);
3377
3378         netif_tx_start_all_queues(netdev);
3379
3380         /* start the watchdog. */
3381         hw->mac.get_link_status = 1;
3382         schedule_work(&adapter->watchdog_task);
3383
3384         return IGC_SUCCESS;
3385
3386 err_set_queues:
3387         igc_free_irq(adapter);
3388 err_req_irq:
3389         igc_release_hw_control(adapter);
3390         igc_power_down_link(adapter);
3391         igc_free_all_rx_resources(adapter);
3392 err_setup_rx:
3393         igc_free_all_tx_resources(adapter);
3394 err_setup_tx:
3395         igc_reset(adapter);
3396
3397         return err;
3398 }
3399
3400 static int igc_open(struct net_device *netdev)
3401 {
3402         return __igc_open(netdev, false);
3403 }
3404
3405 /**
3406  * igc_close - Disables a network interface
3407  * @netdev: network interface device structure
3408  *
3409  * Returns 0, this is not allowed to fail
3410  *
3411  * The close entry point is called when an interface is de-activated
3412  * by the OS.  The hardware is still under the driver's control, but
3413  * needs to be disabled.  A global MAC reset is issued to stop the
3414  * hardware, and all transmit and receive resources are freed.
3415  */
3416 static int __igc_close(struct net_device *netdev, bool suspending)
3417 {
3418         struct igc_adapter *adapter = netdev_priv(netdev);
3419
3420         WARN_ON(test_bit(__IGC_RESETTING, &adapter->state));
3421
3422         igc_down(adapter);
3423
3424         igc_release_hw_control(adapter);
3425
3426         igc_free_irq(adapter);
3427
3428         igc_free_all_tx_resources(adapter);
3429         igc_free_all_rx_resources(adapter);
3430
3431         return 0;
3432 }
3433
3434 static int igc_close(struct net_device *netdev)
3435 {
3436         if (netif_device_present(netdev) || netdev->dismantle)
3437                 return __igc_close(netdev, false);
3438         return 0;
3439 }
3440
3441 static const struct net_device_ops igc_netdev_ops = {
3442         .ndo_open               = igc_open,
3443         .ndo_stop               = igc_close,
3444         .ndo_start_xmit         = igc_xmit_frame,
3445         .ndo_set_mac_address    = igc_set_mac,
3446         .ndo_change_mtu         = igc_change_mtu,
3447         .ndo_get_stats          = igc_get_stats,
3448         .ndo_do_ioctl           = igc_ioctl,
3449 };
3450
3451 /* PCIe configuration access */
3452 void igc_read_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value)
3453 {
3454         struct igc_adapter *adapter = hw->back;
3455
3456         pci_read_config_word(adapter->pdev, reg, value);
3457 }
3458
3459 void igc_write_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value)
3460 {
3461         struct igc_adapter *adapter = hw->back;
3462
3463         pci_write_config_word(adapter->pdev, reg, *value);
3464 }
3465
3466 s32 igc_read_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value)
3467 {
3468         struct igc_adapter *adapter = hw->back;
3469         u16 cap_offset;
3470
3471         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
3472         if (!cap_offset)
3473                 return -IGC_ERR_CONFIG;
3474
3475         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
3476
3477         return IGC_SUCCESS;
3478 }
3479
3480 s32 igc_write_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value)
3481 {
3482         struct igc_adapter *adapter = hw->back;
3483         u16 cap_offset;
3484
3485         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
3486         if (!cap_offset)
3487                 return -IGC_ERR_CONFIG;
3488
3489         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
3490
3491         return IGC_SUCCESS;
3492 }
3493
3494 u32 igc_rd32(struct igc_hw *hw, u32 reg)
3495 {
3496         struct igc_adapter *igc = container_of(hw, struct igc_adapter, hw);
3497         u8 __iomem *hw_addr = READ_ONCE(hw->hw_addr);
3498         u32 value = 0;
3499
3500         if (IGC_REMOVED(hw_addr))
3501                 return ~value;
3502
3503         value = readl(&hw_addr[reg]);
3504
3505         /* reads should not return all F's */
3506         if (!(~value) && (!reg || !(~readl(hw_addr)))) {
3507                 struct net_device *netdev = igc->netdev;
3508
3509                 hw->hw_addr = NULL;
3510                 netif_device_detach(netdev);
3511                 netdev_err(netdev, "PCIe link lost, device now detached\n");
3512         }
3513
3514         return value;
3515 }
3516
3517 /**
3518  * igc_probe - Device Initialization Routine
3519  * @pdev: PCI device information struct
3520  * @ent: entry in igc_pci_tbl
3521  *
3522  * Returns 0 on success, negative on failure
3523  *
3524  * igc_probe initializes an adapter identified by a pci_dev structure.
3525  * The OS initialization, configuring the adapter private structure,
3526  * and a hardware reset occur.
3527  */
3528 static int igc_probe(struct pci_dev *pdev,
3529                      const struct pci_device_id *ent)
3530 {
3531         struct igc_adapter *adapter;
3532         struct net_device *netdev;
3533         struct igc_hw *hw;
3534         const struct igc_info *ei = igc_info_tbl[ent->driver_data];
3535         int err, pci_using_dac;
3536
3537         err = pci_enable_device_mem(pdev);
3538         if (err)
3539                 return err;
3540
3541         pci_using_dac = 0;
3542         err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
3543         if (!err) {
3544                 err = dma_set_coherent_mask(&pdev->dev,
3545                                             DMA_BIT_MASK(64));
3546                 if (!err)
3547                         pci_using_dac = 1;
3548         } else {
3549                 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
3550                 if (err) {
3551                         err = dma_set_coherent_mask(&pdev->dev,
3552                                                     DMA_BIT_MASK(32));
3553                         if (err) {
3554                                 IGC_ERR("Wrong DMA configuration, aborting\n");
3555                                 goto err_dma;
3556                         }
3557                 }
3558         }
3559
3560         err = pci_request_selected_regions(pdev,
3561                                            pci_select_bars(pdev,
3562                                                            IORESOURCE_MEM),
3563                                            igc_driver_name);
3564         if (err)
3565                 goto err_pci_reg;
3566
3567         pci_enable_pcie_error_reporting(pdev);
3568
3569         pci_set_master(pdev);
3570
3571         err = -ENOMEM;
3572         netdev = alloc_etherdev_mq(sizeof(struct igc_adapter),
3573                                    IGC_MAX_TX_QUEUES);
3574
3575         if (!netdev)
3576                 goto err_alloc_etherdev;
3577
3578         SET_NETDEV_DEV(netdev, &pdev->dev);
3579
3580         pci_set_drvdata(pdev, netdev);
3581         adapter = netdev_priv(netdev);
3582         adapter->netdev = netdev;
3583         adapter->pdev = pdev;
3584         hw = &adapter->hw;
3585         hw->back = adapter;
3586         adapter->port_num = hw->bus.func;
3587         adapter->msg_enable = GENMASK(debug - 1, 0);
3588
3589         err = pci_save_state(pdev);
3590         if (err)
3591                 goto err_ioremap;
3592
3593         err = -EIO;
3594         adapter->io_addr = ioremap(pci_resource_start(pdev, 0),
3595                                    pci_resource_len(pdev, 0));
3596         if (!adapter->io_addr)
3597                 goto err_ioremap;
3598
3599         /* hw->hw_addr can be zeroed, so use adapter->io_addr for unmap */
3600         hw->hw_addr = adapter->io_addr;
3601
3602         netdev->netdev_ops = &igc_netdev_ops;
3603
3604         netdev->watchdog_timeo = 5 * HZ;
3605
3606         netdev->mem_start = pci_resource_start(pdev, 0);
3607         netdev->mem_end = pci_resource_end(pdev, 0);
3608
3609         /* PCI config space info */
3610         hw->vendor_id = pdev->vendor;
3611         hw->device_id = pdev->device;
3612         hw->revision_id = pdev->revision;
3613         hw->subsystem_vendor_id = pdev->subsystem_vendor;
3614         hw->subsystem_device_id = pdev->subsystem_device;
3615
3616         /* Copy the default MAC and PHY function pointers */
3617         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
3618         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
3619
3620         /* Initialize skew-specific constants */
3621         err = ei->get_invariants(hw);
3622         if (err)
3623                 goto err_sw_init;
3624
3625         /* setup the private structure */
3626         err = igc_sw_init(adapter);
3627         if (err)
3628                 goto err_sw_init;
3629
3630         /* MTU range: 68 - 9216 */
3631         netdev->min_mtu = ETH_MIN_MTU;
3632         netdev->max_mtu = MAX_STD_JUMBO_FRAME_SIZE;
3633
3634         /* before reading the NVM, reset the controller to put the device in a
3635          * known good starting state
3636          */
3637         hw->mac.ops.reset_hw(hw);
3638
3639         if (eth_platform_get_mac_address(&pdev->dev, hw->mac.addr)) {
3640                 /* copy the MAC address out of the NVM */
3641                 if (hw->mac.ops.read_mac_addr(hw))
3642                         dev_err(&pdev->dev, "NVM Read Error\n");
3643         }
3644
3645         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
3646
3647         if (!is_valid_ether_addr(netdev->dev_addr)) {
3648                 dev_err(&pdev->dev, "Invalid MAC Address\n");
3649                 err = -EIO;
3650                 goto err_eeprom;
3651         }
3652
3653         /* configure RXPBSIZE and TXPBSIZE */
3654         wr32(IGC_RXPBS, I225_RXPBSIZE_DEFAULT);
3655         wr32(IGC_TXPBS, I225_TXPBSIZE_DEFAULT);
3656
3657         timer_setup(&adapter->watchdog_timer, igc_watchdog, 0);
3658         timer_setup(&adapter->phy_info_timer, igc_update_phy_info, 0);
3659
3660         INIT_WORK(&adapter->reset_task, igc_reset_task);
3661         INIT_WORK(&adapter->watchdog_task, igc_watchdog_task);
3662
3663         /* Initialize link properties that are user-changeable */
3664         adapter->fc_autoneg = true;
3665         hw->mac.autoneg = true;
3666         hw->phy.autoneg_advertised = 0xaf;
3667
3668         hw->fc.requested_mode = igc_fc_default;
3669         hw->fc.current_mode = igc_fc_default;
3670
3671         /* reset the hardware with the new settings */
3672         igc_reset(adapter);
3673
3674         /* let the f/w know that the h/w is now under the control of the
3675          * driver.
3676          */
3677         igc_get_hw_control(adapter);
3678
3679         strncpy(netdev->name, "eth%d", IFNAMSIZ);
3680         err = register_netdev(netdev);
3681         if (err)
3682                 goto err_register;
3683
3684          /* carrier off reporting is important to ethtool even BEFORE open */
3685         netif_carrier_off(netdev);
3686
3687         /* Check if Media Autosense is enabled */
3688         adapter->ei = *ei;
3689
3690         /* print pcie link status and MAC address */
3691         pcie_print_link_status(pdev);
3692         netdev_info(netdev, "MAC: %pM\n", netdev->dev_addr);
3693
3694         return 0;
3695
3696 err_register:
3697         igc_release_hw_control(adapter);
3698 err_eeprom:
3699         if (!igc_check_reset_block(hw))
3700                 igc_reset_phy(hw);
3701 err_sw_init:
3702         igc_clear_interrupt_scheme(adapter);
3703         iounmap(adapter->io_addr);
3704 err_ioremap:
3705         free_netdev(netdev);
3706 err_alloc_etherdev:
3707         pci_release_selected_regions(pdev,
3708                                      pci_select_bars(pdev, IORESOURCE_MEM));
3709 err_pci_reg:
3710 err_dma:
3711         pci_disable_device(pdev);
3712         return err;
3713 }
3714
3715 /**
3716  * igc_remove - Device Removal Routine
3717  * @pdev: PCI device information struct
3718  *
3719  * igc_remove is called by the PCI subsystem to alert the driver
3720  * that it should release a PCI device.  This could be caused by a
3721  * Hot-Plug event, or because the driver is going to be removed from
3722  * memory.
3723  */
3724 static void igc_remove(struct pci_dev *pdev)
3725 {
3726         struct net_device *netdev = pci_get_drvdata(pdev);
3727         struct igc_adapter *adapter = netdev_priv(netdev);
3728
3729         set_bit(__IGC_DOWN, &adapter->state);
3730
3731         del_timer_sync(&adapter->watchdog_timer);
3732         del_timer_sync(&adapter->phy_info_timer);
3733
3734         cancel_work_sync(&adapter->reset_task);
3735         cancel_work_sync(&adapter->watchdog_task);
3736
3737         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
3738          * would have already happened in close and is redundant.
3739          */
3740         igc_release_hw_control(adapter);
3741         unregister_netdev(netdev);
3742
3743         igc_clear_interrupt_scheme(adapter);
3744         pci_iounmap(pdev, adapter->io_addr);
3745         pci_release_mem_regions(pdev);
3746
3747         kfree(adapter->mac_table);
3748         kfree(adapter->shadow_vfta);
3749         free_netdev(netdev);
3750
3751         pci_disable_pcie_error_reporting(pdev);
3752
3753         pci_disable_device(pdev);
3754 }
3755
3756 static struct pci_driver igc_driver = {
3757         .name     = igc_driver_name,
3758         .id_table = igc_pci_tbl,
3759         .probe    = igc_probe,
3760         .remove   = igc_remove,
3761 };
3762
3763 static void igc_set_flag_queue_pairs(struct igc_adapter *adapter,
3764                                      const u32 max_rss_queues)
3765 {
3766         /* Determine if we need to pair queues. */
3767         /* If rss_queues > half of max_rss_queues, pair the queues in
3768          * order to conserve interrupts due to limited supply.
3769          */
3770         if (adapter->rss_queues > (max_rss_queues / 2))
3771                 adapter->flags |= IGC_FLAG_QUEUE_PAIRS;
3772         else
3773                 adapter->flags &= ~IGC_FLAG_QUEUE_PAIRS;
3774 }
3775
3776 static unsigned int igc_get_max_rss_queues(struct igc_adapter *adapter)
3777 {
3778         unsigned int max_rss_queues;
3779
3780         /* Determine the maximum number of RSS queues supported. */
3781         max_rss_queues = IGC_MAX_RX_QUEUES;
3782
3783         return max_rss_queues;
3784 }
3785
3786 static void igc_init_queue_configuration(struct igc_adapter *adapter)
3787 {
3788         u32 max_rss_queues;
3789
3790         max_rss_queues = igc_get_max_rss_queues(adapter);
3791         adapter->rss_queues = min_t(u32, max_rss_queues, num_online_cpus());
3792
3793         igc_set_flag_queue_pairs(adapter, max_rss_queues);
3794 }
3795
3796 /**
3797  * igc_sw_init - Initialize general software structures (struct igc_adapter)
3798  * @adapter: board private structure to initialize
3799  *
3800  * igc_sw_init initializes the Adapter private data structure.
3801  * Fields are initialized based on PCI device information and
3802  * OS network device settings (MTU size).
3803  */
3804 static int igc_sw_init(struct igc_adapter *adapter)
3805 {
3806         struct net_device *netdev = adapter->netdev;
3807         struct pci_dev *pdev = adapter->pdev;
3808         struct igc_hw *hw = &adapter->hw;
3809
3810         int size = sizeof(struct igc_mac_addr) * hw->mac.rar_entry_count;
3811
3812         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
3813
3814         /* set default ring sizes */
3815         adapter->tx_ring_count = IGC_DEFAULT_TXD;
3816         adapter->rx_ring_count = IGC_DEFAULT_RXD;
3817
3818         /* set default ITR values */
3819         adapter->rx_itr_setting = IGC_DEFAULT_ITR;
3820         adapter->tx_itr_setting = IGC_DEFAULT_ITR;
3821
3822         /* set default work limits */
3823         adapter->tx_work_limit = IGC_DEFAULT_TX_WORK;
3824
3825         /* adjust max frame to be at least the size of a standard frame */
3826         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
3827                                 VLAN_HLEN;
3828         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
3829
3830         spin_lock_init(&adapter->nfc_lock);
3831         spin_lock_init(&adapter->stats64_lock);
3832         /* Assume MSI-X interrupts, will be checked during IRQ allocation */
3833         adapter->flags |= IGC_FLAG_HAS_MSIX;
3834
3835         adapter->mac_table = kzalloc(size, GFP_ATOMIC);
3836         if (!adapter->mac_table)
3837                 return -ENOMEM;
3838
3839         igc_init_queue_configuration(adapter);
3840
3841         /* This call may decrease the number of queues */
3842         if (igc_init_interrupt_scheme(adapter, true)) {
3843                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
3844                 return -ENOMEM;
3845         }
3846
3847         /* Explicitly disable IRQ since the NIC can be in any state. */
3848         igc_irq_disable(adapter);
3849
3850         set_bit(__IGC_DOWN, &adapter->state);
3851
3852         return 0;
3853 }
3854
3855 /**
3856  * igc_get_hw_dev - return device
3857  * @hw: pointer to hardware structure
3858  *
3859  * used by hardware layer to print debugging information
3860  */
3861 struct net_device *igc_get_hw_dev(struct igc_hw *hw)
3862 {
3863         struct igc_adapter *adapter = hw->back;
3864
3865         return adapter->netdev;
3866 }
3867
3868 /**
3869  * igc_init_module - Driver Registration Routine
3870  *
3871  * igc_init_module is the first routine called when the driver is
3872  * loaded. All it does is register with the PCI subsystem.
3873  */
3874 static int __init igc_init_module(void)
3875 {
3876         int ret;
3877
3878         pr_info("%s - version %s\n",
3879                 igc_driver_string, igc_driver_version);
3880
3881         pr_info("%s\n", igc_copyright);
3882
3883         ret = pci_register_driver(&igc_driver);
3884         return ret;
3885 }
3886
3887 module_init(igc_init_module);
3888
3889 /**
3890  * igc_exit_module - Driver Exit Cleanup Routine
3891  *
3892  * igc_exit_module is called just before the driver is removed
3893  * from memory.
3894  */
3895 static void __exit igc_exit_module(void)
3896 {
3897         pci_unregister_driver(&igc_driver);
3898 }
3899
3900 module_exit(igc_exit_module);
3901 /* igc_main.c */