dt-bindings: serial: Remove obsolete nxp,lpc1850-uart.txt
[sfrench/cifs-2.6.git] / drivers / net / ethernet / intel / igc / igc_main.c
1 // SPDX-License-Identifier: GPL-2.0
2 /* Copyright (c)  2018 Intel Corporation */
3
4 #include <linux/module.h>
5 #include <linux/types.h>
6 #include <linux/if_vlan.h>
7 #include <linux/tcp.h>
8 #include <linux/udp.h>
9 #include <linux/ip.h>
10 #include <linux/pm_runtime.h>
11 #include <net/pkt_sched.h>
12 #include <linux/bpf_trace.h>
13 #include <net/xdp_sock_drv.h>
14 #include <linux/pci.h>
15
16 #include <net/ipv6.h>
17
18 #include "igc.h"
19 #include "igc_hw.h"
20 #include "igc_tsn.h"
21 #include "igc_xdp.h"
22
23 #define DRV_SUMMARY     "Intel(R) 2.5G Ethernet Linux Driver"
24
25 #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_LINK)
26
27 #define IGC_XDP_PASS            0
28 #define IGC_XDP_CONSUMED        BIT(0)
29 #define IGC_XDP_TX              BIT(1)
30 #define IGC_XDP_REDIRECT        BIT(2)
31
32 static int debug = -1;
33
34 MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>");
35 MODULE_DESCRIPTION(DRV_SUMMARY);
36 MODULE_LICENSE("GPL v2");
37 module_param(debug, int, 0);
38 MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)");
39
40 char igc_driver_name[] = "igc";
41 static const char igc_driver_string[] = DRV_SUMMARY;
42 static const char igc_copyright[] =
43         "Copyright(c) 2018 Intel Corporation.";
44
45 static const struct igc_info *igc_info_tbl[] = {
46         [board_base] = &igc_base_info,
47 };
48
49 static const struct pci_device_id igc_pci_tbl[] = {
50         { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_LM), board_base },
51         { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_V), board_base },
52         { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_I), board_base },
53         { PCI_VDEVICE(INTEL, IGC_DEV_ID_I220_V), board_base },
54         { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_K), board_base },
55         { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_K2), board_base },
56         { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_K), board_base },
57         { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_LMVP), board_base },
58         { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_LMVP), board_base },
59         { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_IT), board_base },
60         { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_LM), board_base },
61         { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_V), board_base },
62         { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_IT), board_base },
63         { PCI_VDEVICE(INTEL, IGC_DEV_ID_I221_V), board_base },
64         { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_BLANK_NVM), board_base },
65         { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_BLANK_NVM), board_base },
66         /* required last entry */
67         {0, }
68 };
69
70 MODULE_DEVICE_TABLE(pci, igc_pci_tbl);
71
72 enum latency_range {
73         lowest_latency = 0,
74         low_latency = 1,
75         bulk_latency = 2,
76         latency_invalid = 255
77 };
78
79 void igc_reset(struct igc_adapter *adapter)
80 {
81         struct net_device *dev = adapter->netdev;
82         struct igc_hw *hw = &adapter->hw;
83         struct igc_fc_info *fc = &hw->fc;
84         u32 pba, hwm;
85
86         /* Repartition PBA for greater than 9k MTU if required */
87         pba = IGC_PBA_34K;
88
89         /* flow control settings
90          * The high water mark must be low enough to fit one full frame
91          * after transmitting the pause frame.  As such we must have enough
92          * space to allow for us to complete our current transmit and then
93          * receive the frame that is in progress from the link partner.
94          * Set it to:
95          * - the full Rx FIFO size minus one full Tx plus one full Rx frame
96          */
97         hwm = (pba << 10) - (adapter->max_frame_size + MAX_JUMBO_FRAME_SIZE);
98
99         fc->high_water = hwm & 0xFFFFFFF0;      /* 16-byte granularity */
100         fc->low_water = fc->high_water - 16;
101         fc->pause_time = 0xFFFF;
102         fc->send_xon = 1;
103         fc->current_mode = fc->requested_mode;
104
105         hw->mac.ops.reset_hw(hw);
106
107         if (hw->mac.ops.init_hw(hw))
108                 netdev_err(dev, "Error on hardware initialization\n");
109
110         /* Re-establish EEE setting */
111         igc_set_eee_i225(hw, true, true, true);
112
113         if (!netif_running(adapter->netdev))
114                 igc_power_down_phy_copper_base(&adapter->hw);
115
116         /* Enable HW to recognize an 802.1Q VLAN Ethernet packet */
117         wr32(IGC_VET, ETH_P_8021Q);
118
119         /* Re-enable PTP, where applicable. */
120         igc_ptp_reset(adapter);
121
122         /* Re-enable TSN offloading, where applicable. */
123         igc_tsn_reset(adapter);
124
125         igc_get_phy_info(hw);
126 }
127
128 /**
129  * igc_power_up_link - Power up the phy link
130  * @adapter: address of board private structure
131  */
132 static void igc_power_up_link(struct igc_adapter *adapter)
133 {
134         igc_reset_phy(&adapter->hw);
135
136         igc_power_up_phy_copper(&adapter->hw);
137
138         igc_setup_link(&adapter->hw);
139 }
140
141 /**
142  * igc_release_hw_control - release control of the h/w to f/w
143  * @adapter: address of board private structure
144  *
145  * igc_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
146  * For ASF and Pass Through versions of f/w this means that the
147  * driver is no longer loaded.
148  */
149 static void igc_release_hw_control(struct igc_adapter *adapter)
150 {
151         struct igc_hw *hw = &adapter->hw;
152         u32 ctrl_ext;
153
154         if (!pci_device_is_present(adapter->pdev))
155                 return;
156
157         /* Let firmware take over control of h/w */
158         ctrl_ext = rd32(IGC_CTRL_EXT);
159         wr32(IGC_CTRL_EXT,
160              ctrl_ext & ~IGC_CTRL_EXT_DRV_LOAD);
161 }
162
163 /**
164  * igc_get_hw_control - get control of the h/w from f/w
165  * @adapter: address of board private structure
166  *
167  * igc_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
168  * For ASF and Pass Through versions of f/w this means that
169  * the driver is loaded.
170  */
171 static void igc_get_hw_control(struct igc_adapter *adapter)
172 {
173         struct igc_hw *hw = &adapter->hw;
174         u32 ctrl_ext;
175
176         /* Let firmware know the driver has taken over */
177         ctrl_ext = rd32(IGC_CTRL_EXT);
178         wr32(IGC_CTRL_EXT,
179              ctrl_ext | IGC_CTRL_EXT_DRV_LOAD);
180 }
181
182 static void igc_unmap_tx_buffer(struct device *dev, struct igc_tx_buffer *buf)
183 {
184         dma_unmap_single(dev, dma_unmap_addr(buf, dma),
185                          dma_unmap_len(buf, len), DMA_TO_DEVICE);
186
187         dma_unmap_len_set(buf, len, 0);
188 }
189
190 /**
191  * igc_clean_tx_ring - Free Tx Buffers
192  * @tx_ring: ring to be cleaned
193  */
194 static void igc_clean_tx_ring(struct igc_ring *tx_ring)
195 {
196         u16 i = tx_ring->next_to_clean;
197         struct igc_tx_buffer *tx_buffer = &tx_ring->tx_buffer_info[i];
198         u32 xsk_frames = 0;
199
200         while (i != tx_ring->next_to_use) {
201                 union igc_adv_tx_desc *eop_desc, *tx_desc;
202
203                 switch (tx_buffer->type) {
204                 case IGC_TX_BUFFER_TYPE_XSK:
205                         xsk_frames++;
206                         break;
207                 case IGC_TX_BUFFER_TYPE_XDP:
208                         xdp_return_frame(tx_buffer->xdpf);
209                         igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
210                         break;
211                 case IGC_TX_BUFFER_TYPE_SKB:
212                         dev_kfree_skb_any(tx_buffer->skb);
213                         igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
214                         break;
215                 default:
216                         netdev_warn_once(tx_ring->netdev, "Unknown Tx buffer type\n");
217                         break;
218                 }
219
220                 /* check for eop_desc to determine the end of the packet */
221                 eop_desc = tx_buffer->next_to_watch;
222                 tx_desc = IGC_TX_DESC(tx_ring, i);
223
224                 /* unmap remaining buffers */
225                 while (tx_desc != eop_desc) {
226                         tx_buffer++;
227                         tx_desc++;
228                         i++;
229                         if (unlikely(i == tx_ring->count)) {
230                                 i = 0;
231                                 tx_buffer = tx_ring->tx_buffer_info;
232                                 tx_desc = IGC_TX_DESC(tx_ring, 0);
233                         }
234
235                         /* unmap any remaining paged data */
236                         if (dma_unmap_len(tx_buffer, len))
237                                 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
238                 }
239
240                 tx_buffer->next_to_watch = NULL;
241
242                 /* move us one more past the eop_desc for start of next pkt */
243                 tx_buffer++;
244                 i++;
245                 if (unlikely(i == tx_ring->count)) {
246                         i = 0;
247                         tx_buffer = tx_ring->tx_buffer_info;
248                 }
249         }
250
251         if (tx_ring->xsk_pool && xsk_frames)
252                 xsk_tx_completed(tx_ring->xsk_pool, xsk_frames);
253
254         /* reset BQL for queue */
255         netdev_tx_reset_queue(txring_txq(tx_ring));
256
257         /* Zero out the buffer ring */
258         memset(tx_ring->tx_buffer_info, 0,
259                sizeof(*tx_ring->tx_buffer_info) * tx_ring->count);
260
261         /* Zero out the descriptor ring */
262         memset(tx_ring->desc, 0, tx_ring->size);
263
264         /* reset next_to_use and next_to_clean */
265         tx_ring->next_to_use = 0;
266         tx_ring->next_to_clean = 0;
267 }
268
269 /**
270  * igc_free_tx_resources - Free Tx Resources per Queue
271  * @tx_ring: Tx descriptor ring for a specific queue
272  *
273  * Free all transmit software resources
274  */
275 void igc_free_tx_resources(struct igc_ring *tx_ring)
276 {
277         igc_disable_tx_ring(tx_ring);
278
279         vfree(tx_ring->tx_buffer_info);
280         tx_ring->tx_buffer_info = NULL;
281
282         /* if not set, then don't free */
283         if (!tx_ring->desc)
284                 return;
285
286         dma_free_coherent(tx_ring->dev, tx_ring->size,
287                           tx_ring->desc, tx_ring->dma);
288
289         tx_ring->desc = NULL;
290 }
291
292 /**
293  * igc_free_all_tx_resources - Free Tx Resources for All Queues
294  * @adapter: board private structure
295  *
296  * Free all transmit software resources
297  */
298 static void igc_free_all_tx_resources(struct igc_adapter *adapter)
299 {
300         int i;
301
302         for (i = 0; i < adapter->num_tx_queues; i++)
303                 igc_free_tx_resources(adapter->tx_ring[i]);
304 }
305
306 /**
307  * igc_clean_all_tx_rings - Free Tx Buffers for all queues
308  * @adapter: board private structure
309  */
310 static void igc_clean_all_tx_rings(struct igc_adapter *adapter)
311 {
312         int i;
313
314         for (i = 0; i < adapter->num_tx_queues; i++)
315                 if (adapter->tx_ring[i])
316                         igc_clean_tx_ring(adapter->tx_ring[i]);
317 }
318
319 /**
320  * igc_setup_tx_resources - allocate Tx resources (Descriptors)
321  * @tx_ring: tx descriptor ring (for a specific queue) to setup
322  *
323  * Return 0 on success, negative on failure
324  */
325 int igc_setup_tx_resources(struct igc_ring *tx_ring)
326 {
327         struct net_device *ndev = tx_ring->netdev;
328         struct device *dev = tx_ring->dev;
329         int size = 0;
330
331         size = sizeof(struct igc_tx_buffer) * tx_ring->count;
332         tx_ring->tx_buffer_info = vzalloc(size);
333         if (!tx_ring->tx_buffer_info)
334                 goto err;
335
336         /* round up to nearest 4K */
337         tx_ring->size = tx_ring->count * sizeof(union igc_adv_tx_desc);
338         tx_ring->size = ALIGN(tx_ring->size, 4096);
339
340         tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size,
341                                            &tx_ring->dma, GFP_KERNEL);
342
343         if (!tx_ring->desc)
344                 goto err;
345
346         tx_ring->next_to_use = 0;
347         tx_ring->next_to_clean = 0;
348
349         return 0;
350
351 err:
352         vfree(tx_ring->tx_buffer_info);
353         netdev_err(ndev, "Unable to allocate memory for Tx descriptor ring\n");
354         return -ENOMEM;
355 }
356
357 /**
358  * igc_setup_all_tx_resources - wrapper to allocate Tx resources for all queues
359  * @adapter: board private structure
360  *
361  * Return 0 on success, negative on failure
362  */
363 static int igc_setup_all_tx_resources(struct igc_adapter *adapter)
364 {
365         struct net_device *dev = adapter->netdev;
366         int i, err = 0;
367
368         for (i = 0; i < adapter->num_tx_queues; i++) {
369                 err = igc_setup_tx_resources(adapter->tx_ring[i]);
370                 if (err) {
371                         netdev_err(dev, "Error on Tx queue %u setup\n", i);
372                         for (i--; i >= 0; i--)
373                                 igc_free_tx_resources(adapter->tx_ring[i]);
374                         break;
375                 }
376         }
377
378         return err;
379 }
380
381 static void igc_clean_rx_ring_page_shared(struct igc_ring *rx_ring)
382 {
383         u16 i = rx_ring->next_to_clean;
384
385         dev_kfree_skb(rx_ring->skb);
386         rx_ring->skb = NULL;
387
388         /* Free all the Rx ring sk_buffs */
389         while (i != rx_ring->next_to_alloc) {
390                 struct igc_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i];
391
392                 /* Invalidate cache lines that may have been written to by
393                  * device so that we avoid corrupting memory.
394                  */
395                 dma_sync_single_range_for_cpu(rx_ring->dev,
396                                               buffer_info->dma,
397                                               buffer_info->page_offset,
398                                               igc_rx_bufsz(rx_ring),
399                                               DMA_FROM_DEVICE);
400
401                 /* free resources associated with mapping */
402                 dma_unmap_page_attrs(rx_ring->dev,
403                                      buffer_info->dma,
404                                      igc_rx_pg_size(rx_ring),
405                                      DMA_FROM_DEVICE,
406                                      IGC_RX_DMA_ATTR);
407                 __page_frag_cache_drain(buffer_info->page,
408                                         buffer_info->pagecnt_bias);
409
410                 i++;
411                 if (i == rx_ring->count)
412                         i = 0;
413         }
414 }
415
416 static void igc_clean_rx_ring_xsk_pool(struct igc_ring *ring)
417 {
418         struct igc_rx_buffer *bi;
419         u16 i;
420
421         for (i = 0; i < ring->count; i++) {
422                 bi = &ring->rx_buffer_info[i];
423                 if (!bi->xdp)
424                         continue;
425
426                 xsk_buff_free(bi->xdp);
427                 bi->xdp = NULL;
428         }
429 }
430
431 /**
432  * igc_clean_rx_ring - Free Rx Buffers per Queue
433  * @ring: ring to free buffers from
434  */
435 static void igc_clean_rx_ring(struct igc_ring *ring)
436 {
437         if (ring->xsk_pool)
438                 igc_clean_rx_ring_xsk_pool(ring);
439         else
440                 igc_clean_rx_ring_page_shared(ring);
441
442         clear_ring_uses_large_buffer(ring);
443
444         ring->next_to_alloc = 0;
445         ring->next_to_clean = 0;
446         ring->next_to_use = 0;
447 }
448
449 /**
450  * igc_clean_all_rx_rings - Free Rx Buffers for all queues
451  * @adapter: board private structure
452  */
453 static void igc_clean_all_rx_rings(struct igc_adapter *adapter)
454 {
455         int i;
456
457         for (i = 0; i < adapter->num_rx_queues; i++)
458                 if (adapter->rx_ring[i])
459                         igc_clean_rx_ring(adapter->rx_ring[i]);
460 }
461
462 /**
463  * igc_free_rx_resources - Free Rx Resources
464  * @rx_ring: ring to clean the resources from
465  *
466  * Free all receive software resources
467  */
468 void igc_free_rx_resources(struct igc_ring *rx_ring)
469 {
470         igc_clean_rx_ring(rx_ring);
471
472         xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
473
474         vfree(rx_ring->rx_buffer_info);
475         rx_ring->rx_buffer_info = NULL;
476
477         /* if not set, then don't free */
478         if (!rx_ring->desc)
479                 return;
480
481         dma_free_coherent(rx_ring->dev, rx_ring->size,
482                           rx_ring->desc, rx_ring->dma);
483
484         rx_ring->desc = NULL;
485 }
486
487 /**
488  * igc_free_all_rx_resources - Free Rx Resources for All Queues
489  * @adapter: board private structure
490  *
491  * Free all receive software resources
492  */
493 static void igc_free_all_rx_resources(struct igc_adapter *adapter)
494 {
495         int i;
496
497         for (i = 0; i < adapter->num_rx_queues; i++)
498                 igc_free_rx_resources(adapter->rx_ring[i]);
499 }
500
501 /**
502  * igc_setup_rx_resources - allocate Rx resources (Descriptors)
503  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
504  *
505  * Returns 0 on success, negative on failure
506  */
507 int igc_setup_rx_resources(struct igc_ring *rx_ring)
508 {
509         struct net_device *ndev = rx_ring->netdev;
510         struct device *dev = rx_ring->dev;
511         u8 index = rx_ring->queue_index;
512         int size, desc_len, res;
513
514         /* XDP RX-queue info */
515         if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq))
516                 xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
517         res = xdp_rxq_info_reg(&rx_ring->xdp_rxq, ndev, index,
518                                rx_ring->q_vector->napi.napi_id);
519         if (res < 0) {
520                 netdev_err(ndev, "Failed to register xdp_rxq index %u\n",
521                            index);
522                 return res;
523         }
524
525         size = sizeof(struct igc_rx_buffer) * rx_ring->count;
526         rx_ring->rx_buffer_info = vzalloc(size);
527         if (!rx_ring->rx_buffer_info)
528                 goto err;
529
530         desc_len = sizeof(union igc_adv_rx_desc);
531
532         /* Round up to nearest 4K */
533         rx_ring->size = rx_ring->count * desc_len;
534         rx_ring->size = ALIGN(rx_ring->size, 4096);
535
536         rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size,
537                                            &rx_ring->dma, GFP_KERNEL);
538
539         if (!rx_ring->desc)
540                 goto err;
541
542         rx_ring->next_to_alloc = 0;
543         rx_ring->next_to_clean = 0;
544         rx_ring->next_to_use = 0;
545
546         return 0;
547
548 err:
549         xdp_rxq_info_unreg(&rx_ring->xdp_rxq);
550         vfree(rx_ring->rx_buffer_info);
551         rx_ring->rx_buffer_info = NULL;
552         netdev_err(ndev, "Unable to allocate memory for Rx descriptor ring\n");
553         return -ENOMEM;
554 }
555
556 /**
557  * igc_setup_all_rx_resources - wrapper to allocate Rx resources
558  *                                (Descriptors) for all queues
559  * @adapter: board private structure
560  *
561  * Return 0 on success, negative on failure
562  */
563 static int igc_setup_all_rx_resources(struct igc_adapter *adapter)
564 {
565         struct net_device *dev = adapter->netdev;
566         int i, err = 0;
567
568         for (i = 0; i < adapter->num_rx_queues; i++) {
569                 err = igc_setup_rx_resources(adapter->rx_ring[i]);
570                 if (err) {
571                         netdev_err(dev, "Error on Rx queue %u setup\n", i);
572                         for (i--; i >= 0; i--)
573                                 igc_free_rx_resources(adapter->rx_ring[i]);
574                         break;
575                 }
576         }
577
578         return err;
579 }
580
581 static struct xsk_buff_pool *igc_get_xsk_pool(struct igc_adapter *adapter,
582                                               struct igc_ring *ring)
583 {
584         if (!igc_xdp_is_enabled(adapter) ||
585             !test_bit(IGC_RING_FLAG_AF_XDP_ZC, &ring->flags))
586                 return NULL;
587
588         return xsk_get_pool_from_qid(ring->netdev, ring->queue_index);
589 }
590
591 /**
592  * igc_configure_rx_ring - Configure a receive ring after Reset
593  * @adapter: board private structure
594  * @ring: receive ring to be configured
595  *
596  * Configure the Rx unit of the MAC after a reset.
597  */
598 static void igc_configure_rx_ring(struct igc_adapter *adapter,
599                                   struct igc_ring *ring)
600 {
601         struct igc_hw *hw = &adapter->hw;
602         union igc_adv_rx_desc *rx_desc;
603         int reg_idx = ring->reg_idx;
604         u32 srrctl = 0, rxdctl = 0;
605         u64 rdba = ring->dma;
606         u32 buf_size;
607
608         xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
609         ring->xsk_pool = igc_get_xsk_pool(adapter, ring);
610         if (ring->xsk_pool) {
611                 WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
612                                                    MEM_TYPE_XSK_BUFF_POOL,
613                                                    NULL));
614                 xsk_pool_set_rxq_info(ring->xsk_pool, &ring->xdp_rxq);
615         } else {
616                 WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
617                                                    MEM_TYPE_PAGE_SHARED,
618                                                    NULL));
619         }
620
621         if (igc_xdp_is_enabled(adapter))
622                 set_ring_uses_large_buffer(ring);
623
624         /* disable the queue */
625         wr32(IGC_RXDCTL(reg_idx), 0);
626
627         /* Set DMA base address registers */
628         wr32(IGC_RDBAL(reg_idx),
629              rdba & 0x00000000ffffffffULL);
630         wr32(IGC_RDBAH(reg_idx), rdba >> 32);
631         wr32(IGC_RDLEN(reg_idx),
632              ring->count * sizeof(union igc_adv_rx_desc));
633
634         /* initialize head and tail */
635         ring->tail = adapter->io_addr + IGC_RDT(reg_idx);
636         wr32(IGC_RDH(reg_idx), 0);
637         writel(0, ring->tail);
638
639         /* reset next-to- use/clean to place SW in sync with hardware */
640         ring->next_to_clean = 0;
641         ring->next_to_use = 0;
642
643         if (ring->xsk_pool)
644                 buf_size = xsk_pool_get_rx_frame_size(ring->xsk_pool);
645         else if (ring_uses_large_buffer(ring))
646                 buf_size = IGC_RXBUFFER_3072;
647         else
648                 buf_size = IGC_RXBUFFER_2048;
649
650         srrctl = rd32(IGC_SRRCTL(reg_idx));
651         srrctl &= ~(IGC_SRRCTL_BSIZEPKT_MASK | IGC_SRRCTL_BSIZEHDR_MASK |
652                     IGC_SRRCTL_DESCTYPE_MASK);
653         srrctl |= IGC_SRRCTL_BSIZEHDR(IGC_RX_HDR_LEN);
654         srrctl |= IGC_SRRCTL_BSIZEPKT(buf_size);
655         srrctl |= IGC_SRRCTL_DESCTYPE_ADV_ONEBUF;
656
657         wr32(IGC_SRRCTL(reg_idx), srrctl);
658
659         rxdctl |= IGC_RX_PTHRESH;
660         rxdctl |= IGC_RX_HTHRESH << 8;
661         rxdctl |= IGC_RX_WTHRESH << 16;
662
663         /* initialize rx_buffer_info */
664         memset(ring->rx_buffer_info, 0,
665                sizeof(struct igc_rx_buffer) * ring->count);
666
667         /* initialize Rx descriptor 0 */
668         rx_desc = IGC_RX_DESC(ring, 0);
669         rx_desc->wb.upper.length = 0;
670
671         /* enable receive descriptor fetching */
672         rxdctl |= IGC_RXDCTL_QUEUE_ENABLE;
673
674         wr32(IGC_RXDCTL(reg_idx), rxdctl);
675 }
676
677 /**
678  * igc_configure_rx - Configure receive Unit after Reset
679  * @adapter: board private structure
680  *
681  * Configure the Rx unit of the MAC after a reset.
682  */
683 static void igc_configure_rx(struct igc_adapter *adapter)
684 {
685         int i;
686
687         /* Setup the HW Rx Head and Tail Descriptor Pointers and
688          * the Base and Length of the Rx Descriptor Ring
689          */
690         for (i = 0; i < adapter->num_rx_queues; i++)
691                 igc_configure_rx_ring(adapter, adapter->rx_ring[i]);
692 }
693
694 /**
695  * igc_configure_tx_ring - Configure transmit ring after Reset
696  * @adapter: board private structure
697  * @ring: tx ring to configure
698  *
699  * Configure a transmit ring after a reset.
700  */
701 static void igc_configure_tx_ring(struct igc_adapter *adapter,
702                                   struct igc_ring *ring)
703 {
704         struct igc_hw *hw = &adapter->hw;
705         int reg_idx = ring->reg_idx;
706         u64 tdba = ring->dma;
707         u32 txdctl = 0;
708
709         ring->xsk_pool = igc_get_xsk_pool(adapter, ring);
710
711         /* disable the queue */
712         wr32(IGC_TXDCTL(reg_idx), 0);
713         wrfl();
714         mdelay(10);
715
716         wr32(IGC_TDLEN(reg_idx),
717              ring->count * sizeof(union igc_adv_tx_desc));
718         wr32(IGC_TDBAL(reg_idx),
719              tdba & 0x00000000ffffffffULL);
720         wr32(IGC_TDBAH(reg_idx), tdba >> 32);
721
722         ring->tail = adapter->io_addr + IGC_TDT(reg_idx);
723         wr32(IGC_TDH(reg_idx), 0);
724         writel(0, ring->tail);
725
726         txdctl |= IGC_TX_PTHRESH;
727         txdctl |= IGC_TX_HTHRESH << 8;
728         txdctl |= IGC_TX_WTHRESH << 16;
729
730         txdctl |= IGC_TXDCTL_QUEUE_ENABLE;
731         wr32(IGC_TXDCTL(reg_idx), txdctl);
732 }
733
734 /**
735  * igc_configure_tx - Configure transmit Unit after Reset
736  * @adapter: board private structure
737  *
738  * Configure the Tx unit of the MAC after a reset.
739  */
740 static void igc_configure_tx(struct igc_adapter *adapter)
741 {
742         int i;
743
744         for (i = 0; i < adapter->num_tx_queues; i++)
745                 igc_configure_tx_ring(adapter, adapter->tx_ring[i]);
746 }
747
748 /**
749  * igc_setup_mrqc - configure the multiple receive queue control registers
750  * @adapter: Board private structure
751  */
752 static void igc_setup_mrqc(struct igc_adapter *adapter)
753 {
754         struct igc_hw *hw = &adapter->hw;
755         u32 j, num_rx_queues;
756         u32 mrqc, rxcsum;
757         u32 rss_key[10];
758
759         netdev_rss_key_fill(rss_key, sizeof(rss_key));
760         for (j = 0; j < 10; j++)
761                 wr32(IGC_RSSRK(j), rss_key[j]);
762
763         num_rx_queues = adapter->rss_queues;
764
765         if (adapter->rss_indir_tbl_init != num_rx_queues) {
766                 for (j = 0; j < IGC_RETA_SIZE; j++)
767                         adapter->rss_indir_tbl[j] =
768                         (j * num_rx_queues) / IGC_RETA_SIZE;
769                 adapter->rss_indir_tbl_init = num_rx_queues;
770         }
771         igc_write_rss_indir_tbl(adapter);
772
773         /* Disable raw packet checksumming so that RSS hash is placed in
774          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
775          * offloads as they are enabled by default
776          */
777         rxcsum = rd32(IGC_RXCSUM);
778         rxcsum |= IGC_RXCSUM_PCSD;
779
780         /* Enable Receive Checksum Offload for SCTP */
781         rxcsum |= IGC_RXCSUM_CRCOFL;
782
783         /* Don't need to set TUOFL or IPOFL, they default to 1 */
784         wr32(IGC_RXCSUM, rxcsum);
785
786         /* Generate RSS hash based on packet types, TCP/UDP
787          * port numbers and/or IPv4/v6 src and dst addresses
788          */
789         mrqc = IGC_MRQC_RSS_FIELD_IPV4 |
790                IGC_MRQC_RSS_FIELD_IPV4_TCP |
791                IGC_MRQC_RSS_FIELD_IPV6 |
792                IGC_MRQC_RSS_FIELD_IPV6_TCP |
793                IGC_MRQC_RSS_FIELD_IPV6_TCP_EX;
794
795         if (adapter->flags & IGC_FLAG_RSS_FIELD_IPV4_UDP)
796                 mrqc |= IGC_MRQC_RSS_FIELD_IPV4_UDP;
797         if (adapter->flags & IGC_FLAG_RSS_FIELD_IPV6_UDP)
798                 mrqc |= IGC_MRQC_RSS_FIELD_IPV6_UDP;
799
800         mrqc |= IGC_MRQC_ENABLE_RSS_MQ;
801
802         wr32(IGC_MRQC, mrqc);
803 }
804
805 /**
806  * igc_setup_rctl - configure the receive control registers
807  * @adapter: Board private structure
808  */
809 static void igc_setup_rctl(struct igc_adapter *adapter)
810 {
811         struct igc_hw *hw = &adapter->hw;
812         u32 rctl;
813
814         rctl = rd32(IGC_RCTL);
815
816         rctl &= ~(3 << IGC_RCTL_MO_SHIFT);
817         rctl &= ~(IGC_RCTL_LBM_TCVR | IGC_RCTL_LBM_MAC);
818
819         rctl |= IGC_RCTL_EN | IGC_RCTL_BAM | IGC_RCTL_RDMTS_HALF |
820                 (hw->mac.mc_filter_type << IGC_RCTL_MO_SHIFT);
821
822         /* enable stripping of CRC. Newer features require
823          * that the HW strips the CRC.
824          */
825         rctl |= IGC_RCTL_SECRC;
826
827         /* disable store bad packets and clear size bits. */
828         rctl &= ~(IGC_RCTL_SBP | IGC_RCTL_SZ_256);
829
830         /* enable LPE to allow for reception of jumbo frames */
831         rctl |= IGC_RCTL_LPE;
832
833         /* disable queue 0 to prevent tail write w/o re-config */
834         wr32(IGC_RXDCTL(0), 0);
835
836         /* This is useful for sniffing bad packets. */
837         if (adapter->netdev->features & NETIF_F_RXALL) {
838                 /* UPE and MPE will be handled by normal PROMISC logic
839                  * in set_rx_mode
840                  */
841                 rctl |= (IGC_RCTL_SBP | /* Receive bad packets */
842                          IGC_RCTL_BAM | /* RX All Bcast Pkts */
843                          IGC_RCTL_PMCF); /* RX All MAC Ctrl Pkts */
844
845                 rctl &= ~(IGC_RCTL_DPF | /* Allow filtered pause */
846                           IGC_RCTL_CFIEN); /* Disable VLAN CFIEN Filter */
847         }
848
849         wr32(IGC_RCTL, rctl);
850 }
851
852 /**
853  * igc_setup_tctl - configure the transmit control registers
854  * @adapter: Board private structure
855  */
856 static void igc_setup_tctl(struct igc_adapter *adapter)
857 {
858         struct igc_hw *hw = &adapter->hw;
859         u32 tctl;
860
861         /* disable queue 0 which icould be enabled by default */
862         wr32(IGC_TXDCTL(0), 0);
863
864         /* Program the Transmit Control Register */
865         tctl = rd32(IGC_TCTL);
866         tctl &= ~IGC_TCTL_CT;
867         tctl |= IGC_TCTL_PSP | IGC_TCTL_RTLC |
868                 (IGC_COLLISION_THRESHOLD << IGC_CT_SHIFT);
869
870         /* Enable transmits */
871         tctl |= IGC_TCTL_EN;
872
873         wr32(IGC_TCTL, tctl);
874 }
875
876 /**
877  * igc_set_mac_filter_hw() - Set MAC address filter in hardware
878  * @adapter: Pointer to adapter where the filter should be set
879  * @index: Filter index
880  * @type: MAC address filter type (source or destination)
881  * @addr: MAC address
882  * @queue: If non-negative, queue assignment feature is enabled and frames
883  *         matching the filter are enqueued onto 'queue'. Otherwise, queue
884  *         assignment is disabled.
885  */
886 static void igc_set_mac_filter_hw(struct igc_adapter *adapter, int index,
887                                   enum igc_mac_filter_type type,
888                                   const u8 *addr, int queue)
889 {
890         struct net_device *dev = adapter->netdev;
891         struct igc_hw *hw = &adapter->hw;
892         u32 ral, rah;
893
894         if (WARN_ON(index >= hw->mac.rar_entry_count))
895                 return;
896
897         ral = le32_to_cpup((__le32 *)(addr));
898         rah = le16_to_cpup((__le16 *)(addr + 4));
899
900         if (type == IGC_MAC_FILTER_TYPE_SRC) {
901                 rah &= ~IGC_RAH_ASEL_MASK;
902                 rah |= IGC_RAH_ASEL_SRC_ADDR;
903         }
904
905         if (queue >= 0) {
906                 rah &= ~IGC_RAH_QSEL_MASK;
907                 rah |= (queue << IGC_RAH_QSEL_SHIFT);
908                 rah |= IGC_RAH_QSEL_ENABLE;
909         }
910
911         rah |= IGC_RAH_AV;
912
913         wr32(IGC_RAL(index), ral);
914         wr32(IGC_RAH(index), rah);
915
916         netdev_dbg(dev, "MAC address filter set in HW: index %d", index);
917 }
918
919 /**
920  * igc_clear_mac_filter_hw() - Clear MAC address filter in hardware
921  * @adapter: Pointer to adapter where the filter should be cleared
922  * @index: Filter index
923  */
924 static void igc_clear_mac_filter_hw(struct igc_adapter *adapter, int index)
925 {
926         struct net_device *dev = adapter->netdev;
927         struct igc_hw *hw = &adapter->hw;
928
929         if (WARN_ON(index >= hw->mac.rar_entry_count))
930                 return;
931
932         wr32(IGC_RAL(index), 0);
933         wr32(IGC_RAH(index), 0);
934
935         netdev_dbg(dev, "MAC address filter cleared in HW: index %d", index);
936 }
937
938 /* Set default MAC address for the PF in the first RAR entry */
939 static void igc_set_default_mac_filter(struct igc_adapter *adapter)
940 {
941         struct net_device *dev = adapter->netdev;
942         u8 *addr = adapter->hw.mac.addr;
943
944         netdev_dbg(dev, "Set default MAC address filter: address %pM", addr);
945
946         igc_set_mac_filter_hw(adapter, 0, IGC_MAC_FILTER_TYPE_DST, addr, -1);
947 }
948
949 /**
950  * igc_set_mac - Change the Ethernet Address of the NIC
951  * @netdev: network interface device structure
952  * @p: pointer to an address structure
953  *
954  * Returns 0 on success, negative on failure
955  */
956 static int igc_set_mac(struct net_device *netdev, void *p)
957 {
958         struct igc_adapter *adapter = netdev_priv(netdev);
959         struct igc_hw *hw = &adapter->hw;
960         struct sockaddr *addr = p;
961
962         if (!is_valid_ether_addr(addr->sa_data))
963                 return -EADDRNOTAVAIL;
964
965         eth_hw_addr_set(netdev, addr->sa_data);
966         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
967
968         /* set the correct pool for the new PF MAC address in entry 0 */
969         igc_set_default_mac_filter(adapter);
970
971         return 0;
972 }
973
974 /**
975  *  igc_write_mc_addr_list - write multicast addresses to MTA
976  *  @netdev: network interface device structure
977  *
978  *  Writes multicast address list to the MTA hash table.
979  *  Returns: -ENOMEM on failure
980  *           0 on no addresses written
981  *           X on writing X addresses to MTA
982  **/
983 static int igc_write_mc_addr_list(struct net_device *netdev)
984 {
985         struct igc_adapter *adapter = netdev_priv(netdev);
986         struct igc_hw *hw = &adapter->hw;
987         struct netdev_hw_addr *ha;
988         u8  *mta_list;
989         int i;
990
991         if (netdev_mc_empty(netdev)) {
992                 /* nothing to program, so clear mc list */
993                 igc_update_mc_addr_list(hw, NULL, 0);
994                 return 0;
995         }
996
997         mta_list = kcalloc(netdev_mc_count(netdev), 6, GFP_ATOMIC);
998         if (!mta_list)
999                 return -ENOMEM;
1000
1001         /* The shared function expects a packed array of only addresses. */
1002         i = 0;
1003         netdev_for_each_mc_addr(ha, netdev)
1004                 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
1005
1006         igc_update_mc_addr_list(hw, mta_list, i);
1007         kfree(mta_list);
1008
1009         return netdev_mc_count(netdev);
1010 }
1011
1012 static __le32 igc_tx_launchtime(struct igc_ring *ring, ktime_t txtime,
1013                                 bool *first_flag, bool *insert_empty)
1014 {
1015         struct igc_adapter *adapter = netdev_priv(ring->netdev);
1016         ktime_t cycle_time = adapter->cycle_time;
1017         ktime_t base_time = adapter->base_time;
1018         ktime_t now = ktime_get_clocktai();
1019         ktime_t baset_est, end_of_cycle;
1020         u32 launchtime;
1021         s64 n;
1022
1023         n = div64_s64(ktime_sub_ns(now, base_time), cycle_time);
1024
1025         baset_est = ktime_add_ns(base_time, cycle_time * (n));
1026         end_of_cycle = ktime_add_ns(baset_est, cycle_time);
1027
1028         if (ktime_compare(txtime, end_of_cycle) >= 0) {
1029                 if (baset_est != ring->last_ff_cycle) {
1030                         *first_flag = true;
1031                         ring->last_ff_cycle = baset_est;
1032
1033                         if (ktime_compare(txtime, ring->last_tx_cycle) > 0)
1034                                 *insert_empty = true;
1035                 }
1036         }
1037
1038         /* Introducing a window at end of cycle on which packets
1039          * potentially not honor launchtime. Window of 5us chosen
1040          * considering software update the tail pointer and packets
1041          * are dma'ed to packet buffer.
1042          */
1043         if ((ktime_sub_ns(end_of_cycle, now) < 5 * NSEC_PER_USEC))
1044                 netdev_warn(ring->netdev, "Packet with txtime=%llu may not be honoured\n",
1045                             txtime);
1046
1047         ring->last_tx_cycle = end_of_cycle;
1048
1049         launchtime = ktime_sub_ns(txtime, baset_est);
1050         if (launchtime > 0)
1051                 div_s64_rem(launchtime, cycle_time, &launchtime);
1052         else
1053                 launchtime = 0;
1054
1055         return cpu_to_le32(launchtime);
1056 }
1057
1058 static int igc_init_empty_frame(struct igc_ring *ring,
1059                                 struct igc_tx_buffer *buffer,
1060                                 struct sk_buff *skb)
1061 {
1062         unsigned int size;
1063         dma_addr_t dma;
1064
1065         size = skb_headlen(skb);
1066
1067         dma = dma_map_single(ring->dev, skb->data, size, DMA_TO_DEVICE);
1068         if (dma_mapping_error(ring->dev, dma)) {
1069                 netdev_err_once(ring->netdev, "Failed to map DMA for TX\n");
1070                 return -ENOMEM;
1071         }
1072
1073         buffer->skb = skb;
1074         buffer->protocol = 0;
1075         buffer->bytecount = skb->len;
1076         buffer->gso_segs = 1;
1077         buffer->time_stamp = jiffies;
1078         dma_unmap_len_set(buffer, len, skb->len);
1079         dma_unmap_addr_set(buffer, dma, dma);
1080
1081         return 0;
1082 }
1083
1084 static int igc_init_tx_empty_descriptor(struct igc_ring *ring,
1085                                         struct sk_buff *skb,
1086                                         struct igc_tx_buffer *first)
1087 {
1088         union igc_adv_tx_desc *desc;
1089         u32 cmd_type, olinfo_status;
1090         int err;
1091
1092         if (!igc_desc_unused(ring))
1093                 return -EBUSY;
1094
1095         err = igc_init_empty_frame(ring, first, skb);
1096         if (err)
1097                 return err;
1098
1099         cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT |
1100                    IGC_ADVTXD_DCMD_IFCS | IGC_TXD_DCMD |
1101                    first->bytecount;
1102         olinfo_status = first->bytecount << IGC_ADVTXD_PAYLEN_SHIFT;
1103
1104         desc = IGC_TX_DESC(ring, ring->next_to_use);
1105         desc->read.cmd_type_len = cpu_to_le32(cmd_type);
1106         desc->read.olinfo_status = cpu_to_le32(olinfo_status);
1107         desc->read.buffer_addr = cpu_to_le64(dma_unmap_addr(first, dma));
1108
1109         netdev_tx_sent_queue(txring_txq(ring), skb->len);
1110
1111         first->next_to_watch = desc;
1112
1113         ring->next_to_use++;
1114         if (ring->next_to_use == ring->count)
1115                 ring->next_to_use = 0;
1116
1117         return 0;
1118 }
1119
1120 #define IGC_EMPTY_FRAME_SIZE 60
1121
1122 static void igc_tx_ctxtdesc(struct igc_ring *tx_ring,
1123                             __le32 launch_time, bool first_flag,
1124                             u32 vlan_macip_lens, u32 type_tucmd,
1125                             u32 mss_l4len_idx)
1126 {
1127         struct igc_adv_tx_context_desc *context_desc;
1128         u16 i = tx_ring->next_to_use;
1129
1130         context_desc = IGC_TX_CTXTDESC(tx_ring, i);
1131
1132         i++;
1133         tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
1134
1135         /* set bits to identify this as an advanced context descriptor */
1136         type_tucmd |= IGC_TXD_CMD_DEXT | IGC_ADVTXD_DTYP_CTXT;
1137
1138         /* For i225, context index must be unique per ring. */
1139         if (test_bit(IGC_RING_FLAG_TX_CTX_IDX, &tx_ring->flags))
1140                 mss_l4len_idx |= tx_ring->reg_idx << 4;
1141
1142         if (first_flag)
1143                 mss_l4len_idx |= IGC_ADVTXD_TSN_CNTX_FIRST;
1144
1145         context_desc->vlan_macip_lens   = cpu_to_le32(vlan_macip_lens);
1146         context_desc->type_tucmd_mlhl   = cpu_to_le32(type_tucmd);
1147         context_desc->mss_l4len_idx     = cpu_to_le32(mss_l4len_idx);
1148         context_desc->launch_time       = launch_time;
1149 }
1150
1151 static void igc_tx_csum(struct igc_ring *tx_ring, struct igc_tx_buffer *first,
1152                         __le32 launch_time, bool first_flag)
1153 {
1154         struct sk_buff *skb = first->skb;
1155         u32 vlan_macip_lens = 0;
1156         u32 type_tucmd = 0;
1157
1158         if (skb->ip_summed != CHECKSUM_PARTIAL) {
1159 csum_failed:
1160                 if (!(first->tx_flags & IGC_TX_FLAGS_VLAN) &&
1161                     !tx_ring->launchtime_enable)
1162                         return;
1163                 goto no_csum;
1164         }
1165
1166         switch (skb->csum_offset) {
1167         case offsetof(struct tcphdr, check):
1168                 type_tucmd = IGC_ADVTXD_TUCMD_L4T_TCP;
1169                 fallthrough;
1170         case offsetof(struct udphdr, check):
1171                 break;
1172         case offsetof(struct sctphdr, checksum):
1173                 /* validate that this is actually an SCTP request */
1174                 if (skb_csum_is_sctp(skb)) {
1175                         type_tucmd = IGC_ADVTXD_TUCMD_L4T_SCTP;
1176                         break;
1177                 }
1178                 fallthrough;
1179         default:
1180                 skb_checksum_help(skb);
1181                 goto csum_failed;
1182         }
1183
1184         /* update TX checksum flag */
1185         first->tx_flags |= IGC_TX_FLAGS_CSUM;
1186         vlan_macip_lens = skb_checksum_start_offset(skb) -
1187                           skb_network_offset(skb);
1188 no_csum:
1189         vlan_macip_lens |= skb_network_offset(skb) << IGC_ADVTXD_MACLEN_SHIFT;
1190         vlan_macip_lens |= first->tx_flags & IGC_TX_FLAGS_VLAN_MASK;
1191
1192         igc_tx_ctxtdesc(tx_ring, launch_time, first_flag,
1193                         vlan_macip_lens, type_tucmd, 0);
1194 }
1195
1196 static int __igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size)
1197 {
1198         struct net_device *netdev = tx_ring->netdev;
1199
1200         netif_stop_subqueue(netdev, tx_ring->queue_index);
1201
1202         /* memory barriier comment */
1203         smp_mb();
1204
1205         /* We need to check again in a case another CPU has just
1206          * made room available.
1207          */
1208         if (igc_desc_unused(tx_ring) < size)
1209                 return -EBUSY;
1210
1211         /* A reprieve! */
1212         netif_wake_subqueue(netdev, tx_ring->queue_index);
1213
1214         u64_stats_update_begin(&tx_ring->tx_syncp2);
1215         tx_ring->tx_stats.restart_queue2++;
1216         u64_stats_update_end(&tx_ring->tx_syncp2);
1217
1218         return 0;
1219 }
1220
1221 static inline int igc_maybe_stop_tx(struct igc_ring *tx_ring, const u16 size)
1222 {
1223         if (igc_desc_unused(tx_ring) >= size)
1224                 return 0;
1225         return __igc_maybe_stop_tx(tx_ring, size);
1226 }
1227
1228 #define IGC_SET_FLAG(_input, _flag, _result) \
1229         (((_flag) <= (_result)) ?                               \
1230          ((u32)((_input) & (_flag)) * ((_result) / (_flag))) :  \
1231          ((u32)((_input) & (_flag)) / ((_flag) / (_result))))
1232
1233 static u32 igc_tx_cmd_type(struct sk_buff *skb, u32 tx_flags)
1234 {
1235         /* set type for advanced descriptor with frame checksum insertion */
1236         u32 cmd_type = IGC_ADVTXD_DTYP_DATA |
1237                        IGC_ADVTXD_DCMD_DEXT |
1238                        IGC_ADVTXD_DCMD_IFCS;
1239
1240         /* set HW vlan bit if vlan is present */
1241         cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_VLAN,
1242                                  IGC_ADVTXD_DCMD_VLE);
1243
1244         /* set segmentation bits for TSO */
1245         cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSO,
1246                                  (IGC_ADVTXD_DCMD_TSE));
1247
1248         /* set timestamp bit if present */
1249         cmd_type |= IGC_SET_FLAG(tx_flags, IGC_TX_FLAGS_TSTAMP,
1250                                  (IGC_ADVTXD_MAC_TSTAMP));
1251
1252         /* insert frame checksum */
1253         cmd_type ^= IGC_SET_FLAG(skb->no_fcs, 1, IGC_ADVTXD_DCMD_IFCS);
1254
1255         return cmd_type;
1256 }
1257
1258 static void igc_tx_olinfo_status(struct igc_ring *tx_ring,
1259                                  union igc_adv_tx_desc *tx_desc,
1260                                  u32 tx_flags, unsigned int paylen)
1261 {
1262         u32 olinfo_status = paylen << IGC_ADVTXD_PAYLEN_SHIFT;
1263
1264         /* insert L4 checksum */
1265         olinfo_status |= (tx_flags & IGC_TX_FLAGS_CSUM) *
1266                           ((IGC_TXD_POPTS_TXSM << 8) /
1267                           IGC_TX_FLAGS_CSUM);
1268
1269         /* insert IPv4 checksum */
1270         olinfo_status |= (tx_flags & IGC_TX_FLAGS_IPV4) *
1271                           (((IGC_TXD_POPTS_IXSM << 8)) /
1272                           IGC_TX_FLAGS_IPV4);
1273
1274         tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
1275 }
1276
1277 static int igc_tx_map(struct igc_ring *tx_ring,
1278                       struct igc_tx_buffer *first,
1279                       const u8 hdr_len)
1280 {
1281         struct sk_buff *skb = first->skb;
1282         struct igc_tx_buffer *tx_buffer;
1283         union igc_adv_tx_desc *tx_desc;
1284         u32 tx_flags = first->tx_flags;
1285         skb_frag_t *frag;
1286         u16 i = tx_ring->next_to_use;
1287         unsigned int data_len, size;
1288         dma_addr_t dma;
1289         u32 cmd_type;
1290
1291         cmd_type = igc_tx_cmd_type(skb, tx_flags);
1292         tx_desc = IGC_TX_DESC(tx_ring, i);
1293
1294         igc_tx_olinfo_status(tx_ring, tx_desc, tx_flags, skb->len - hdr_len);
1295
1296         size = skb_headlen(skb);
1297         data_len = skb->data_len;
1298
1299         dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE);
1300
1301         tx_buffer = first;
1302
1303         for (frag = &skb_shinfo(skb)->frags[0];; frag++) {
1304                 if (dma_mapping_error(tx_ring->dev, dma))
1305                         goto dma_error;
1306
1307                 /* record length, and DMA address */
1308                 dma_unmap_len_set(tx_buffer, len, size);
1309                 dma_unmap_addr_set(tx_buffer, dma, dma);
1310
1311                 tx_desc->read.buffer_addr = cpu_to_le64(dma);
1312
1313                 while (unlikely(size > IGC_MAX_DATA_PER_TXD)) {
1314                         tx_desc->read.cmd_type_len =
1315                                 cpu_to_le32(cmd_type ^ IGC_MAX_DATA_PER_TXD);
1316
1317                         i++;
1318                         tx_desc++;
1319                         if (i == tx_ring->count) {
1320                                 tx_desc = IGC_TX_DESC(tx_ring, 0);
1321                                 i = 0;
1322                         }
1323                         tx_desc->read.olinfo_status = 0;
1324
1325                         dma += IGC_MAX_DATA_PER_TXD;
1326                         size -= IGC_MAX_DATA_PER_TXD;
1327
1328                         tx_desc->read.buffer_addr = cpu_to_le64(dma);
1329                 }
1330
1331                 if (likely(!data_len))
1332                         break;
1333
1334                 tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type ^ size);
1335
1336                 i++;
1337                 tx_desc++;
1338                 if (i == tx_ring->count) {
1339                         tx_desc = IGC_TX_DESC(tx_ring, 0);
1340                         i = 0;
1341                 }
1342                 tx_desc->read.olinfo_status = 0;
1343
1344                 size = skb_frag_size(frag);
1345                 data_len -= size;
1346
1347                 dma = skb_frag_dma_map(tx_ring->dev, frag, 0,
1348                                        size, DMA_TO_DEVICE);
1349
1350                 tx_buffer = &tx_ring->tx_buffer_info[i];
1351         }
1352
1353         /* write last descriptor with RS and EOP bits */
1354         cmd_type |= size | IGC_TXD_DCMD;
1355         tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
1356
1357         netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount);
1358
1359         /* set the timestamp */
1360         first->time_stamp = jiffies;
1361
1362         skb_tx_timestamp(skb);
1363
1364         /* Force memory writes to complete before letting h/w know there
1365          * are new descriptors to fetch.  (Only applicable for weak-ordered
1366          * memory model archs, such as IA-64).
1367          *
1368          * We also need this memory barrier to make certain all of the
1369          * status bits have been updated before next_to_watch is written.
1370          */
1371         wmb();
1372
1373         /* set next_to_watch value indicating a packet is present */
1374         first->next_to_watch = tx_desc;
1375
1376         i++;
1377         if (i == tx_ring->count)
1378                 i = 0;
1379
1380         tx_ring->next_to_use = i;
1381
1382         /* Make sure there is space in the ring for the next send. */
1383         igc_maybe_stop_tx(tx_ring, DESC_NEEDED);
1384
1385         if (netif_xmit_stopped(txring_txq(tx_ring)) || !netdev_xmit_more()) {
1386                 writel(i, tx_ring->tail);
1387         }
1388
1389         return 0;
1390 dma_error:
1391         netdev_err(tx_ring->netdev, "TX DMA map failed\n");
1392         tx_buffer = &tx_ring->tx_buffer_info[i];
1393
1394         /* clear dma mappings for failed tx_buffer_info map */
1395         while (tx_buffer != first) {
1396                 if (dma_unmap_len(tx_buffer, len))
1397                         igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
1398
1399                 if (i-- == 0)
1400                         i += tx_ring->count;
1401                 tx_buffer = &tx_ring->tx_buffer_info[i];
1402         }
1403
1404         if (dma_unmap_len(tx_buffer, len))
1405                 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
1406
1407         dev_kfree_skb_any(tx_buffer->skb);
1408         tx_buffer->skb = NULL;
1409
1410         tx_ring->next_to_use = i;
1411
1412         return -1;
1413 }
1414
1415 static int igc_tso(struct igc_ring *tx_ring,
1416                    struct igc_tx_buffer *first,
1417                    __le32 launch_time, bool first_flag,
1418                    u8 *hdr_len)
1419 {
1420         u32 vlan_macip_lens, type_tucmd, mss_l4len_idx;
1421         struct sk_buff *skb = first->skb;
1422         union {
1423                 struct iphdr *v4;
1424                 struct ipv6hdr *v6;
1425                 unsigned char *hdr;
1426         } ip;
1427         union {
1428                 struct tcphdr *tcp;
1429                 struct udphdr *udp;
1430                 unsigned char *hdr;
1431         } l4;
1432         u32 paylen, l4_offset;
1433         int err;
1434
1435         if (skb->ip_summed != CHECKSUM_PARTIAL)
1436                 return 0;
1437
1438         if (!skb_is_gso(skb))
1439                 return 0;
1440
1441         err = skb_cow_head(skb, 0);
1442         if (err < 0)
1443                 return err;
1444
1445         ip.hdr = skb_network_header(skb);
1446         l4.hdr = skb_checksum_start(skb);
1447
1448         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
1449         type_tucmd = IGC_ADVTXD_TUCMD_L4T_TCP;
1450
1451         /* initialize outer IP header fields */
1452         if (ip.v4->version == 4) {
1453                 unsigned char *csum_start = skb_checksum_start(skb);
1454                 unsigned char *trans_start = ip.hdr + (ip.v4->ihl * 4);
1455
1456                 /* IP header will have to cancel out any data that
1457                  * is not a part of the outer IP header
1458                  */
1459                 ip.v4->check = csum_fold(csum_partial(trans_start,
1460                                                       csum_start - trans_start,
1461                                                       0));
1462                 type_tucmd |= IGC_ADVTXD_TUCMD_IPV4;
1463
1464                 ip.v4->tot_len = 0;
1465                 first->tx_flags |= IGC_TX_FLAGS_TSO |
1466                                    IGC_TX_FLAGS_CSUM |
1467                                    IGC_TX_FLAGS_IPV4;
1468         } else {
1469                 ip.v6->payload_len = 0;
1470                 first->tx_flags |= IGC_TX_FLAGS_TSO |
1471                                    IGC_TX_FLAGS_CSUM;
1472         }
1473
1474         /* determine offset of inner transport header */
1475         l4_offset = l4.hdr - skb->data;
1476
1477         /* remove payload length from inner checksum */
1478         paylen = skb->len - l4_offset;
1479         if (type_tucmd & IGC_ADVTXD_TUCMD_L4T_TCP) {
1480                 /* compute length of segmentation header */
1481                 *hdr_len = (l4.tcp->doff * 4) + l4_offset;
1482                 csum_replace_by_diff(&l4.tcp->check,
1483                                      (__force __wsum)htonl(paylen));
1484         } else {
1485                 /* compute length of segmentation header */
1486                 *hdr_len = sizeof(*l4.udp) + l4_offset;
1487                 csum_replace_by_diff(&l4.udp->check,
1488                                      (__force __wsum)htonl(paylen));
1489         }
1490
1491         /* update gso size and bytecount with header size */
1492         first->gso_segs = skb_shinfo(skb)->gso_segs;
1493         first->bytecount += (first->gso_segs - 1) * *hdr_len;
1494
1495         /* MSS L4LEN IDX */
1496         mss_l4len_idx = (*hdr_len - l4_offset) << IGC_ADVTXD_L4LEN_SHIFT;
1497         mss_l4len_idx |= skb_shinfo(skb)->gso_size << IGC_ADVTXD_MSS_SHIFT;
1498
1499         /* VLAN MACLEN IPLEN */
1500         vlan_macip_lens = l4.hdr - ip.hdr;
1501         vlan_macip_lens |= (ip.hdr - skb->data) << IGC_ADVTXD_MACLEN_SHIFT;
1502         vlan_macip_lens |= first->tx_flags & IGC_TX_FLAGS_VLAN_MASK;
1503
1504         igc_tx_ctxtdesc(tx_ring, launch_time, first_flag,
1505                         vlan_macip_lens, type_tucmd, mss_l4len_idx);
1506
1507         return 1;
1508 }
1509
1510 static netdev_tx_t igc_xmit_frame_ring(struct sk_buff *skb,
1511                                        struct igc_ring *tx_ring)
1512 {
1513         struct igc_adapter *adapter = netdev_priv(tx_ring->netdev);
1514         bool first_flag = false, insert_empty = false;
1515         u16 count = TXD_USE_COUNT(skb_headlen(skb));
1516         __be16 protocol = vlan_get_protocol(skb);
1517         struct igc_tx_buffer *first;
1518         __le32 launch_time = 0;
1519         u32 tx_flags = 0;
1520         unsigned short f;
1521         ktime_t txtime;
1522         u8 hdr_len = 0;
1523         int tso = 0;
1524
1525         /* need: 1 descriptor per page * PAGE_SIZE/IGC_MAX_DATA_PER_TXD,
1526          *      + 1 desc for skb_headlen/IGC_MAX_DATA_PER_TXD,
1527          *      + 2 desc gap to keep tail from touching head,
1528          *      + 1 desc for context descriptor,
1529          * otherwise try next time
1530          */
1531         for (f = 0; f < skb_shinfo(skb)->nr_frags; f++)
1532                 count += TXD_USE_COUNT(skb_frag_size(
1533                                                 &skb_shinfo(skb)->frags[f]));
1534
1535         if (igc_maybe_stop_tx(tx_ring, count + 5)) {
1536                 /* this is a hard error */
1537                 return NETDEV_TX_BUSY;
1538         }
1539
1540         if (!tx_ring->launchtime_enable)
1541                 goto done;
1542
1543         txtime = skb->tstamp;
1544         skb->tstamp = ktime_set(0, 0);
1545         launch_time = igc_tx_launchtime(tx_ring, txtime, &first_flag, &insert_empty);
1546
1547         if (insert_empty) {
1548                 struct igc_tx_buffer *empty_info;
1549                 struct sk_buff *empty;
1550                 void *data;
1551
1552                 empty_info = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
1553                 empty = alloc_skb(IGC_EMPTY_FRAME_SIZE, GFP_ATOMIC);
1554                 if (!empty)
1555                         goto done;
1556
1557                 data = skb_put(empty, IGC_EMPTY_FRAME_SIZE);
1558                 memset(data, 0, IGC_EMPTY_FRAME_SIZE);
1559
1560                 igc_tx_ctxtdesc(tx_ring, 0, false, 0, 0, 0);
1561
1562                 if (igc_init_tx_empty_descriptor(tx_ring,
1563                                                  empty,
1564                                                  empty_info) < 0)
1565                         dev_kfree_skb_any(empty);
1566         }
1567
1568 done:
1569         /* record the location of the first descriptor for this packet */
1570         first = &tx_ring->tx_buffer_info[tx_ring->next_to_use];
1571         first->type = IGC_TX_BUFFER_TYPE_SKB;
1572         first->skb = skb;
1573         first->bytecount = skb->len;
1574         first->gso_segs = 1;
1575
1576         if (tx_ring->max_sdu > 0) {
1577                 u32 max_sdu = 0;
1578
1579                 max_sdu = tx_ring->max_sdu +
1580                           (skb_vlan_tagged(first->skb) ? VLAN_HLEN : 0);
1581
1582                 if (first->bytecount > max_sdu) {
1583                         adapter->stats.txdrop++;
1584                         goto out_drop;
1585                 }
1586         }
1587
1588         if (unlikely(test_bit(IGC_RING_FLAG_TX_HWTSTAMP, &tx_ring->flags) &&
1589                      skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
1590                 /* FIXME: add support for retrieving timestamps from
1591                  * the other timer registers before skipping the
1592                  * timestamping request.
1593                  */
1594                 unsigned long flags;
1595
1596                 spin_lock_irqsave(&adapter->ptp_tx_lock, flags);
1597                 if (!adapter->ptp_tx_skb) {
1598                         skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
1599                         tx_flags |= IGC_TX_FLAGS_TSTAMP;
1600
1601                         adapter->ptp_tx_skb = skb_get(skb);
1602                         adapter->ptp_tx_start = jiffies;
1603                 } else {
1604                         adapter->tx_hwtstamp_skipped++;
1605                 }
1606
1607                 spin_unlock_irqrestore(&adapter->ptp_tx_lock, flags);
1608         }
1609
1610         if (skb_vlan_tag_present(skb)) {
1611                 tx_flags |= IGC_TX_FLAGS_VLAN;
1612                 tx_flags |= (skb_vlan_tag_get(skb) << IGC_TX_FLAGS_VLAN_SHIFT);
1613         }
1614
1615         /* record initial flags and protocol */
1616         first->tx_flags = tx_flags;
1617         first->protocol = protocol;
1618
1619         tso = igc_tso(tx_ring, first, launch_time, first_flag, &hdr_len);
1620         if (tso < 0)
1621                 goto out_drop;
1622         else if (!tso)
1623                 igc_tx_csum(tx_ring, first, launch_time, first_flag);
1624
1625         igc_tx_map(tx_ring, first, hdr_len);
1626
1627         return NETDEV_TX_OK;
1628
1629 out_drop:
1630         dev_kfree_skb_any(first->skb);
1631         first->skb = NULL;
1632
1633         return NETDEV_TX_OK;
1634 }
1635
1636 static inline struct igc_ring *igc_tx_queue_mapping(struct igc_adapter *adapter,
1637                                                     struct sk_buff *skb)
1638 {
1639         unsigned int r_idx = skb->queue_mapping;
1640
1641         if (r_idx >= adapter->num_tx_queues)
1642                 r_idx = r_idx % adapter->num_tx_queues;
1643
1644         return adapter->tx_ring[r_idx];
1645 }
1646
1647 static netdev_tx_t igc_xmit_frame(struct sk_buff *skb,
1648                                   struct net_device *netdev)
1649 {
1650         struct igc_adapter *adapter = netdev_priv(netdev);
1651
1652         /* The minimum packet size with TCTL.PSP set is 17 so pad the skb
1653          * in order to meet this minimum size requirement.
1654          */
1655         if (skb->len < 17) {
1656                 if (skb_padto(skb, 17))
1657                         return NETDEV_TX_OK;
1658                 skb->len = 17;
1659         }
1660
1661         return igc_xmit_frame_ring(skb, igc_tx_queue_mapping(adapter, skb));
1662 }
1663
1664 static void igc_rx_checksum(struct igc_ring *ring,
1665                             union igc_adv_rx_desc *rx_desc,
1666                             struct sk_buff *skb)
1667 {
1668         skb_checksum_none_assert(skb);
1669
1670         /* Ignore Checksum bit is set */
1671         if (igc_test_staterr(rx_desc, IGC_RXD_STAT_IXSM))
1672                 return;
1673
1674         /* Rx checksum disabled via ethtool */
1675         if (!(ring->netdev->features & NETIF_F_RXCSUM))
1676                 return;
1677
1678         /* TCP/UDP checksum error bit is set */
1679         if (igc_test_staterr(rx_desc,
1680                              IGC_RXDEXT_STATERR_L4E |
1681                              IGC_RXDEXT_STATERR_IPE)) {
1682                 /* work around errata with sctp packets where the TCPE aka
1683                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
1684                  * packets (aka let the stack check the crc32c)
1685                  */
1686                 if (!(skb->len == 60 &&
1687                       test_bit(IGC_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) {
1688                         u64_stats_update_begin(&ring->rx_syncp);
1689                         ring->rx_stats.csum_err++;
1690                         u64_stats_update_end(&ring->rx_syncp);
1691                 }
1692                 /* let the stack verify checksum errors */
1693                 return;
1694         }
1695         /* It must be a TCP or UDP packet with a valid checksum */
1696         if (igc_test_staterr(rx_desc, IGC_RXD_STAT_TCPCS |
1697                                       IGC_RXD_STAT_UDPCS))
1698                 skb->ip_summed = CHECKSUM_UNNECESSARY;
1699
1700         netdev_dbg(ring->netdev, "cksum success: bits %08X\n",
1701                    le32_to_cpu(rx_desc->wb.upper.status_error));
1702 }
1703
1704 /* Mapping HW RSS Type to enum pkt_hash_types */
1705 static const enum pkt_hash_types igc_rss_type_table[IGC_RSS_TYPE_MAX_TABLE] = {
1706         [IGC_RSS_TYPE_NO_HASH]          = PKT_HASH_TYPE_L2,
1707         [IGC_RSS_TYPE_HASH_TCP_IPV4]    = PKT_HASH_TYPE_L4,
1708         [IGC_RSS_TYPE_HASH_IPV4]        = PKT_HASH_TYPE_L3,
1709         [IGC_RSS_TYPE_HASH_TCP_IPV6]    = PKT_HASH_TYPE_L4,
1710         [IGC_RSS_TYPE_HASH_IPV6_EX]     = PKT_HASH_TYPE_L3,
1711         [IGC_RSS_TYPE_HASH_IPV6]        = PKT_HASH_TYPE_L3,
1712         [IGC_RSS_TYPE_HASH_TCP_IPV6_EX] = PKT_HASH_TYPE_L4,
1713         [IGC_RSS_TYPE_HASH_UDP_IPV4]    = PKT_HASH_TYPE_L4,
1714         [IGC_RSS_TYPE_HASH_UDP_IPV6]    = PKT_HASH_TYPE_L4,
1715         [IGC_RSS_TYPE_HASH_UDP_IPV6_EX] = PKT_HASH_TYPE_L4,
1716         [10] = PKT_HASH_TYPE_NONE, /* RSS Type above 9 "Reserved" by HW  */
1717         [11] = PKT_HASH_TYPE_NONE, /* keep array sized for SW bit-mask   */
1718         [12] = PKT_HASH_TYPE_NONE, /* to handle future HW revisons       */
1719         [13] = PKT_HASH_TYPE_NONE,
1720         [14] = PKT_HASH_TYPE_NONE,
1721         [15] = PKT_HASH_TYPE_NONE,
1722 };
1723
1724 static inline void igc_rx_hash(struct igc_ring *ring,
1725                                union igc_adv_rx_desc *rx_desc,
1726                                struct sk_buff *skb)
1727 {
1728         if (ring->netdev->features & NETIF_F_RXHASH) {
1729                 u32 rss_hash = le32_to_cpu(rx_desc->wb.lower.hi_dword.rss);
1730                 u32 rss_type = igc_rss_type(rx_desc);
1731
1732                 skb_set_hash(skb, rss_hash, igc_rss_type_table[rss_type]);
1733         }
1734 }
1735
1736 static void igc_rx_vlan(struct igc_ring *rx_ring,
1737                         union igc_adv_rx_desc *rx_desc,
1738                         struct sk_buff *skb)
1739 {
1740         struct net_device *dev = rx_ring->netdev;
1741         u16 vid;
1742
1743         if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
1744             igc_test_staterr(rx_desc, IGC_RXD_STAT_VP)) {
1745                 if (igc_test_staterr(rx_desc, IGC_RXDEXT_STATERR_LB) &&
1746                     test_bit(IGC_RING_FLAG_RX_LB_VLAN_BSWAP, &rx_ring->flags))
1747                         vid = be16_to_cpu((__force __be16)rx_desc->wb.upper.vlan);
1748                 else
1749                         vid = le16_to_cpu(rx_desc->wb.upper.vlan);
1750
1751                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid);
1752         }
1753 }
1754
1755 /**
1756  * igc_process_skb_fields - Populate skb header fields from Rx descriptor
1757  * @rx_ring: rx descriptor ring packet is being transacted on
1758  * @rx_desc: pointer to the EOP Rx descriptor
1759  * @skb: pointer to current skb being populated
1760  *
1761  * This function checks the ring, descriptor, and packet information in order
1762  * to populate the hash, checksum, VLAN, protocol, and other fields within the
1763  * skb.
1764  */
1765 static void igc_process_skb_fields(struct igc_ring *rx_ring,
1766                                    union igc_adv_rx_desc *rx_desc,
1767                                    struct sk_buff *skb)
1768 {
1769         igc_rx_hash(rx_ring, rx_desc, skb);
1770
1771         igc_rx_checksum(rx_ring, rx_desc, skb);
1772
1773         igc_rx_vlan(rx_ring, rx_desc, skb);
1774
1775         skb_record_rx_queue(skb, rx_ring->queue_index);
1776
1777         skb->protocol = eth_type_trans(skb, rx_ring->netdev);
1778 }
1779
1780 static void igc_vlan_mode(struct net_device *netdev, netdev_features_t features)
1781 {
1782         bool enable = !!(features & NETIF_F_HW_VLAN_CTAG_RX);
1783         struct igc_adapter *adapter = netdev_priv(netdev);
1784         struct igc_hw *hw = &adapter->hw;
1785         u32 ctrl;
1786
1787         ctrl = rd32(IGC_CTRL);
1788
1789         if (enable) {
1790                 /* enable VLAN tag insert/strip */
1791                 ctrl |= IGC_CTRL_VME;
1792         } else {
1793                 /* disable VLAN tag insert/strip */
1794                 ctrl &= ~IGC_CTRL_VME;
1795         }
1796         wr32(IGC_CTRL, ctrl);
1797 }
1798
1799 static void igc_restore_vlan(struct igc_adapter *adapter)
1800 {
1801         igc_vlan_mode(adapter->netdev, adapter->netdev->features);
1802 }
1803
1804 static struct igc_rx_buffer *igc_get_rx_buffer(struct igc_ring *rx_ring,
1805                                                const unsigned int size,
1806                                                int *rx_buffer_pgcnt)
1807 {
1808         struct igc_rx_buffer *rx_buffer;
1809
1810         rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
1811         *rx_buffer_pgcnt =
1812 #if (PAGE_SIZE < 8192)
1813                 page_count(rx_buffer->page);
1814 #else
1815                 0;
1816 #endif
1817         prefetchw(rx_buffer->page);
1818
1819         /* we are reusing so sync this buffer for CPU use */
1820         dma_sync_single_range_for_cpu(rx_ring->dev,
1821                                       rx_buffer->dma,
1822                                       rx_buffer->page_offset,
1823                                       size,
1824                                       DMA_FROM_DEVICE);
1825
1826         rx_buffer->pagecnt_bias--;
1827
1828         return rx_buffer;
1829 }
1830
1831 static void igc_rx_buffer_flip(struct igc_rx_buffer *buffer,
1832                                unsigned int truesize)
1833 {
1834 #if (PAGE_SIZE < 8192)
1835         buffer->page_offset ^= truesize;
1836 #else
1837         buffer->page_offset += truesize;
1838 #endif
1839 }
1840
1841 static unsigned int igc_get_rx_frame_truesize(struct igc_ring *ring,
1842                                               unsigned int size)
1843 {
1844         unsigned int truesize;
1845
1846 #if (PAGE_SIZE < 8192)
1847         truesize = igc_rx_pg_size(ring) / 2;
1848 #else
1849         truesize = ring_uses_build_skb(ring) ?
1850                    SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) +
1851                    SKB_DATA_ALIGN(IGC_SKB_PAD + size) :
1852                    SKB_DATA_ALIGN(size);
1853 #endif
1854         return truesize;
1855 }
1856
1857 /**
1858  * igc_add_rx_frag - Add contents of Rx buffer to sk_buff
1859  * @rx_ring: rx descriptor ring to transact packets on
1860  * @rx_buffer: buffer containing page to add
1861  * @skb: sk_buff to place the data into
1862  * @size: size of buffer to be added
1863  *
1864  * This function will add the data contained in rx_buffer->page to the skb.
1865  */
1866 static void igc_add_rx_frag(struct igc_ring *rx_ring,
1867                             struct igc_rx_buffer *rx_buffer,
1868                             struct sk_buff *skb,
1869                             unsigned int size)
1870 {
1871         unsigned int truesize;
1872
1873 #if (PAGE_SIZE < 8192)
1874         truesize = igc_rx_pg_size(rx_ring) / 2;
1875 #else
1876         truesize = ring_uses_build_skb(rx_ring) ?
1877                    SKB_DATA_ALIGN(IGC_SKB_PAD + size) :
1878                    SKB_DATA_ALIGN(size);
1879 #endif
1880         skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_buffer->page,
1881                         rx_buffer->page_offset, size, truesize);
1882
1883         igc_rx_buffer_flip(rx_buffer, truesize);
1884 }
1885
1886 static struct sk_buff *igc_build_skb(struct igc_ring *rx_ring,
1887                                      struct igc_rx_buffer *rx_buffer,
1888                                      struct xdp_buff *xdp)
1889 {
1890         unsigned int size = xdp->data_end - xdp->data;
1891         unsigned int truesize = igc_get_rx_frame_truesize(rx_ring, size);
1892         unsigned int metasize = xdp->data - xdp->data_meta;
1893         struct sk_buff *skb;
1894
1895         /* prefetch first cache line of first page */
1896         net_prefetch(xdp->data_meta);
1897
1898         /* build an skb around the page buffer */
1899         skb = napi_build_skb(xdp->data_hard_start, truesize);
1900         if (unlikely(!skb))
1901                 return NULL;
1902
1903         /* update pointers within the skb to store the data */
1904         skb_reserve(skb, xdp->data - xdp->data_hard_start);
1905         __skb_put(skb, size);
1906         if (metasize)
1907                 skb_metadata_set(skb, metasize);
1908
1909         igc_rx_buffer_flip(rx_buffer, truesize);
1910         return skb;
1911 }
1912
1913 static struct sk_buff *igc_construct_skb(struct igc_ring *rx_ring,
1914                                          struct igc_rx_buffer *rx_buffer,
1915                                          struct xdp_buff *xdp,
1916                                          ktime_t timestamp)
1917 {
1918         unsigned int metasize = xdp->data - xdp->data_meta;
1919         unsigned int size = xdp->data_end - xdp->data;
1920         unsigned int truesize = igc_get_rx_frame_truesize(rx_ring, size);
1921         void *va = xdp->data;
1922         unsigned int headlen;
1923         struct sk_buff *skb;
1924
1925         /* prefetch first cache line of first page */
1926         net_prefetch(xdp->data_meta);
1927
1928         /* allocate a skb to store the frags */
1929         skb = napi_alloc_skb(&rx_ring->q_vector->napi,
1930                              IGC_RX_HDR_LEN + metasize);
1931         if (unlikely(!skb))
1932                 return NULL;
1933
1934         if (timestamp)
1935                 skb_hwtstamps(skb)->hwtstamp = timestamp;
1936
1937         /* Determine available headroom for copy */
1938         headlen = size;
1939         if (headlen > IGC_RX_HDR_LEN)
1940                 headlen = eth_get_headlen(skb->dev, va, IGC_RX_HDR_LEN);
1941
1942         /* align pull length to size of long to optimize memcpy performance */
1943         memcpy(__skb_put(skb, headlen + metasize), xdp->data_meta,
1944                ALIGN(headlen + metasize, sizeof(long)));
1945
1946         if (metasize) {
1947                 skb_metadata_set(skb, metasize);
1948                 __skb_pull(skb, metasize);
1949         }
1950
1951         /* update all of the pointers */
1952         size -= headlen;
1953         if (size) {
1954                 skb_add_rx_frag(skb, 0, rx_buffer->page,
1955                                 (va + headlen) - page_address(rx_buffer->page),
1956                                 size, truesize);
1957                 igc_rx_buffer_flip(rx_buffer, truesize);
1958         } else {
1959                 rx_buffer->pagecnt_bias++;
1960         }
1961
1962         return skb;
1963 }
1964
1965 /**
1966  * igc_reuse_rx_page - page flip buffer and store it back on the ring
1967  * @rx_ring: rx descriptor ring to store buffers on
1968  * @old_buff: donor buffer to have page reused
1969  *
1970  * Synchronizes page for reuse by the adapter
1971  */
1972 static void igc_reuse_rx_page(struct igc_ring *rx_ring,
1973                               struct igc_rx_buffer *old_buff)
1974 {
1975         u16 nta = rx_ring->next_to_alloc;
1976         struct igc_rx_buffer *new_buff;
1977
1978         new_buff = &rx_ring->rx_buffer_info[nta];
1979
1980         /* update, and store next to alloc */
1981         nta++;
1982         rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
1983
1984         /* Transfer page from old buffer to new buffer.
1985          * Move each member individually to avoid possible store
1986          * forwarding stalls.
1987          */
1988         new_buff->dma           = old_buff->dma;
1989         new_buff->page          = old_buff->page;
1990         new_buff->page_offset   = old_buff->page_offset;
1991         new_buff->pagecnt_bias  = old_buff->pagecnt_bias;
1992 }
1993
1994 static bool igc_can_reuse_rx_page(struct igc_rx_buffer *rx_buffer,
1995                                   int rx_buffer_pgcnt)
1996 {
1997         unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
1998         struct page *page = rx_buffer->page;
1999
2000         /* avoid re-using remote and pfmemalloc pages */
2001         if (!dev_page_is_reusable(page))
2002                 return false;
2003
2004 #if (PAGE_SIZE < 8192)
2005         /* if we are only owner of page we can reuse it */
2006         if (unlikely((rx_buffer_pgcnt - pagecnt_bias) > 1))
2007                 return false;
2008 #else
2009 #define IGC_LAST_OFFSET \
2010         (SKB_WITH_OVERHEAD(PAGE_SIZE) - IGC_RXBUFFER_2048)
2011
2012         if (rx_buffer->page_offset > IGC_LAST_OFFSET)
2013                 return false;
2014 #endif
2015
2016         /* If we have drained the page fragment pool we need to update
2017          * the pagecnt_bias and page count so that we fully restock the
2018          * number of references the driver holds.
2019          */
2020         if (unlikely(pagecnt_bias == 1)) {
2021                 page_ref_add(page, USHRT_MAX - 1);
2022                 rx_buffer->pagecnt_bias = USHRT_MAX;
2023         }
2024
2025         return true;
2026 }
2027
2028 /**
2029  * igc_is_non_eop - process handling of non-EOP buffers
2030  * @rx_ring: Rx ring being processed
2031  * @rx_desc: Rx descriptor for current buffer
2032  *
2033  * This function updates next to clean.  If the buffer is an EOP buffer
2034  * this function exits returning false, otherwise it will place the
2035  * sk_buff in the next buffer to be chained and return true indicating
2036  * that this is in fact a non-EOP buffer.
2037  */
2038 static bool igc_is_non_eop(struct igc_ring *rx_ring,
2039                            union igc_adv_rx_desc *rx_desc)
2040 {
2041         u32 ntc = rx_ring->next_to_clean + 1;
2042
2043         /* fetch, update, and store next to clean */
2044         ntc = (ntc < rx_ring->count) ? ntc : 0;
2045         rx_ring->next_to_clean = ntc;
2046
2047         prefetch(IGC_RX_DESC(rx_ring, ntc));
2048
2049         if (likely(igc_test_staterr(rx_desc, IGC_RXD_STAT_EOP)))
2050                 return false;
2051
2052         return true;
2053 }
2054
2055 /**
2056  * igc_cleanup_headers - Correct corrupted or empty headers
2057  * @rx_ring: rx descriptor ring packet is being transacted on
2058  * @rx_desc: pointer to the EOP Rx descriptor
2059  * @skb: pointer to current skb being fixed
2060  *
2061  * Address the case where we are pulling data in on pages only
2062  * and as such no data is present in the skb header.
2063  *
2064  * In addition if skb is not at least 60 bytes we need to pad it so that
2065  * it is large enough to qualify as a valid Ethernet frame.
2066  *
2067  * Returns true if an error was encountered and skb was freed.
2068  */
2069 static bool igc_cleanup_headers(struct igc_ring *rx_ring,
2070                                 union igc_adv_rx_desc *rx_desc,
2071                                 struct sk_buff *skb)
2072 {
2073         /* XDP packets use error pointer so abort at this point */
2074         if (IS_ERR(skb))
2075                 return true;
2076
2077         if (unlikely(igc_test_staterr(rx_desc, IGC_RXDEXT_STATERR_RXE))) {
2078                 struct net_device *netdev = rx_ring->netdev;
2079
2080                 if (!(netdev->features & NETIF_F_RXALL)) {
2081                         dev_kfree_skb_any(skb);
2082                         return true;
2083                 }
2084         }
2085
2086         /* if eth_skb_pad returns an error the skb was freed */
2087         if (eth_skb_pad(skb))
2088                 return true;
2089
2090         return false;
2091 }
2092
2093 static void igc_put_rx_buffer(struct igc_ring *rx_ring,
2094                               struct igc_rx_buffer *rx_buffer,
2095                               int rx_buffer_pgcnt)
2096 {
2097         if (igc_can_reuse_rx_page(rx_buffer, rx_buffer_pgcnt)) {
2098                 /* hand second half of page back to the ring */
2099                 igc_reuse_rx_page(rx_ring, rx_buffer);
2100         } else {
2101                 /* We are not reusing the buffer so unmap it and free
2102                  * any references we are holding to it
2103                  */
2104                 dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma,
2105                                      igc_rx_pg_size(rx_ring), DMA_FROM_DEVICE,
2106                                      IGC_RX_DMA_ATTR);
2107                 __page_frag_cache_drain(rx_buffer->page,
2108                                         rx_buffer->pagecnt_bias);
2109         }
2110
2111         /* clear contents of rx_buffer */
2112         rx_buffer->page = NULL;
2113 }
2114
2115 static inline unsigned int igc_rx_offset(struct igc_ring *rx_ring)
2116 {
2117         struct igc_adapter *adapter = rx_ring->q_vector->adapter;
2118
2119         if (ring_uses_build_skb(rx_ring))
2120                 return IGC_SKB_PAD;
2121         if (igc_xdp_is_enabled(adapter))
2122                 return XDP_PACKET_HEADROOM;
2123
2124         return 0;
2125 }
2126
2127 static bool igc_alloc_mapped_page(struct igc_ring *rx_ring,
2128                                   struct igc_rx_buffer *bi)
2129 {
2130         struct page *page = bi->page;
2131         dma_addr_t dma;
2132
2133         /* since we are recycling buffers we should seldom need to alloc */
2134         if (likely(page))
2135                 return true;
2136
2137         /* alloc new page for storage */
2138         page = dev_alloc_pages(igc_rx_pg_order(rx_ring));
2139         if (unlikely(!page)) {
2140                 rx_ring->rx_stats.alloc_failed++;
2141                 return false;
2142         }
2143
2144         /* map page for use */
2145         dma = dma_map_page_attrs(rx_ring->dev, page, 0,
2146                                  igc_rx_pg_size(rx_ring),
2147                                  DMA_FROM_DEVICE,
2148                                  IGC_RX_DMA_ATTR);
2149
2150         /* if mapping failed free memory back to system since
2151          * there isn't much point in holding memory we can't use
2152          */
2153         if (dma_mapping_error(rx_ring->dev, dma)) {
2154                 __free_page(page);
2155
2156                 rx_ring->rx_stats.alloc_failed++;
2157                 return false;
2158         }
2159
2160         bi->dma = dma;
2161         bi->page = page;
2162         bi->page_offset = igc_rx_offset(rx_ring);
2163         page_ref_add(page, USHRT_MAX - 1);
2164         bi->pagecnt_bias = USHRT_MAX;
2165
2166         return true;
2167 }
2168
2169 /**
2170  * igc_alloc_rx_buffers - Replace used receive buffers; packet split
2171  * @rx_ring: rx descriptor ring
2172  * @cleaned_count: number of buffers to clean
2173  */
2174 static void igc_alloc_rx_buffers(struct igc_ring *rx_ring, u16 cleaned_count)
2175 {
2176         union igc_adv_rx_desc *rx_desc;
2177         u16 i = rx_ring->next_to_use;
2178         struct igc_rx_buffer *bi;
2179         u16 bufsz;
2180
2181         /* nothing to do */
2182         if (!cleaned_count)
2183                 return;
2184
2185         rx_desc = IGC_RX_DESC(rx_ring, i);
2186         bi = &rx_ring->rx_buffer_info[i];
2187         i -= rx_ring->count;
2188
2189         bufsz = igc_rx_bufsz(rx_ring);
2190
2191         do {
2192                 if (!igc_alloc_mapped_page(rx_ring, bi))
2193                         break;
2194
2195                 /* sync the buffer for use by the device */
2196                 dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
2197                                                  bi->page_offset, bufsz,
2198                                                  DMA_FROM_DEVICE);
2199
2200                 /* Refresh the desc even if buffer_addrs didn't change
2201                  * because each write-back erases this info.
2202                  */
2203                 rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset);
2204
2205                 rx_desc++;
2206                 bi++;
2207                 i++;
2208                 if (unlikely(!i)) {
2209                         rx_desc = IGC_RX_DESC(rx_ring, 0);
2210                         bi = rx_ring->rx_buffer_info;
2211                         i -= rx_ring->count;
2212                 }
2213
2214                 /* clear the length for the next_to_use descriptor */
2215                 rx_desc->wb.upper.length = 0;
2216
2217                 cleaned_count--;
2218         } while (cleaned_count);
2219
2220         i += rx_ring->count;
2221
2222         if (rx_ring->next_to_use != i) {
2223                 /* record the next descriptor to use */
2224                 rx_ring->next_to_use = i;
2225
2226                 /* update next to alloc since we have filled the ring */
2227                 rx_ring->next_to_alloc = i;
2228
2229                 /* Force memory writes to complete before letting h/w
2230                  * know there are new descriptors to fetch.  (Only
2231                  * applicable for weak-ordered memory model archs,
2232                  * such as IA-64).
2233                  */
2234                 wmb();
2235                 writel(i, rx_ring->tail);
2236         }
2237 }
2238
2239 static bool igc_alloc_rx_buffers_zc(struct igc_ring *ring, u16 count)
2240 {
2241         union igc_adv_rx_desc *desc;
2242         u16 i = ring->next_to_use;
2243         struct igc_rx_buffer *bi;
2244         dma_addr_t dma;
2245         bool ok = true;
2246
2247         if (!count)
2248                 return ok;
2249
2250         XSK_CHECK_PRIV_TYPE(struct igc_xdp_buff);
2251
2252         desc = IGC_RX_DESC(ring, i);
2253         bi = &ring->rx_buffer_info[i];
2254         i -= ring->count;
2255
2256         do {
2257                 bi->xdp = xsk_buff_alloc(ring->xsk_pool);
2258                 if (!bi->xdp) {
2259                         ok = false;
2260                         break;
2261                 }
2262
2263                 dma = xsk_buff_xdp_get_dma(bi->xdp);
2264                 desc->read.pkt_addr = cpu_to_le64(dma);
2265
2266                 desc++;
2267                 bi++;
2268                 i++;
2269                 if (unlikely(!i)) {
2270                         desc = IGC_RX_DESC(ring, 0);
2271                         bi = ring->rx_buffer_info;
2272                         i -= ring->count;
2273                 }
2274
2275                 /* Clear the length for the next_to_use descriptor. */
2276                 desc->wb.upper.length = 0;
2277
2278                 count--;
2279         } while (count);
2280
2281         i += ring->count;
2282
2283         if (ring->next_to_use != i) {
2284                 ring->next_to_use = i;
2285
2286                 /* Force memory writes to complete before letting h/w
2287                  * know there are new descriptors to fetch.  (Only
2288                  * applicable for weak-ordered memory model archs,
2289                  * such as IA-64).
2290                  */
2291                 wmb();
2292                 writel(i, ring->tail);
2293         }
2294
2295         return ok;
2296 }
2297
2298 /* This function requires __netif_tx_lock is held by the caller. */
2299 static int igc_xdp_init_tx_descriptor(struct igc_ring *ring,
2300                                       struct xdp_frame *xdpf)
2301 {
2302         struct skb_shared_info *sinfo = xdp_get_shared_info_from_frame(xdpf);
2303         u8 nr_frags = unlikely(xdp_frame_has_frags(xdpf)) ? sinfo->nr_frags : 0;
2304         u16 count, index = ring->next_to_use;
2305         struct igc_tx_buffer *head = &ring->tx_buffer_info[index];
2306         struct igc_tx_buffer *buffer = head;
2307         union igc_adv_tx_desc *desc = IGC_TX_DESC(ring, index);
2308         u32 olinfo_status, len = xdpf->len, cmd_type;
2309         void *data = xdpf->data;
2310         u16 i;
2311
2312         count = TXD_USE_COUNT(len);
2313         for (i = 0; i < nr_frags; i++)
2314                 count += TXD_USE_COUNT(skb_frag_size(&sinfo->frags[i]));
2315
2316         if (igc_maybe_stop_tx(ring, count + 3)) {
2317                 /* this is a hard error */
2318                 return -EBUSY;
2319         }
2320
2321         i = 0;
2322         head->bytecount = xdp_get_frame_len(xdpf);
2323         head->type = IGC_TX_BUFFER_TYPE_XDP;
2324         head->gso_segs = 1;
2325         head->xdpf = xdpf;
2326
2327         olinfo_status = head->bytecount << IGC_ADVTXD_PAYLEN_SHIFT;
2328         desc->read.olinfo_status = cpu_to_le32(olinfo_status);
2329
2330         for (;;) {
2331                 dma_addr_t dma;
2332
2333                 dma = dma_map_single(ring->dev, data, len, DMA_TO_DEVICE);
2334                 if (dma_mapping_error(ring->dev, dma)) {
2335                         netdev_err_once(ring->netdev,
2336                                         "Failed to map DMA for TX\n");
2337                         goto unmap;
2338                 }
2339
2340                 dma_unmap_len_set(buffer, len, len);
2341                 dma_unmap_addr_set(buffer, dma, dma);
2342
2343                 cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT |
2344                            IGC_ADVTXD_DCMD_IFCS | len;
2345
2346                 desc->read.cmd_type_len = cpu_to_le32(cmd_type);
2347                 desc->read.buffer_addr = cpu_to_le64(dma);
2348
2349                 buffer->protocol = 0;
2350
2351                 if (++index == ring->count)
2352                         index = 0;
2353
2354                 if (i == nr_frags)
2355                         break;
2356
2357                 buffer = &ring->tx_buffer_info[index];
2358                 desc = IGC_TX_DESC(ring, index);
2359                 desc->read.olinfo_status = 0;
2360
2361                 data = skb_frag_address(&sinfo->frags[i]);
2362                 len = skb_frag_size(&sinfo->frags[i]);
2363                 i++;
2364         }
2365         desc->read.cmd_type_len |= cpu_to_le32(IGC_TXD_DCMD);
2366
2367         netdev_tx_sent_queue(txring_txq(ring), head->bytecount);
2368         /* set the timestamp */
2369         head->time_stamp = jiffies;
2370         /* set next_to_watch value indicating a packet is present */
2371         head->next_to_watch = desc;
2372         ring->next_to_use = index;
2373
2374         return 0;
2375
2376 unmap:
2377         for (;;) {
2378                 buffer = &ring->tx_buffer_info[index];
2379                 if (dma_unmap_len(buffer, len))
2380                         dma_unmap_page(ring->dev,
2381                                        dma_unmap_addr(buffer, dma),
2382                                        dma_unmap_len(buffer, len),
2383                                        DMA_TO_DEVICE);
2384                 dma_unmap_len_set(buffer, len, 0);
2385                 if (buffer == head)
2386                         break;
2387
2388                 if (!index)
2389                         index += ring->count;
2390                 index--;
2391         }
2392
2393         return -ENOMEM;
2394 }
2395
2396 static struct igc_ring *igc_xdp_get_tx_ring(struct igc_adapter *adapter,
2397                                             int cpu)
2398 {
2399         int index = cpu;
2400
2401         if (unlikely(index < 0))
2402                 index = 0;
2403
2404         while (index >= adapter->num_tx_queues)
2405                 index -= adapter->num_tx_queues;
2406
2407         return adapter->tx_ring[index];
2408 }
2409
2410 static int igc_xdp_xmit_back(struct igc_adapter *adapter, struct xdp_buff *xdp)
2411 {
2412         struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp);
2413         int cpu = smp_processor_id();
2414         struct netdev_queue *nq;
2415         struct igc_ring *ring;
2416         int res;
2417
2418         if (unlikely(!xdpf))
2419                 return -EFAULT;
2420
2421         ring = igc_xdp_get_tx_ring(adapter, cpu);
2422         nq = txring_txq(ring);
2423
2424         __netif_tx_lock(nq, cpu);
2425         /* Avoid transmit queue timeout since we share it with the slow path */
2426         txq_trans_cond_update(nq);
2427         res = igc_xdp_init_tx_descriptor(ring, xdpf);
2428         __netif_tx_unlock(nq);
2429         return res;
2430 }
2431
2432 /* This function assumes rcu_read_lock() is held by the caller. */
2433 static int __igc_xdp_run_prog(struct igc_adapter *adapter,
2434                               struct bpf_prog *prog,
2435                               struct xdp_buff *xdp)
2436 {
2437         u32 act = bpf_prog_run_xdp(prog, xdp);
2438
2439         switch (act) {
2440         case XDP_PASS:
2441                 return IGC_XDP_PASS;
2442         case XDP_TX:
2443                 if (igc_xdp_xmit_back(adapter, xdp) < 0)
2444                         goto out_failure;
2445                 return IGC_XDP_TX;
2446         case XDP_REDIRECT:
2447                 if (xdp_do_redirect(adapter->netdev, xdp, prog) < 0)
2448                         goto out_failure;
2449                 return IGC_XDP_REDIRECT;
2450                 break;
2451         default:
2452                 bpf_warn_invalid_xdp_action(adapter->netdev, prog, act);
2453                 fallthrough;
2454         case XDP_ABORTED:
2455 out_failure:
2456                 trace_xdp_exception(adapter->netdev, prog, act);
2457                 fallthrough;
2458         case XDP_DROP:
2459                 return IGC_XDP_CONSUMED;
2460         }
2461 }
2462
2463 static struct sk_buff *igc_xdp_run_prog(struct igc_adapter *adapter,
2464                                         struct xdp_buff *xdp)
2465 {
2466         struct bpf_prog *prog;
2467         int res;
2468
2469         prog = READ_ONCE(adapter->xdp_prog);
2470         if (!prog) {
2471                 res = IGC_XDP_PASS;
2472                 goto out;
2473         }
2474
2475         res = __igc_xdp_run_prog(adapter, prog, xdp);
2476
2477 out:
2478         return ERR_PTR(-res);
2479 }
2480
2481 /* This function assumes __netif_tx_lock is held by the caller. */
2482 static void igc_flush_tx_descriptors(struct igc_ring *ring)
2483 {
2484         /* Once tail pointer is updated, hardware can fetch the descriptors
2485          * any time so we issue a write membar here to ensure all memory
2486          * writes are complete before the tail pointer is updated.
2487          */
2488         wmb();
2489         writel(ring->next_to_use, ring->tail);
2490 }
2491
2492 static void igc_finalize_xdp(struct igc_adapter *adapter, int status)
2493 {
2494         int cpu = smp_processor_id();
2495         struct netdev_queue *nq;
2496         struct igc_ring *ring;
2497
2498         if (status & IGC_XDP_TX) {
2499                 ring = igc_xdp_get_tx_ring(adapter, cpu);
2500                 nq = txring_txq(ring);
2501
2502                 __netif_tx_lock(nq, cpu);
2503                 igc_flush_tx_descriptors(ring);
2504                 __netif_tx_unlock(nq);
2505         }
2506
2507         if (status & IGC_XDP_REDIRECT)
2508                 xdp_do_flush();
2509 }
2510
2511 static void igc_update_rx_stats(struct igc_q_vector *q_vector,
2512                                 unsigned int packets, unsigned int bytes)
2513 {
2514         struct igc_ring *ring = q_vector->rx.ring;
2515
2516         u64_stats_update_begin(&ring->rx_syncp);
2517         ring->rx_stats.packets += packets;
2518         ring->rx_stats.bytes += bytes;
2519         u64_stats_update_end(&ring->rx_syncp);
2520
2521         q_vector->rx.total_packets += packets;
2522         q_vector->rx.total_bytes += bytes;
2523 }
2524
2525 static int igc_clean_rx_irq(struct igc_q_vector *q_vector, const int budget)
2526 {
2527         unsigned int total_bytes = 0, total_packets = 0;
2528         struct igc_adapter *adapter = q_vector->adapter;
2529         struct igc_ring *rx_ring = q_vector->rx.ring;
2530         struct sk_buff *skb = rx_ring->skb;
2531         u16 cleaned_count = igc_desc_unused(rx_ring);
2532         int xdp_status = 0, rx_buffer_pgcnt;
2533
2534         while (likely(total_packets < budget)) {
2535                 union igc_adv_rx_desc *rx_desc;
2536                 struct igc_rx_buffer *rx_buffer;
2537                 unsigned int size, truesize;
2538                 struct igc_xdp_buff ctx;
2539                 ktime_t timestamp = 0;
2540                 int pkt_offset = 0;
2541                 void *pktbuf;
2542
2543                 /* return some buffers to hardware, one at a time is too slow */
2544                 if (cleaned_count >= IGC_RX_BUFFER_WRITE) {
2545                         igc_alloc_rx_buffers(rx_ring, cleaned_count);
2546                         cleaned_count = 0;
2547                 }
2548
2549                 rx_desc = IGC_RX_DESC(rx_ring, rx_ring->next_to_clean);
2550                 size = le16_to_cpu(rx_desc->wb.upper.length);
2551                 if (!size)
2552                         break;
2553
2554                 /* This memory barrier is needed to keep us from reading
2555                  * any other fields out of the rx_desc until we know the
2556                  * descriptor has been written back
2557                  */
2558                 dma_rmb();
2559
2560                 rx_buffer = igc_get_rx_buffer(rx_ring, size, &rx_buffer_pgcnt);
2561                 truesize = igc_get_rx_frame_truesize(rx_ring, size);
2562
2563                 pktbuf = page_address(rx_buffer->page) + rx_buffer->page_offset;
2564
2565                 if (igc_test_staterr(rx_desc, IGC_RXDADV_STAT_TSIP)) {
2566                         timestamp = igc_ptp_rx_pktstamp(q_vector->adapter,
2567                                                         pktbuf);
2568                         ctx.rx_ts = timestamp;
2569                         pkt_offset = IGC_TS_HDR_LEN;
2570                         size -= IGC_TS_HDR_LEN;
2571                 }
2572
2573                 if (!skb) {
2574                         xdp_init_buff(&ctx.xdp, truesize, &rx_ring->xdp_rxq);
2575                         xdp_prepare_buff(&ctx.xdp, pktbuf - igc_rx_offset(rx_ring),
2576                                          igc_rx_offset(rx_ring) + pkt_offset,
2577                                          size, true);
2578                         xdp_buff_clear_frags_flag(&ctx.xdp);
2579                         ctx.rx_desc = rx_desc;
2580
2581                         skb = igc_xdp_run_prog(adapter, &ctx.xdp);
2582                 }
2583
2584                 if (IS_ERR(skb)) {
2585                         unsigned int xdp_res = -PTR_ERR(skb);
2586
2587                         switch (xdp_res) {
2588                         case IGC_XDP_CONSUMED:
2589                                 rx_buffer->pagecnt_bias++;
2590                                 break;
2591                         case IGC_XDP_TX:
2592                         case IGC_XDP_REDIRECT:
2593                                 igc_rx_buffer_flip(rx_buffer, truesize);
2594                                 xdp_status |= xdp_res;
2595                                 break;
2596                         }
2597
2598                         total_packets++;
2599                         total_bytes += size;
2600                 } else if (skb)
2601                         igc_add_rx_frag(rx_ring, rx_buffer, skb, size);
2602                 else if (ring_uses_build_skb(rx_ring))
2603                         skb = igc_build_skb(rx_ring, rx_buffer, &ctx.xdp);
2604                 else
2605                         skb = igc_construct_skb(rx_ring, rx_buffer, &ctx.xdp,
2606                                                 timestamp);
2607
2608                 /* exit if we failed to retrieve a buffer */
2609                 if (!skb) {
2610                         rx_ring->rx_stats.alloc_failed++;
2611                         rx_buffer->pagecnt_bias++;
2612                         break;
2613                 }
2614
2615                 igc_put_rx_buffer(rx_ring, rx_buffer, rx_buffer_pgcnt);
2616                 cleaned_count++;
2617
2618                 /* fetch next buffer in frame if non-eop */
2619                 if (igc_is_non_eop(rx_ring, rx_desc))
2620                         continue;
2621
2622                 /* verify the packet layout is correct */
2623                 if (igc_cleanup_headers(rx_ring, rx_desc, skb)) {
2624                         skb = NULL;
2625                         continue;
2626                 }
2627
2628                 /* probably a little skewed due to removing CRC */
2629                 total_bytes += skb->len;
2630
2631                 /* populate checksum, VLAN, and protocol */
2632                 igc_process_skb_fields(rx_ring, rx_desc, skb);
2633
2634                 napi_gro_receive(&q_vector->napi, skb);
2635
2636                 /* reset skb pointer */
2637                 skb = NULL;
2638
2639                 /* update budget accounting */
2640                 total_packets++;
2641         }
2642
2643         if (xdp_status)
2644                 igc_finalize_xdp(adapter, xdp_status);
2645
2646         /* place incomplete frames back on ring for completion */
2647         rx_ring->skb = skb;
2648
2649         igc_update_rx_stats(q_vector, total_packets, total_bytes);
2650
2651         if (cleaned_count)
2652                 igc_alloc_rx_buffers(rx_ring, cleaned_count);
2653
2654         return total_packets;
2655 }
2656
2657 static struct sk_buff *igc_construct_skb_zc(struct igc_ring *ring,
2658                                             struct xdp_buff *xdp)
2659 {
2660         unsigned int totalsize = xdp->data_end - xdp->data_meta;
2661         unsigned int metasize = xdp->data - xdp->data_meta;
2662         struct sk_buff *skb;
2663
2664         net_prefetch(xdp->data_meta);
2665
2666         skb = __napi_alloc_skb(&ring->q_vector->napi, totalsize,
2667                                GFP_ATOMIC | __GFP_NOWARN);
2668         if (unlikely(!skb))
2669                 return NULL;
2670
2671         memcpy(__skb_put(skb, totalsize), xdp->data_meta,
2672                ALIGN(totalsize, sizeof(long)));
2673
2674         if (metasize) {
2675                 skb_metadata_set(skb, metasize);
2676                 __skb_pull(skb, metasize);
2677         }
2678
2679         return skb;
2680 }
2681
2682 static void igc_dispatch_skb_zc(struct igc_q_vector *q_vector,
2683                                 union igc_adv_rx_desc *desc,
2684                                 struct xdp_buff *xdp,
2685                                 ktime_t timestamp)
2686 {
2687         struct igc_ring *ring = q_vector->rx.ring;
2688         struct sk_buff *skb;
2689
2690         skb = igc_construct_skb_zc(ring, xdp);
2691         if (!skb) {
2692                 ring->rx_stats.alloc_failed++;
2693                 return;
2694         }
2695
2696         if (timestamp)
2697                 skb_hwtstamps(skb)->hwtstamp = timestamp;
2698
2699         if (igc_cleanup_headers(ring, desc, skb))
2700                 return;
2701
2702         igc_process_skb_fields(ring, desc, skb);
2703         napi_gro_receive(&q_vector->napi, skb);
2704 }
2705
2706 static struct igc_xdp_buff *xsk_buff_to_igc_ctx(struct xdp_buff *xdp)
2707 {
2708         /* xdp_buff pointer used by ZC code path is alloc as xdp_buff_xsk. The
2709          * igc_xdp_buff shares its layout with xdp_buff_xsk and private
2710          * igc_xdp_buff fields fall into xdp_buff_xsk->cb
2711          */
2712        return (struct igc_xdp_buff *)xdp;
2713 }
2714
2715 static int igc_clean_rx_irq_zc(struct igc_q_vector *q_vector, const int budget)
2716 {
2717         struct igc_adapter *adapter = q_vector->adapter;
2718         struct igc_ring *ring = q_vector->rx.ring;
2719         u16 cleaned_count = igc_desc_unused(ring);
2720         int total_bytes = 0, total_packets = 0;
2721         u16 ntc = ring->next_to_clean;
2722         struct bpf_prog *prog;
2723         bool failure = false;
2724         int xdp_status = 0;
2725
2726         rcu_read_lock();
2727
2728         prog = READ_ONCE(adapter->xdp_prog);
2729
2730         while (likely(total_packets < budget)) {
2731                 union igc_adv_rx_desc *desc;
2732                 struct igc_rx_buffer *bi;
2733                 struct igc_xdp_buff *ctx;
2734                 ktime_t timestamp = 0;
2735                 unsigned int size;
2736                 int res;
2737
2738                 desc = IGC_RX_DESC(ring, ntc);
2739                 size = le16_to_cpu(desc->wb.upper.length);
2740                 if (!size)
2741                         break;
2742
2743                 /* This memory barrier is needed to keep us from reading
2744                  * any other fields out of the rx_desc until we know the
2745                  * descriptor has been written back
2746                  */
2747                 dma_rmb();
2748
2749                 bi = &ring->rx_buffer_info[ntc];
2750
2751                 ctx = xsk_buff_to_igc_ctx(bi->xdp);
2752                 ctx->rx_desc = desc;
2753
2754                 if (igc_test_staterr(desc, IGC_RXDADV_STAT_TSIP)) {
2755                         timestamp = igc_ptp_rx_pktstamp(q_vector->adapter,
2756                                                         bi->xdp->data);
2757                         ctx->rx_ts = timestamp;
2758
2759                         bi->xdp->data += IGC_TS_HDR_LEN;
2760
2761                         /* HW timestamp has been copied into local variable. Metadata
2762                          * length when XDP program is called should be 0.
2763                          */
2764                         bi->xdp->data_meta += IGC_TS_HDR_LEN;
2765                         size -= IGC_TS_HDR_LEN;
2766                 }
2767
2768                 bi->xdp->data_end = bi->xdp->data + size;
2769                 xsk_buff_dma_sync_for_cpu(bi->xdp, ring->xsk_pool);
2770
2771                 res = __igc_xdp_run_prog(adapter, prog, bi->xdp);
2772                 switch (res) {
2773                 case IGC_XDP_PASS:
2774                         igc_dispatch_skb_zc(q_vector, desc, bi->xdp, timestamp);
2775                         fallthrough;
2776                 case IGC_XDP_CONSUMED:
2777                         xsk_buff_free(bi->xdp);
2778                         break;
2779                 case IGC_XDP_TX:
2780                 case IGC_XDP_REDIRECT:
2781                         xdp_status |= res;
2782                         break;
2783                 }
2784
2785                 bi->xdp = NULL;
2786                 total_bytes += size;
2787                 total_packets++;
2788                 cleaned_count++;
2789                 ntc++;
2790                 if (ntc == ring->count)
2791                         ntc = 0;
2792         }
2793
2794         ring->next_to_clean = ntc;
2795         rcu_read_unlock();
2796
2797         if (cleaned_count >= IGC_RX_BUFFER_WRITE)
2798                 failure = !igc_alloc_rx_buffers_zc(ring, cleaned_count);
2799
2800         if (xdp_status)
2801                 igc_finalize_xdp(adapter, xdp_status);
2802
2803         igc_update_rx_stats(q_vector, total_packets, total_bytes);
2804
2805         if (xsk_uses_need_wakeup(ring->xsk_pool)) {
2806                 if (failure || ring->next_to_clean == ring->next_to_use)
2807                         xsk_set_rx_need_wakeup(ring->xsk_pool);
2808                 else
2809                         xsk_clear_rx_need_wakeup(ring->xsk_pool);
2810                 return total_packets;
2811         }
2812
2813         return failure ? budget : total_packets;
2814 }
2815
2816 static void igc_update_tx_stats(struct igc_q_vector *q_vector,
2817                                 unsigned int packets, unsigned int bytes)
2818 {
2819         struct igc_ring *ring = q_vector->tx.ring;
2820
2821         u64_stats_update_begin(&ring->tx_syncp);
2822         ring->tx_stats.bytes += bytes;
2823         ring->tx_stats.packets += packets;
2824         u64_stats_update_end(&ring->tx_syncp);
2825
2826         q_vector->tx.total_bytes += bytes;
2827         q_vector->tx.total_packets += packets;
2828 }
2829
2830 static void igc_xdp_xmit_zc(struct igc_ring *ring)
2831 {
2832         struct xsk_buff_pool *pool = ring->xsk_pool;
2833         struct netdev_queue *nq = txring_txq(ring);
2834         union igc_adv_tx_desc *tx_desc = NULL;
2835         int cpu = smp_processor_id();
2836         u16 ntu = ring->next_to_use;
2837         struct xdp_desc xdp_desc;
2838         u16 budget;
2839
2840         if (!netif_carrier_ok(ring->netdev))
2841                 return;
2842
2843         __netif_tx_lock(nq, cpu);
2844
2845         /* Avoid transmit queue timeout since we share it with the slow path */
2846         txq_trans_cond_update(nq);
2847
2848         budget = igc_desc_unused(ring);
2849
2850         while (xsk_tx_peek_desc(pool, &xdp_desc) && budget--) {
2851                 u32 cmd_type, olinfo_status;
2852                 struct igc_tx_buffer *bi;
2853                 dma_addr_t dma;
2854
2855                 cmd_type = IGC_ADVTXD_DTYP_DATA | IGC_ADVTXD_DCMD_DEXT |
2856                            IGC_ADVTXD_DCMD_IFCS | IGC_TXD_DCMD |
2857                            xdp_desc.len;
2858                 olinfo_status = xdp_desc.len << IGC_ADVTXD_PAYLEN_SHIFT;
2859
2860                 dma = xsk_buff_raw_get_dma(pool, xdp_desc.addr);
2861                 xsk_buff_raw_dma_sync_for_device(pool, dma, xdp_desc.len);
2862
2863                 tx_desc = IGC_TX_DESC(ring, ntu);
2864                 tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type);
2865                 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
2866                 tx_desc->read.buffer_addr = cpu_to_le64(dma);
2867
2868                 bi = &ring->tx_buffer_info[ntu];
2869                 bi->type = IGC_TX_BUFFER_TYPE_XSK;
2870                 bi->protocol = 0;
2871                 bi->bytecount = xdp_desc.len;
2872                 bi->gso_segs = 1;
2873                 bi->time_stamp = jiffies;
2874                 bi->next_to_watch = tx_desc;
2875
2876                 netdev_tx_sent_queue(txring_txq(ring), xdp_desc.len);
2877
2878                 ntu++;
2879                 if (ntu == ring->count)
2880                         ntu = 0;
2881         }
2882
2883         ring->next_to_use = ntu;
2884         if (tx_desc) {
2885                 igc_flush_tx_descriptors(ring);
2886                 xsk_tx_release(pool);
2887         }
2888
2889         __netif_tx_unlock(nq);
2890 }
2891
2892 /**
2893  * igc_clean_tx_irq - Reclaim resources after transmit completes
2894  * @q_vector: pointer to q_vector containing needed info
2895  * @napi_budget: Used to determine if we are in netpoll
2896  *
2897  * returns true if ring is completely cleaned
2898  */
2899 static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget)
2900 {
2901         struct igc_adapter *adapter = q_vector->adapter;
2902         unsigned int total_bytes = 0, total_packets = 0;
2903         unsigned int budget = q_vector->tx.work_limit;
2904         struct igc_ring *tx_ring = q_vector->tx.ring;
2905         unsigned int i = tx_ring->next_to_clean;
2906         struct igc_tx_buffer *tx_buffer;
2907         union igc_adv_tx_desc *tx_desc;
2908         u32 xsk_frames = 0;
2909
2910         if (test_bit(__IGC_DOWN, &adapter->state))
2911                 return true;
2912
2913         tx_buffer = &tx_ring->tx_buffer_info[i];
2914         tx_desc = IGC_TX_DESC(tx_ring, i);
2915         i -= tx_ring->count;
2916
2917         do {
2918                 union igc_adv_tx_desc *eop_desc = tx_buffer->next_to_watch;
2919
2920                 /* if next_to_watch is not set then there is no work pending */
2921                 if (!eop_desc)
2922                         break;
2923
2924                 /* prevent any other reads prior to eop_desc */
2925                 smp_rmb();
2926
2927                 /* if DD is not set pending work has not been completed */
2928                 if (!(eop_desc->wb.status & cpu_to_le32(IGC_TXD_STAT_DD)))
2929                         break;
2930
2931                 /* clear next_to_watch to prevent false hangs */
2932                 tx_buffer->next_to_watch = NULL;
2933
2934                 /* update the statistics for this packet */
2935                 total_bytes += tx_buffer->bytecount;
2936                 total_packets += tx_buffer->gso_segs;
2937
2938                 switch (tx_buffer->type) {
2939                 case IGC_TX_BUFFER_TYPE_XSK:
2940                         xsk_frames++;
2941                         break;
2942                 case IGC_TX_BUFFER_TYPE_XDP:
2943                         xdp_return_frame(tx_buffer->xdpf);
2944                         igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
2945                         break;
2946                 case IGC_TX_BUFFER_TYPE_SKB:
2947                         napi_consume_skb(tx_buffer->skb, napi_budget);
2948                         igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
2949                         break;
2950                 default:
2951                         netdev_warn_once(tx_ring->netdev, "Unknown Tx buffer type\n");
2952                         break;
2953                 }
2954
2955                 /* clear last DMA location and unmap remaining buffers */
2956                 while (tx_desc != eop_desc) {
2957                         tx_buffer++;
2958                         tx_desc++;
2959                         i++;
2960                         if (unlikely(!i)) {
2961                                 i -= tx_ring->count;
2962                                 tx_buffer = tx_ring->tx_buffer_info;
2963                                 tx_desc = IGC_TX_DESC(tx_ring, 0);
2964                         }
2965
2966                         /* unmap any remaining paged data */
2967                         if (dma_unmap_len(tx_buffer, len))
2968                                 igc_unmap_tx_buffer(tx_ring->dev, tx_buffer);
2969                 }
2970
2971                 /* move us one more past the eop_desc for start of next pkt */
2972                 tx_buffer++;
2973                 tx_desc++;
2974                 i++;
2975                 if (unlikely(!i)) {
2976                         i -= tx_ring->count;
2977                         tx_buffer = tx_ring->tx_buffer_info;
2978                         tx_desc = IGC_TX_DESC(tx_ring, 0);
2979                 }
2980
2981                 /* issue prefetch for next Tx descriptor */
2982                 prefetch(tx_desc);
2983
2984                 /* update budget accounting */
2985                 budget--;
2986         } while (likely(budget));
2987
2988         netdev_tx_completed_queue(txring_txq(tx_ring),
2989                                   total_packets, total_bytes);
2990
2991         i += tx_ring->count;
2992         tx_ring->next_to_clean = i;
2993
2994         igc_update_tx_stats(q_vector, total_packets, total_bytes);
2995
2996         if (tx_ring->xsk_pool) {
2997                 if (xsk_frames)
2998                         xsk_tx_completed(tx_ring->xsk_pool, xsk_frames);
2999                 if (xsk_uses_need_wakeup(tx_ring->xsk_pool))
3000                         xsk_set_tx_need_wakeup(tx_ring->xsk_pool);
3001                 igc_xdp_xmit_zc(tx_ring);
3002         }
3003
3004         if (test_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) {
3005                 struct igc_hw *hw = &adapter->hw;
3006
3007                 /* Detect a transmit hang in hardware, this serializes the
3008                  * check with the clearing of time_stamp and movement of i
3009                  */
3010                 clear_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
3011                 if (tx_buffer->next_to_watch &&
3012                     time_after(jiffies, tx_buffer->time_stamp +
3013                     (adapter->tx_timeout_factor * HZ)) &&
3014                     !(rd32(IGC_STATUS) & IGC_STATUS_TXOFF) &&
3015                     (rd32(IGC_TDH(tx_ring->reg_idx)) !=
3016                      readl(tx_ring->tail))) {
3017                         /* detected Tx unit hang */
3018                         netdev_err(tx_ring->netdev,
3019                                    "Detected Tx Unit Hang\n"
3020                                    "  Tx Queue             <%d>\n"
3021                                    "  TDH                  <%x>\n"
3022                                    "  TDT                  <%x>\n"
3023                                    "  next_to_use          <%x>\n"
3024                                    "  next_to_clean        <%x>\n"
3025                                    "buffer_info[next_to_clean]\n"
3026                                    "  time_stamp           <%lx>\n"
3027                                    "  next_to_watch        <%p>\n"
3028                                    "  jiffies              <%lx>\n"
3029                                    "  desc.status          <%x>\n",
3030                                    tx_ring->queue_index,
3031                                    rd32(IGC_TDH(tx_ring->reg_idx)),
3032                                    readl(tx_ring->tail),
3033                                    tx_ring->next_to_use,
3034                                    tx_ring->next_to_clean,
3035                                    tx_buffer->time_stamp,
3036                                    tx_buffer->next_to_watch,
3037                                    jiffies,
3038                                    tx_buffer->next_to_watch->wb.status);
3039                         netif_stop_subqueue(tx_ring->netdev,
3040                                             tx_ring->queue_index);
3041
3042                         /* we are about to reset, no point in enabling stuff */
3043                         return true;
3044                 }
3045         }
3046
3047 #define TX_WAKE_THRESHOLD (DESC_NEEDED * 2)
3048         if (unlikely(total_packets &&
3049                      netif_carrier_ok(tx_ring->netdev) &&
3050                      igc_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD)) {
3051                 /* Make sure that anybody stopping the queue after this
3052                  * sees the new next_to_clean.
3053                  */
3054                 smp_mb();
3055                 if (__netif_subqueue_stopped(tx_ring->netdev,
3056                                              tx_ring->queue_index) &&
3057                     !(test_bit(__IGC_DOWN, &adapter->state))) {
3058                         netif_wake_subqueue(tx_ring->netdev,
3059                                             tx_ring->queue_index);
3060
3061                         u64_stats_update_begin(&tx_ring->tx_syncp);
3062                         tx_ring->tx_stats.restart_queue++;
3063                         u64_stats_update_end(&tx_ring->tx_syncp);
3064                 }
3065         }
3066
3067         return !!budget;
3068 }
3069
3070 static int igc_find_mac_filter(struct igc_adapter *adapter,
3071                                enum igc_mac_filter_type type, const u8 *addr)
3072 {
3073         struct igc_hw *hw = &adapter->hw;
3074         int max_entries = hw->mac.rar_entry_count;
3075         u32 ral, rah;
3076         int i;
3077
3078         for (i = 0; i < max_entries; i++) {
3079                 ral = rd32(IGC_RAL(i));
3080                 rah = rd32(IGC_RAH(i));
3081
3082                 if (!(rah & IGC_RAH_AV))
3083                         continue;
3084                 if (!!(rah & IGC_RAH_ASEL_SRC_ADDR) != type)
3085                         continue;
3086                 if ((rah & IGC_RAH_RAH_MASK) !=
3087                     le16_to_cpup((__le16 *)(addr + 4)))
3088                         continue;
3089                 if (ral != le32_to_cpup((__le32 *)(addr)))
3090                         continue;
3091
3092                 return i;
3093         }
3094
3095         return -1;
3096 }
3097
3098 static int igc_get_avail_mac_filter_slot(struct igc_adapter *adapter)
3099 {
3100         struct igc_hw *hw = &adapter->hw;
3101         int max_entries = hw->mac.rar_entry_count;
3102         u32 rah;
3103         int i;
3104
3105         for (i = 0; i < max_entries; i++) {
3106                 rah = rd32(IGC_RAH(i));
3107
3108                 if (!(rah & IGC_RAH_AV))
3109                         return i;
3110         }
3111
3112         return -1;
3113 }
3114
3115 /**
3116  * igc_add_mac_filter() - Add MAC address filter
3117  * @adapter: Pointer to adapter where the filter should be added
3118  * @type: MAC address filter type (source or destination)
3119  * @addr: MAC address
3120  * @queue: If non-negative, queue assignment feature is enabled and frames
3121  *         matching the filter are enqueued onto 'queue'. Otherwise, queue
3122  *         assignment is disabled.
3123  *
3124  * Return: 0 in case of success, negative errno code otherwise.
3125  */
3126 static int igc_add_mac_filter(struct igc_adapter *adapter,
3127                               enum igc_mac_filter_type type, const u8 *addr,
3128                               int queue)
3129 {
3130         struct net_device *dev = adapter->netdev;
3131         int index;
3132
3133         index = igc_find_mac_filter(adapter, type, addr);
3134         if (index >= 0)
3135                 goto update_filter;
3136
3137         index = igc_get_avail_mac_filter_slot(adapter);
3138         if (index < 0)
3139                 return -ENOSPC;
3140
3141         netdev_dbg(dev, "Add MAC address filter: index %d type %s address %pM queue %d\n",
3142                    index, type == IGC_MAC_FILTER_TYPE_DST ? "dst" : "src",
3143                    addr, queue);
3144
3145 update_filter:
3146         igc_set_mac_filter_hw(adapter, index, type, addr, queue);
3147         return 0;
3148 }
3149
3150 /**
3151  * igc_del_mac_filter() - Delete MAC address filter
3152  * @adapter: Pointer to adapter where the filter should be deleted from
3153  * @type: MAC address filter type (source or destination)
3154  * @addr: MAC address
3155  */
3156 static void igc_del_mac_filter(struct igc_adapter *adapter,
3157                                enum igc_mac_filter_type type, const u8 *addr)
3158 {
3159         struct net_device *dev = adapter->netdev;
3160         int index;
3161
3162         index = igc_find_mac_filter(adapter, type, addr);
3163         if (index < 0)
3164                 return;
3165
3166         if (index == 0) {
3167                 /* If this is the default filter, we don't actually delete it.
3168                  * We just reset to its default value i.e. disable queue
3169                  * assignment.
3170                  */
3171                 netdev_dbg(dev, "Disable default MAC filter queue assignment");
3172
3173                 igc_set_mac_filter_hw(adapter, 0, type, addr, -1);
3174         } else {
3175                 netdev_dbg(dev, "Delete MAC address filter: index %d type %s address %pM\n",
3176                            index,
3177                            type == IGC_MAC_FILTER_TYPE_DST ? "dst" : "src",
3178                            addr);
3179
3180                 igc_clear_mac_filter_hw(adapter, index);
3181         }
3182 }
3183
3184 /**
3185  * igc_add_vlan_prio_filter() - Add VLAN priority filter
3186  * @adapter: Pointer to adapter where the filter should be added
3187  * @prio: VLAN priority value
3188  * @queue: Queue number which matching frames are assigned to
3189  *
3190  * Return: 0 in case of success, negative errno code otherwise.
3191  */
3192 static int igc_add_vlan_prio_filter(struct igc_adapter *adapter, int prio,
3193                                     int queue)
3194 {
3195         struct net_device *dev = adapter->netdev;
3196         struct igc_hw *hw = &adapter->hw;
3197         u32 vlanpqf;
3198
3199         vlanpqf = rd32(IGC_VLANPQF);
3200
3201         if (vlanpqf & IGC_VLANPQF_VALID(prio)) {
3202                 netdev_dbg(dev, "VLAN priority filter already in use\n");
3203                 return -EEXIST;
3204         }
3205
3206         vlanpqf |= IGC_VLANPQF_QSEL(prio, queue);
3207         vlanpqf |= IGC_VLANPQF_VALID(prio);
3208
3209         wr32(IGC_VLANPQF, vlanpqf);
3210
3211         netdev_dbg(dev, "Add VLAN priority filter: prio %d queue %d\n",
3212                    prio, queue);
3213         return 0;
3214 }
3215
3216 /**
3217  * igc_del_vlan_prio_filter() - Delete VLAN priority filter
3218  * @adapter: Pointer to adapter where the filter should be deleted from
3219  * @prio: VLAN priority value
3220  */
3221 static void igc_del_vlan_prio_filter(struct igc_adapter *adapter, int prio)
3222 {
3223         struct igc_hw *hw = &adapter->hw;
3224         u32 vlanpqf;
3225
3226         vlanpqf = rd32(IGC_VLANPQF);
3227
3228         vlanpqf &= ~IGC_VLANPQF_VALID(prio);
3229         vlanpqf &= ~IGC_VLANPQF_QSEL(prio, IGC_VLANPQF_QUEUE_MASK);
3230
3231         wr32(IGC_VLANPQF, vlanpqf);
3232
3233         netdev_dbg(adapter->netdev, "Delete VLAN priority filter: prio %d\n",
3234                    prio);
3235 }
3236
3237 static int igc_get_avail_etype_filter_slot(struct igc_adapter *adapter)
3238 {
3239         struct igc_hw *hw = &adapter->hw;
3240         int i;
3241
3242         for (i = 0; i < MAX_ETYPE_FILTER; i++) {
3243                 u32 etqf = rd32(IGC_ETQF(i));
3244
3245                 if (!(etqf & IGC_ETQF_FILTER_ENABLE))
3246                         return i;
3247         }
3248
3249         return -1;
3250 }
3251
3252 /**
3253  * igc_add_etype_filter() - Add ethertype filter
3254  * @adapter: Pointer to adapter where the filter should be added
3255  * @etype: Ethertype value
3256  * @queue: If non-negative, queue assignment feature is enabled and frames
3257  *         matching the filter are enqueued onto 'queue'. Otherwise, queue
3258  *         assignment is disabled.
3259  *
3260  * Return: 0 in case of success, negative errno code otherwise.
3261  */
3262 static int igc_add_etype_filter(struct igc_adapter *adapter, u16 etype,
3263                                 int queue)
3264 {
3265         struct igc_hw *hw = &adapter->hw;
3266         int index;
3267         u32 etqf;
3268
3269         index = igc_get_avail_etype_filter_slot(adapter);
3270         if (index < 0)
3271                 return -ENOSPC;
3272
3273         etqf = rd32(IGC_ETQF(index));
3274
3275         etqf &= ~IGC_ETQF_ETYPE_MASK;
3276         etqf |= etype;
3277
3278         if (queue >= 0) {
3279                 etqf &= ~IGC_ETQF_QUEUE_MASK;
3280                 etqf |= (queue << IGC_ETQF_QUEUE_SHIFT);
3281                 etqf |= IGC_ETQF_QUEUE_ENABLE;
3282         }
3283
3284         etqf |= IGC_ETQF_FILTER_ENABLE;
3285
3286         wr32(IGC_ETQF(index), etqf);
3287
3288         netdev_dbg(adapter->netdev, "Add ethertype filter: etype %04x queue %d\n",
3289                    etype, queue);
3290         return 0;
3291 }
3292
3293 static int igc_find_etype_filter(struct igc_adapter *adapter, u16 etype)
3294 {
3295         struct igc_hw *hw = &adapter->hw;
3296         int i;
3297
3298         for (i = 0; i < MAX_ETYPE_FILTER; i++) {
3299                 u32 etqf = rd32(IGC_ETQF(i));
3300
3301                 if ((etqf & IGC_ETQF_ETYPE_MASK) == etype)
3302                         return i;
3303         }
3304
3305         return -1;
3306 }
3307
3308 /**
3309  * igc_del_etype_filter() - Delete ethertype filter
3310  * @adapter: Pointer to adapter where the filter should be deleted from
3311  * @etype: Ethertype value
3312  */
3313 static void igc_del_etype_filter(struct igc_adapter *adapter, u16 etype)
3314 {
3315         struct igc_hw *hw = &adapter->hw;
3316         int index;
3317
3318         index = igc_find_etype_filter(adapter, etype);
3319         if (index < 0)
3320                 return;
3321
3322         wr32(IGC_ETQF(index), 0);
3323
3324         netdev_dbg(adapter->netdev, "Delete ethertype filter: etype %04x\n",
3325                    etype);
3326 }
3327
3328 static int igc_flex_filter_select(struct igc_adapter *adapter,
3329                                   struct igc_flex_filter *input,
3330                                   u32 *fhft)
3331 {
3332         struct igc_hw *hw = &adapter->hw;
3333         u8 fhft_index;
3334         u32 fhftsl;
3335
3336         if (input->index >= MAX_FLEX_FILTER) {
3337                 dev_err(&adapter->pdev->dev, "Wrong Flex Filter index selected!\n");
3338                 return -EINVAL;
3339         }
3340
3341         /* Indirect table select register */
3342         fhftsl = rd32(IGC_FHFTSL);
3343         fhftsl &= ~IGC_FHFTSL_FTSL_MASK;
3344         switch (input->index) {
3345         case 0 ... 7:
3346                 fhftsl |= 0x00;
3347                 break;
3348         case 8 ... 15:
3349                 fhftsl |= 0x01;
3350                 break;
3351         case 16 ... 23:
3352                 fhftsl |= 0x02;
3353                 break;
3354         case 24 ... 31:
3355                 fhftsl |= 0x03;
3356                 break;
3357         }
3358         wr32(IGC_FHFTSL, fhftsl);
3359
3360         /* Normalize index down to host table register */
3361         fhft_index = input->index % 8;
3362
3363         *fhft = (fhft_index < 4) ? IGC_FHFT(fhft_index) :
3364                 IGC_FHFT_EXT(fhft_index - 4);
3365
3366         return 0;
3367 }
3368
3369 static int igc_write_flex_filter_ll(struct igc_adapter *adapter,
3370                                     struct igc_flex_filter *input)
3371 {
3372         struct device *dev = &adapter->pdev->dev;
3373         struct igc_hw *hw = &adapter->hw;
3374         u8 *data = input->data;
3375         u8 *mask = input->mask;
3376         u32 queuing;
3377         u32 fhft;
3378         u32 wufc;
3379         int ret;
3380         int i;
3381
3382         /* Length has to be aligned to 8. Otherwise the filter will fail. Bail
3383          * out early to avoid surprises later.
3384          */
3385         if (input->length % 8 != 0) {
3386                 dev_err(dev, "The length of a flex filter has to be 8 byte aligned!\n");
3387                 return -EINVAL;
3388         }
3389
3390         /* Select corresponding flex filter register and get base for host table. */
3391         ret = igc_flex_filter_select(adapter, input, &fhft);
3392         if (ret)
3393                 return ret;
3394
3395         /* When adding a filter globally disable flex filter feature. That is
3396          * recommended within the datasheet.
3397          */
3398         wufc = rd32(IGC_WUFC);
3399         wufc &= ~IGC_WUFC_FLEX_HQ;
3400         wr32(IGC_WUFC, wufc);
3401
3402         /* Configure filter */
3403         queuing = input->length & IGC_FHFT_LENGTH_MASK;
3404         queuing |= (input->rx_queue << IGC_FHFT_QUEUE_SHIFT) & IGC_FHFT_QUEUE_MASK;
3405         queuing |= (input->prio << IGC_FHFT_PRIO_SHIFT) & IGC_FHFT_PRIO_MASK;
3406
3407         if (input->immediate_irq)
3408                 queuing |= IGC_FHFT_IMM_INT;
3409
3410         if (input->drop)
3411                 queuing |= IGC_FHFT_DROP;
3412
3413         wr32(fhft + 0xFC, queuing);
3414
3415         /* Write data (128 byte) and mask (128 bit) */
3416         for (i = 0; i < 16; ++i) {
3417                 const size_t data_idx = i * 8;
3418                 const size_t row_idx = i * 16;
3419                 u32 dw0 =
3420                         (data[data_idx + 0] << 0) |
3421                         (data[data_idx + 1] << 8) |
3422                         (data[data_idx + 2] << 16) |
3423                         (data[data_idx + 3] << 24);
3424                 u32 dw1 =
3425                         (data[data_idx + 4] << 0) |
3426                         (data[data_idx + 5] << 8) |
3427                         (data[data_idx + 6] << 16) |
3428                         (data[data_idx + 7] << 24);
3429                 u32 tmp;
3430
3431                 /* Write row: dw0, dw1 and mask */
3432                 wr32(fhft + row_idx, dw0);
3433                 wr32(fhft + row_idx + 4, dw1);
3434
3435                 /* mask is only valid for MASK(7, 0) */
3436                 tmp = rd32(fhft + row_idx + 8);
3437                 tmp &= ~GENMASK(7, 0);
3438                 tmp |= mask[i];
3439                 wr32(fhft + row_idx + 8, tmp);
3440         }
3441
3442         /* Enable filter. */
3443         wufc |= IGC_WUFC_FLEX_HQ;
3444         if (input->index > 8) {
3445                 /* Filter 0-7 are enabled via WUFC. The other 24 filters are not. */
3446                 u32 wufc_ext = rd32(IGC_WUFC_EXT);
3447
3448                 wufc_ext |= (IGC_WUFC_EXT_FLX8 << (input->index - 8));
3449
3450                 wr32(IGC_WUFC_EXT, wufc_ext);
3451         } else {
3452                 wufc |= (IGC_WUFC_FLX0 << input->index);
3453         }
3454         wr32(IGC_WUFC, wufc);
3455
3456         dev_dbg(&adapter->pdev->dev, "Added flex filter %u to HW.\n",
3457                 input->index);
3458
3459         return 0;
3460 }
3461
3462 static void igc_flex_filter_add_field(struct igc_flex_filter *flex,
3463                                       const void *src, unsigned int offset,
3464                                       size_t len, const void *mask)
3465 {
3466         int i;
3467
3468         /* data */
3469         memcpy(&flex->data[offset], src, len);
3470
3471         /* mask */
3472         for (i = 0; i < len; ++i) {
3473                 const unsigned int idx = i + offset;
3474                 const u8 *ptr = mask;
3475
3476                 if (mask) {
3477                         if (ptr[i] & 0xff)
3478                                 flex->mask[idx / 8] |= BIT(idx % 8);
3479
3480                         continue;
3481                 }
3482
3483                 flex->mask[idx / 8] |= BIT(idx % 8);
3484         }
3485 }
3486
3487 static int igc_find_avail_flex_filter_slot(struct igc_adapter *adapter)
3488 {
3489         struct igc_hw *hw = &adapter->hw;
3490         u32 wufc, wufc_ext;
3491         int i;
3492
3493         wufc = rd32(IGC_WUFC);
3494         wufc_ext = rd32(IGC_WUFC_EXT);
3495
3496         for (i = 0; i < MAX_FLEX_FILTER; i++) {
3497                 if (i < 8) {
3498                         if (!(wufc & (IGC_WUFC_FLX0 << i)))
3499                                 return i;
3500                 } else {
3501                         if (!(wufc_ext & (IGC_WUFC_EXT_FLX8 << (i - 8))))
3502                                 return i;
3503                 }
3504         }
3505
3506         return -ENOSPC;
3507 }
3508
3509 static bool igc_flex_filter_in_use(struct igc_adapter *adapter)
3510 {
3511         struct igc_hw *hw = &adapter->hw;
3512         u32 wufc, wufc_ext;
3513
3514         wufc = rd32(IGC_WUFC);
3515         wufc_ext = rd32(IGC_WUFC_EXT);
3516
3517         if (wufc & IGC_WUFC_FILTER_MASK)
3518                 return true;
3519
3520         if (wufc_ext & IGC_WUFC_EXT_FILTER_MASK)
3521                 return true;
3522
3523         return false;
3524 }
3525
3526 static int igc_add_flex_filter(struct igc_adapter *adapter,
3527                                struct igc_nfc_rule *rule)
3528 {
3529         struct igc_flex_filter flex = { };
3530         struct igc_nfc_filter *filter = &rule->filter;
3531         unsigned int eth_offset, user_offset;
3532         int ret, index;
3533         bool vlan;
3534
3535         index = igc_find_avail_flex_filter_slot(adapter);
3536         if (index < 0)
3537                 return -ENOSPC;
3538
3539         /* Construct the flex filter:
3540          *  -> dest_mac [6]
3541          *  -> src_mac [6]
3542          *  -> tpid [2]
3543          *  -> vlan tci [2]
3544          *  -> ether type [2]
3545          *  -> user data [8]
3546          *  -> = 26 bytes => 32 length
3547          */
3548         flex.index    = index;
3549         flex.length   = 32;
3550         flex.rx_queue = rule->action;
3551
3552         vlan = rule->filter.vlan_tci || rule->filter.vlan_etype;
3553         eth_offset = vlan ? 16 : 12;
3554         user_offset = vlan ? 18 : 14;
3555
3556         /* Add destination MAC  */
3557         if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR)
3558                 igc_flex_filter_add_field(&flex, &filter->dst_addr, 0,
3559                                           ETH_ALEN, NULL);
3560
3561         /* Add source MAC */
3562         if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR)
3563                 igc_flex_filter_add_field(&flex, &filter->src_addr, 6,
3564                                           ETH_ALEN, NULL);
3565
3566         /* Add VLAN etype */
3567         if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_ETYPE)
3568                 igc_flex_filter_add_field(&flex, &filter->vlan_etype, 12,
3569                                           sizeof(filter->vlan_etype),
3570                                           NULL);
3571
3572         /* Add VLAN TCI */
3573         if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI)
3574                 igc_flex_filter_add_field(&flex, &filter->vlan_tci, 14,
3575                                           sizeof(filter->vlan_tci), NULL);
3576
3577         /* Add Ether type */
3578         if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) {
3579                 __be16 etype = cpu_to_be16(filter->etype);
3580
3581                 igc_flex_filter_add_field(&flex, &etype, eth_offset,
3582                                           sizeof(etype), NULL);
3583         }
3584
3585         /* Add user data */
3586         if (rule->filter.match_flags & IGC_FILTER_FLAG_USER_DATA)
3587                 igc_flex_filter_add_field(&flex, &filter->user_data,
3588                                           user_offset,
3589                                           sizeof(filter->user_data),
3590                                           filter->user_mask);
3591
3592         /* Add it down to the hardware and enable it. */
3593         ret = igc_write_flex_filter_ll(adapter, &flex);
3594         if (ret)
3595                 return ret;
3596
3597         filter->flex_index = index;
3598
3599         return 0;
3600 }
3601
3602 static void igc_del_flex_filter(struct igc_adapter *adapter,
3603                                 u16 reg_index)
3604 {
3605         struct igc_hw *hw = &adapter->hw;
3606         u32 wufc;
3607
3608         /* Just disable the filter. The filter table itself is kept
3609          * intact. Another flex_filter_add() should override the "old" data
3610          * then.
3611          */
3612         if (reg_index > 8) {
3613                 u32 wufc_ext = rd32(IGC_WUFC_EXT);
3614
3615                 wufc_ext &= ~(IGC_WUFC_EXT_FLX8 << (reg_index - 8));
3616                 wr32(IGC_WUFC_EXT, wufc_ext);
3617         } else {
3618                 wufc = rd32(IGC_WUFC);
3619
3620                 wufc &= ~(IGC_WUFC_FLX0 << reg_index);
3621                 wr32(IGC_WUFC, wufc);
3622         }
3623
3624         if (igc_flex_filter_in_use(adapter))
3625                 return;
3626
3627         /* No filters are in use, we may disable flex filters */
3628         wufc = rd32(IGC_WUFC);
3629         wufc &= ~IGC_WUFC_FLEX_HQ;
3630         wr32(IGC_WUFC, wufc);
3631 }
3632
3633 static int igc_enable_nfc_rule(struct igc_adapter *adapter,
3634                                struct igc_nfc_rule *rule)
3635 {
3636         int err;
3637
3638         if (rule->flex) {
3639                 return igc_add_flex_filter(adapter, rule);
3640         }
3641
3642         if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) {
3643                 err = igc_add_etype_filter(adapter, rule->filter.etype,
3644                                            rule->action);
3645                 if (err)
3646                         return err;
3647         }
3648
3649         if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) {
3650                 err = igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_SRC,
3651                                          rule->filter.src_addr, rule->action);
3652                 if (err)
3653                         return err;
3654         }
3655
3656         if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) {
3657                 err = igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST,
3658                                          rule->filter.dst_addr, rule->action);
3659                 if (err)
3660                         return err;
3661         }
3662
3663         if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) {
3664                 int prio = (rule->filter.vlan_tci & VLAN_PRIO_MASK) >>
3665                            VLAN_PRIO_SHIFT;
3666
3667                 err = igc_add_vlan_prio_filter(adapter, prio, rule->action);
3668                 if (err)
3669                         return err;
3670         }
3671
3672         return 0;
3673 }
3674
3675 static void igc_disable_nfc_rule(struct igc_adapter *adapter,
3676                                  const struct igc_nfc_rule *rule)
3677 {
3678         if (rule->flex) {
3679                 igc_del_flex_filter(adapter, rule->filter.flex_index);
3680                 return;
3681         }
3682
3683         if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE)
3684                 igc_del_etype_filter(adapter, rule->filter.etype);
3685
3686         if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) {
3687                 int prio = (rule->filter.vlan_tci & VLAN_PRIO_MASK) >>
3688                            VLAN_PRIO_SHIFT;
3689
3690                 igc_del_vlan_prio_filter(adapter, prio);
3691         }
3692
3693         if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR)
3694                 igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_SRC,
3695                                    rule->filter.src_addr);
3696
3697         if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR)
3698                 igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST,
3699                                    rule->filter.dst_addr);
3700 }
3701
3702 /**
3703  * igc_get_nfc_rule() - Get NFC rule
3704  * @adapter: Pointer to adapter
3705  * @location: Rule location
3706  *
3707  * Context: Expects adapter->nfc_rule_lock to be held by caller.
3708  *
3709  * Return: Pointer to NFC rule at @location. If not found, NULL.
3710  */
3711 struct igc_nfc_rule *igc_get_nfc_rule(struct igc_adapter *adapter,
3712                                       u32 location)
3713 {
3714         struct igc_nfc_rule *rule;
3715
3716         list_for_each_entry(rule, &adapter->nfc_rule_list, list) {
3717                 if (rule->location == location)
3718                         return rule;
3719                 if (rule->location > location)
3720                         break;
3721         }
3722
3723         return NULL;
3724 }
3725
3726 /**
3727  * igc_del_nfc_rule() - Delete NFC rule
3728  * @adapter: Pointer to adapter
3729  * @rule: Pointer to rule to be deleted
3730  *
3731  * Disable NFC rule in hardware and delete it from adapter.
3732  *
3733  * Context: Expects adapter->nfc_rule_lock to be held by caller.
3734  */
3735 void igc_del_nfc_rule(struct igc_adapter *adapter, struct igc_nfc_rule *rule)
3736 {
3737         igc_disable_nfc_rule(adapter, rule);
3738
3739         list_del(&rule->list);
3740         adapter->nfc_rule_count--;
3741
3742         kfree(rule);
3743 }
3744
3745 static void igc_flush_nfc_rules(struct igc_adapter *adapter)
3746 {
3747         struct igc_nfc_rule *rule, *tmp;
3748
3749         mutex_lock(&adapter->nfc_rule_lock);
3750
3751         list_for_each_entry_safe(rule, tmp, &adapter->nfc_rule_list, list)
3752                 igc_del_nfc_rule(adapter, rule);
3753
3754         mutex_unlock(&adapter->nfc_rule_lock);
3755 }
3756
3757 /**
3758  * igc_add_nfc_rule() - Add NFC rule
3759  * @adapter: Pointer to adapter
3760  * @rule: Pointer to rule to be added
3761  *
3762  * Enable NFC rule in hardware and add it to adapter.
3763  *
3764  * Context: Expects adapter->nfc_rule_lock to be held by caller.
3765  *
3766  * Return: 0 on success, negative errno on failure.
3767  */
3768 int igc_add_nfc_rule(struct igc_adapter *adapter, struct igc_nfc_rule *rule)
3769 {
3770         struct igc_nfc_rule *pred, *cur;
3771         int err;
3772
3773         err = igc_enable_nfc_rule(adapter, rule);
3774         if (err)
3775                 return err;
3776
3777         pred = NULL;
3778         list_for_each_entry(cur, &adapter->nfc_rule_list, list) {
3779                 if (cur->location >= rule->location)
3780                         break;
3781                 pred = cur;
3782         }
3783
3784         list_add(&rule->list, pred ? &pred->list : &adapter->nfc_rule_list);
3785         adapter->nfc_rule_count++;
3786         return 0;
3787 }
3788
3789 static void igc_restore_nfc_rules(struct igc_adapter *adapter)
3790 {
3791         struct igc_nfc_rule *rule;
3792
3793         mutex_lock(&adapter->nfc_rule_lock);
3794
3795         list_for_each_entry_reverse(rule, &adapter->nfc_rule_list, list)
3796                 igc_enable_nfc_rule(adapter, rule);
3797
3798         mutex_unlock(&adapter->nfc_rule_lock);
3799 }
3800
3801 static int igc_uc_sync(struct net_device *netdev, const unsigned char *addr)
3802 {
3803         struct igc_adapter *adapter = netdev_priv(netdev);
3804
3805         return igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, addr, -1);
3806 }
3807
3808 static int igc_uc_unsync(struct net_device *netdev, const unsigned char *addr)
3809 {
3810         struct igc_adapter *adapter = netdev_priv(netdev);
3811
3812         igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, addr);
3813         return 0;
3814 }
3815
3816 /**
3817  * igc_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3818  * @netdev: network interface device structure
3819  *
3820  * The set_rx_mode entry point is called whenever the unicast or multicast
3821  * address lists or the network interface flags are updated.  This routine is
3822  * responsible for configuring the hardware for proper unicast, multicast,
3823  * promiscuous mode, and all-multi behavior.
3824  */
3825 static void igc_set_rx_mode(struct net_device *netdev)
3826 {
3827         struct igc_adapter *adapter = netdev_priv(netdev);
3828         struct igc_hw *hw = &adapter->hw;
3829         u32 rctl = 0, rlpml = MAX_JUMBO_FRAME_SIZE;
3830         int count;
3831
3832         /* Check for Promiscuous and All Multicast modes */
3833         if (netdev->flags & IFF_PROMISC) {
3834                 rctl |= IGC_RCTL_UPE | IGC_RCTL_MPE;
3835         } else {
3836                 if (netdev->flags & IFF_ALLMULTI) {
3837                         rctl |= IGC_RCTL_MPE;
3838                 } else {
3839                         /* Write addresses to the MTA, if the attempt fails
3840                          * then we should just turn on promiscuous mode so
3841                          * that we can at least receive multicast traffic
3842                          */
3843                         count = igc_write_mc_addr_list(netdev);
3844                         if (count < 0)
3845                                 rctl |= IGC_RCTL_MPE;
3846                 }
3847         }
3848
3849         /* Write addresses to available RAR registers, if there is not
3850          * sufficient space to store all the addresses then enable
3851          * unicast promiscuous mode
3852          */
3853         if (__dev_uc_sync(netdev, igc_uc_sync, igc_uc_unsync))
3854                 rctl |= IGC_RCTL_UPE;
3855
3856         /* update state of unicast and multicast */
3857         rctl |= rd32(IGC_RCTL) & ~(IGC_RCTL_UPE | IGC_RCTL_MPE);
3858         wr32(IGC_RCTL, rctl);
3859
3860 #if (PAGE_SIZE < 8192)
3861         if (adapter->max_frame_size <= IGC_MAX_FRAME_BUILD_SKB)
3862                 rlpml = IGC_MAX_FRAME_BUILD_SKB;
3863 #endif
3864         wr32(IGC_RLPML, rlpml);
3865 }
3866
3867 /**
3868  * igc_configure - configure the hardware for RX and TX
3869  * @adapter: private board structure
3870  */
3871 static void igc_configure(struct igc_adapter *adapter)
3872 {
3873         struct net_device *netdev = adapter->netdev;
3874         int i = 0;
3875
3876         igc_get_hw_control(adapter);
3877         igc_set_rx_mode(netdev);
3878
3879         igc_restore_vlan(adapter);
3880
3881         igc_setup_tctl(adapter);
3882         igc_setup_mrqc(adapter);
3883         igc_setup_rctl(adapter);
3884
3885         igc_set_default_mac_filter(adapter);
3886         igc_restore_nfc_rules(adapter);
3887
3888         igc_configure_tx(adapter);
3889         igc_configure_rx(adapter);
3890
3891         igc_rx_fifo_flush_base(&adapter->hw);
3892
3893         /* call igc_desc_unused which always leaves
3894          * at least 1 descriptor unused to make sure
3895          * next_to_use != next_to_clean
3896          */
3897         for (i = 0; i < adapter->num_rx_queues; i++) {
3898                 struct igc_ring *ring = adapter->rx_ring[i];
3899
3900                 if (ring->xsk_pool)
3901                         igc_alloc_rx_buffers_zc(ring, igc_desc_unused(ring));
3902                 else
3903                         igc_alloc_rx_buffers(ring, igc_desc_unused(ring));
3904         }
3905 }
3906
3907 /**
3908  * igc_write_ivar - configure ivar for given MSI-X vector
3909  * @hw: pointer to the HW structure
3910  * @msix_vector: vector number we are allocating to a given ring
3911  * @index: row index of IVAR register to write within IVAR table
3912  * @offset: column offset of in IVAR, should be multiple of 8
3913  *
3914  * The IVAR table consists of 2 columns,
3915  * each containing an cause allocation for an Rx and Tx ring, and a
3916  * variable number of rows depending on the number of queues supported.
3917  */
3918 static void igc_write_ivar(struct igc_hw *hw, int msix_vector,
3919                            int index, int offset)
3920 {
3921         u32 ivar = array_rd32(IGC_IVAR0, index);
3922
3923         /* clear any bits that are currently set */
3924         ivar &= ~((u32)0xFF << offset);
3925
3926         /* write vector and valid bit */
3927         ivar |= (msix_vector | IGC_IVAR_VALID) << offset;
3928
3929         array_wr32(IGC_IVAR0, index, ivar);
3930 }
3931
3932 static void igc_assign_vector(struct igc_q_vector *q_vector, int msix_vector)
3933 {
3934         struct igc_adapter *adapter = q_vector->adapter;
3935         struct igc_hw *hw = &adapter->hw;
3936         int rx_queue = IGC_N0_QUEUE;
3937         int tx_queue = IGC_N0_QUEUE;
3938
3939         if (q_vector->rx.ring)
3940                 rx_queue = q_vector->rx.ring->reg_idx;
3941         if (q_vector->tx.ring)
3942                 tx_queue = q_vector->tx.ring->reg_idx;
3943
3944         switch (hw->mac.type) {
3945         case igc_i225:
3946                 if (rx_queue > IGC_N0_QUEUE)
3947                         igc_write_ivar(hw, msix_vector,
3948                                        rx_queue >> 1,
3949                                        (rx_queue & 0x1) << 4);
3950                 if (tx_queue > IGC_N0_QUEUE)
3951                         igc_write_ivar(hw, msix_vector,
3952                                        tx_queue >> 1,
3953                                        ((tx_queue & 0x1) << 4) + 8);
3954                 q_vector->eims_value = BIT(msix_vector);
3955                 break;
3956         default:
3957                 WARN_ONCE(hw->mac.type != igc_i225, "Wrong MAC type\n");
3958                 break;
3959         }
3960
3961         /* add q_vector eims value to global eims_enable_mask */
3962         adapter->eims_enable_mask |= q_vector->eims_value;
3963
3964         /* configure q_vector to set itr on first interrupt */
3965         q_vector->set_itr = 1;
3966 }
3967
3968 /**
3969  * igc_configure_msix - Configure MSI-X hardware
3970  * @adapter: Pointer to adapter structure
3971  *
3972  * igc_configure_msix sets up the hardware to properly
3973  * generate MSI-X interrupts.
3974  */
3975 static void igc_configure_msix(struct igc_adapter *adapter)
3976 {
3977         struct igc_hw *hw = &adapter->hw;
3978         int i, vector = 0;
3979         u32 tmp;
3980
3981         adapter->eims_enable_mask = 0;
3982
3983         /* set vector for other causes, i.e. link changes */
3984         switch (hw->mac.type) {
3985         case igc_i225:
3986                 /* Turn on MSI-X capability first, or our settings
3987                  * won't stick.  And it will take days to debug.
3988                  */
3989                 wr32(IGC_GPIE, IGC_GPIE_MSIX_MODE |
3990                      IGC_GPIE_PBA | IGC_GPIE_EIAME |
3991                      IGC_GPIE_NSICR);
3992
3993                 /* enable msix_other interrupt */
3994                 adapter->eims_other = BIT(vector);
3995                 tmp = (vector++ | IGC_IVAR_VALID) << 8;
3996
3997                 wr32(IGC_IVAR_MISC, tmp);
3998                 break;
3999         default:
4000                 /* do nothing, since nothing else supports MSI-X */
4001                 break;
4002         } /* switch (hw->mac.type) */
4003
4004         adapter->eims_enable_mask |= adapter->eims_other;
4005
4006         for (i = 0; i < adapter->num_q_vectors; i++)
4007                 igc_assign_vector(adapter->q_vector[i], vector++);
4008
4009         wrfl();
4010 }
4011
4012 /**
4013  * igc_irq_enable - Enable default interrupt generation settings
4014  * @adapter: board private structure
4015  */
4016 static void igc_irq_enable(struct igc_adapter *adapter)
4017 {
4018         struct igc_hw *hw = &adapter->hw;
4019
4020         if (adapter->msix_entries) {
4021                 u32 ims = IGC_IMS_LSC | IGC_IMS_DOUTSYNC | IGC_IMS_DRSTA;
4022                 u32 regval = rd32(IGC_EIAC);
4023
4024                 wr32(IGC_EIAC, regval | adapter->eims_enable_mask);
4025                 regval = rd32(IGC_EIAM);
4026                 wr32(IGC_EIAM, regval | adapter->eims_enable_mask);
4027                 wr32(IGC_EIMS, adapter->eims_enable_mask);
4028                 wr32(IGC_IMS, ims);
4029         } else {
4030                 wr32(IGC_IMS, IMS_ENABLE_MASK | IGC_IMS_DRSTA);
4031                 wr32(IGC_IAM, IMS_ENABLE_MASK | IGC_IMS_DRSTA);
4032         }
4033 }
4034
4035 /**
4036  * igc_irq_disable - Mask off interrupt generation on the NIC
4037  * @adapter: board private structure
4038  */
4039 static void igc_irq_disable(struct igc_adapter *adapter)
4040 {
4041         struct igc_hw *hw = &adapter->hw;
4042
4043         if (adapter->msix_entries) {
4044                 u32 regval = rd32(IGC_EIAM);
4045
4046                 wr32(IGC_EIAM, regval & ~adapter->eims_enable_mask);
4047                 wr32(IGC_EIMC, adapter->eims_enable_mask);
4048                 regval = rd32(IGC_EIAC);
4049                 wr32(IGC_EIAC, regval & ~adapter->eims_enable_mask);
4050         }
4051
4052         wr32(IGC_IAM, 0);
4053         wr32(IGC_IMC, ~0);
4054         wrfl();
4055
4056         if (adapter->msix_entries) {
4057                 int vector = 0, i;
4058
4059                 synchronize_irq(adapter->msix_entries[vector++].vector);
4060
4061                 for (i = 0; i < adapter->num_q_vectors; i++)
4062                         synchronize_irq(adapter->msix_entries[vector++].vector);
4063         } else {
4064                 synchronize_irq(adapter->pdev->irq);
4065         }
4066 }
4067
4068 void igc_set_flag_queue_pairs(struct igc_adapter *adapter,
4069                               const u32 max_rss_queues)
4070 {
4071         /* Determine if we need to pair queues. */
4072         /* If rss_queues > half of max_rss_queues, pair the queues in
4073          * order to conserve interrupts due to limited supply.
4074          */
4075         if (adapter->rss_queues > (max_rss_queues / 2))
4076                 adapter->flags |= IGC_FLAG_QUEUE_PAIRS;
4077         else
4078                 adapter->flags &= ~IGC_FLAG_QUEUE_PAIRS;
4079 }
4080
4081 unsigned int igc_get_max_rss_queues(struct igc_adapter *adapter)
4082 {
4083         return IGC_MAX_RX_QUEUES;
4084 }
4085
4086 static void igc_init_queue_configuration(struct igc_adapter *adapter)
4087 {
4088         u32 max_rss_queues;
4089
4090         max_rss_queues = igc_get_max_rss_queues(adapter);
4091         adapter->rss_queues = min_t(u32, max_rss_queues, num_online_cpus());
4092
4093         igc_set_flag_queue_pairs(adapter, max_rss_queues);
4094 }
4095
4096 /**
4097  * igc_reset_q_vector - Reset config for interrupt vector
4098  * @adapter: board private structure to initialize
4099  * @v_idx: Index of vector to be reset
4100  *
4101  * If NAPI is enabled it will delete any references to the
4102  * NAPI struct. This is preparation for igc_free_q_vector.
4103  */
4104 static void igc_reset_q_vector(struct igc_adapter *adapter, int v_idx)
4105 {
4106         struct igc_q_vector *q_vector = adapter->q_vector[v_idx];
4107
4108         /* if we're coming from igc_set_interrupt_capability, the vectors are
4109          * not yet allocated
4110          */
4111         if (!q_vector)
4112                 return;
4113
4114         if (q_vector->tx.ring)
4115                 adapter->tx_ring[q_vector->tx.ring->queue_index] = NULL;
4116
4117         if (q_vector->rx.ring)
4118                 adapter->rx_ring[q_vector->rx.ring->queue_index] = NULL;
4119
4120         netif_napi_del(&q_vector->napi);
4121 }
4122
4123 /**
4124  * igc_free_q_vector - Free memory allocated for specific interrupt vector
4125  * @adapter: board private structure to initialize
4126  * @v_idx: Index of vector to be freed
4127  *
4128  * This function frees the memory allocated to the q_vector.
4129  */
4130 static void igc_free_q_vector(struct igc_adapter *adapter, int v_idx)
4131 {
4132         struct igc_q_vector *q_vector = adapter->q_vector[v_idx];
4133
4134         adapter->q_vector[v_idx] = NULL;
4135
4136         /* igc_get_stats64() might access the rings on this vector,
4137          * we must wait a grace period before freeing it.
4138          */
4139         if (q_vector)
4140                 kfree_rcu(q_vector, rcu);
4141 }
4142
4143 /**
4144  * igc_free_q_vectors - Free memory allocated for interrupt vectors
4145  * @adapter: board private structure to initialize
4146  *
4147  * This function frees the memory allocated to the q_vectors.  In addition if
4148  * NAPI is enabled it will delete any references to the NAPI struct prior
4149  * to freeing the q_vector.
4150  */
4151 static void igc_free_q_vectors(struct igc_adapter *adapter)
4152 {
4153         int v_idx = adapter->num_q_vectors;
4154
4155         adapter->num_tx_queues = 0;
4156         adapter->num_rx_queues = 0;
4157         adapter->num_q_vectors = 0;
4158
4159         while (v_idx--) {
4160                 igc_reset_q_vector(adapter, v_idx);
4161                 igc_free_q_vector(adapter, v_idx);
4162         }
4163 }
4164
4165 /**
4166  * igc_update_itr - update the dynamic ITR value based on statistics
4167  * @q_vector: pointer to q_vector
4168  * @ring_container: ring info to update the itr for
4169  *
4170  * Stores a new ITR value based on packets and byte
4171  * counts during the last interrupt.  The advantage of per interrupt
4172  * computation is faster updates and more accurate ITR for the current
4173  * traffic pattern.  Constants in this function were computed
4174  * based on theoretical maximum wire speed and thresholds were set based
4175  * on testing data as well as attempting to minimize response time
4176  * while increasing bulk throughput.
4177  * NOTE: These calculations are only valid when operating in a single-
4178  * queue environment.
4179  */
4180 static void igc_update_itr(struct igc_q_vector *q_vector,
4181                            struct igc_ring_container *ring_container)
4182 {
4183         unsigned int packets = ring_container->total_packets;
4184         unsigned int bytes = ring_container->total_bytes;
4185         u8 itrval = ring_container->itr;
4186
4187         /* no packets, exit with status unchanged */
4188         if (packets == 0)
4189                 return;
4190
4191         switch (itrval) {
4192         case lowest_latency:
4193                 /* handle TSO and jumbo frames */
4194                 if (bytes / packets > 8000)
4195                         itrval = bulk_latency;
4196                 else if ((packets < 5) && (bytes > 512))
4197                         itrval = low_latency;
4198                 break;
4199         case low_latency:  /* 50 usec aka 20000 ints/s */
4200                 if (bytes > 10000) {
4201                         /* this if handles the TSO accounting */
4202                         if (bytes / packets > 8000)
4203                                 itrval = bulk_latency;
4204                         else if ((packets < 10) || ((bytes / packets) > 1200))
4205                                 itrval = bulk_latency;
4206                         else if ((packets > 35))
4207                                 itrval = lowest_latency;
4208                 } else if (bytes / packets > 2000) {
4209                         itrval = bulk_latency;
4210                 } else if (packets <= 2 && bytes < 512) {
4211                         itrval = lowest_latency;
4212                 }
4213                 break;
4214         case bulk_latency: /* 250 usec aka 4000 ints/s */
4215                 if (bytes > 25000) {
4216                         if (packets > 35)
4217                                 itrval = low_latency;
4218                 } else if (bytes < 1500) {
4219                         itrval = low_latency;
4220                 }
4221                 break;
4222         }
4223
4224         /* clear work counters since we have the values we need */
4225         ring_container->total_bytes = 0;
4226         ring_container->total_packets = 0;
4227
4228         /* write updated itr to ring container */
4229         ring_container->itr = itrval;
4230 }
4231
4232 static void igc_set_itr(struct igc_q_vector *q_vector)
4233 {
4234         struct igc_adapter *adapter = q_vector->adapter;
4235         u32 new_itr = q_vector->itr_val;
4236         u8 current_itr = 0;
4237
4238         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
4239         switch (adapter->link_speed) {
4240         case SPEED_10:
4241         case SPEED_100:
4242                 current_itr = 0;
4243                 new_itr = IGC_4K_ITR;
4244                 goto set_itr_now;
4245         default:
4246                 break;
4247         }
4248
4249         igc_update_itr(q_vector, &q_vector->tx);
4250         igc_update_itr(q_vector, &q_vector->rx);
4251
4252         current_itr = max(q_vector->rx.itr, q_vector->tx.itr);
4253
4254         /* conservative mode (itr 3) eliminates the lowest_latency setting */
4255         if (current_itr == lowest_latency &&
4256             ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
4257             (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
4258                 current_itr = low_latency;
4259
4260         switch (current_itr) {
4261         /* counts and packets in update_itr are dependent on these numbers */
4262         case lowest_latency:
4263                 new_itr = IGC_70K_ITR; /* 70,000 ints/sec */
4264                 break;
4265         case low_latency:
4266                 new_itr = IGC_20K_ITR; /* 20,000 ints/sec */
4267                 break;
4268         case bulk_latency:
4269                 new_itr = IGC_4K_ITR;  /* 4,000 ints/sec */
4270                 break;
4271         default:
4272                 break;
4273         }
4274
4275 set_itr_now:
4276         if (new_itr != q_vector->itr_val) {
4277                 /* this attempts to bias the interrupt rate towards Bulk
4278                  * by adding intermediate steps when interrupt rate is
4279                  * increasing
4280                  */
4281                 new_itr = new_itr > q_vector->itr_val ?
4282                           max((new_itr * q_vector->itr_val) /
4283                           (new_itr + (q_vector->itr_val >> 2)),
4284                           new_itr) : new_itr;
4285                 /* Don't write the value here; it resets the adapter's
4286                  * internal timer, and causes us to delay far longer than
4287                  * we should between interrupts.  Instead, we write the ITR
4288                  * value at the beginning of the next interrupt so the timing
4289                  * ends up being correct.
4290                  */
4291                 q_vector->itr_val = new_itr;
4292                 q_vector->set_itr = 1;
4293         }
4294 }
4295
4296 static void igc_reset_interrupt_capability(struct igc_adapter *adapter)
4297 {
4298         int v_idx = adapter->num_q_vectors;
4299
4300         if (adapter->msix_entries) {
4301                 pci_disable_msix(adapter->pdev);
4302                 kfree(adapter->msix_entries);
4303                 adapter->msix_entries = NULL;
4304         } else if (adapter->flags & IGC_FLAG_HAS_MSI) {
4305                 pci_disable_msi(adapter->pdev);
4306         }
4307
4308         while (v_idx--)
4309                 igc_reset_q_vector(adapter, v_idx);
4310 }
4311
4312 /**
4313  * igc_set_interrupt_capability - set MSI or MSI-X if supported
4314  * @adapter: Pointer to adapter structure
4315  * @msix: boolean value for MSI-X capability
4316  *
4317  * Attempt to configure interrupts using the best available
4318  * capabilities of the hardware and kernel.
4319  */
4320 static void igc_set_interrupt_capability(struct igc_adapter *adapter,
4321                                          bool msix)
4322 {
4323         int numvecs, i;
4324         int err;
4325
4326         if (!msix)
4327                 goto msi_only;
4328         adapter->flags |= IGC_FLAG_HAS_MSIX;
4329
4330         /* Number of supported queues. */
4331         adapter->num_rx_queues = adapter->rss_queues;
4332
4333         adapter->num_tx_queues = adapter->rss_queues;
4334
4335         /* start with one vector for every Rx queue */
4336         numvecs = adapter->num_rx_queues;
4337
4338         /* if Tx handler is separate add 1 for every Tx queue */
4339         if (!(adapter->flags & IGC_FLAG_QUEUE_PAIRS))
4340                 numvecs += adapter->num_tx_queues;
4341
4342         /* store the number of vectors reserved for queues */
4343         adapter->num_q_vectors = numvecs;
4344
4345         /* add 1 vector for link status interrupts */
4346         numvecs++;
4347
4348         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
4349                                         GFP_KERNEL);
4350
4351         if (!adapter->msix_entries)
4352                 return;
4353
4354         /* populate entry values */
4355         for (i = 0; i < numvecs; i++)
4356                 adapter->msix_entries[i].entry = i;
4357
4358         err = pci_enable_msix_range(adapter->pdev,
4359                                     adapter->msix_entries,
4360                                     numvecs,
4361                                     numvecs);
4362         if (err > 0)
4363                 return;
4364
4365         kfree(adapter->msix_entries);
4366         adapter->msix_entries = NULL;
4367
4368         igc_reset_interrupt_capability(adapter);
4369
4370 msi_only:
4371         adapter->flags &= ~IGC_FLAG_HAS_MSIX;
4372
4373         adapter->rss_queues = 1;
4374         adapter->flags |= IGC_FLAG_QUEUE_PAIRS;
4375         adapter->num_rx_queues = 1;
4376         adapter->num_tx_queues = 1;
4377         adapter->num_q_vectors = 1;
4378         if (!pci_enable_msi(adapter->pdev))
4379                 adapter->flags |= IGC_FLAG_HAS_MSI;
4380 }
4381
4382 /**
4383  * igc_update_ring_itr - update the dynamic ITR value based on packet size
4384  * @q_vector: pointer to q_vector
4385  *
4386  * Stores a new ITR value based on strictly on packet size.  This
4387  * algorithm is less sophisticated than that used in igc_update_itr,
4388  * due to the difficulty of synchronizing statistics across multiple
4389  * receive rings.  The divisors and thresholds used by this function
4390  * were determined based on theoretical maximum wire speed and testing
4391  * data, in order to minimize response time while increasing bulk
4392  * throughput.
4393  * NOTE: This function is called only when operating in a multiqueue
4394  * receive environment.
4395  */
4396 static void igc_update_ring_itr(struct igc_q_vector *q_vector)
4397 {
4398         struct igc_adapter *adapter = q_vector->adapter;
4399         int new_val = q_vector->itr_val;
4400         int avg_wire_size = 0;
4401         unsigned int packets;
4402
4403         /* For non-gigabit speeds, just fix the interrupt rate at 4000
4404          * ints/sec - ITR timer value of 120 ticks.
4405          */
4406         switch (adapter->link_speed) {
4407         case SPEED_10:
4408         case SPEED_100:
4409                 new_val = IGC_4K_ITR;
4410                 goto set_itr_val;
4411         default:
4412                 break;
4413         }
4414
4415         packets = q_vector->rx.total_packets;
4416         if (packets)
4417                 avg_wire_size = q_vector->rx.total_bytes / packets;
4418
4419         packets = q_vector->tx.total_packets;
4420         if (packets)
4421                 avg_wire_size = max_t(u32, avg_wire_size,
4422                                       q_vector->tx.total_bytes / packets);
4423
4424         /* if avg_wire_size isn't set no work was done */
4425         if (!avg_wire_size)
4426                 goto clear_counts;
4427
4428         /* Add 24 bytes to size to account for CRC, preamble, and gap */
4429         avg_wire_size += 24;
4430
4431         /* Don't starve jumbo frames */
4432         avg_wire_size = min(avg_wire_size, 3000);
4433
4434         /* Give a little boost to mid-size frames */
4435         if (avg_wire_size > 300 && avg_wire_size < 1200)
4436                 new_val = avg_wire_size / 3;
4437         else
4438                 new_val = avg_wire_size / 2;
4439
4440         /* conservative mode (itr 3) eliminates the lowest_latency setting */
4441         if (new_val < IGC_20K_ITR &&
4442             ((q_vector->rx.ring && adapter->rx_itr_setting == 3) ||
4443             (!q_vector->rx.ring && adapter->tx_itr_setting == 3)))
4444                 new_val = IGC_20K_ITR;
4445
4446 set_itr_val:
4447         if (new_val != q_vector->itr_val) {
4448                 q_vector->itr_val = new_val;
4449                 q_vector->set_itr = 1;
4450         }
4451 clear_counts:
4452         q_vector->rx.total_bytes = 0;
4453         q_vector->rx.total_packets = 0;
4454         q_vector->tx.total_bytes = 0;
4455         q_vector->tx.total_packets = 0;
4456 }
4457
4458 static void igc_ring_irq_enable(struct igc_q_vector *q_vector)
4459 {
4460         struct igc_adapter *adapter = q_vector->adapter;
4461         struct igc_hw *hw = &adapter->hw;
4462
4463         if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) ||
4464             (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) {
4465                 if (adapter->num_q_vectors == 1)
4466                         igc_set_itr(q_vector);
4467                 else
4468                         igc_update_ring_itr(q_vector);
4469         }
4470
4471         if (!test_bit(__IGC_DOWN, &adapter->state)) {
4472                 if (adapter->msix_entries)
4473                         wr32(IGC_EIMS, q_vector->eims_value);
4474                 else
4475                         igc_irq_enable(adapter);
4476         }
4477 }
4478
4479 static void igc_add_ring(struct igc_ring *ring,
4480                          struct igc_ring_container *head)
4481 {
4482         head->ring = ring;
4483         head->count++;
4484 }
4485
4486 /**
4487  * igc_cache_ring_register - Descriptor ring to register mapping
4488  * @adapter: board private structure to initialize
4489  *
4490  * Once we know the feature-set enabled for the device, we'll cache
4491  * the register offset the descriptor ring is assigned to.
4492  */
4493 static void igc_cache_ring_register(struct igc_adapter *adapter)
4494 {
4495         int i = 0, j = 0;
4496
4497         switch (adapter->hw.mac.type) {
4498         case igc_i225:
4499         default:
4500                 for (; i < adapter->num_rx_queues; i++)
4501                         adapter->rx_ring[i]->reg_idx = i;
4502                 for (; j < adapter->num_tx_queues; j++)
4503                         adapter->tx_ring[j]->reg_idx = j;
4504                 break;
4505         }
4506 }
4507
4508 /**
4509  * igc_poll - NAPI Rx polling callback
4510  * @napi: napi polling structure
4511  * @budget: count of how many packets we should handle
4512  */
4513 static int igc_poll(struct napi_struct *napi, int budget)
4514 {
4515         struct igc_q_vector *q_vector = container_of(napi,
4516                                                      struct igc_q_vector,
4517                                                      napi);
4518         struct igc_ring *rx_ring = q_vector->rx.ring;
4519         bool clean_complete = true;
4520         int work_done = 0;
4521
4522         if (q_vector->tx.ring)
4523                 clean_complete = igc_clean_tx_irq(q_vector, budget);
4524
4525         if (rx_ring) {
4526                 int cleaned = rx_ring->xsk_pool ?
4527                               igc_clean_rx_irq_zc(q_vector, budget) :
4528                               igc_clean_rx_irq(q_vector, budget);
4529
4530                 work_done += cleaned;
4531                 if (cleaned >= budget)
4532                         clean_complete = false;
4533         }
4534
4535         /* If all work not completed, return budget and keep polling */
4536         if (!clean_complete)
4537                 return budget;
4538
4539         /* Exit the polling mode, but don't re-enable interrupts if stack might
4540          * poll us due to busy-polling
4541          */
4542         if (likely(napi_complete_done(napi, work_done)))
4543                 igc_ring_irq_enable(q_vector);
4544
4545         return min(work_done, budget - 1);
4546 }
4547
4548 /**
4549  * igc_alloc_q_vector - Allocate memory for a single interrupt vector
4550  * @adapter: board private structure to initialize
4551  * @v_count: q_vectors allocated on adapter, used for ring interleaving
4552  * @v_idx: index of vector in adapter struct
4553  * @txr_count: total number of Tx rings to allocate
4554  * @txr_idx: index of first Tx ring to allocate
4555  * @rxr_count: total number of Rx rings to allocate
4556  * @rxr_idx: index of first Rx ring to allocate
4557  *
4558  * We allocate one q_vector.  If allocation fails we return -ENOMEM.
4559  */
4560 static int igc_alloc_q_vector(struct igc_adapter *adapter,
4561                               unsigned int v_count, unsigned int v_idx,
4562                               unsigned int txr_count, unsigned int txr_idx,
4563                               unsigned int rxr_count, unsigned int rxr_idx)
4564 {
4565         struct igc_q_vector *q_vector;
4566         struct igc_ring *ring;
4567         int ring_count;
4568
4569         /* igc only supports 1 Tx and/or 1 Rx queue per vector */
4570         if (txr_count > 1 || rxr_count > 1)
4571                 return -ENOMEM;
4572
4573         ring_count = txr_count + rxr_count;
4574
4575         /* allocate q_vector and rings */
4576         q_vector = adapter->q_vector[v_idx];
4577         if (!q_vector)
4578                 q_vector = kzalloc(struct_size(q_vector, ring, ring_count),
4579                                    GFP_KERNEL);
4580         else
4581                 memset(q_vector, 0, struct_size(q_vector, ring, ring_count));
4582         if (!q_vector)
4583                 return -ENOMEM;
4584
4585         /* initialize NAPI */
4586         netif_napi_add(adapter->netdev, &q_vector->napi, igc_poll);
4587
4588         /* tie q_vector and adapter together */
4589         adapter->q_vector[v_idx] = q_vector;
4590         q_vector->adapter = adapter;
4591
4592         /* initialize work limits */
4593         q_vector->tx.work_limit = adapter->tx_work_limit;
4594
4595         /* initialize ITR configuration */
4596         q_vector->itr_register = adapter->io_addr + IGC_EITR(0);
4597         q_vector->itr_val = IGC_START_ITR;
4598
4599         /* initialize pointer to rings */
4600         ring = q_vector->ring;
4601
4602         /* initialize ITR */
4603         if (rxr_count) {
4604                 /* rx or rx/tx vector */
4605                 if (!adapter->rx_itr_setting || adapter->rx_itr_setting > 3)
4606                         q_vector->itr_val = adapter->rx_itr_setting;
4607         } else {
4608                 /* tx only vector */
4609                 if (!adapter->tx_itr_setting || adapter->tx_itr_setting > 3)
4610                         q_vector->itr_val = adapter->tx_itr_setting;
4611         }
4612
4613         if (txr_count) {
4614                 /* assign generic ring traits */
4615                 ring->dev = &adapter->pdev->dev;
4616                 ring->netdev = adapter->netdev;
4617
4618                 /* configure backlink on ring */
4619                 ring->q_vector = q_vector;
4620
4621                 /* update q_vector Tx values */
4622                 igc_add_ring(ring, &q_vector->tx);
4623
4624                 /* apply Tx specific ring traits */
4625                 ring->count = adapter->tx_ring_count;
4626                 ring->queue_index = txr_idx;
4627
4628                 /* assign ring to adapter */
4629                 adapter->tx_ring[txr_idx] = ring;
4630
4631                 /* push pointer to next ring */
4632                 ring++;
4633         }
4634
4635         if (rxr_count) {
4636                 /* assign generic ring traits */
4637                 ring->dev = &adapter->pdev->dev;
4638                 ring->netdev = adapter->netdev;
4639
4640                 /* configure backlink on ring */
4641                 ring->q_vector = q_vector;
4642
4643                 /* update q_vector Rx values */
4644                 igc_add_ring(ring, &q_vector->rx);
4645
4646                 /* apply Rx specific ring traits */
4647                 ring->count = adapter->rx_ring_count;
4648                 ring->queue_index = rxr_idx;
4649
4650                 /* assign ring to adapter */
4651                 adapter->rx_ring[rxr_idx] = ring;
4652         }
4653
4654         return 0;
4655 }
4656
4657 /**
4658  * igc_alloc_q_vectors - Allocate memory for interrupt vectors
4659  * @adapter: board private structure to initialize
4660  *
4661  * We allocate one q_vector per queue interrupt.  If allocation fails we
4662  * return -ENOMEM.
4663  */
4664 static int igc_alloc_q_vectors(struct igc_adapter *adapter)
4665 {
4666         int rxr_remaining = adapter->num_rx_queues;
4667         int txr_remaining = adapter->num_tx_queues;
4668         int rxr_idx = 0, txr_idx = 0, v_idx = 0;
4669         int q_vectors = adapter->num_q_vectors;
4670         int err;
4671
4672         if (q_vectors >= (rxr_remaining + txr_remaining)) {
4673                 for (; rxr_remaining; v_idx++) {
4674                         err = igc_alloc_q_vector(adapter, q_vectors, v_idx,
4675                                                  0, 0, 1, rxr_idx);
4676
4677                         if (err)
4678                                 goto err_out;
4679
4680                         /* update counts and index */
4681                         rxr_remaining--;
4682                         rxr_idx++;
4683                 }
4684         }
4685
4686         for (; v_idx < q_vectors; v_idx++) {
4687                 int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx);
4688                 int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx);
4689
4690                 err = igc_alloc_q_vector(adapter, q_vectors, v_idx,
4691                                          tqpv, txr_idx, rqpv, rxr_idx);
4692
4693                 if (err)
4694                         goto err_out;
4695
4696                 /* update counts and index */
4697                 rxr_remaining -= rqpv;
4698                 txr_remaining -= tqpv;
4699                 rxr_idx++;
4700                 txr_idx++;
4701         }
4702
4703         return 0;
4704
4705 err_out:
4706         adapter->num_tx_queues = 0;
4707         adapter->num_rx_queues = 0;
4708         adapter->num_q_vectors = 0;
4709
4710         while (v_idx--)
4711                 igc_free_q_vector(adapter, v_idx);
4712
4713         return -ENOMEM;
4714 }
4715
4716 /**
4717  * igc_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
4718  * @adapter: Pointer to adapter structure
4719  * @msix: boolean for MSI-X capability
4720  *
4721  * This function initializes the interrupts and allocates all of the queues.
4722  */
4723 static int igc_init_interrupt_scheme(struct igc_adapter *adapter, bool msix)
4724 {
4725         struct net_device *dev = adapter->netdev;
4726         int err = 0;
4727
4728         igc_set_interrupt_capability(adapter, msix);
4729
4730         err = igc_alloc_q_vectors(adapter);
4731         if (err) {
4732                 netdev_err(dev, "Unable to allocate memory for vectors\n");
4733                 goto err_alloc_q_vectors;
4734         }
4735
4736         igc_cache_ring_register(adapter);
4737
4738         return 0;
4739
4740 err_alloc_q_vectors:
4741         igc_reset_interrupt_capability(adapter);
4742         return err;
4743 }
4744
4745 /**
4746  * igc_sw_init - Initialize general software structures (struct igc_adapter)
4747  * @adapter: board private structure to initialize
4748  *
4749  * igc_sw_init initializes the Adapter private data structure.
4750  * Fields are initialized based on PCI device information and
4751  * OS network device settings (MTU size).
4752  */
4753 static int igc_sw_init(struct igc_adapter *adapter)
4754 {
4755         struct net_device *netdev = adapter->netdev;
4756         struct pci_dev *pdev = adapter->pdev;
4757         struct igc_hw *hw = &adapter->hw;
4758
4759         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
4760
4761         /* set default ring sizes */
4762         adapter->tx_ring_count = IGC_DEFAULT_TXD;
4763         adapter->rx_ring_count = IGC_DEFAULT_RXD;
4764
4765         /* set default ITR values */
4766         adapter->rx_itr_setting = IGC_DEFAULT_ITR;
4767         adapter->tx_itr_setting = IGC_DEFAULT_ITR;
4768
4769         /* set default work limits */
4770         adapter->tx_work_limit = IGC_DEFAULT_TX_WORK;
4771
4772         /* adjust max frame to be at least the size of a standard frame */
4773         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN +
4774                                 VLAN_HLEN;
4775         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
4776
4777         mutex_init(&adapter->nfc_rule_lock);
4778         INIT_LIST_HEAD(&adapter->nfc_rule_list);
4779         adapter->nfc_rule_count = 0;
4780
4781         spin_lock_init(&adapter->stats64_lock);
4782         /* Assume MSI-X interrupts, will be checked during IRQ allocation */
4783         adapter->flags |= IGC_FLAG_HAS_MSIX;
4784
4785         igc_init_queue_configuration(adapter);
4786
4787         /* This call may decrease the number of queues */
4788         if (igc_init_interrupt_scheme(adapter, true)) {
4789                 netdev_err(netdev, "Unable to allocate memory for queues\n");
4790                 return -ENOMEM;
4791         }
4792
4793         /* Explicitly disable IRQ since the NIC can be in any state. */
4794         igc_irq_disable(adapter);
4795
4796         set_bit(__IGC_DOWN, &adapter->state);
4797
4798         return 0;
4799 }
4800
4801 /**
4802  * igc_up - Open the interface and prepare it to handle traffic
4803  * @adapter: board private structure
4804  */
4805 void igc_up(struct igc_adapter *adapter)
4806 {
4807         struct igc_hw *hw = &adapter->hw;
4808         int i = 0;
4809
4810         /* hardware has been reset, we need to reload some things */
4811         igc_configure(adapter);
4812
4813         clear_bit(__IGC_DOWN, &adapter->state);
4814
4815         for (i = 0; i < adapter->num_q_vectors; i++)
4816                 napi_enable(&adapter->q_vector[i]->napi);
4817
4818         if (adapter->msix_entries)
4819                 igc_configure_msix(adapter);
4820         else
4821                 igc_assign_vector(adapter->q_vector[0], 0);
4822
4823         /* Clear any pending interrupts. */
4824         rd32(IGC_ICR);
4825         igc_irq_enable(adapter);
4826
4827         netif_tx_start_all_queues(adapter->netdev);
4828
4829         /* start the watchdog. */
4830         hw->mac.get_link_status = true;
4831         schedule_work(&adapter->watchdog_task);
4832 }
4833
4834 /**
4835  * igc_update_stats - Update the board statistics counters
4836  * @adapter: board private structure
4837  */
4838 void igc_update_stats(struct igc_adapter *adapter)
4839 {
4840         struct rtnl_link_stats64 *net_stats = &adapter->stats64;
4841         struct pci_dev *pdev = adapter->pdev;
4842         struct igc_hw *hw = &adapter->hw;
4843         u64 _bytes, _packets;
4844         u64 bytes, packets;
4845         unsigned int start;
4846         u32 mpc;
4847         int i;
4848
4849         /* Prevent stats update while adapter is being reset, or if the pci
4850          * connection is down.
4851          */
4852         if (adapter->link_speed == 0)
4853                 return;
4854         if (pci_channel_offline(pdev))
4855                 return;
4856
4857         packets = 0;
4858         bytes = 0;
4859
4860         rcu_read_lock();
4861         for (i = 0; i < adapter->num_rx_queues; i++) {
4862                 struct igc_ring *ring = adapter->rx_ring[i];
4863                 u32 rqdpc = rd32(IGC_RQDPC(i));
4864
4865                 if (hw->mac.type >= igc_i225)
4866                         wr32(IGC_RQDPC(i), 0);
4867
4868                 if (rqdpc) {
4869                         ring->rx_stats.drops += rqdpc;
4870                         net_stats->rx_fifo_errors += rqdpc;
4871                 }
4872
4873                 do {
4874                         start = u64_stats_fetch_begin(&ring->rx_syncp);
4875                         _bytes = ring->rx_stats.bytes;
4876                         _packets = ring->rx_stats.packets;
4877                 } while (u64_stats_fetch_retry(&ring->rx_syncp, start));
4878                 bytes += _bytes;
4879                 packets += _packets;
4880         }
4881
4882         net_stats->rx_bytes = bytes;
4883         net_stats->rx_packets = packets;
4884
4885         packets = 0;
4886         bytes = 0;
4887         for (i = 0; i < adapter->num_tx_queues; i++) {
4888                 struct igc_ring *ring = adapter->tx_ring[i];
4889
4890                 do {
4891                         start = u64_stats_fetch_begin(&ring->tx_syncp);
4892                         _bytes = ring->tx_stats.bytes;
4893                         _packets = ring->tx_stats.packets;
4894                 } while (u64_stats_fetch_retry(&ring->tx_syncp, start));
4895                 bytes += _bytes;
4896                 packets += _packets;
4897         }
4898         net_stats->tx_bytes = bytes;
4899         net_stats->tx_packets = packets;
4900         rcu_read_unlock();
4901
4902         /* read stats registers */
4903         adapter->stats.crcerrs += rd32(IGC_CRCERRS);
4904         adapter->stats.gprc += rd32(IGC_GPRC);
4905         adapter->stats.gorc += rd32(IGC_GORCL);
4906         rd32(IGC_GORCH); /* clear GORCL */
4907         adapter->stats.bprc += rd32(IGC_BPRC);
4908         adapter->stats.mprc += rd32(IGC_MPRC);
4909         adapter->stats.roc += rd32(IGC_ROC);
4910
4911         adapter->stats.prc64 += rd32(IGC_PRC64);
4912         adapter->stats.prc127 += rd32(IGC_PRC127);
4913         adapter->stats.prc255 += rd32(IGC_PRC255);
4914         adapter->stats.prc511 += rd32(IGC_PRC511);
4915         adapter->stats.prc1023 += rd32(IGC_PRC1023);
4916         adapter->stats.prc1522 += rd32(IGC_PRC1522);
4917         adapter->stats.tlpic += rd32(IGC_TLPIC);
4918         adapter->stats.rlpic += rd32(IGC_RLPIC);
4919         adapter->stats.hgptc += rd32(IGC_HGPTC);
4920
4921         mpc = rd32(IGC_MPC);
4922         adapter->stats.mpc += mpc;
4923         net_stats->rx_fifo_errors += mpc;
4924         adapter->stats.scc += rd32(IGC_SCC);
4925         adapter->stats.ecol += rd32(IGC_ECOL);
4926         adapter->stats.mcc += rd32(IGC_MCC);
4927         adapter->stats.latecol += rd32(IGC_LATECOL);
4928         adapter->stats.dc += rd32(IGC_DC);
4929         adapter->stats.rlec += rd32(IGC_RLEC);
4930         adapter->stats.xonrxc += rd32(IGC_XONRXC);
4931         adapter->stats.xontxc += rd32(IGC_XONTXC);
4932         adapter->stats.xoffrxc += rd32(IGC_XOFFRXC);
4933         adapter->stats.xofftxc += rd32(IGC_XOFFTXC);
4934         adapter->stats.fcruc += rd32(IGC_FCRUC);
4935         adapter->stats.gptc += rd32(IGC_GPTC);
4936         adapter->stats.gotc += rd32(IGC_GOTCL);
4937         rd32(IGC_GOTCH); /* clear GOTCL */
4938         adapter->stats.rnbc += rd32(IGC_RNBC);
4939         adapter->stats.ruc += rd32(IGC_RUC);
4940         adapter->stats.rfc += rd32(IGC_RFC);
4941         adapter->stats.rjc += rd32(IGC_RJC);
4942         adapter->stats.tor += rd32(IGC_TORH);
4943         adapter->stats.tot += rd32(IGC_TOTH);
4944         adapter->stats.tpr += rd32(IGC_TPR);
4945
4946         adapter->stats.ptc64 += rd32(IGC_PTC64);
4947         adapter->stats.ptc127 += rd32(IGC_PTC127);
4948         adapter->stats.ptc255 += rd32(IGC_PTC255);
4949         adapter->stats.ptc511 += rd32(IGC_PTC511);
4950         adapter->stats.ptc1023 += rd32(IGC_PTC1023);
4951         adapter->stats.ptc1522 += rd32(IGC_PTC1522);
4952
4953         adapter->stats.mptc += rd32(IGC_MPTC);
4954         adapter->stats.bptc += rd32(IGC_BPTC);
4955
4956         adapter->stats.tpt += rd32(IGC_TPT);
4957         adapter->stats.colc += rd32(IGC_COLC);
4958         adapter->stats.colc += rd32(IGC_RERC);
4959
4960         adapter->stats.algnerrc += rd32(IGC_ALGNERRC);
4961
4962         adapter->stats.tsctc += rd32(IGC_TSCTC);
4963
4964         adapter->stats.iac += rd32(IGC_IAC);
4965
4966         /* Fill out the OS statistics structure */
4967         net_stats->multicast = adapter->stats.mprc;
4968         net_stats->collisions = adapter->stats.colc;
4969
4970         /* Rx Errors */
4971
4972         /* RLEC on some newer hardware can be incorrect so build
4973          * our own version based on RUC and ROC
4974          */
4975         net_stats->rx_errors = adapter->stats.rxerrc +
4976                 adapter->stats.crcerrs + adapter->stats.algnerrc +
4977                 adapter->stats.ruc + adapter->stats.roc +
4978                 adapter->stats.cexterr;
4979         net_stats->rx_length_errors = adapter->stats.ruc +
4980                                       adapter->stats.roc;
4981         net_stats->rx_crc_errors = adapter->stats.crcerrs;
4982         net_stats->rx_frame_errors = adapter->stats.algnerrc;
4983         net_stats->rx_missed_errors = adapter->stats.mpc;
4984
4985         /* Tx Errors */
4986         net_stats->tx_errors = adapter->stats.ecol +
4987                                adapter->stats.latecol;
4988         net_stats->tx_aborted_errors = adapter->stats.ecol;
4989         net_stats->tx_window_errors = adapter->stats.latecol;
4990         net_stats->tx_carrier_errors = adapter->stats.tncrs;
4991
4992         /* Tx Dropped */
4993         net_stats->tx_dropped = adapter->stats.txdrop;
4994
4995         /* Management Stats */
4996         adapter->stats.mgptc += rd32(IGC_MGTPTC);
4997         adapter->stats.mgprc += rd32(IGC_MGTPRC);
4998         adapter->stats.mgpdc += rd32(IGC_MGTPDC);
4999 }
5000
5001 /**
5002  * igc_down - Close the interface
5003  * @adapter: board private structure
5004  */
5005 void igc_down(struct igc_adapter *adapter)
5006 {
5007         struct net_device *netdev = adapter->netdev;
5008         struct igc_hw *hw = &adapter->hw;
5009         u32 tctl, rctl;
5010         int i = 0;
5011
5012         set_bit(__IGC_DOWN, &adapter->state);
5013
5014         igc_ptp_suspend(adapter);
5015
5016         if (pci_device_is_present(adapter->pdev)) {
5017                 /* disable receives in the hardware */
5018                 rctl = rd32(IGC_RCTL);
5019                 wr32(IGC_RCTL, rctl & ~IGC_RCTL_EN);
5020                 /* flush and sleep below */
5021         }
5022         /* set trans_start so we don't get spurious watchdogs during reset */
5023         netif_trans_update(netdev);
5024
5025         netif_carrier_off(netdev);
5026         netif_tx_stop_all_queues(netdev);
5027
5028         if (pci_device_is_present(adapter->pdev)) {
5029                 /* disable transmits in the hardware */
5030                 tctl = rd32(IGC_TCTL);
5031                 tctl &= ~IGC_TCTL_EN;
5032                 wr32(IGC_TCTL, tctl);
5033                 /* flush both disables and wait for them to finish */
5034                 wrfl();
5035                 usleep_range(10000, 20000);
5036
5037                 igc_irq_disable(adapter);
5038         }
5039
5040         adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE;
5041
5042         for (i = 0; i < adapter->num_q_vectors; i++) {
5043                 if (adapter->q_vector[i]) {
5044                         napi_synchronize(&adapter->q_vector[i]->napi);
5045                         napi_disable(&adapter->q_vector[i]->napi);
5046                 }
5047         }
5048
5049         del_timer_sync(&adapter->watchdog_timer);
5050         del_timer_sync(&adapter->phy_info_timer);
5051
5052         /* record the stats before reset*/
5053         spin_lock(&adapter->stats64_lock);
5054         igc_update_stats(adapter);
5055         spin_unlock(&adapter->stats64_lock);
5056
5057         adapter->link_speed = 0;
5058         adapter->link_duplex = 0;
5059
5060         if (!pci_channel_offline(adapter->pdev))
5061                 igc_reset(adapter);
5062
5063         /* clear VLAN promisc flag so VFTA will be updated if necessary */
5064         adapter->flags &= ~IGC_FLAG_VLAN_PROMISC;
5065
5066         igc_clean_all_tx_rings(adapter);
5067         igc_clean_all_rx_rings(adapter);
5068 }
5069
5070 void igc_reinit_locked(struct igc_adapter *adapter)
5071 {
5072         while (test_and_set_bit(__IGC_RESETTING, &adapter->state))
5073                 usleep_range(1000, 2000);
5074         igc_down(adapter);
5075         igc_up(adapter);
5076         clear_bit(__IGC_RESETTING, &adapter->state);
5077 }
5078
5079 static void igc_reset_task(struct work_struct *work)
5080 {
5081         struct igc_adapter *adapter;
5082
5083         adapter = container_of(work, struct igc_adapter, reset_task);
5084
5085         rtnl_lock();
5086         /* If we're already down or resetting, just bail */
5087         if (test_bit(__IGC_DOWN, &adapter->state) ||
5088             test_bit(__IGC_RESETTING, &adapter->state)) {
5089                 rtnl_unlock();
5090                 return;
5091         }
5092
5093         igc_rings_dump(adapter);
5094         igc_regs_dump(adapter);
5095         netdev_err(adapter->netdev, "Reset adapter\n");
5096         igc_reinit_locked(adapter);
5097         rtnl_unlock();
5098 }
5099
5100 /**
5101  * igc_change_mtu - Change the Maximum Transfer Unit
5102  * @netdev: network interface device structure
5103  * @new_mtu: new value for maximum frame size
5104  *
5105  * Returns 0 on success, negative on failure
5106  */
5107 static int igc_change_mtu(struct net_device *netdev, int new_mtu)
5108 {
5109         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
5110         struct igc_adapter *adapter = netdev_priv(netdev);
5111
5112         if (igc_xdp_is_enabled(adapter) && new_mtu > ETH_DATA_LEN) {
5113                 netdev_dbg(netdev, "Jumbo frames not supported with XDP");
5114                 return -EINVAL;
5115         }
5116
5117         /* adjust max frame to be at least the size of a standard frame */
5118         if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN))
5119                 max_frame = ETH_FRAME_LEN + ETH_FCS_LEN;
5120
5121         while (test_and_set_bit(__IGC_RESETTING, &adapter->state))
5122                 usleep_range(1000, 2000);
5123
5124         /* igc_down has a dependency on max_frame_size */
5125         adapter->max_frame_size = max_frame;
5126
5127         if (netif_running(netdev))
5128                 igc_down(adapter);
5129
5130         netdev_dbg(netdev, "changing MTU from %d to %d\n", netdev->mtu, new_mtu);
5131         netdev->mtu = new_mtu;
5132
5133         if (netif_running(netdev))
5134                 igc_up(adapter);
5135         else
5136                 igc_reset(adapter);
5137
5138         clear_bit(__IGC_RESETTING, &adapter->state);
5139
5140         return 0;
5141 }
5142
5143 /**
5144  * igc_tx_timeout - Respond to a Tx Hang
5145  * @netdev: network interface device structure
5146  * @txqueue: queue number that timed out
5147  **/
5148 static void igc_tx_timeout(struct net_device *netdev,
5149                            unsigned int __always_unused txqueue)
5150 {
5151         struct igc_adapter *adapter = netdev_priv(netdev);
5152         struct igc_hw *hw = &adapter->hw;
5153
5154         /* Do the reset outside of interrupt context */
5155         adapter->tx_timeout_count++;
5156         schedule_work(&adapter->reset_task);
5157         wr32(IGC_EICS,
5158              (adapter->eims_enable_mask & ~adapter->eims_other));
5159 }
5160
5161 /**
5162  * igc_get_stats64 - Get System Network Statistics
5163  * @netdev: network interface device structure
5164  * @stats: rtnl_link_stats64 pointer
5165  *
5166  * Returns the address of the device statistics structure.
5167  * The statistics are updated here and also from the timer callback.
5168  */
5169 static void igc_get_stats64(struct net_device *netdev,
5170                             struct rtnl_link_stats64 *stats)
5171 {
5172         struct igc_adapter *adapter = netdev_priv(netdev);
5173
5174         spin_lock(&adapter->stats64_lock);
5175         if (!test_bit(__IGC_RESETTING, &adapter->state))
5176                 igc_update_stats(adapter);
5177         memcpy(stats, &adapter->stats64, sizeof(*stats));
5178         spin_unlock(&adapter->stats64_lock);
5179 }
5180
5181 static netdev_features_t igc_fix_features(struct net_device *netdev,
5182                                           netdev_features_t features)
5183 {
5184         /* Since there is no support for separate Rx/Tx vlan accel
5185          * enable/disable make sure Tx flag is always in same state as Rx.
5186          */
5187         if (features & NETIF_F_HW_VLAN_CTAG_RX)
5188                 features |= NETIF_F_HW_VLAN_CTAG_TX;
5189         else
5190                 features &= ~NETIF_F_HW_VLAN_CTAG_TX;
5191
5192         return features;
5193 }
5194
5195 static int igc_set_features(struct net_device *netdev,
5196                             netdev_features_t features)
5197 {
5198         netdev_features_t changed = netdev->features ^ features;
5199         struct igc_adapter *adapter = netdev_priv(netdev);
5200
5201         if (changed & NETIF_F_HW_VLAN_CTAG_RX)
5202                 igc_vlan_mode(netdev, features);
5203
5204         /* Add VLAN support */
5205         if (!(changed & (NETIF_F_RXALL | NETIF_F_NTUPLE)))
5206                 return 0;
5207
5208         if (!(features & NETIF_F_NTUPLE))
5209                 igc_flush_nfc_rules(adapter);
5210
5211         netdev->features = features;
5212
5213         if (netif_running(netdev))
5214                 igc_reinit_locked(adapter);
5215         else
5216                 igc_reset(adapter);
5217
5218         return 1;
5219 }
5220
5221 static netdev_features_t
5222 igc_features_check(struct sk_buff *skb, struct net_device *dev,
5223                    netdev_features_t features)
5224 {
5225         unsigned int network_hdr_len, mac_hdr_len;
5226
5227         /* Make certain the headers can be described by a context descriptor */
5228         mac_hdr_len = skb_network_header(skb) - skb->data;
5229         if (unlikely(mac_hdr_len > IGC_MAX_MAC_HDR_LEN))
5230                 return features & ~(NETIF_F_HW_CSUM |
5231                                     NETIF_F_SCTP_CRC |
5232                                     NETIF_F_HW_VLAN_CTAG_TX |
5233                                     NETIF_F_TSO |
5234                                     NETIF_F_TSO6);
5235
5236         network_hdr_len = skb_checksum_start(skb) - skb_network_header(skb);
5237         if (unlikely(network_hdr_len >  IGC_MAX_NETWORK_HDR_LEN))
5238                 return features & ~(NETIF_F_HW_CSUM |
5239                                     NETIF_F_SCTP_CRC |
5240                                     NETIF_F_TSO |
5241                                     NETIF_F_TSO6);
5242
5243         /* We can only support IPv4 TSO in tunnels if we can mangle the
5244          * inner IP ID field, so strip TSO if MANGLEID is not supported.
5245          */
5246         if (skb->encapsulation && !(features & NETIF_F_TSO_MANGLEID))
5247                 features &= ~NETIF_F_TSO;
5248
5249         return features;
5250 }
5251
5252 static void igc_tsync_interrupt(struct igc_adapter *adapter)
5253 {
5254         u32 ack, tsauxc, sec, nsec, tsicr;
5255         struct igc_hw *hw = &adapter->hw;
5256         struct ptp_clock_event event;
5257         struct timespec64 ts;
5258
5259         tsicr = rd32(IGC_TSICR);
5260         ack = 0;
5261
5262         if (tsicr & IGC_TSICR_SYS_WRAP) {
5263                 event.type = PTP_CLOCK_PPS;
5264                 if (adapter->ptp_caps.pps)
5265                         ptp_clock_event(adapter->ptp_clock, &event);
5266                 ack |= IGC_TSICR_SYS_WRAP;
5267         }
5268
5269         if (tsicr & IGC_TSICR_TXTS) {
5270                 /* retrieve hardware timestamp */
5271                 igc_ptp_tx_tstamp_event(adapter);
5272                 ack |= IGC_TSICR_TXTS;
5273         }
5274
5275         if (tsicr & IGC_TSICR_TT0) {
5276                 spin_lock(&adapter->tmreg_lock);
5277                 ts = timespec64_add(adapter->perout[0].start,
5278                                     adapter->perout[0].period);
5279                 wr32(IGC_TRGTTIML0, ts.tv_nsec | IGC_TT_IO_TIMER_SEL_SYSTIM0);
5280                 wr32(IGC_TRGTTIMH0, (u32)ts.tv_sec);
5281                 tsauxc = rd32(IGC_TSAUXC);
5282                 tsauxc |= IGC_TSAUXC_EN_TT0;
5283                 wr32(IGC_TSAUXC, tsauxc);
5284                 adapter->perout[0].start = ts;
5285                 spin_unlock(&adapter->tmreg_lock);
5286                 ack |= IGC_TSICR_TT0;
5287         }
5288
5289         if (tsicr & IGC_TSICR_TT1) {
5290                 spin_lock(&adapter->tmreg_lock);
5291                 ts = timespec64_add(adapter->perout[1].start,
5292                                     adapter->perout[1].period);
5293                 wr32(IGC_TRGTTIML1, ts.tv_nsec | IGC_TT_IO_TIMER_SEL_SYSTIM0);
5294                 wr32(IGC_TRGTTIMH1, (u32)ts.tv_sec);
5295                 tsauxc = rd32(IGC_TSAUXC);
5296                 tsauxc |= IGC_TSAUXC_EN_TT1;
5297                 wr32(IGC_TSAUXC, tsauxc);
5298                 adapter->perout[1].start = ts;
5299                 spin_unlock(&adapter->tmreg_lock);
5300                 ack |= IGC_TSICR_TT1;
5301         }
5302
5303         if (tsicr & IGC_TSICR_AUTT0) {
5304                 nsec = rd32(IGC_AUXSTMPL0);
5305                 sec  = rd32(IGC_AUXSTMPH0);
5306                 event.type = PTP_CLOCK_EXTTS;
5307                 event.index = 0;
5308                 event.timestamp = sec * NSEC_PER_SEC + nsec;
5309                 ptp_clock_event(adapter->ptp_clock, &event);
5310                 ack |= IGC_TSICR_AUTT0;
5311         }
5312
5313         if (tsicr & IGC_TSICR_AUTT1) {
5314                 nsec = rd32(IGC_AUXSTMPL1);
5315                 sec  = rd32(IGC_AUXSTMPH1);
5316                 event.type = PTP_CLOCK_EXTTS;
5317                 event.index = 1;
5318                 event.timestamp = sec * NSEC_PER_SEC + nsec;
5319                 ptp_clock_event(adapter->ptp_clock, &event);
5320                 ack |= IGC_TSICR_AUTT1;
5321         }
5322
5323         /* acknowledge the interrupts */
5324         wr32(IGC_TSICR, ack);
5325 }
5326
5327 /**
5328  * igc_msix_other - msix other interrupt handler
5329  * @irq: interrupt number
5330  * @data: pointer to a q_vector
5331  */
5332 static irqreturn_t igc_msix_other(int irq, void *data)
5333 {
5334         struct igc_adapter *adapter = data;
5335         struct igc_hw *hw = &adapter->hw;
5336         u32 icr = rd32(IGC_ICR);
5337
5338         /* reading ICR causes bit 31 of EICR to be cleared */
5339         if (icr & IGC_ICR_DRSTA)
5340                 schedule_work(&adapter->reset_task);
5341
5342         if (icr & IGC_ICR_DOUTSYNC) {
5343                 /* HW is reporting DMA is out of sync */
5344                 adapter->stats.doosync++;
5345         }
5346
5347         if (icr & IGC_ICR_LSC) {
5348                 hw->mac.get_link_status = true;
5349                 /* guard against interrupt when we're going down */
5350                 if (!test_bit(__IGC_DOWN, &adapter->state))
5351                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5352         }
5353
5354         if (icr & IGC_ICR_TS)
5355                 igc_tsync_interrupt(adapter);
5356
5357         wr32(IGC_EIMS, adapter->eims_other);
5358
5359         return IRQ_HANDLED;
5360 }
5361
5362 static void igc_write_itr(struct igc_q_vector *q_vector)
5363 {
5364         u32 itr_val = q_vector->itr_val & IGC_QVECTOR_MASK;
5365
5366         if (!q_vector->set_itr)
5367                 return;
5368
5369         if (!itr_val)
5370                 itr_val = IGC_ITR_VAL_MASK;
5371
5372         itr_val |= IGC_EITR_CNT_IGNR;
5373
5374         writel(itr_val, q_vector->itr_register);
5375         q_vector->set_itr = 0;
5376 }
5377
5378 static irqreturn_t igc_msix_ring(int irq, void *data)
5379 {
5380         struct igc_q_vector *q_vector = data;
5381
5382         /* Write the ITR value calculated from the previous interrupt. */
5383         igc_write_itr(q_vector);
5384
5385         napi_schedule(&q_vector->napi);
5386
5387         return IRQ_HANDLED;
5388 }
5389
5390 /**
5391  * igc_request_msix - Initialize MSI-X interrupts
5392  * @adapter: Pointer to adapter structure
5393  *
5394  * igc_request_msix allocates MSI-X vectors and requests interrupts from the
5395  * kernel.
5396  */
5397 static int igc_request_msix(struct igc_adapter *adapter)
5398 {
5399         unsigned int num_q_vectors = adapter->num_q_vectors;
5400         int i = 0, err = 0, vector = 0, free_vector = 0;
5401         struct net_device *netdev = adapter->netdev;
5402
5403         err = request_irq(adapter->msix_entries[vector].vector,
5404                           &igc_msix_other, 0, netdev->name, adapter);
5405         if (err)
5406                 goto err_out;
5407
5408         if (num_q_vectors > MAX_Q_VECTORS) {
5409                 num_q_vectors = MAX_Q_VECTORS;
5410                 dev_warn(&adapter->pdev->dev,
5411                          "The number of queue vectors (%d) is higher than max allowed (%d)\n",
5412                          adapter->num_q_vectors, MAX_Q_VECTORS);
5413         }
5414         for (i = 0; i < num_q_vectors; i++) {
5415                 struct igc_q_vector *q_vector = adapter->q_vector[i];
5416
5417                 vector++;
5418
5419                 q_vector->itr_register = adapter->io_addr + IGC_EITR(vector);
5420
5421                 if (q_vector->rx.ring && q_vector->tx.ring)
5422                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
5423                                 q_vector->rx.ring->queue_index);
5424                 else if (q_vector->tx.ring)
5425                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
5426                                 q_vector->tx.ring->queue_index);
5427                 else if (q_vector->rx.ring)
5428                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
5429                                 q_vector->rx.ring->queue_index);
5430                 else
5431                         sprintf(q_vector->name, "%s-unused", netdev->name);
5432
5433                 err = request_irq(adapter->msix_entries[vector].vector,
5434                                   igc_msix_ring, 0, q_vector->name,
5435                                   q_vector);
5436                 if (err)
5437                         goto err_free;
5438         }
5439
5440         igc_configure_msix(adapter);
5441         return 0;
5442
5443 err_free:
5444         /* free already assigned IRQs */
5445         free_irq(adapter->msix_entries[free_vector++].vector, adapter);
5446
5447         vector--;
5448         for (i = 0; i < vector; i++) {
5449                 free_irq(adapter->msix_entries[free_vector++].vector,
5450                          adapter->q_vector[i]);
5451         }
5452 err_out:
5453         return err;
5454 }
5455
5456 /**
5457  * igc_clear_interrupt_scheme - reset the device to a state of no interrupts
5458  * @adapter: Pointer to adapter structure
5459  *
5460  * This function resets the device so that it has 0 rx queues, tx queues, and
5461  * MSI-X interrupts allocated.
5462  */
5463 static void igc_clear_interrupt_scheme(struct igc_adapter *adapter)
5464 {
5465         igc_free_q_vectors(adapter);
5466         igc_reset_interrupt_capability(adapter);
5467 }
5468
5469 /* Need to wait a few seconds after link up to get diagnostic information from
5470  * the phy
5471  */
5472 static void igc_update_phy_info(struct timer_list *t)
5473 {
5474         struct igc_adapter *adapter = from_timer(adapter, t, phy_info_timer);
5475
5476         igc_get_phy_info(&adapter->hw);
5477 }
5478
5479 /**
5480  * igc_has_link - check shared code for link and determine up/down
5481  * @adapter: pointer to driver private info
5482  */
5483 bool igc_has_link(struct igc_adapter *adapter)
5484 {
5485         struct igc_hw *hw = &adapter->hw;
5486         bool link_active = false;
5487
5488         /* get_link_status is set on LSC (link status) interrupt or
5489          * rx sequence error interrupt.  get_link_status will stay
5490          * false until the igc_check_for_link establishes link
5491          * for copper adapters ONLY
5492          */
5493         if (!hw->mac.get_link_status)
5494                 return true;
5495         hw->mac.ops.check_for_link(hw);
5496         link_active = !hw->mac.get_link_status;
5497
5498         if (hw->mac.type == igc_i225) {
5499                 if (!netif_carrier_ok(adapter->netdev)) {
5500                         adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE;
5501                 } else if (!(adapter->flags & IGC_FLAG_NEED_LINK_UPDATE)) {
5502                         adapter->flags |= IGC_FLAG_NEED_LINK_UPDATE;
5503                         adapter->link_check_timeout = jiffies;
5504                 }
5505         }
5506
5507         return link_active;
5508 }
5509
5510 /**
5511  * igc_watchdog - Timer Call-back
5512  * @t: timer for the watchdog
5513  */
5514 static void igc_watchdog(struct timer_list *t)
5515 {
5516         struct igc_adapter *adapter = from_timer(adapter, t, watchdog_timer);
5517         /* Do the rest outside of interrupt context */
5518         schedule_work(&adapter->watchdog_task);
5519 }
5520
5521 static void igc_watchdog_task(struct work_struct *work)
5522 {
5523         struct igc_adapter *adapter = container_of(work,
5524                                                    struct igc_adapter,
5525                                                    watchdog_task);
5526         struct net_device *netdev = adapter->netdev;
5527         struct igc_hw *hw = &adapter->hw;
5528         struct igc_phy_info *phy = &hw->phy;
5529         u16 phy_data, retry_count = 20;
5530         u32 link;
5531         int i;
5532
5533         link = igc_has_link(adapter);
5534
5535         if (adapter->flags & IGC_FLAG_NEED_LINK_UPDATE) {
5536                 if (time_after(jiffies, (adapter->link_check_timeout + HZ)))
5537                         adapter->flags &= ~IGC_FLAG_NEED_LINK_UPDATE;
5538                 else
5539                         link = false;
5540         }
5541
5542         if (link) {
5543                 /* Cancel scheduled suspend requests. */
5544                 pm_runtime_resume(netdev->dev.parent);
5545
5546                 if (!netif_carrier_ok(netdev)) {
5547                         u32 ctrl;
5548
5549                         hw->mac.ops.get_speed_and_duplex(hw,
5550                                                          &adapter->link_speed,
5551                                                          &adapter->link_duplex);
5552
5553                         ctrl = rd32(IGC_CTRL);
5554                         /* Link status message must follow this format */
5555                         netdev_info(netdev,
5556                                     "NIC Link is Up %d Mbps %s Duplex, Flow Control: %s\n",
5557                                     adapter->link_speed,
5558                                     adapter->link_duplex == FULL_DUPLEX ?
5559                                     "Full" : "Half",
5560                                     (ctrl & IGC_CTRL_TFCE) &&
5561                                     (ctrl & IGC_CTRL_RFCE) ? "RX/TX" :
5562                                     (ctrl & IGC_CTRL_RFCE) ?  "RX" :
5563                                     (ctrl & IGC_CTRL_TFCE) ?  "TX" : "None");
5564
5565                         /* disable EEE if enabled */
5566                         if ((adapter->flags & IGC_FLAG_EEE) &&
5567                             adapter->link_duplex == HALF_DUPLEX) {
5568                                 netdev_info(netdev,
5569                                             "EEE Disabled: unsupported at half duplex. Re-enable using ethtool when at full duplex\n");
5570                                 adapter->hw.dev_spec._base.eee_enable = false;
5571                                 adapter->flags &= ~IGC_FLAG_EEE;
5572                         }
5573
5574                         /* check if SmartSpeed worked */
5575                         igc_check_downshift(hw);
5576                         if (phy->speed_downgraded)
5577                                 netdev_warn(netdev, "Link Speed was downgraded by SmartSpeed\n");
5578
5579                         /* adjust timeout factor according to speed/duplex */
5580                         adapter->tx_timeout_factor = 1;
5581                         switch (adapter->link_speed) {
5582                         case SPEED_10:
5583                                 adapter->tx_timeout_factor = 14;
5584                                 break;
5585                         case SPEED_100:
5586                         case SPEED_1000:
5587                         case SPEED_2500:
5588                                 adapter->tx_timeout_factor = 1;
5589                                 break;
5590                         }
5591
5592                         /* Once the launch time has been set on the wire, there
5593                          * is a delay before the link speed can be determined
5594                          * based on link-up activity. Write into the register
5595                          * as soon as we know the correct link speed.
5596                          */
5597                         igc_tsn_adjust_txtime_offset(adapter);
5598
5599                         if (adapter->link_speed != SPEED_1000)
5600                                 goto no_wait;
5601
5602                         /* wait for Remote receiver status OK */
5603 retry_read_status:
5604                         if (!igc_read_phy_reg(hw, PHY_1000T_STATUS,
5605                                               &phy_data)) {
5606                                 if (!(phy_data & SR_1000T_REMOTE_RX_STATUS) &&
5607                                     retry_count) {
5608                                         msleep(100);
5609                                         retry_count--;
5610                                         goto retry_read_status;
5611                                 } else if (!retry_count) {
5612                                         netdev_err(netdev, "exceed max 2 second\n");
5613                                 }
5614                         } else {
5615                                 netdev_err(netdev, "read 1000Base-T Status Reg\n");
5616                         }
5617 no_wait:
5618                         netif_carrier_on(netdev);
5619
5620                         /* link state has changed, schedule phy info update */
5621                         if (!test_bit(__IGC_DOWN, &adapter->state))
5622                                 mod_timer(&adapter->phy_info_timer,
5623                                           round_jiffies(jiffies + 2 * HZ));
5624                 }
5625         } else {
5626                 if (netif_carrier_ok(netdev)) {
5627                         adapter->link_speed = 0;
5628                         adapter->link_duplex = 0;
5629
5630                         /* Links status message must follow this format */
5631                         netdev_info(netdev, "NIC Link is Down\n");
5632                         netif_carrier_off(netdev);
5633
5634                         /* link state has changed, schedule phy info update */
5635                         if (!test_bit(__IGC_DOWN, &adapter->state))
5636                                 mod_timer(&adapter->phy_info_timer,
5637                                           round_jiffies(jiffies + 2 * HZ));
5638
5639                         pm_schedule_suspend(netdev->dev.parent,
5640                                             MSEC_PER_SEC * 5);
5641                 }
5642         }
5643
5644         spin_lock(&adapter->stats64_lock);
5645         igc_update_stats(adapter);
5646         spin_unlock(&adapter->stats64_lock);
5647
5648         for (i = 0; i < adapter->num_tx_queues; i++) {
5649                 struct igc_ring *tx_ring = adapter->tx_ring[i];
5650
5651                 if (!netif_carrier_ok(netdev)) {
5652                         /* We've lost link, so the controller stops DMA,
5653                          * but we've got queued Tx work that's never going
5654                          * to get done, so reset controller to flush Tx.
5655                          * (Do the reset outside of interrupt context).
5656                          */
5657                         if (igc_desc_unused(tx_ring) + 1 < tx_ring->count) {
5658                                 adapter->tx_timeout_count++;
5659                                 schedule_work(&adapter->reset_task);
5660                                 /* return immediately since reset is imminent */
5661                                 return;
5662                         }
5663                 }
5664
5665                 /* Force detection of hung controller every watchdog period */
5666                 set_bit(IGC_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags);
5667         }
5668
5669         /* Cause software interrupt to ensure Rx ring is cleaned */
5670         if (adapter->flags & IGC_FLAG_HAS_MSIX) {
5671                 u32 eics = 0;
5672
5673                 for (i = 0; i < adapter->num_q_vectors; i++)
5674                         eics |= adapter->q_vector[i]->eims_value;
5675                 wr32(IGC_EICS, eics);
5676         } else {
5677                 wr32(IGC_ICS, IGC_ICS_RXDMT0);
5678         }
5679
5680         igc_ptp_tx_hang(adapter);
5681
5682         /* Reset the timer */
5683         if (!test_bit(__IGC_DOWN, &adapter->state)) {
5684                 if (adapter->flags & IGC_FLAG_NEED_LINK_UPDATE)
5685                         mod_timer(&adapter->watchdog_timer,
5686                                   round_jiffies(jiffies +  HZ));
5687                 else
5688                         mod_timer(&adapter->watchdog_timer,
5689                                   round_jiffies(jiffies + 2 * HZ));
5690         }
5691 }
5692
5693 /**
5694  * igc_intr_msi - Interrupt Handler
5695  * @irq: interrupt number
5696  * @data: pointer to a network interface device structure
5697  */
5698 static irqreturn_t igc_intr_msi(int irq, void *data)
5699 {
5700         struct igc_adapter *adapter = data;
5701         struct igc_q_vector *q_vector = adapter->q_vector[0];
5702         struct igc_hw *hw = &adapter->hw;
5703         /* read ICR disables interrupts using IAM */
5704         u32 icr = rd32(IGC_ICR);
5705
5706         igc_write_itr(q_vector);
5707
5708         if (icr & IGC_ICR_DRSTA)
5709                 schedule_work(&adapter->reset_task);
5710
5711         if (icr & IGC_ICR_DOUTSYNC) {
5712                 /* HW is reporting DMA is out of sync */
5713                 adapter->stats.doosync++;
5714         }
5715
5716         if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) {
5717                 hw->mac.get_link_status = true;
5718                 if (!test_bit(__IGC_DOWN, &adapter->state))
5719                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5720         }
5721
5722         if (icr & IGC_ICR_TS)
5723                 igc_tsync_interrupt(adapter);
5724
5725         napi_schedule(&q_vector->napi);
5726
5727         return IRQ_HANDLED;
5728 }
5729
5730 /**
5731  * igc_intr - Legacy Interrupt Handler
5732  * @irq: interrupt number
5733  * @data: pointer to a network interface device structure
5734  */
5735 static irqreturn_t igc_intr(int irq, void *data)
5736 {
5737         struct igc_adapter *adapter = data;
5738         struct igc_q_vector *q_vector = adapter->q_vector[0];
5739         struct igc_hw *hw = &adapter->hw;
5740         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5741          * need for the IMC write
5742          */
5743         u32 icr = rd32(IGC_ICR);
5744
5745         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5746          * not set, then the adapter didn't send an interrupt
5747          */
5748         if (!(icr & IGC_ICR_INT_ASSERTED))
5749                 return IRQ_NONE;
5750
5751         igc_write_itr(q_vector);
5752
5753         if (icr & IGC_ICR_DRSTA)
5754                 schedule_work(&adapter->reset_task);
5755
5756         if (icr & IGC_ICR_DOUTSYNC) {
5757                 /* HW is reporting DMA is out of sync */
5758                 adapter->stats.doosync++;
5759         }
5760
5761         if (icr & (IGC_ICR_RXSEQ | IGC_ICR_LSC)) {
5762                 hw->mac.get_link_status = true;
5763                 /* guard against interrupt when we're going down */
5764                 if (!test_bit(__IGC_DOWN, &adapter->state))
5765                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5766         }
5767
5768         if (icr & IGC_ICR_TS)
5769                 igc_tsync_interrupt(adapter);
5770
5771         napi_schedule(&q_vector->napi);
5772
5773         return IRQ_HANDLED;
5774 }
5775
5776 static void igc_free_irq(struct igc_adapter *adapter)
5777 {
5778         if (adapter->msix_entries) {
5779                 int vector = 0, i;
5780
5781                 free_irq(adapter->msix_entries[vector++].vector, adapter);
5782
5783                 for (i = 0; i < adapter->num_q_vectors; i++)
5784                         free_irq(adapter->msix_entries[vector++].vector,
5785                                  adapter->q_vector[i]);
5786         } else {
5787                 free_irq(adapter->pdev->irq, adapter);
5788         }
5789 }
5790
5791 /**
5792  * igc_request_irq - initialize interrupts
5793  * @adapter: Pointer to adapter structure
5794  *
5795  * Attempts to configure interrupts using the best available
5796  * capabilities of the hardware and kernel.
5797  */
5798 static int igc_request_irq(struct igc_adapter *adapter)
5799 {
5800         struct net_device *netdev = adapter->netdev;
5801         struct pci_dev *pdev = adapter->pdev;
5802         int err = 0;
5803
5804         if (adapter->flags & IGC_FLAG_HAS_MSIX) {
5805                 err = igc_request_msix(adapter);
5806                 if (!err)
5807                         goto request_done;
5808                 /* fall back to MSI */
5809                 igc_free_all_tx_resources(adapter);
5810                 igc_free_all_rx_resources(adapter);
5811
5812                 igc_clear_interrupt_scheme(adapter);
5813                 err = igc_init_interrupt_scheme(adapter, false);
5814                 if (err)
5815                         goto request_done;
5816                 igc_setup_all_tx_resources(adapter);
5817                 igc_setup_all_rx_resources(adapter);
5818                 igc_configure(adapter);
5819         }
5820
5821         igc_assign_vector(adapter->q_vector[0], 0);
5822
5823         if (adapter->flags & IGC_FLAG_HAS_MSI) {
5824                 err = request_irq(pdev->irq, &igc_intr_msi, 0,
5825                                   netdev->name, adapter);
5826                 if (!err)
5827                         goto request_done;
5828
5829                 /* fall back to legacy interrupts */
5830                 igc_reset_interrupt_capability(adapter);
5831                 adapter->flags &= ~IGC_FLAG_HAS_MSI;
5832         }
5833
5834         err = request_irq(pdev->irq, &igc_intr, IRQF_SHARED,
5835                           netdev->name, adapter);
5836
5837         if (err)
5838                 netdev_err(netdev, "Error %d getting interrupt\n", err);
5839
5840 request_done:
5841         return err;
5842 }
5843
5844 /**
5845  * __igc_open - Called when a network interface is made active
5846  * @netdev: network interface device structure
5847  * @resuming: boolean indicating if the device is resuming
5848  *
5849  * Returns 0 on success, negative value on failure
5850  *
5851  * The open entry point is called when a network interface is made
5852  * active by the system (IFF_UP).  At this point all resources needed
5853  * for transmit and receive operations are allocated, the interrupt
5854  * handler is registered with the OS, the watchdog timer is started,
5855  * and the stack is notified that the interface is ready.
5856  */
5857 static int __igc_open(struct net_device *netdev, bool resuming)
5858 {
5859         struct igc_adapter *adapter = netdev_priv(netdev);
5860         struct pci_dev *pdev = adapter->pdev;
5861         struct igc_hw *hw = &adapter->hw;
5862         int err = 0;
5863         int i = 0;
5864
5865         /* disallow open during test */
5866
5867         if (test_bit(__IGC_TESTING, &adapter->state)) {
5868                 WARN_ON(resuming);
5869                 return -EBUSY;
5870         }
5871
5872         if (!resuming)
5873                 pm_runtime_get_sync(&pdev->dev);
5874
5875         netif_carrier_off(netdev);
5876
5877         /* allocate transmit descriptors */
5878         err = igc_setup_all_tx_resources(adapter);
5879         if (err)
5880                 goto err_setup_tx;
5881
5882         /* allocate receive descriptors */
5883         err = igc_setup_all_rx_resources(adapter);
5884         if (err)
5885                 goto err_setup_rx;
5886
5887         igc_power_up_link(adapter);
5888
5889         igc_configure(adapter);
5890
5891         err = igc_request_irq(adapter);
5892         if (err)
5893                 goto err_req_irq;
5894
5895         /* Notify the stack of the actual queue counts. */
5896         err = netif_set_real_num_tx_queues(netdev, adapter->num_tx_queues);
5897         if (err)
5898                 goto err_set_queues;
5899
5900         err = netif_set_real_num_rx_queues(netdev, adapter->num_rx_queues);
5901         if (err)
5902                 goto err_set_queues;
5903
5904         clear_bit(__IGC_DOWN, &adapter->state);
5905
5906         for (i = 0; i < adapter->num_q_vectors; i++)
5907                 napi_enable(&adapter->q_vector[i]->napi);
5908
5909         /* Clear any pending interrupts. */
5910         rd32(IGC_ICR);
5911         igc_irq_enable(adapter);
5912
5913         if (!resuming)
5914                 pm_runtime_put(&pdev->dev);
5915
5916         netif_tx_start_all_queues(netdev);
5917
5918         /* start the watchdog. */
5919         hw->mac.get_link_status = true;
5920         schedule_work(&adapter->watchdog_task);
5921
5922         return IGC_SUCCESS;
5923
5924 err_set_queues:
5925         igc_free_irq(adapter);
5926 err_req_irq:
5927         igc_release_hw_control(adapter);
5928         igc_power_down_phy_copper_base(&adapter->hw);
5929         igc_free_all_rx_resources(adapter);
5930 err_setup_rx:
5931         igc_free_all_tx_resources(adapter);
5932 err_setup_tx:
5933         igc_reset(adapter);
5934         if (!resuming)
5935                 pm_runtime_put(&pdev->dev);
5936
5937         return err;
5938 }
5939
5940 int igc_open(struct net_device *netdev)
5941 {
5942         return __igc_open(netdev, false);
5943 }
5944
5945 /**
5946  * __igc_close - Disables a network interface
5947  * @netdev: network interface device structure
5948  * @suspending: boolean indicating the device is suspending
5949  *
5950  * Returns 0, this is not allowed to fail
5951  *
5952  * The close entry point is called when an interface is de-activated
5953  * by the OS.  The hardware is still under the driver's control, but
5954  * needs to be disabled.  A global MAC reset is issued to stop the
5955  * hardware, and all transmit and receive resources are freed.
5956  */
5957 static int __igc_close(struct net_device *netdev, bool suspending)
5958 {
5959         struct igc_adapter *adapter = netdev_priv(netdev);
5960         struct pci_dev *pdev = adapter->pdev;
5961
5962         WARN_ON(test_bit(__IGC_RESETTING, &adapter->state));
5963
5964         if (!suspending)
5965                 pm_runtime_get_sync(&pdev->dev);
5966
5967         igc_down(adapter);
5968
5969         igc_release_hw_control(adapter);
5970
5971         igc_free_irq(adapter);
5972
5973         igc_free_all_tx_resources(adapter);
5974         igc_free_all_rx_resources(adapter);
5975
5976         if (!suspending)
5977                 pm_runtime_put_sync(&pdev->dev);
5978
5979         return 0;
5980 }
5981
5982 int igc_close(struct net_device *netdev)
5983 {
5984         if (netif_device_present(netdev) || netdev->dismantle)
5985                 return __igc_close(netdev, false);
5986         return 0;
5987 }
5988
5989 /**
5990  * igc_ioctl - Access the hwtstamp interface
5991  * @netdev: network interface device structure
5992  * @ifr: interface request data
5993  * @cmd: ioctl command
5994  **/
5995 static int igc_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5996 {
5997         switch (cmd) {
5998         case SIOCGHWTSTAMP:
5999                 return igc_ptp_get_ts_config(netdev, ifr);
6000         case SIOCSHWTSTAMP:
6001                 return igc_ptp_set_ts_config(netdev, ifr);
6002         default:
6003                 return -EOPNOTSUPP;
6004         }
6005 }
6006
6007 static int igc_save_launchtime_params(struct igc_adapter *adapter, int queue,
6008                                       bool enable)
6009 {
6010         struct igc_ring *ring;
6011
6012         if (queue < 0 || queue >= adapter->num_tx_queues)
6013                 return -EINVAL;
6014
6015         ring = adapter->tx_ring[queue];
6016         ring->launchtime_enable = enable;
6017
6018         return 0;
6019 }
6020
6021 static bool is_base_time_past(ktime_t base_time, const struct timespec64 *now)
6022 {
6023         struct timespec64 b;
6024
6025         b = ktime_to_timespec64(base_time);
6026
6027         return timespec64_compare(now, &b) > 0;
6028 }
6029
6030 static bool validate_schedule(struct igc_adapter *adapter,
6031                               const struct tc_taprio_qopt_offload *qopt)
6032 {
6033         int queue_uses[IGC_MAX_TX_QUEUES] = { };
6034         struct igc_hw *hw = &adapter->hw;
6035         struct timespec64 now;
6036         size_t n;
6037
6038         if (qopt->cycle_time_extension)
6039                 return false;
6040
6041         igc_ptp_read(adapter, &now);
6042
6043         /* If we program the controller's BASET registers with a time
6044          * in the future, it will hold all the packets until that
6045          * time, causing a lot of TX Hangs, so to avoid that, we
6046          * reject schedules that would start in the future.
6047          * Note: Limitation above is no longer in i226.
6048          */
6049         if (!is_base_time_past(qopt->base_time, &now) &&
6050             igc_is_device_id_i225(hw))
6051                 return false;
6052
6053         for (n = 0; n < qopt->num_entries; n++) {
6054                 const struct tc_taprio_sched_entry *e, *prev;
6055                 int i;
6056
6057                 prev = n ? &qopt->entries[n - 1] : NULL;
6058                 e = &qopt->entries[n];
6059
6060                 /* i225 only supports "global" frame preemption
6061                  * settings.
6062                  */
6063                 if (e->command != TC_TAPRIO_CMD_SET_GATES)
6064                         return false;
6065
6066                 for (i = 0; i < adapter->num_tx_queues; i++)
6067                         if (e->gate_mask & BIT(i)) {
6068                                 queue_uses[i]++;
6069
6070                                 /* There are limitations: A single queue cannot
6071                                  * be opened and closed multiple times per cycle
6072                                  * unless the gate stays open. Check for it.
6073                                  */
6074                                 if (queue_uses[i] > 1 &&
6075                                     !(prev->gate_mask & BIT(i)))
6076                                         return false;
6077                         }
6078         }
6079
6080         return true;
6081 }
6082
6083 static int igc_tsn_enable_launchtime(struct igc_adapter *adapter,
6084                                      struct tc_etf_qopt_offload *qopt)
6085 {
6086         struct igc_hw *hw = &adapter->hw;
6087         int err;
6088
6089         if (hw->mac.type != igc_i225)
6090                 return -EOPNOTSUPP;
6091
6092         err = igc_save_launchtime_params(adapter, qopt->queue, qopt->enable);
6093         if (err)
6094                 return err;
6095
6096         return igc_tsn_offload_apply(adapter);
6097 }
6098
6099 static int igc_tsn_clear_schedule(struct igc_adapter *adapter)
6100 {
6101         int i;
6102
6103         adapter->base_time = 0;
6104         adapter->cycle_time = NSEC_PER_SEC;
6105         adapter->qbv_config_change_errors = 0;
6106
6107         for (i = 0; i < adapter->num_tx_queues; i++) {
6108                 struct igc_ring *ring = adapter->tx_ring[i];
6109
6110                 ring->start_time = 0;
6111                 ring->end_time = NSEC_PER_SEC;
6112                 ring->max_sdu = 0;
6113         }
6114
6115         return 0;
6116 }
6117
6118 static int igc_save_qbv_schedule(struct igc_adapter *adapter,
6119                                  struct tc_taprio_qopt_offload *qopt)
6120 {
6121         bool queue_configured[IGC_MAX_TX_QUEUES] = { };
6122         struct igc_hw *hw = &adapter->hw;
6123         u32 start_time = 0, end_time = 0;
6124         size_t n;
6125         int i;
6126
6127         switch (qopt->cmd) {
6128         case TAPRIO_CMD_REPLACE:
6129                 adapter->qbv_enable = true;
6130                 break;
6131         case TAPRIO_CMD_DESTROY:
6132                 adapter->qbv_enable = false;
6133                 break;
6134         default:
6135                 return -EOPNOTSUPP;
6136         }
6137
6138         if (!adapter->qbv_enable)
6139                 return igc_tsn_clear_schedule(adapter);
6140
6141         if (qopt->base_time < 0)
6142                 return -ERANGE;
6143
6144         if (igc_is_device_id_i225(hw) && adapter->base_time)
6145                 return -EALREADY;
6146
6147         if (!validate_schedule(adapter, qopt))
6148                 return -EINVAL;
6149
6150         adapter->cycle_time = qopt->cycle_time;
6151         adapter->base_time = qopt->base_time;
6152
6153         for (n = 0; n < qopt->num_entries; n++) {
6154                 struct tc_taprio_sched_entry *e = &qopt->entries[n];
6155
6156                 end_time += e->interval;
6157
6158                 /* If any of the conditions below are true, we need to manually
6159                  * control the end time of the cycle.
6160                  * 1. Qbv users can specify a cycle time that is not equal
6161                  * to the total GCL intervals. Hence, recalculation is
6162                  * necessary here to exclude the time interval that
6163                  * exceeds the cycle time.
6164                  * 2. According to IEEE Std. 802.1Q-2018 section 8.6.9.2,
6165                  * once the end of the list is reached, it will switch
6166                  * to the END_OF_CYCLE state and leave the gates in the
6167                  * same state until the next cycle is started.
6168                  */
6169                 if (end_time > adapter->cycle_time ||
6170                     n + 1 == qopt->num_entries)
6171                         end_time = adapter->cycle_time;
6172
6173                 for (i = 0; i < adapter->num_tx_queues; i++) {
6174                         struct igc_ring *ring = adapter->tx_ring[i];
6175
6176                         if (!(e->gate_mask & BIT(i)))
6177                                 continue;
6178
6179                         /* Check whether a queue stays open for more than one
6180                          * entry. If so, keep the start and advance the end
6181                          * time.
6182                          */
6183                         if (!queue_configured[i])
6184                                 ring->start_time = start_time;
6185                         ring->end_time = end_time;
6186
6187                         queue_configured[i] = true;
6188                 }
6189
6190                 start_time += e->interval;
6191         }
6192
6193         /* Check whether a queue gets configured.
6194          * If not, set the start and end time to be end time.
6195          */
6196         for (i = 0; i < adapter->num_tx_queues; i++) {
6197                 if (!queue_configured[i]) {
6198                         struct igc_ring *ring = adapter->tx_ring[i];
6199
6200                         ring->start_time = end_time;
6201                         ring->end_time = end_time;
6202                 }
6203         }
6204
6205         for (i = 0; i < adapter->num_tx_queues; i++) {
6206                 struct igc_ring *ring = adapter->tx_ring[i];
6207                 struct net_device *dev = adapter->netdev;
6208
6209                 if (qopt->max_sdu[i])
6210                         ring->max_sdu = qopt->max_sdu[i] + dev->hard_header_len;
6211                 else
6212                         ring->max_sdu = 0;
6213         }
6214
6215         return 0;
6216 }
6217
6218 static int igc_tsn_enable_qbv_scheduling(struct igc_adapter *adapter,
6219                                          struct tc_taprio_qopt_offload *qopt)
6220 {
6221         struct igc_hw *hw = &adapter->hw;
6222         int err;
6223
6224         if (hw->mac.type != igc_i225)
6225                 return -EOPNOTSUPP;
6226
6227         err = igc_save_qbv_schedule(adapter, qopt);
6228         if (err)
6229                 return err;
6230
6231         return igc_tsn_offload_apply(adapter);
6232 }
6233
6234 static int igc_save_cbs_params(struct igc_adapter *adapter, int queue,
6235                                bool enable, int idleslope, int sendslope,
6236                                int hicredit, int locredit)
6237 {
6238         bool cbs_status[IGC_MAX_SR_QUEUES] = { false };
6239         struct net_device *netdev = adapter->netdev;
6240         struct igc_ring *ring;
6241         int i;
6242
6243         /* i225 has two sets of credit-based shaper logic.
6244          * Supporting it only on the top two priority queues
6245          */
6246         if (queue < 0 || queue > 1)
6247                 return -EINVAL;
6248
6249         ring = adapter->tx_ring[queue];
6250
6251         for (i = 0; i < IGC_MAX_SR_QUEUES; i++)
6252                 if (adapter->tx_ring[i])
6253                         cbs_status[i] = adapter->tx_ring[i]->cbs_enable;
6254
6255         /* CBS should be enabled on the highest priority queue first in order
6256          * for the CBS algorithm to operate as intended.
6257          */
6258         if (enable) {
6259                 if (queue == 1 && !cbs_status[0]) {
6260                         netdev_err(netdev,
6261                                    "Enabling CBS on queue1 before queue0\n");
6262                         return -EINVAL;
6263                 }
6264         } else {
6265                 if (queue == 0 && cbs_status[1]) {
6266                         netdev_err(netdev,
6267                                    "Disabling CBS on queue0 before queue1\n");
6268                         return -EINVAL;
6269                 }
6270         }
6271
6272         ring->cbs_enable = enable;
6273         ring->idleslope = idleslope;
6274         ring->sendslope = sendslope;
6275         ring->hicredit = hicredit;
6276         ring->locredit = locredit;
6277
6278         return 0;
6279 }
6280
6281 static int igc_tsn_enable_cbs(struct igc_adapter *adapter,
6282                               struct tc_cbs_qopt_offload *qopt)
6283 {
6284         struct igc_hw *hw = &adapter->hw;
6285         int err;
6286
6287         if (hw->mac.type != igc_i225)
6288                 return -EOPNOTSUPP;
6289
6290         if (qopt->queue < 0 || qopt->queue > 1)
6291                 return -EINVAL;
6292
6293         err = igc_save_cbs_params(adapter, qopt->queue, qopt->enable,
6294                                   qopt->idleslope, qopt->sendslope,
6295                                   qopt->hicredit, qopt->locredit);
6296         if (err)
6297                 return err;
6298
6299         return igc_tsn_offload_apply(adapter);
6300 }
6301
6302 static int igc_tc_query_caps(struct igc_adapter *adapter,
6303                              struct tc_query_caps_base *base)
6304 {
6305         struct igc_hw *hw = &adapter->hw;
6306
6307         switch (base->type) {
6308         case TC_SETUP_QDISC_TAPRIO: {
6309                 struct tc_taprio_caps *caps = base->caps;
6310
6311                 caps->broken_mqprio = true;
6312
6313                 if (hw->mac.type == igc_i225) {
6314                         caps->supports_queue_max_sdu = true;
6315                         caps->gate_mask_per_txq = true;
6316                 }
6317
6318                 return 0;
6319         }
6320         default:
6321                 return -EOPNOTSUPP;
6322         }
6323 }
6324
6325 static int igc_setup_tc(struct net_device *dev, enum tc_setup_type type,
6326                         void *type_data)
6327 {
6328         struct igc_adapter *adapter = netdev_priv(dev);
6329
6330         switch (type) {
6331         case TC_QUERY_CAPS:
6332                 return igc_tc_query_caps(adapter, type_data);
6333         case TC_SETUP_QDISC_TAPRIO:
6334                 return igc_tsn_enable_qbv_scheduling(adapter, type_data);
6335
6336         case TC_SETUP_QDISC_ETF:
6337                 return igc_tsn_enable_launchtime(adapter, type_data);
6338
6339         case TC_SETUP_QDISC_CBS:
6340                 return igc_tsn_enable_cbs(adapter, type_data);
6341
6342         default:
6343                 return -EOPNOTSUPP;
6344         }
6345 }
6346
6347 static int igc_bpf(struct net_device *dev, struct netdev_bpf *bpf)
6348 {
6349         struct igc_adapter *adapter = netdev_priv(dev);
6350
6351         switch (bpf->command) {
6352         case XDP_SETUP_PROG:
6353                 return igc_xdp_set_prog(adapter, bpf->prog, bpf->extack);
6354         case XDP_SETUP_XSK_POOL:
6355                 return igc_xdp_setup_pool(adapter, bpf->xsk.pool,
6356                                           bpf->xsk.queue_id);
6357         default:
6358                 return -EOPNOTSUPP;
6359         }
6360 }
6361
6362 static int igc_xdp_xmit(struct net_device *dev, int num_frames,
6363                         struct xdp_frame **frames, u32 flags)
6364 {
6365         struct igc_adapter *adapter = netdev_priv(dev);
6366         int cpu = smp_processor_id();
6367         struct netdev_queue *nq;
6368         struct igc_ring *ring;
6369         int i, drops;
6370
6371         if (unlikely(test_bit(__IGC_DOWN, &adapter->state)))
6372                 return -ENETDOWN;
6373
6374         if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
6375                 return -EINVAL;
6376
6377         ring = igc_xdp_get_tx_ring(adapter, cpu);
6378         nq = txring_txq(ring);
6379
6380         __netif_tx_lock(nq, cpu);
6381
6382         /* Avoid transmit queue timeout since we share it with the slow path */
6383         txq_trans_cond_update(nq);
6384
6385         drops = 0;
6386         for (i = 0; i < num_frames; i++) {
6387                 int err;
6388                 struct xdp_frame *xdpf = frames[i];
6389
6390                 err = igc_xdp_init_tx_descriptor(ring, xdpf);
6391                 if (err) {
6392                         xdp_return_frame_rx_napi(xdpf);
6393                         drops++;
6394                 }
6395         }
6396
6397         if (flags & XDP_XMIT_FLUSH)
6398                 igc_flush_tx_descriptors(ring);
6399
6400         __netif_tx_unlock(nq);
6401
6402         return num_frames - drops;
6403 }
6404
6405 static void igc_trigger_rxtxq_interrupt(struct igc_adapter *adapter,
6406                                         struct igc_q_vector *q_vector)
6407 {
6408         struct igc_hw *hw = &adapter->hw;
6409         u32 eics = 0;
6410
6411         eics |= q_vector->eims_value;
6412         wr32(IGC_EICS, eics);
6413 }
6414
6415 int igc_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags)
6416 {
6417         struct igc_adapter *adapter = netdev_priv(dev);
6418         struct igc_q_vector *q_vector;
6419         struct igc_ring *ring;
6420
6421         if (test_bit(__IGC_DOWN, &adapter->state))
6422                 return -ENETDOWN;
6423
6424         if (!igc_xdp_is_enabled(adapter))
6425                 return -ENXIO;
6426
6427         if (queue_id >= adapter->num_rx_queues)
6428                 return -EINVAL;
6429
6430         ring = adapter->rx_ring[queue_id];
6431
6432         if (!ring->xsk_pool)
6433                 return -ENXIO;
6434
6435         q_vector = adapter->q_vector[queue_id];
6436         if (!napi_if_scheduled_mark_missed(&q_vector->napi))
6437                 igc_trigger_rxtxq_interrupt(adapter, q_vector);
6438
6439         return 0;
6440 }
6441
6442 static const struct net_device_ops igc_netdev_ops = {
6443         .ndo_open               = igc_open,
6444         .ndo_stop               = igc_close,
6445         .ndo_start_xmit         = igc_xmit_frame,
6446         .ndo_set_rx_mode        = igc_set_rx_mode,
6447         .ndo_set_mac_address    = igc_set_mac,
6448         .ndo_change_mtu         = igc_change_mtu,
6449         .ndo_tx_timeout         = igc_tx_timeout,
6450         .ndo_get_stats64        = igc_get_stats64,
6451         .ndo_fix_features       = igc_fix_features,
6452         .ndo_set_features       = igc_set_features,
6453         .ndo_features_check     = igc_features_check,
6454         .ndo_eth_ioctl          = igc_ioctl,
6455         .ndo_setup_tc           = igc_setup_tc,
6456         .ndo_bpf                = igc_bpf,
6457         .ndo_xdp_xmit           = igc_xdp_xmit,
6458         .ndo_xsk_wakeup         = igc_xsk_wakeup,
6459 };
6460
6461 /* PCIe configuration access */
6462 void igc_read_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value)
6463 {
6464         struct igc_adapter *adapter = hw->back;
6465
6466         pci_read_config_word(adapter->pdev, reg, value);
6467 }
6468
6469 void igc_write_pci_cfg(struct igc_hw *hw, u32 reg, u16 *value)
6470 {
6471         struct igc_adapter *adapter = hw->back;
6472
6473         pci_write_config_word(adapter->pdev, reg, *value);
6474 }
6475
6476 s32 igc_read_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value)
6477 {
6478         struct igc_adapter *adapter = hw->back;
6479
6480         if (!pci_is_pcie(adapter->pdev))
6481                 return -IGC_ERR_CONFIG;
6482
6483         pcie_capability_read_word(adapter->pdev, reg, value);
6484
6485         return IGC_SUCCESS;
6486 }
6487
6488 s32 igc_write_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value)
6489 {
6490         struct igc_adapter *adapter = hw->back;
6491
6492         if (!pci_is_pcie(adapter->pdev))
6493                 return -IGC_ERR_CONFIG;
6494
6495         pcie_capability_write_word(adapter->pdev, reg, *value);
6496
6497         return IGC_SUCCESS;
6498 }
6499
6500 u32 igc_rd32(struct igc_hw *hw, u32 reg)
6501 {
6502         struct igc_adapter *igc = container_of(hw, struct igc_adapter, hw);
6503         u8 __iomem *hw_addr = READ_ONCE(hw->hw_addr);
6504         u32 value = 0;
6505
6506         if (IGC_REMOVED(hw_addr))
6507                 return ~value;
6508
6509         value = readl(&hw_addr[reg]);
6510
6511         /* reads should not return all F's */
6512         if (!(~value) && (!reg || !(~readl(hw_addr)))) {
6513                 struct net_device *netdev = igc->netdev;
6514
6515                 hw->hw_addr = NULL;
6516                 netif_device_detach(netdev);
6517                 netdev_err(netdev, "PCIe link lost, device now detached\n");
6518                 WARN(pci_device_is_present(igc->pdev),
6519                      "igc: Failed to read reg 0x%x!\n", reg);
6520         }
6521
6522         return value;
6523 }
6524
6525 /* Mapping HW RSS Type to enum xdp_rss_hash_type */
6526 static enum xdp_rss_hash_type igc_xdp_rss_type[IGC_RSS_TYPE_MAX_TABLE] = {
6527         [IGC_RSS_TYPE_NO_HASH]          = XDP_RSS_TYPE_L2,
6528         [IGC_RSS_TYPE_HASH_TCP_IPV4]    = XDP_RSS_TYPE_L4_IPV4_TCP,
6529         [IGC_RSS_TYPE_HASH_IPV4]        = XDP_RSS_TYPE_L3_IPV4,
6530         [IGC_RSS_TYPE_HASH_TCP_IPV6]    = XDP_RSS_TYPE_L4_IPV6_TCP,
6531         [IGC_RSS_TYPE_HASH_IPV6_EX]     = XDP_RSS_TYPE_L3_IPV6_EX,
6532         [IGC_RSS_TYPE_HASH_IPV6]        = XDP_RSS_TYPE_L3_IPV6,
6533         [IGC_RSS_TYPE_HASH_TCP_IPV6_EX] = XDP_RSS_TYPE_L4_IPV6_TCP_EX,
6534         [IGC_RSS_TYPE_HASH_UDP_IPV4]    = XDP_RSS_TYPE_L4_IPV4_UDP,
6535         [IGC_RSS_TYPE_HASH_UDP_IPV6]    = XDP_RSS_TYPE_L4_IPV6_UDP,
6536         [IGC_RSS_TYPE_HASH_UDP_IPV6_EX] = XDP_RSS_TYPE_L4_IPV6_UDP_EX,
6537         [10] = XDP_RSS_TYPE_NONE, /* RSS Type above 9 "Reserved" by HW  */
6538         [11] = XDP_RSS_TYPE_NONE, /* keep array sized for SW bit-mask   */
6539         [12] = XDP_RSS_TYPE_NONE, /* to handle future HW revisons       */
6540         [13] = XDP_RSS_TYPE_NONE,
6541         [14] = XDP_RSS_TYPE_NONE,
6542         [15] = XDP_RSS_TYPE_NONE,
6543 };
6544
6545 static int igc_xdp_rx_hash(const struct xdp_md *_ctx, u32 *hash,
6546                            enum xdp_rss_hash_type *rss_type)
6547 {
6548         const struct igc_xdp_buff *ctx = (void *)_ctx;
6549
6550         if (!(ctx->xdp.rxq->dev->features & NETIF_F_RXHASH))
6551                 return -ENODATA;
6552
6553         *hash = le32_to_cpu(ctx->rx_desc->wb.lower.hi_dword.rss);
6554         *rss_type = igc_xdp_rss_type[igc_rss_type(ctx->rx_desc)];
6555
6556         return 0;
6557 }
6558
6559 static int igc_xdp_rx_timestamp(const struct xdp_md *_ctx, u64 *timestamp)
6560 {
6561         const struct igc_xdp_buff *ctx = (void *)_ctx;
6562
6563         if (igc_test_staterr(ctx->rx_desc, IGC_RXDADV_STAT_TSIP)) {
6564                 *timestamp = ctx->rx_ts;
6565
6566                 return 0;
6567         }
6568
6569         return -ENODATA;
6570 }
6571
6572 static const struct xdp_metadata_ops igc_xdp_metadata_ops = {
6573         .xmo_rx_hash                    = igc_xdp_rx_hash,
6574         .xmo_rx_timestamp               = igc_xdp_rx_timestamp,
6575 };
6576
6577 /**
6578  * igc_probe - Device Initialization Routine
6579  * @pdev: PCI device information struct
6580  * @ent: entry in igc_pci_tbl
6581  *
6582  * Returns 0 on success, negative on failure
6583  *
6584  * igc_probe initializes an adapter identified by a pci_dev structure.
6585  * The OS initialization, configuring the adapter private structure,
6586  * and a hardware reset occur.
6587  */
6588 static int igc_probe(struct pci_dev *pdev,
6589                      const struct pci_device_id *ent)
6590 {
6591         struct igc_adapter *adapter;
6592         struct net_device *netdev;
6593         struct igc_hw *hw;
6594         const struct igc_info *ei = igc_info_tbl[ent->driver_data];
6595         int err;
6596
6597         err = pci_enable_device_mem(pdev);
6598         if (err)
6599                 return err;
6600
6601         err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
6602         if (err) {
6603                 dev_err(&pdev->dev,
6604                         "No usable DMA configuration, aborting\n");
6605                 goto err_dma;
6606         }
6607
6608         err = pci_request_mem_regions(pdev, igc_driver_name);
6609         if (err)
6610                 goto err_pci_reg;
6611
6612         err = pci_enable_ptm(pdev, NULL);
6613         if (err < 0)
6614                 dev_info(&pdev->dev, "PCIe PTM not supported by PCIe bus/controller\n");
6615
6616         pci_set_master(pdev);
6617
6618         err = -ENOMEM;
6619         netdev = alloc_etherdev_mq(sizeof(struct igc_adapter),
6620                                    IGC_MAX_TX_QUEUES);
6621
6622         if (!netdev)
6623                 goto err_alloc_etherdev;
6624
6625         SET_NETDEV_DEV(netdev, &pdev->dev);
6626
6627         pci_set_drvdata(pdev, netdev);
6628         adapter = netdev_priv(netdev);
6629         adapter->netdev = netdev;
6630         adapter->pdev = pdev;
6631         hw = &adapter->hw;
6632         hw->back = adapter;
6633         adapter->port_num = hw->bus.func;
6634         adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE);
6635
6636         err = pci_save_state(pdev);
6637         if (err)
6638                 goto err_ioremap;
6639
6640         err = -EIO;
6641         adapter->io_addr = ioremap(pci_resource_start(pdev, 0),
6642                                    pci_resource_len(pdev, 0));
6643         if (!adapter->io_addr)
6644                 goto err_ioremap;
6645
6646         /* hw->hw_addr can be zeroed, so use adapter->io_addr for unmap */
6647         hw->hw_addr = adapter->io_addr;
6648
6649         netdev->netdev_ops = &igc_netdev_ops;
6650         netdev->xdp_metadata_ops = &igc_xdp_metadata_ops;
6651         igc_ethtool_set_ops(netdev);
6652         netdev->watchdog_timeo = 5 * HZ;
6653
6654         netdev->mem_start = pci_resource_start(pdev, 0);
6655         netdev->mem_end = pci_resource_end(pdev, 0);
6656
6657         /* PCI config space info */
6658         hw->vendor_id = pdev->vendor;
6659         hw->device_id = pdev->device;
6660         hw->revision_id = pdev->revision;
6661         hw->subsystem_vendor_id = pdev->subsystem_vendor;
6662         hw->subsystem_device_id = pdev->subsystem_device;
6663
6664         /* Copy the default MAC and PHY function pointers */
6665         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
6666         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
6667
6668         /* Initialize skew-specific constants */
6669         err = ei->get_invariants(hw);
6670         if (err)
6671                 goto err_sw_init;
6672
6673         /* Add supported features to the features list*/
6674         netdev->features |= NETIF_F_SG;
6675         netdev->features |= NETIF_F_TSO;
6676         netdev->features |= NETIF_F_TSO6;
6677         netdev->features |= NETIF_F_TSO_ECN;
6678         netdev->features |= NETIF_F_RXHASH;
6679         netdev->features |= NETIF_F_RXCSUM;
6680         netdev->features |= NETIF_F_HW_CSUM;
6681         netdev->features |= NETIF_F_SCTP_CRC;
6682         netdev->features |= NETIF_F_HW_TC;
6683
6684 #define IGC_GSO_PARTIAL_FEATURES (NETIF_F_GSO_GRE | \
6685                                   NETIF_F_GSO_GRE_CSUM | \
6686                                   NETIF_F_GSO_IPXIP4 | \
6687                                   NETIF_F_GSO_IPXIP6 | \
6688                                   NETIF_F_GSO_UDP_TUNNEL | \
6689                                   NETIF_F_GSO_UDP_TUNNEL_CSUM)
6690
6691         netdev->gso_partial_features = IGC_GSO_PARTIAL_FEATURES;
6692         netdev->features |= NETIF_F_GSO_PARTIAL | IGC_GSO_PARTIAL_FEATURES;
6693
6694         /* setup the private structure */
6695         err = igc_sw_init(adapter);
6696         if (err)
6697                 goto err_sw_init;
6698
6699         /* copy netdev features into list of user selectable features */
6700         netdev->hw_features |= NETIF_F_NTUPLE;
6701         netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX;
6702         netdev->hw_features |= NETIF_F_HW_VLAN_CTAG_RX;
6703         netdev->hw_features |= netdev->features;
6704
6705         netdev->features |= NETIF_F_HIGHDMA;
6706
6707         netdev->vlan_features |= netdev->features | NETIF_F_TSO_MANGLEID;
6708         netdev->mpls_features |= NETIF_F_HW_CSUM;
6709         netdev->hw_enc_features |= netdev->vlan_features;
6710
6711         netdev->xdp_features = NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
6712                                NETDEV_XDP_ACT_XSK_ZEROCOPY;
6713
6714         /* MTU range: 68 - 9216 */
6715         netdev->min_mtu = ETH_MIN_MTU;
6716         netdev->max_mtu = MAX_STD_JUMBO_FRAME_SIZE;
6717
6718         /* before reading the NVM, reset the controller to put the device in a
6719          * known good starting state
6720          */
6721         hw->mac.ops.reset_hw(hw);
6722
6723         if (igc_get_flash_presence_i225(hw)) {
6724                 if (hw->nvm.ops.validate(hw) < 0) {
6725                         dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
6726                         err = -EIO;
6727                         goto err_eeprom;
6728                 }
6729         }
6730
6731         if (eth_platform_get_mac_address(&pdev->dev, hw->mac.addr)) {
6732                 /* copy the MAC address out of the NVM */
6733                 if (hw->mac.ops.read_mac_addr(hw))
6734                         dev_err(&pdev->dev, "NVM Read Error\n");
6735         }
6736
6737         eth_hw_addr_set(netdev, hw->mac.addr);
6738
6739         if (!is_valid_ether_addr(netdev->dev_addr)) {
6740                 dev_err(&pdev->dev, "Invalid MAC Address\n");
6741                 err = -EIO;
6742                 goto err_eeprom;
6743         }
6744
6745         /* configure RXPBSIZE and TXPBSIZE */
6746         wr32(IGC_RXPBS, I225_RXPBSIZE_DEFAULT);
6747         wr32(IGC_TXPBS, I225_TXPBSIZE_DEFAULT);
6748
6749         timer_setup(&adapter->watchdog_timer, igc_watchdog, 0);
6750         timer_setup(&adapter->phy_info_timer, igc_update_phy_info, 0);
6751
6752         INIT_WORK(&adapter->reset_task, igc_reset_task);
6753         INIT_WORK(&adapter->watchdog_task, igc_watchdog_task);
6754
6755         /* Initialize link properties that are user-changeable */
6756         adapter->fc_autoneg = true;
6757         hw->mac.autoneg = true;
6758         hw->phy.autoneg_advertised = 0xaf;
6759
6760         hw->fc.requested_mode = igc_fc_default;
6761         hw->fc.current_mode = igc_fc_default;
6762
6763         /* By default, support wake on port A */
6764         adapter->flags |= IGC_FLAG_WOL_SUPPORTED;
6765
6766         /* initialize the wol settings based on the eeprom settings */
6767         if (adapter->flags & IGC_FLAG_WOL_SUPPORTED)
6768                 adapter->wol |= IGC_WUFC_MAG;
6769
6770         device_set_wakeup_enable(&adapter->pdev->dev,
6771                                  adapter->flags & IGC_FLAG_WOL_SUPPORTED);
6772
6773         igc_ptp_init(adapter);
6774
6775         igc_tsn_clear_schedule(adapter);
6776
6777         /* reset the hardware with the new settings */
6778         igc_reset(adapter);
6779
6780         /* let the f/w know that the h/w is now under the control of the
6781          * driver.
6782          */
6783         igc_get_hw_control(adapter);
6784
6785         strncpy(netdev->name, "eth%d", IFNAMSIZ);
6786         err = register_netdev(netdev);
6787         if (err)
6788                 goto err_register;
6789
6790          /* carrier off reporting is important to ethtool even BEFORE open */
6791         netif_carrier_off(netdev);
6792
6793         /* Check if Media Autosense is enabled */
6794         adapter->ei = *ei;
6795
6796         /* print pcie link status and MAC address */
6797         pcie_print_link_status(pdev);
6798         netdev_info(netdev, "MAC: %pM\n", netdev->dev_addr);
6799
6800         dev_pm_set_driver_flags(&pdev->dev, DPM_FLAG_NO_DIRECT_COMPLETE);
6801         /* Disable EEE for internal PHY devices */
6802         hw->dev_spec._base.eee_enable = false;
6803         adapter->flags &= ~IGC_FLAG_EEE;
6804         igc_set_eee_i225(hw, false, false, false);
6805
6806         pm_runtime_put_noidle(&pdev->dev);
6807
6808         return 0;
6809
6810 err_register:
6811         igc_release_hw_control(adapter);
6812 err_eeprom:
6813         if (!igc_check_reset_block(hw))
6814                 igc_reset_phy(hw);
6815 err_sw_init:
6816         igc_clear_interrupt_scheme(adapter);
6817         iounmap(adapter->io_addr);
6818 err_ioremap:
6819         free_netdev(netdev);
6820 err_alloc_etherdev:
6821         pci_release_mem_regions(pdev);
6822 err_pci_reg:
6823 err_dma:
6824         pci_disable_device(pdev);
6825         return err;
6826 }
6827
6828 /**
6829  * igc_remove - Device Removal Routine
6830  * @pdev: PCI device information struct
6831  *
6832  * igc_remove is called by the PCI subsystem to alert the driver
6833  * that it should release a PCI device.  This could be caused by a
6834  * Hot-Plug event, or because the driver is going to be removed from
6835  * memory.
6836  */
6837 static void igc_remove(struct pci_dev *pdev)
6838 {
6839         struct net_device *netdev = pci_get_drvdata(pdev);
6840         struct igc_adapter *adapter = netdev_priv(netdev);
6841
6842         pm_runtime_get_noresume(&pdev->dev);
6843
6844         igc_flush_nfc_rules(adapter);
6845
6846         igc_ptp_stop(adapter);
6847
6848         pci_disable_ptm(pdev);
6849         pci_clear_master(pdev);
6850
6851         set_bit(__IGC_DOWN, &adapter->state);
6852
6853         del_timer_sync(&adapter->watchdog_timer);
6854         del_timer_sync(&adapter->phy_info_timer);
6855
6856         cancel_work_sync(&adapter->reset_task);
6857         cancel_work_sync(&adapter->watchdog_task);
6858
6859         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
6860          * would have already happened in close and is redundant.
6861          */
6862         igc_release_hw_control(adapter);
6863         unregister_netdev(netdev);
6864
6865         igc_clear_interrupt_scheme(adapter);
6866         pci_iounmap(pdev, adapter->io_addr);
6867         pci_release_mem_regions(pdev);
6868
6869         free_netdev(netdev);
6870
6871         pci_disable_device(pdev);
6872 }
6873
6874 static int __igc_shutdown(struct pci_dev *pdev, bool *enable_wake,
6875                           bool runtime)
6876 {
6877         struct net_device *netdev = pci_get_drvdata(pdev);
6878         struct igc_adapter *adapter = netdev_priv(netdev);
6879         u32 wufc = runtime ? IGC_WUFC_LNKC : adapter->wol;
6880         struct igc_hw *hw = &adapter->hw;
6881         u32 ctrl, rctl, status;
6882         bool wake;
6883
6884         rtnl_lock();
6885         netif_device_detach(netdev);
6886
6887         if (netif_running(netdev))
6888                 __igc_close(netdev, true);
6889
6890         igc_ptp_suspend(adapter);
6891
6892         igc_clear_interrupt_scheme(adapter);
6893         rtnl_unlock();
6894
6895         status = rd32(IGC_STATUS);
6896         if (status & IGC_STATUS_LU)
6897                 wufc &= ~IGC_WUFC_LNKC;
6898
6899         if (wufc) {
6900                 igc_setup_rctl(adapter);
6901                 igc_set_rx_mode(netdev);
6902
6903                 /* turn on all-multi mode if wake on multicast is enabled */
6904                 if (wufc & IGC_WUFC_MC) {
6905                         rctl = rd32(IGC_RCTL);
6906                         rctl |= IGC_RCTL_MPE;
6907                         wr32(IGC_RCTL, rctl);
6908                 }
6909
6910                 ctrl = rd32(IGC_CTRL);
6911                 ctrl |= IGC_CTRL_ADVD3WUC;
6912                 wr32(IGC_CTRL, ctrl);
6913
6914                 /* Allow time for pending master requests to run */
6915                 igc_disable_pcie_master(hw);
6916
6917                 wr32(IGC_WUC, IGC_WUC_PME_EN);
6918                 wr32(IGC_WUFC, wufc);
6919         } else {
6920                 wr32(IGC_WUC, 0);
6921                 wr32(IGC_WUFC, 0);
6922         }
6923
6924         wake = wufc || adapter->en_mng_pt;
6925         if (!wake)
6926                 igc_power_down_phy_copper_base(&adapter->hw);
6927         else
6928                 igc_power_up_link(adapter);
6929
6930         if (enable_wake)
6931                 *enable_wake = wake;
6932
6933         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
6934          * would have already happened in close and is redundant.
6935          */
6936         igc_release_hw_control(adapter);
6937
6938         pci_disable_device(pdev);
6939
6940         return 0;
6941 }
6942
6943 #ifdef CONFIG_PM
6944 static int __maybe_unused igc_runtime_suspend(struct device *dev)
6945 {
6946         return __igc_shutdown(to_pci_dev(dev), NULL, 1);
6947 }
6948
6949 static void igc_deliver_wake_packet(struct net_device *netdev)
6950 {
6951         struct igc_adapter *adapter = netdev_priv(netdev);
6952         struct igc_hw *hw = &adapter->hw;
6953         struct sk_buff *skb;
6954         u32 wupl;
6955
6956         wupl = rd32(IGC_WUPL) & IGC_WUPL_MASK;
6957
6958         /* WUPM stores only the first 128 bytes of the wake packet.
6959          * Read the packet only if we have the whole thing.
6960          */
6961         if (wupl == 0 || wupl > IGC_WUPM_BYTES)
6962                 return;
6963
6964         skb = netdev_alloc_skb_ip_align(netdev, IGC_WUPM_BYTES);
6965         if (!skb)
6966                 return;
6967
6968         skb_put(skb, wupl);
6969
6970         /* Ensure reads are 32-bit aligned */
6971         wupl = roundup(wupl, 4);
6972
6973         memcpy_fromio(skb->data, hw->hw_addr + IGC_WUPM_REG(0), wupl);
6974
6975         skb->protocol = eth_type_trans(skb, netdev);
6976         netif_rx(skb);
6977 }
6978
6979 static int __maybe_unused igc_resume(struct device *dev)
6980 {
6981         struct pci_dev *pdev = to_pci_dev(dev);
6982         struct net_device *netdev = pci_get_drvdata(pdev);
6983         struct igc_adapter *adapter = netdev_priv(netdev);
6984         struct igc_hw *hw = &adapter->hw;
6985         u32 err, val;
6986
6987         pci_set_power_state(pdev, PCI_D0);
6988         pci_restore_state(pdev);
6989         pci_save_state(pdev);
6990
6991         if (!pci_device_is_present(pdev))
6992                 return -ENODEV;
6993         err = pci_enable_device_mem(pdev);
6994         if (err) {
6995                 netdev_err(netdev, "Cannot enable PCI device from suspend\n");
6996                 return err;
6997         }
6998         pci_set_master(pdev);
6999
7000         pci_enable_wake(pdev, PCI_D3hot, 0);
7001         pci_enable_wake(pdev, PCI_D3cold, 0);
7002
7003         if (igc_init_interrupt_scheme(adapter, true)) {
7004                 netdev_err(netdev, "Unable to allocate memory for queues\n");
7005                 return -ENOMEM;
7006         }
7007
7008         igc_reset(adapter);
7009
7010         /* let the f/w know that the h/w is now under the control of the
7011          * driver.
7012          */
7013         igc_get_hw_control(adapter);
7014
7015         val = rd32(IGC_WUS);
7016         if (val & WAKE_PKT_WUS)
7017                 igc_deliver_wake_packet(netdev);
7018
7019         wr32(IGC_WUS, ~0);
7020
7021         rtnl_lock();
7022         if (!err && netif_running(netdev))
7023                 err = __igc_open(netdev, true);
7024
7025         if (!err)
7026                 netif_device_attach(netdev);
7027         rtnl_unlock();
7028
7029         return err;
7030 }
7031
7032 static int __maybe_unused igc_runtime_resume(struct device *dev)
7033 {
7034         return igc_resume(dev);
7035 }
7036
7037 static int __maybe_unused igc_suspend(struct device *dev)
7038 {
7039         return __igc_shutdown(to_pci_dev(dev), NULL, 0);
7040 }
7041
7042 static int __maybe_unused igc_runtime_idle(struct device *dev)
7043 {
7044         struct net_device *netdev = dev_get_drvdata(dev);
7045         struct igc_adapter *adapter = netdev_priv(netdev);
7046
7047         if (!igc_has_link(adapter))
7048                 pm_schedule_suspend(dev, MSEC_PER_SEC * 5);
7049
7050         return -EBUSY;
7051 }
7052 #endif /* CONFIG_PM */
7053
7054 static void igc_shutdown(struct pci_dev *pdev)
7055 {
7056         bool wake;
7057
7058         __igc_shutdown(pdev, &wake, 0);
7059
7060         if (system_state == SYSTEM_POWER_OFF) {
7061                 pci_wake_from_d3(pdev, wake);
7062                 pci_set_power_state(pdev, PCI_D3hot);
7063         }
7064 }
7065
7066 /**
7067  *  igc_io_error_detected - called when PCI error is detected
7068  *  @pdev: Pointer to PCI device
7069  *  @state: The current PCI connection state
7070  *
7071  *  This function is called after a PCI bus error affecting
7072  *  this device has been detected.
7073  **/
7074 static pci_ers_result_t igc_io_error_detected(struct pci_dev *pdev,
7075                                               pci_channel_state_t state)
7076 {
7077         struct net_device *netdev = pci_get_drvdata(pdev);
7078         struct igc_adapter *adapter = netdev_priv(netdev);
7079
7080         netif_device_detach(netdev);
7081
7082         if (state == pci_channel_io_perm_failure)
7083                 return PCI_ERS_RESULT_DISCONNECT;
7084
7085         if (netif_running(netdev))
7086                 igc_down(adapter);
7087         pci_disable_device(pdev);
7088
7089         /* Request a slot reset. */
7090         return PCI_ERS_RESULT_NEED_RESET;
7091 }
7092
7093 /**
7094  *  igc_io_slot_reset - called after the PCI bus has been reset.
7095  *  @pdev: Pointer to PCI device
7096  *
7097  *  Restart the card from scratch, as if from a cold-boot. Implementation
7098  *  resembles the first-half of the igc_resume routine.
7099  **/
7100 static pci_ers_result_t igc_io_slot_reset(struct pci_dev *pdev)
7101 {
7102         struct net_device *netdev = pci_get_drvdata(pdev);
7103         struct igc_adapter *adapter = netdev_priv(netdev);
7104         struct igc_hw *hw = &adapter->hw;
7105         pci_ers_result_t result;
7106
7107         if (pci_enable_device_mem(pdev)) {
7108                 netdev_err(netdev, "Could not re-enable PCI device after reset\n");
7109                 result = PCI_ERS_RESULT_DISCONNECT;
7110         } else {
7111                 pci_set_master(pdev);
7112                 pci_restore_state(pdev);
7113                 pci_save_state(pdev);
7114
7115                 pci_enable_wake(pdev, PCI_D3hot, 0);
7116                 pci_enable_wake(pdev, PCI_D3cold, 0);
7117
7118                 /* In case of PCI error, adapter loses its HW address
7119                  * so we should re-assign it here.
7120                  */
7121                 hw->hw_addr = adapter->io_addr;
7122
7123                 igc_reset(adapter);
7124                 wr32(IGC_WUS, ~0);
7125                 result = PCI_ERS_RESULT_RECOVERED;
7126         }
7127
7128         return result;
7129 }
7130
7131 /**
7132  *  igc_io_resume - called when traffic can start to flow again.
7133  *  @pdev: Pointer to PCI device
7134  *
7135  *  This callback is called when the error recovery driver tells us that
7136  *  its OK to resume normal operation. Implementation resembles the
7137  *  second-half of the igc_resume routine.
7138  */
7139 static void igc_io_resume(struct pci_dev *pdev)
7140 {
7141         struct net_device *netdev = pci_get_drvdata(pdev);
7142         struct igc_adapter *adapter = netdev_priv(netdev);
7143
7144         rtnl_lock();
7145         if (netif_running(netdev)) {
7146                 if (igc_open(netdev)) {
7147                         netdev_err(netdev, "igc_open failed after reset\n");
7148                         return;
7149                 }
7150         }
7151
7152         netif_device_attach(netdev);
7153
7154         /* let the f/w know that the h/w is now under the control of the
7155          * driver.
7156          */
7157         igc_get_hw_control(adapter);
7158         rtnl_unlock();
7159 }
7160
7161 static const struct pci_error_handlers igc_err_handler = {
7162         .error_detected = igc_io_error_detected,
7163         .slot_reset = igc_io_slot_reset,
7164         .resume = igc_io_resume,
7165 };
7166
7167 #ifdef CONFIG_PM
7168 static const struct dev_pm_ops igc_pm_ops = {
7169         SET_SYSTEM_SLEEP_PM_OPS(igc_suspend, igc_resume)
7170         SET_RUNTIME_PM_OPS(igc_runtime_suspend, igc_runtime_resume,
7171                            igc_runtime_idle)
7172 };
7173 #endif
7174
7175 static struct pci_driver igc_driver = {
7176         .name     = igc_driver_name,
7177         .id_table = igc_pci_tbl,
7178         .probe    = igc_probe,
7179         .remove   = igc_remove,
7180 #ifdef CONFIG_PM
7181         .driver.pm = &igc_pm_ops,
7182 #endif
7183         .shutdown = igc_shutdown,
7184         .err_handler = &igc_err_handler,
7185 };
7186
7187 /**
7188  * igc_reinit_queues - return error
7189  * @adapter: pointer to adapter structure
7190  */
7191 int igc_reinit_queues(struct igc_adapter *adapter)
7192 {
7193         struct net_device *netdev = adapter->netdev;
7194         int err = 0;
7195
7196         if (netif_running(netdev))
7197                 igc_close(netdev);
7198
7199         igc_reset_interrupt_capability(adapter);
7200
7201         if (igc_init_interrupt_scheme(adapter, true)) {
7202                 netdev_err(netdev, "Unable to allocate memory for queues\n");
7203                 return -ENOMEM;
7204         }
7205
7206         if (netif_running(netdev))
7207                 err = igc_open(netdev);
7208
7209         return err;
7210 }
7211
7212 /**
7213  * igc_get_hw_dev - return device
7214  * @hw: pointer to hardware structure
7215  *
7216  * used by hardware layer to print debugging information
7217  */
7218 struct net_device *igc_get_hw_dev(struct igc_hw *hw)
7219 {
7220         struct igc_adapter *adapter = hw->back;
7221
7222         return adapter->netdev;
7223 }
7224
7225 static void igc_disable_rx_ring_hw(struct igc_ring *ring)
7226 {
7227         struct igc_hw *hw = &ring->q_vector->adapter->hw;
7228         u8 idx = ring->reg_idx;
7229         u32 rxdctl;
7230
7231         rxdctl = rd32(IGC_RXDCTL(idx));
7232         rxdctl &= ~IGC_RXDCTL_QUEUE_ENABLE;
7233         rxdctl |= IGC_RXDCTL_SWFLUSH;
7234         wr32(IGC_RXDCTL(idx), rxdctl);
7235 }
7236
7237 void igc_disable_rx_ring(struct igc_ring *ring)
7238 {
7239         igc_disable_rx_ring_hw(ring);
7240         igc_clean_rx_ring(ring);
7241 }
7242
7243 void igc_enable_rx_ring(struct igc_ring *ring)
7244 {
7245         struct igc_adapter *adapter = ring->q_vector->adapter;
7246
7247         igc_configure_rx_ring(adapter, ring);
7248
7249         if (ring->xsk_pool)
7250                 igc_alloc_rx_buffers_zc(ring, igc_desc_unused(ring));
7251         else
7252                 igc_alloc_rx_buffers(ring, igc_desc_unused(ring));
7253 }
7254
7255 static void igc_disable_tx_ring_hw(struct igc_ring *ring)
7256 {
7257         struct igc_hw *hw = &ring->q_vector->adapter->hw;
7258         u8 idx = ring->reg_idx;
7259         u32 txdctl;
7260
7261         txdctl = rd32(IGC_TXDCTL(idx));
7262         txdctl &= ~IGC_TXDCTL_QUEUE_ENABLE;
7263         txdctl |= IGC_TXDCTL_SWFLUSH;
7264         wr32(IGC_TXDCTL(idx), txdctl);
7265 }
7266
7267 void igc_disable_tx_ring(struct igc_ring *ring)
7268 {
7269         igc_disable_tx_ring_hw(ring);
7270         igc_clean_tx_ring(ring);
7271 }
7272
7273 void igc_enable_tx_ring(struct igc_ring *ring)
7274 {
7275         struct igc_adapter *adapter = ring->q_vector->adapter;
7276
7277         igc_configure_tx_ring(adapter, ring);
7278 }
7279
7280 /**
7281  * igc_init_module - Driver Registration Routine
7282  *
7283  * igc_init_module is the first routine called when the driver is
7284  * loaded. All it does is register with the PCI subsystem.
7285  */
7286 static int __init igc_init_module(void)
7287 {
7288         int ret;
7289
7290         pr_info("%s\n", igc_driver_string);
7291         pr_info("%s\n", igc_copyright);
7292
7293         ret = pci_register_driver(&igc_driver);
7294         return ret;
7295 }
7296
7297 module_init(igc_init_module);
7298
7299 /**
7300  * igc_exit_module - Driver Exit Cleanup Routine
7301  *
7302  * igc_exit_module is called just before the driver is removed
7303  * from memory.
7304  */
7305 static void __exit igc_exit_module(void)
7306 {
7307         pci_unregister_driver(&igc_driver);
7308 }
7309
7310 module_exit(igc_exit_module);
7311 /* igc_main.c */