Merge branch 'pwm-dmtimer-fixes' into omap-for-v5.0/fixes-v2
[sfrench/cifs-2.6.git] / drivers / net / ethernet / emulex / benet / be_main.c
1 /*
2  * Copyright (C) 2005 - 2016 Broadcom
3  * All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License version 2
7  * as published by the Free Software Foundation.  The full GNU General
8  * Public License is included in this distribution in the file called COPYING.
9  *
10  * Contact Information:
11  * linux-drivers@emulex.com
12  *
13  * Emulex
14  * 3333 Susan Street
15  * Costa Mesa, CA 92626
16  */
17
18 #include <linux/prefetch.h>
19 #include <linux/module.h>
20 #include "be.h"
21 #include "be_cmds.h"
22 #include <asm/div64.h>
23 #include <linux/aer.h>
24 #include <linux/if_bridge.h>
25 #include <net/busy_poll.h>
26 #include <net/vxlan.h>
27
28 MODULE_VERSION(DRV_VER);
29 MODULE_DESCRIPTION(DRV_DESC " " DRV_VER);
30 MODULE_AUTHOR("Emulex Corporation");
31 MODULE_LICENSE("GPL");
32
33 /* num_vfs module param is obsolete.
34  * Use sysfs method to enable/disable VFs.
35  */
36 static unsigned int num_vfs;
37 module_param(num_vfs, uint, 0444);
38 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
39
40 static ushort rx_frag_size = 2048;
41 module_param(rx_frag_size, ushort, 0444);
42 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
43
44 /* Per-module error detection/recovery workq shared across all functions.
45  * Each function schedules its own work request on this shared workq.
46  */
47 static struct workqueue_struct *be_err_recovery_workq;
48
49 static const struct pci_device_id be_dev_ids[] = {
50 #ifdef CONFIG_BE2NET_BE2
51         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
52         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
53 #endif /* CONFIG_BE2NET_BE2 */
54 #ifdef CONFIG_BE2NET_BE3
55         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
56         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
57 #endif /* CONFIG_BE2NET_BE3 */
58 #ifdef CONFIG_BE2NET_LANCER
59         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
60         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
61 #endif /* CONFIG_BE2NET_LANCER */
62 #ifdef CONFIG_BE2NET_SKYHAWK
63         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
64         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
65 #endif /* CONFIG_BE2NET_SKYHAWK */
66         { 0 }
67 };
68 MODULE_DEVICE_TABLE(pci, be_dev_ids);
69
70 /* Workqueue used by all functions for defering cmd calls to the adapter */
71 static struct workqueue_struct *be_wq;
72
73 /* UE Status Low CSR */
74 static const char * const ue_status_low_desc[] = {
75         "CEV",
76         "CTX",
77         "DBUF",
78         "ERX",
79         "Host",
80         "MPU",
81         "NDMA",
82         "PTC ",
83         "RDMA ",
84         "RXF ",
85         "RXIPS ",
86         "RXULP0 ",
87         "RXULP1 ",
88         "RXULP2 ",
89         "TIM ",
90         "TPOST ",
91         "TPRE ",
92         "TXIPS ",
93         "TXULP0 ",
94         "TXULP1 ",
95         "UC ",
96         "WDMA ",
97         "TXULP2 ",
98         "HOST1 ",
99         "P0_OB_LINK ",
100         "P1_OB_LINK ",
101         "HOST_GPIO ",
102         "MBOX ",
103         "ERX2 ",
104         "SPARE ",
105         "JTAG ",
106         "MPU_INTPEND "
107 };
108
109 /* UE Status High CSR */
110 static const char * const ue_status_hi_desc[] = {
111         "LPCMEMHOST",
112         "MGMT_MAC",
113         "PCS0ONLINE",
114         "MPU_IRAM",
115         "PCS1ONLINE",
116         "PCTL0",
117         "PCTL1",
118         "PMEM",
119         "RR",
120         "TXPB",
121         "RXPP",
122         "XAUI",
123         "TXP",
124         "ARM",
125         "IPC",
126         "HOST2",
127         "HOST3",
128         "HOST4",
129         "HOST5",
130         "HOST6",
131         "HOST7",
132         "ECRC",
133         "Poison TLP",
134         "NETC",
135         "PERIPH",
136         "LLTXULP",
137         "D2P",
138         "RCON",
139         "LDMA",
140         "LLTXP",
141         "LLTXPB",
142         "Unknown"
143 };
144
145 #define BE_VF_IF_EN_FLAGS       (BE_IF_FLAGS_UNTAGGED | \
146                                  BE_IF_FLAGS_BROADCAST | \
147                                  BE_IF_FLAGS_MULTICAST | \
148                                  BE_IF_FLAGS_PASS_L3L4_ERRORS)
149
150 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
151 {
152         struct be_dma_mem *mem = &q->dma_mem;
153
154         if (mem->va) {
155                 dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
156                                   mem->dma);
157                 mem->va = NULL;
158         }
159 }
160
161 static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
162                           u16 len, u16 entry_size)
163 {
164         struct be_dma_mem *mem = &q->dma_mem;
165
166         memset(q, 0, sizeof(*q));
167         q->len = len;
168         q->entry_size = entry_size;
169         mem->size = len * entry_size;
170         mem->va = dma_zalloc_coherent(&adapter->pdev->dev, mem->size, &mem->dma,
171                                       GFP_KERNEL);
172         if (!mem->va)
173                 return -ENOMEM;
174         return 0;
175 }
176
177 static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
178 {
179         u32 reg, enabled;
180
181         pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
182                               &reg);
183         enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
184
185         if (!enabled && enable)
186                 reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
187         else if (enabled && !enable)
188                 reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
189         else
190                 return;
191
192         pci_write_config_dword(adapter->pdev,
193                                PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
194 }
195
196 static void be_intr_set(struct be_adapter *adapter, bool enable)
197 {
198         int status = 0;
199
200         /* On lancer interrupts can't be controlled via this register */
201         if (lancer_chip(adapter))
202                 return;
203
204         if (be_check_error(adapter, BE_ERROR_EEH))
205                 return;
206
207         status = be_cmd_intr_set(adapter, enable);
208         if (status)
209                 be_reg_intr_set(adapter, enable);
210 }
211
212 static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
213 {
214         u32 val = 0;
215
216         if (be_check_error(adapter, BE_ERROR_HW))
217                 return;
218
219         val |= qid & DB_RQ_RING_ID_MASK;
220         val |= posted << DB_RQ_NUM_POSTED_SHIFT;
221
222         wmb();
223         iowrite32(val, adapter->db + DB_RQ_OFFSET);
224 }
225
226 static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
227                           u16 posted)
228 {
229         u32 val = 0;
230
231         if (be_check_error(adapter, BE_ERROR_HW))
232                 return;
233
234         val |= txo->q.id & DB_TXULP_RING_ID_MASK;
235         val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
236
237         wmb();
238         iowrite32(val, adapter->db + txo->db_offset);
239 }
240
241 static void be_eq_notify(struct be_adapter *adapter, u16 qid,
242                          bool arm, bool clear_int, u16 num_popped,
243                          u32 eq_delay_mult_enc)
244 {
245         u32 val = 0;
246
247         val |= qid & DB_EQ_RING_ID_MASK;
248         val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
249
250         if (be_check_error(adapter, BE_ERROR_HW))
251                 return;
252
253         if (arm)
254                 val |= 1 << DB_EQ_REARM_SHIFT;
255         if (clear_int)
256                 val |= 1 << DB_EQ_CLR_SHIFT;
257         val |= 1 << DB_EQ_EVNT_SHIFT;
258         val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
259         val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
260         iowrite32(val, adapter->db + DB_EQ_OFFSET);
261 }
262
263 void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
264 {
265         u32 val = 0;
266
267         val |= qid & DB_CQ_RING_ID_MASK;
268         val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
269                         DB_CQ_RING_ID_EXT_MASK_SHIFT);
270
271         if (be_check_error(adapter, BE_ERROR_HW))
272                 return;
273
274         if (arm)
275                 val |= 1 << DB_CQ_REARM_SHIFT;
276         val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
277         iowrite32(val, adapter->db + DB_CQ_OFFSET);
278 }
279
280 static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac)
281 {
282         int i;
283
284         /* Check if mac has already been added as part of uc-list */
285         for (i = 0; i < adapter->uc_macs; i++) {
286                 if (ether_addr_equal(adapter->uc_list[i].mac, mac)) {
287                         /* mac already added, skip addition */
288                         adapter->pmac_id[0] = adapter->pmac_id[i + 1];
289                         return 0;
290                 }
291         }
292
293         return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
294                                &adapter->pmac_id[0], 0);
295 }
296
297 static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
298 {
299         int i;
300
301         /* Skip deletion if the programmed mac is
302          * being used in uc-list
303          */
304         for (i = 0; i < adapter->uc_macs; i++) {
305                 if (adapter->pmac_id[i + 1] == pmac_id)
306                         return;
307         }
308         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
309 }
310
311 static int be_mac_addr_set(struct net_device *netdev, void *p)
312 {
313         struct be_adapter *adapter = netdev_priv(netdev);
314         struct device *dev = &adapter->pdev->dev;
315         struct sockaddr *addr = p;
316         int status;
317         u8 mac[ETH_ALEN];
318         u32 old_pmac_id = adapter->pmac_id[0];
319
320         if (!is_valid_ether_addr(addr->sa_data))
321                 return -EADDRNOTAVAIL;
322
323         /* Proceed further only if, User provided MAC is different
324          * from active MAC
325          */
326         if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
327                 return 0;
328
329         /* BE3 VFs without FILTMGMT privilege are not allowed to set its MAC
330          * address
331          */
332         if (BEx_chip(adapter) && be_virtfn(adapter) &&
333             !check_privilege(adapter, BE_PRIV_FILTMGMT))
334                 return -EPERM;
335
336         /* if device is not running, copy MAC to netdev->dev_addr */
337         if (!netif_running(netdev))
338                 goto done;
339
340         /* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
341          * privilege or if PF did not provision the new MAC address.
342          * On BE3, this cmd will always fail if the VF doesn't have the
343          * FILTMGMT privilege. This failure is OK, only if the PF programmed
344          * the MAC for the VF.
345          */
346         mutex_lock(&adapter->rx_filter_lock);
347         status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
348         if (!status) {
349
350                 /* Delete the old programmed MAC. This call may fail if the
351                  * old MAC was already deleted by the PF driver.
352                  */
353                 if (adapter->pmac_id[0] != old_pmac_id)
354                         be_dev_mac_del(adapter, old_pmac_id);
355         }
356
357         mutex_unlock(&adapter->rx_filter_lock);
358         /* Decide if the new MAC is successfully activated only after
359          * querying the FW
360          */
361         status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
362                                        adapter->if_handle, true, 0);
363         if (status)
364                 goto err;
365
366         /* The MAC change did not happen, either due to lack of privilege
367          * or PF didn't pre-provision.
368          */
369         if (!ether_addr_equal(addr->sa_data, mac)) {
370                 status = -EPERM;
371                 goto err;
372         }
373
374         /* Remember currently programmed MAC */
375         ether_addr_copy(adapter->dev_mac, addr->sa_data);
376 done:
377         ether_addr_copy(netdev->dev_addr, addr->sa_data);
378         dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
379         return 0;
380 err:
381         dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
382         return status;
383 }
384
385 /* BE2 supports only v0 cmd */
386 static void *hw_stats_from_cmd(struct be_adapter *adapter)
387 {
388         if (BE2_chip(adapter)) {
389                 struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
390
391                 return &cmd->hw_stats;
392         } else if (BE3_chip(adapter)) {
393                 struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
394
395                 return &cmd->hw_stats;
396         } else {
397                 struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
398
399                 return &cmd->hw_stats;
400         }
401 }
402
403 /* BE2 supports only v0 cmd */
404 static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
405 {
406         if (BE2_chip(adapter)) {
407                 struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
408
409                 return &hw_stats->erx;
410         } else if (BE3_chip(adapter)) {
411                 struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
412
413                 return &hw_stats->erx;
414         } else {
415                 struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
416
417                 return &hw_stats->erx;
418         }
419 }
420
421 static void populate_be_v0_stats(struct be_adapter *adapter)
422 {
423         struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
424         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
425         struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
426         struct be_port_rxf_stats_v0 *port_stats =
427                                         &rxf_stats->port[adapter->port_num];
428         struct be_drv_stats *drvs = &adapter->drv_stats;
429
430         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
431         drvs->rx_pause_frames = port_stats->rx_pause_frames;
432         drvs->rx_crc_errors = port_stats->rx_crc_errors;
433         drvs->rx_control_frames = port_stats->rx_control_frames;
434         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
435         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
436         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
437         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
438         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
439         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
440         drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
441         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
442         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
443         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
444         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
445         drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
446         drvs->rx_dropped_header_too_small =
447                 port_stats->rx_dropped_header_too_small;
448         drvs->rx_address_filtered =
449                                         port_stats->rx_address_filtered +
450                                         port_stats->rx_vlan_filtered;
451         drvs->rx_alignment_symbol_errors =
452                 port_stats->rx_alignment_symbol_errors;
453
454         drvs->tx_pauseframes = port_stats->tx_pauseframes;
455         drvs->tx_controlframes = port_stats->tx_controlframes;
456
457         if (adapter->port_num)
458                 drvs->jabber_events = rxf_stats->port1_jabber_events;
459         else
460                 drvs->jabber_events = rxf_stats->port0_jabber_events;
461         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
462         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
463         drvs->forwarded_packets = rxf_stats->forwarded_packets;
464         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
465         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
466         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
467         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
468 }
469
470 static void populate_be_v1_stats(struct be_adapter *adapter)
471 {
472         struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
473         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
474         struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
475         struct be_port_rxf_stats_v1 *port_stats =
476                                         &rxf_stats->port[adapter->port_num];
477         struct be_drv_stats *drvs = &adapter->drv_stats;
478
479         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
480         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
481         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
482         drvs->rx_pause_frames = port_stats->rx_pause_frames;
483         drvs->rx_crc_errors = port_stats->rx_crc_errors;
484         drvs->rx_control_frames = port_stats->rx_control_frames;
485         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
486         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
487         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
488         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
489         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
490         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
491         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
492         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
493         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
494         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
495         drvs->rx_dropped_header_too_small =
496                 port_stats->rx_dropped_header_too_small;
497         drvs->rx_input_fifo_overflow_drop =
498                 port_stats->rx_input_fifo_overflow_drop;
499         drvs->rx_address_filtered = port_stats->rx_address_filtered;
500         drvs->rx_alignment_symbol_errors =
501                 port_stats->rx_alignment_symbol_errors;
502         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
503         drvs->tx_pauseframes = port_stats->tx_pauseframes;
504         drvs->tx_controlframes = port_stats->tx_controlframes;
505         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
506         drvs->jabber_events = port_stats->jabber_events;
507         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
508         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
509         drvs->forwarded_packets = rxf_stats->forwarded_packets;
510         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
511         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
512         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
513         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
514 }
515
516 static void populate_be_v2_stats(struct be_adapter *adapter)
517 {
518         struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
519         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
520         struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
521         struct be_port_rxf_stats_v2 *port_stats =
522                                         &rxf_stats->port[adapter->port_num];
523         struct be_drv_stats *drvs = &adapter->drv_stats;
524
525         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
526         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
527         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
528         drvs->rx_pause_frames = port_stats->rx_pause_frames;
529         drvs->rx_crc_errors = port_stats->rx_crc_errors;
530         drvs->rx_control_frames = port_stats->rx_control_frames;
531         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
532         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
533         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
534         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
535         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
536         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
537         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
538         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
539         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
540         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
541         drvs->rx_dropped_header_too_small =
542                 port_stats->rx_dropped_header_too_small;
543         drvs->rx_input_fifo_overflow_drop =
544                 port_stats->rx_input_fifo_overflow_drop;
545         drvs->rx_address_filtered = port_stats->rx_address_filtered;
546         drvs->rx_alignment_symbol_errors =
547                 port_stats->rx_alignment_symbol_errors;
548         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
549         drvs->tx_pauseframes = port_stats->tx_pauseframes;
550         drvs->tx_controlframes = port_stats->tx_controlframes;
551         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
552         drvs->jabber_events = port_stats->jabber_events;
553         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
554         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
555         drvs->forwarded_packets = rxf_stats->forwarded_packets;
556         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
557         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
558         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
559         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
560         if (be_roce_supported(adapter)) {
561                 drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
562                 drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
563                 drvs->rx_roce_frames = port_stats->roce_frames_received;
564                 drvs->roce_drops_crc = port_stats->roce_drops_crc;
565                 drvs->roce_drops_payload_len =
566                         port_stats->roce_drops_payload_len;
567         }
568 }
569
570 static void populate_lancer_stats(struct be_adapter *adapter)
571 {
572         struct be_drv_stats *drvs = &adapter->drv_stats;
573         struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
574
575         be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
576         drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
577         drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
578         drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
579         drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
580         drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
581         drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
582         drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
583         drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
584         drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
585         drvs->rx_dropped_tcp_length =
586                                 pport_stats->rx_dropped_invalid_tcp_length;
587         drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
588         drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
589         drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
590         drvs->rx_dropped_header_too_small =
591                                 pport_stats->rx_dropped_header_too_small;
592         drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
593         drvs->rx_address_filtered =
594                                         pport_stats->rx_address_filtered +
595                                         pport_stats->rx_vlan_filtered;
596         drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
597         drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
598         drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
599         drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
600         drvs->jabber_events = pport_stats->rx_jabbers;
601         drvs->forwarded_packets = pport_stats->num_forwards_lo;
602         drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
603         drvs->rx_drops_too_many_frags =
604                                 pport_stats->rx_drops_too_many_frags_lo;
605 }
606
607 static void accumulate_16bit_val(u32 *acc, u16 val)
608 {
609 #define lo(x)                   (x & 0xFFFF)
610 #define hi(x)                   (x & 0xFFFF0000)
611         bool wrapped = val < lo(*acc);
612         u32 newacc = hi(*acc) + val;
613
614         if (wrapped)
615                 newacc += 65536;
616         WRITE_ONCE(*acc, newacc);
617 }
618
619 static void populate_erx_stats(struct be_adapter *adapter,
620                                struct be_rx_obj *rxo, u32 erx_stat)
621 {
622         if (!BEx_chip(adapter))
623                 rx_stats(rxo)->rx_drops_no_frags = erx_stat;
624         else
625                 /* below erx HW counter can actually wrap around after
626                  * 65535. Driver accumulates a 32-bit value
627                  */
628                 accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
629                                      (u16)erx_stat);
630 }
631
632 void be_parse_stats(struct be_adapter *adapter)
633 {
634         struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
635         struct be_rx_obj *rxo;
636         int i;
637         u32 erx_stat;
638
639         if (lancer_chip(adapter)) {
640                 populate_lancer_stats(adapter);
641         } else {
642                 if (BE2_chip(adapter))
643                         populate_be_v0_stats(adapter);
644                 else if (BE3_chip(adapter))
645                         /* for BE3 */
646                         populate_be_v1_stats(adapter);
647                 else
648                         populate_be_v2_stats(adapter);
649
650                 /* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
651                 for_all_rx_queues(adapter, rxo, i) {
652                         erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
653                         populate_erx_stats(adapter, rxo, erx_stat);
654                 }
655         }
656 }
657
658 static void be_get_stats64(struct net_device *netdev,
659                            struct rtnl_link_stats64 *stats)
660 {
661         struct be_adapter *adapter = netdev_priv(netdev);
662         struct be_drv_stats *drvs = &adapter->drv_stats;
663         struct be_rx_obj *rxo;
664         struct be_tx_obj *txo;
665         u64 pkts, bytes;
666         unsigned int start;
667         int i;
668
669         for_all_rx_queues(adapter, rxo, i) {
670                 const struct be_rx_stats *rx_stats = rx_stats(rxo);
671
672                 do {
673                         start = u64_stats_fetch_begin_irq(&rx_stats->sync);
674                         pkts = rx_stats(rxo)->rx_pkts;
675                         bytes = rx_stats(rxo)->rx_bytes;
676                 } while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
677                 stats->rx_packets += pkts;
678                 stats->rx_bytes += bytes;
679                 stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
680                 stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
681                                         rx_stats(rxo)->rx_drops_no_frags;
682         }
683
684         for_all_tx_queues(adapter, txo, i) {
685                 const struct be_tx_stats *tx_stats = tx_stats(txo);
686
687                 do {
688                         start = u64_stats_fetch_begin_irq(&tx_stats->sync);
689                         pkts = tx_stats(txo)->tx_pkts;
690                         bytes = tx_stats(txo)->tx_bytes;
691                 } while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
692                 stats->tx_packets += pkts;
693                 stats->tx_bytes += bytes;
694         }
695
696         /* bad pkts received */
697         stats->rx_errors = drvs->rx_crc_errors +
698                 drvs->rx_alignment_symbol_errors +
699                 drvs->rx_in_range_errors +
700                 drvs->rx_out_range_errors +
701                 drvs->rx_frame_too_long +
702                 drvs->rx_dropped_too_small +
703                 drvs->rx_dropped_too_short +
704                 drvs->rx_dropped_header_too_small +
705                 drvs->rx_dropped_tcp_length +
706                 drvs->rx_dropped_runt;
707
708         /* detailed rx errors */
709         stats->rx_length_errors = drvs->rx_in_range_errors +
710                 drvs->rx_out_range_errors +
711                 drvs->rx_frame_too_long;
712
713         stats->rx_crc_errors = drvs->rx_crc_errors;
714
715         /* frame alignment errors */
716         stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
717
718         /* receiver fifo overrun */
719         /* drops_no_pbuf is no per i/f, it's per BE card */
720         stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
721                                 drvs->rx_input_fifo_overflow_drop +
722                                 drvs->rx_drops_no_pbuf;
723 }
724
725 void be_link_status_update(struct be_adapter *adapter, u8 link_status)
726 {
727         struct net_device *netdev = adapter->netdev;
728
729         if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
730                 netif_carrier_off(netdev);
731                 adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
732         }
733
734         if (link_status)
735                 netif_carrier_on(netdev);
736         else
737                 netif_carrier_off(netdev);
738
739         netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
740 }
741
742 static int be_gso_hdr_len(struct sk_buff *skb)
743 {
744         if (skb->encapsulation)
745                 return skb_inner_transport_offset(skb) +
746                        inner_tcp_hdrlen(skb);
747         return skb_transport_offset(skb) + tcp_hdrlen(skb);
748 }
749
750 static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
751 {
752         struct be_tx_stats *stats = tx_stats(txo);
753         u32 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
754         /* Account for headers which get duplicated in TSO pkt */
755         u32 dup_hdr_len = tx_pkts > 1 ? be_gso_hdr_len(skb) * (tx_pkts - 1) : 0;
756
757         u64_stats_update_begin(&stats->sync);
758         stats->tx_reqs++;
759         stats->tx_bytes += skb->len + dup_hdr_len;
760         stats->tx_pkts += tx_pkts;
761         if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
762                 stats->tx_vxlan_offload_pkts += tx_pkts;
763         u64_stats_update_end(&stats->sync);
764 }
765
766 /* Returns number of WRBs needed for the skb */
767 static u32 skb_wrb_cnt(struct sk_buff *skb)
768 {
769         /* +1 for the header wrb */
770         return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
771 }
772
773 static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
774 {
775         wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
776         wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
777         wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
778         wrb->rsvd0 = 0;
779 }
780
781 /* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
782  * to avoid the swap and shift/mask operations in wrb_fill().
783  */
784 static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
785 {
786         wrb->frag_pa_hi = 0;
787         wrb->frag_pa_lo = 0;
788         wrb->frag_len = 0;
789         wrb->rsvd0 = 0;
790 }
791
792 static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
793                                      struct sk_buff *skb)
794 {
795         u8 vlan_prio;
796         u16 vlan_tag;
797
798         vlan_tag = skb_vlan_tag_get(skb);
799         vlan_prio = skb_vlan_tag_get_prio(skb);
800         /* If vlan priority provided by OS is NOT in available bmap */
801         if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
802                 vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
803                                 adapter->recommended_prio_bits;
804
805         return vlan_tag;
806 }
807
808 /* Used only for IP tunnel packets */
809 static u16 skb_inner_ip_proto(struct sk_buff *skb)
810 {
811         return (inner_ip_hdr(skb)->version == 4) ?
812                 inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
813 }
814
815 static u16 skb_ip_proto(struct sk_buff *skb)
816 {
817         return (ip_hdr(skb)->version == 4) ?
818                 ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
819 }
820
821 static inline bool be_is_txq_full(struct be_tx_obj *txo)
822 {
823         return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
824 }
825
826 static inline bool be_can_txq_wake(struct be_tx_obj *txo)
827 {
828         return atomic_read(&txo->q.used) < txo->q.len / 2;
829 }
830
831 static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
832 {
833         return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
834 }
835
836 static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
837                                        struct sk_buff *skb,
838                                        struct be_wrb_params *wrb_params)
839 {
840         u16 proto;
841
842         if (skb_is_gso(skb)) {
843                 BE_WRB_F_SET(wrb_params->features, LSO, 1);
844                 wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
845                 if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
846                         BE_WRB_F_SET(wrb_params->features, LSO6, 1);
847         } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
848                 if (skb->encapsulation) {
849                         BE_WRB_F_SET(wrb_params->features, IPCS, 1);
850                         proto = skb_inner_ip_proto(skb);
851                 } else {
852                         proto = skb_ip_proto(skb);
853                 }
854                 if (proto == IPPROTO_TCP)
855                         BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
856                 else if (proto == IPPROTO_UDP)
857                         BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
858         }
859
860         if (skb_vlan_tag_present(skb)) {
861                 BE_WRB_F_SET(wrb_params->features, VLAN, 1);
862                 wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
863         }
864
865         BE_WRB_F_SET(wrb_params->features, CRC, 1);
866 }
867
868 static void wrb_fill_hdr(struct be_adapter *adapter,
869                          struct be_eth_hdr_wrb *hdr,
870                          struct be_wrb_params *wrb_params,
871                          struct sk_buff *skb)
872 {
873         memset(hdr, 0, sizeof(*hdr));
874
875         SET_TX_WRB_HDR_BITS(crc, hdr,
876                             BE_WRB_F_GET(wrb_params->features, CRC));
877         SET_TX_WRB_HDR_BITS(ipcs, hdr,
878                             BE_WRB_F_GET(wrb_params->features, IPCS));
879         SET_TX_WRB_HDR_BITS(tcpcs, hdr,
880                             BE_WRB_F_GET(wrb_params->features, TCPCS));
881         SET_TX_WRB_HDR_BITS(udpcs, hdr,
882                             BE_WRB_F_GET(wrb_params->features, UDPCS));
883
884         SET_TX_WRB_HDR_BITS(lso, hdr,
885                             BE_WRB_F_GET(wrb_params->features, LSO));
886         SET_TX_WRB_HDR_BITS(lso6, hdr,
887                             BE_WRB_F_GET(wrb_params->features, LSO6));
888         SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
889
890         /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
891          * hack is not needed, the evt bit is set while ringing DB.
892          */
893         SET_TX_WRB_HDR_BITS(event, hdr,
894                             BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
895         SET_TX_WRB_HDR_BITS(vlan, hdr,
896                             BE_WRB_F_GET(wrb_params->features, VLAN));
897         SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
898
899         SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
900         SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
901         SET_TX_WRB_HDR_BITS(mgmt, hdr,
902                             BE_WRB_F_GET(wrb_params->features, OS2BMC));
903 }
904
905 static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
906                           bool unmap_single)
907 {
908         dma_addr_t dma;
909         u32 frag_len = le32_to_cpu(wrb->frag_len);
910
911
912         dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
913                 (u64)le32_to_cpu(wrb->frag_pa_lo);
914         if (frag_len) {
915                 if (unmap_single)
916                         dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
917                 else
918                         dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
919         }
920 }
921
922 /* Grab a WRB header for xmit */
923 static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
924 {
925         u32 head = txo->q.head;
926
927         queue_head_inc(&txo->q);
928         return head;
929 }
930
931 /* Set up the WRB header for xmit */
932 static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
933                                 struct be_tx_obj *txo,
934                                 struct be_wrb_params *wrb_params,
935                                 struct sk_buff *skb, u16 head)
936 {
937         u32 num_frags = skb_wrb_cnt(skb);
938         struct be_queue_info *txq = &txo->q;
939         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
940
941         wrb_fill_hdr(adapter, hdr, wrb_params, skb);
942         be_dws_cpu_to_le(hdr, sizeof(*hdr));
943
944         BUG_ON(txo->sent_skb_list[head]);
945         txo->sent_skb_list[head] = skb;
946         txo->last_req_hdr = head;
947         atomic_add(num_frags, &txq->used);
948         txo->last_req_wrb_cnt = num_frags;
949         txo->pend_wrb_cnt += num_frags;
950 }
951
952 /* Setup a WRB fragment (buffer descriptor) for xmit */
953 static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
954                                  int len)
955 {
956         struct be_eth_wrb *wrb;
957         struct be_queue_info *txq = &txo->q;
958
959         wrb = queue_head_node(txq);
960         wrb_fill(wrb, busaddr, len);
961         queue_head_inc(txq);
962 }
963
964 /* Bring the queue back to the state it was in before be_xmit_enqueue() routine
965  * was invoked. The producer index is restored to the previous packet and the
966  * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
967  */
968 static void be_xmit_restore(struct be_adapter *adapter,
969                             struct be_tx_obj *txo, u32 head, bool map_single,
970                             u32 copied)
971 {
972         struct device *dev;
973         struct be_eth_wrb *wrb;
974         struct be_queue_info *txq = &txo->q;
975
976         dev = &adapter->pdev->dev;
977         txq->head = head;
978
979         /* skip the first wrb (hdr); it's not mapped */
980         queue_head_inc(txq);
981         while (copied) {
982                 wrb = queue_head_node(txq);
983                 unmap_tx_frag(dev, wrb, map_single);
984                 map_single = false;
985                 copied -= le32_to_cpu(wrb->frag_len);
986                 queue_head_inc(txq);
987         }
988
989         txq->head = head;
990 }
991
992 /* Enqueue the given packet for transmit. This routine allocates WRBs for the
993  * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
994  * of WRBs used up by the packet.
995  */
996 static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
997                            struct sk_buff *skb,
998                            struct be_wrb_params *wrb_params)
999 {
1000         u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
1001         struct device *dev = &adapter->pdev->dev;
1002         bool map_single = false;
1003         u32 head;
1004         dma_addr_t busaddr;
1005         int len;
1006
1007         head = be_tx_get_wrb_hdr(txo);
1008
1009         if (skb->len > skb->data_len) {
1010                 len = skb_headlen(skb);
1011
1012                 busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
1013                 if (dma_mapping_error(dev, busaddr))
1014                         goto dma_err;
1015                 map_single = true;
1016                 be_tx_setup_wrb_frag(txo, busaddr, len);
1017                 copied += len;
1018         }
1019
1020         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1021                 const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
1022                 len = skb_frag_size(frag);
1023
1024                 busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
1025                 if (dma_mapping_error(dev, busaddr))
1026                         goto dma_err;
1027                 be_tx_setup_wrb_frag(txo, busaddr, len);
1028                 copied += len;
1029         }
1030
1031         be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
1032
1033         be_tx_stats_update(txo, skb);
1034         return wrb_cnt;
1035
1036 dma_err:
1037         adapter->drv_stats.dma_map_errors++;
1038         be_xmit_restore(adapter, txo, head, map_single, copied);
1039         return 0;
1040 }
1041
1042 static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
1043 {
1044         return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
1045 }
1046
1047 static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
1048                                              struct sk_buff *skb,
1049                                              struct be_wrb_params
1050                                              *wrb_params)
1051 {
1052         bool insert_vlan = false;
1053         u16 vlan_tag = 0;
1054
1055         skb = skb_share_check(skb, GFP_ATOMIC);
1056         if (unlikely(!skb))
1057                 return skb;
1058
1059         if (skb_vlan_tag_present(skb)) {
1060                 vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1061                 insert_vlan = true;
1062         }
1063
1064         if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1065                 if (!insert_vlan) {
1066                         vlan_tag = adapter->pvid;
1067                         insert_vlan = true;
1068                 }
1069                 /* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1070                  * skip VLAN insertion
1071                  */
1072                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1073         }
1074
1075         if (insert_vlan) {
1076                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1077                                                 vlan_tag);
1078                 if (unlikely(!skb))
1079                         return skb;
1080                 __vlan_hwaccel_clear_tag(skb);
1081         }
1082
1083         /* Insert the outer VLAN, if any */
1084         if (adapter->qnq_vid) {
1085                 vlan_tag = adapter->qnq_vid;
1086                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1087                                                 vlan_tag);
1088                 if (unlikely(!skb))
1089                         return skb;
1090                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1091         }
1092
1093         return skb;
1094 }
1095
1096 static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1097 {
1098         struct ethhdr *eh = (struct ethhdr *)skb->data;
1099         u16 offset = ETH_HLEN;
1100
1101         if (eh->h_proto == htons(ETH_P_IPV6)) {
1102                 struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1103
1104                 offset += sizeof(struct ipv6hdr);
1105                 if (ip6h->nexthdr != NEXTHDR_TCP &&
1106                     ip6h->nexthdr != NEXTHDR_UDP) {
1107                         struct ipv6_opt_hdr *ehdr =
1108                                 (struct ipv6_opt_hdr *)(skb->data + offset);
1109
1110                         /* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1111                         if (ehdr->hdrlen == 0xff)
1112                                 return true;
1113                 }
1114         }
1115         return false;
1116 }
1117
1118 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1119 {
1120         return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1121 }
1122
1123 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1124 {
1125         return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1126 }
1127
1128 static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1129                                                   struct sk_buff *skb,
1130                                                   struct be_wrb_params
1131                                                   *wrb_params)
1132 {
1133         struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1134         unsigned int eth_hdr_len;
1135         struct iphdr *ip;
1136
1137         /* For padded packets, BE HW modifies tot_len field in IP header
1138          * incorrecly when VLAN tag is inserted by HW.
1139          * For padded packets, Lancer computes incorrect checksum.
1140          */
1141         eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1142                                                 VLAN_ETH_HLEN : ETH_HLEN;
1143         if (skb->len <= 60 &&
1144             (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1145             is_ipv4_pkt(skb)) {
1146                 ip = (struct iphdr *)ip_hdr(skb);
1147                 pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1148         }
1149
1150         /* If vlan tag is already inlined in the packet, skip HW VLAN
1151          * tagging in pvid-tagging mode
1152          */
1153         if (be_pvid_tagging_enabled(adapter) &&
1154             veh->h_vlan_proto == htons(ETH_P_8021Q))
1155                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1156
1157         /* HW has a bug wherein it will calculate CSUM for VLAN
1158          * pkts even though it is disabled.
1159          * Manually insert VLAN in pkt.
1160          */
1161         if (skb->ip_summed != CHECKSUM_PARTIAL &&
1162             skb_vlan_tag_present(skb)) {
1163                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1164                 if (unlikely(!skb))
1165                         goto err;
1166         }
1167
1168         /* HW may lockup when VLAN HW tagging is requested on
1169          * certain ipv6 packets. Drop such pkts if the HW workaround to
1170          * skip HW tagging is not enabled by FW.
1171          */
1172         if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1173                      (adapter->pvid || adapter->qnq_vid) &&
1174                      !qnq_async_evt_rcvd(adapter)))
1175                 goto tx_drop;
1176
1177         /* Manual VLAN tag insertion to prevent:
1178          * ASIC lockup when the ASIC inserts VLAN tag into
1179          * certain ipv6 packets. Insert VLAN tags in driver,
1180          * and set event, completion, vlan bits accordingly
1181          * in the Tx WRB.
1182          */
1183         if (be_ipv6_tx_stall_chk(adapter, skb) &&
1184             be_vlan_tag_tx_chk(adapter, skb)) {
1185                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1186                 if (unlikely(!skb))
1187                         goto err;
1188         }
1189
1190         return skb;
1191 tx_drop:
1192         dev_kfree_skb_any(skb);
1193 err:
1194         return NULL;
1195 }
1196
1197 static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1198                                            struct sk_buff *skb,
1199                                            struct be_wrb_params *wrb_params)
1200 {
1201         int err;
1202
1203         /* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1204          * packets that are 32b or less may cause a transmit stall
1205          * on that port. The workaround is to pad such packets
1206          * (len <= 32 bytes) to a minimum length of 36b.
1207          */
1208         if (skb->len <= 32) {
1209                 if (skb_put_padto(skb, 36))
1210                         return NULL;
1211         }
1212
1213         if (BEx_chip(adapter) || lancer_chip(adapter)) {
1214                 skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1215                 if (!skb)
1216                         return NULL;
1217         }
1218
1219         /* The stack can send us skbs with length greater than
1220          * what the HW can handle. Trim the extra bytes.
1221          */
1222         WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1223         err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1224         WARN_ON(err);
1225
1226         return skb;
1227 }
1228
1229 static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1230 {
1231         struct be_queue_info *txq = &txo->q;
1232         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1233
1234         /* Mark the last request eventable if it hasn't been marked already */
1235         if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1236                 hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1237
1238         /* compose a dummy wrb if there are odd set of wrbs to notify */
1239         if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1240                 wrb_fill_dummy(queue_head_node(txq));
1241                 queue_head_inc(txq);
1242                 atomic_inc(&txq->used);
1243                 txo->pend_wrb_cnt++;
1244                 hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1245                                            TX_HDR_WRB_NUM_SHIFT);
1246                 hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1247                                           TX_HDR_WRB_NUM_SHIFT);
1248         }
1249         be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1250         txo->pend_wrb_cnt = 0;
1251 }
1252
1253 /* OS2BMC related */
1254
1255 #define DHCP_CLIENT_PORT        68
1256 #define DHCP_SERVER_PORT        67
1257 #define NET_BIOS_PORT1          137
1258 #define NET_BIOS_PORT2          138
1259 #define DHCPV6_RAS_PORT         547
1260
1261 #define is_mc_allowed_on_bmc(adapter, eh)       \
1262         (!is_multicast_filt_enabled(adapter) && \
1263          is_multicast_ether_addr(eh->h_dest) && \
1264          !is_broadcast_ether_addr(eh->h_dest))
1265
1266 #define is_bc_allowed_on_bmc(adapter, eh)       \
1267         (!is_broadcast_filt_enabled(adapter) && \
1268          is_broadcast_ether_addr(eh->h_dest))
1269
1270 #define is_arp_allowed_on_bmc(adapter, skb)     \
1271         (is_arp(skb) && is_arp_filt_enabled(adapter))
1272
1273 #define is_broadcast_packet(eh, adapter)        \
1274                 (is_multicast_ether_addr(eh->h_dest) && \
1275                 !compare_ether_addr(eh->h_dest, adapter->netdev->broadcast))
1276
1277 #define is_arp(skb)     (skb->protocol == htons(ETH_P_ARP))
1278
1279 #define is_arp_filt_enabled(adapter)    \
1280                 (adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1281
1282 #define is_dhcp_client_filt_enabled(adapter)    \
1283                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1284
1285 #define is_dhcp_srvr_filt_enabled(adapter)      \
1286                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1287
1288 #define is_nbios_filt_enabled(adapter)  \
1289                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1290
1291 #define is_ipv6_na_filt_enabled(adapter)        \
1292                 (adapter->bmc_filt_mask &       \
1293                         BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1294
1295 #define is_ipv6_ra_filt_enabled(adapter)        \
1296                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1297
1298 #define is_ipv6_ras_filt_enabled(adapter)       \
1299                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1300
1301 #define is_broadcast_filt_enabled(adapter)      \
1302                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1303
1304 #define is_multicast_filt_enabled(adapter)      \
1305                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1306
1307 static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1308                                struct sk_buff **skb)
1309 {
1310         struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1311         bool os2bmc = false;
1312
1313         if (!be_is_os2bmc_enabled(adapter))
1314                 goto done;
1315
1316         if (!is_multicast_ether_addr(eh->h_dest))
1317                 goto done;
1318
1319         if (is_mc_allowed_on_bmc(adapter, eh) ||
1320             is_bc_allowed_on_bmc(adapter, eh) ||
1321             is_arp_allowed_on_bmc(adapter, (*skb))) {
1322                 os2bmc = true;
1323                 goto done;
1324         }
1325
1326         if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1327                 struct ipv6hdr *hdr = ipv6_hdr((*skb));
1328                 u8 nexthdr = hdr->nexthdr;
1329
1330                 if (nexthdr == IPPROTO_ICMPV6) {
1331                         struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1332
1333                         switch (icmp6->icmp6_type) {
1334                         case NDISC_ROUTER_ADVERTISEMENT:
1335                                 os2bmc = is_ipv6_ra_filt_enabled(adapter);
1336                                 goto done;
1337                         case NDISC_NEIGHBOUR_ADVERTISEMENT:
1338                                 os2bmc = is_ipv6_na_filt_enabled(adapter);
1339                                 goto done;
1340                         default:
1341                                 break;
1342                         }
1343                 }
1344         }
1345
1346         if (is_udp_pkt((*skb))) {
1347                 struct udphdr *udp = udp_hdr((*skb));
1348
1349                 switch (ntohs(udp->dest)) {
1350                 case DHCP_CLIENT_PORT:
1351                         os2bmc = is_dhcp_client_filt_enabled(adapter);
1352                         goto done;
1353                 case DHCP_SERVER_PORT:
1354                         os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1355                         goto done;
1356                 case NET_BIOS_PORT1:
1357                 case NET_BIOS_PORT2:
1358                         os2bmc = is_nbios_filt_enabled(adapter);
1359                         goto done;
1360                 case DHCPV6_RAS_PORT:
1361                         os2bmc = is_ipv6_ras_filt_enabled(adapter);
1362                         goto done;
1363                 default:
1364                         break;
1365                 }
1366         }
1367 done:
1368         /* For packets over a vlan, which are destined
1369          * to BMC, asic expects the vlan to be inline in the packet.
1370          */
1371         if (os2bmc)
1372                 *skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1373
1374         return os2bmc;
1375 }
1376
1377 static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1378 {
1379         struct be_adapter *adapter = netdev_priv(netdev);
1380         u16 q_idx = skb_get_queue_mapping(skb);
1381         struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1382         struct be_wrb_params wrb_params = { 0 };
1383         bool flush = !skb->xmit_more;
1384         u16 wrb_cnt;
1385
1386         skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1387         if (unlikely(!skb))
1388                 goto drop;
1389
1390         be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1391
1392         wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1393         if (unlikely(!wrb_cnt)) {
1394                 dev_kfree_skb_any(skb);
1395                 goto drop;
1396         }
1397
1398         /* if os2bmc is enabled and if the pkt is destined to bmc,
1399          * enqueue the pkt a 2nd time with mgmt bit set.
1400          */
1401         if (be_send_pkt_to_bmc(adapter, &skb)) {
1402                 BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1403                 wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1404                 if (unlikely(!wrb_cnt))
1405                         goto drop;
1406                 else
1407                         skb_get(skb);
1408         }
1409
1410         if (be_is_txq_full(txo)) {
1411                 netif_stop_subqueue(netdev, q_idx);
1412                 tx_stats(txo)->tx_stops++;
1413         }
1414
1415         if (flush || __netif_subqueue_stopped(netdev, q_idx))
1416                 be_xmit_flush(adapter, txo);
1417
1418         return NETDEV_TX_OK;
1419 drop:
1420         tx_stats(txo)->tx_drv_drops++;
1421         /* Flush the already enqueued tx requests */
1422         if (flush && txo->pend_wrb_cnt)
1423                 be_xmit_flush(adapter, txo);
1424
1425         return NETDEV_TX_OK;
1426 }
1427
1428 static void be_tx_timeout(struct net_device *netdev)
1429 {
1430         struct be_adapter *adapter = netdev_priv(netdev);
1431         struct device *dev = &adapter->pdev->dev;
1432         struct be_tx_obj *txo;
1433         struct sk_buff *skb;
1434         struct tcphdr *tcphdr;
1435         struct udphdr *udphdr;
1436         u32 *entry;
1437         int status;
1438         int i, j;
1439
1440         for_all_tx_queues(adapter, txo, i) {
1441                 dev_info(dev, "TXQ Dump: %d H: %d T: %d used: %d, qid: 0x%x\n",
1442                          i, txo->q.head, txo->q.tail,
1443                          atomic_read(&txo->q.used), txo->q.id);
1444
1445                 entry = txo->q.dma_mem.va;
1446                 for (j = 0; j < TX_Q_LEN * 4; j += 4) {
1447                         if (entry[j] != 0 || entry[j + 1] != 0 ||
1448                             entry[j + 2] != 0 || entry[j + 3] != 0) {
1449                                 dev_info(dev, "Entry %d 0x%x 0x%x 0x%x 0x%x\n",
1450                                          j, entry[j], entry[j + 1],
1451                                          entry[j + 2], entry[j + 3]);
1452                         }
1453                 }
1454
1455                 entry = txo->cq.dma_mem.va;
1456                 dev_info(dev, "TXCQ Dump: %d  H: %d T: %d used: %d\n",
1457                          i, txo->cq.head, txo->cq.tail,
1458                          atomic_read(&txo->cq.used));
1459                 for (j = 0; j < TX_CQ_LEN * 4; j += 4) {
1460                         if (entry[j] != 0 || entry[j + 1] != 0 ||
1461                             entry[j + 2] != 0 || entry[j + 3] != 0) {
1462                                 dev_info(dev, "Entry %d 0x%x 0x%x 0x%x 0x%x\n",
1463                                          j, entry[j], entry[j + 1],
1464                                          entry[j + 2], entry[j + 3]);
1465                         }
1466                 }
1467
1468                 for (j = 0; j < TX_Q_LEN; j++) {
1469                         if (txo->sent_skb_list[j]) {
1470                                 skb = txo->sent_skb_list[j];
1471                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP) {
1472                                         tcphdr = tcp_hdr(skb);
1473                                         dev_info(dev, "TCP source port %d\n",
1474                                                  ntohs(tcphdr->source));
1475                                         dev_info(dev, "TCP dest port %d\n",
1476                                                  ntohs(tcphdr->dest));
1477                                         dev_info(dev, "TCP sequence num %d\n",
1478                                                  ntohs(tcphdr->seq));
1479                                         dev_info(dev, "TCP ack_seq %d\n",
1480                                                  ntohs(tcphdr->ack_seq));
1481                                 } else if (ip_hdr(skb)->protocol ==
1482                                            IPPROTO_UDP) {
1483                                         udphdr = udp_hdr(skb);
1484                                         dev_info(dev, "UDP source port %d\n",
1485                                                  ntohs(udphdr->source));
1486                                         dev_info(dev, "UDP dest port %d\n",
1487                                                  ntohs(udphdr->dest));
1488                                 }
1489                                 dev_info(dev, "skb[%d] %p len %d proto 0x%x\n",
1490                                          j, skb, skb->len, skb->protocol);
1491                         }
1492                 }
1493         }
1494
1495         if (lancer_chip(adapter)) {
1496                 dev_info(dev, "Initiating reset due to tx timeout\n");
1497                 dev_info(dev, "Resetting adapter\n");
1498                 status = lancer_physdev_ctrl(adapter,
1499                                              PHYSDEV_CONTROL_FW_RESET_MASK);
1500                 if (status)
1501                         dev_err(dev, "Reset failed .. Reboot server\n");
1502         }
1503 }
1504
1505 static inline bool be_in_all_promisc(struct be_adapter *adapter)
1506 {
1507         return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1508                         BE_IF_FLAGS_ALL_PROMISCUOUS;
1509 }
1510
1511 static int be_set_vlan_promisc(struct be_adapter *adapter)
1512 {
1513         struct device *dev = &adapter->pdev->dev;
1514         int status;
1515
1516         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1517                 return 0;
1518
1519         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1520         if (!status) {
1521                 dev_info(dev, "Enabled VLAN promiscuous mode\n");
1522                 adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1523         } else {
1524                 dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1525         }
1526         return status;
1527 }
1528
1529 static int be_clear_vlan_promisc(struct be_adapter *adapter)
1530 {
1531         struct device *dev = &adapter->pdev->dev;
1532         int status;
1533
1534         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1535         if (!status) {
1536                 dev_info(dev, "Disabling VLAN promiscuous mode\n");
1537                 adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1538         }
1539         return status;
1540 }
1541
1542 /*
1543  * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1544  * If the user configures more, place BE in vlan promiscuous mode.
1545  */
1546 static int be_vid_config(struct be_adapter *adapter)
1547 {
1548         struct device *dev = &adapter->pdev->dev;
1549         u16 vids[BE_NUM_VLANS_SUPPORTED];
1550         u16 num = 0, i = 0;
1551         int status = 0;
1552
1553         /* No need to change the VLAN state if the I/F is in promiscuous */
1554         if (adapter->netdev->flags & IFF_PROMISC)
1555                 return 0;
1556
1557         if (adapter->vlans_added > be_max_vlans(adapter))
1558                 return be_set_vlan_promisc(adapter);
1559
1560         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1561                 status = be_clear_vlan_promisc(adapter);
1562                 if (status)
1563                         return status;
1564         }
1565         /* Construct VLAN Table to give to HW */
1566         for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1567                 vids[num++] = cpu_to_le16(i);
1568
1569         status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1570         if (status) {
1571                 dev_err(dev, "Setting HW VLAN filtering failed\n");
1572                 /* Set to VLAN promisc mode as setting VLAN filter failed */
1573                 if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1574                     addl_status(status) ==
1575                                 MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1576                         return be_set_vlan_promisc(adapter);
1577         }
1578         return status;
1579 }
1580
1581 static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1582 {
1583         struct be_adapter *adapter = netdev_priv(netdev);
1584         int status = 0;
1585
1586         mutex_lock(&adapter->rx_filter_lock);
1587
1588         /* Packets with VID 0 are always received by Lancer by default */
1589         if (lancer_chip(adapter) && vid == 0)
1590                 goto done;
1591
1592         if (test_bit(vid, adapter->vids))
1593                 goto done;
1594
1595         set_bit(vid, adapter->vids);
1596         adapter->vlans_added++;
1597
1598         status = be_vid_config(adapter);
1599 done:
1600         mutex_unlock(&adapter->rx_filter_lock);
1601         return status;
1602 }
1603
1604 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1605 {
1606         struct be_adapter *adapter = netdev_priv(netdev);
1607         int status = 0;
1608
1609         mutex_lock(&adapter->rx_filter_lock);
1610
1611         /* Packets with VID 0 are always received by Lancer by default */
1612         if (lancer_chip(adapter) && vid == 0)
1613                 goto done;
1614
1615         if (!test_bit(vid, adapter->vids))
1616                 goto done;
1617
1618         clear_bit(vid, adapter->vids);
1619         adapter->vlans_added--;
1620
1621         status = be_vid_config(adapter);
1622 done:
1623         mutex_unlock(&adapter->rx_filter_lock);
1624         return status;
1625 }
1626
1627 static void be_set_all_promisc(struct be_adapter *adapter)
1628 {
1629         be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1630         adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1631 }
1632
1633 static void be_set_mc_promisc(struct be_adapter *adapter)
1634 {
1635         int status;
1636
1637         if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1638                 return;
1639
1640         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1641         if (!status)
1642                 adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1643 }
1644
1645 static void be_set_uc_promisc(struct be_adapter *adapter)
1646 {
1647         int status;
1648
1649         if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1650                 return;
1651
1652         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1653         if (!status)
1654                 adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1655 }
1656
1657 static void be_clear_uc_promisc(struct be_adapter *adapter)
1658 {
1659         int status;
1660
1661         if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1662                 return;
1663
1664         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1665         if (!status)
1666                 adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1667 }
1668
1669 /* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1670  * We use a single callback function for both sync and unsync. We really don't
1671  * add/remove addresses through this callback. But, we use it to detect changes
1672  * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1673  */
1674 static int be_uc_list_update(struct net_device *netdev,
1675                              const unsigned char *addr)
1676 {
1677         struct be_adapter *adapter = netdev_priv(netdev);
1678
1679         adapter->update_uc_list = true;
1680         return 0;
1681 }
1682
1683 static int be_mc_list_update(struct net_device *netdev,
1684                              const unsigned char *addr)
1685 {
1686         struct be_adapter *adapter = netdev_priv(netdev);
1687
1688         adapter->update_mc_list = true;
1689         return 0;
1690 }
1691
1692 static void be_set_mc_list(struct be_adapter *adapter)
1693 {
1694         struct net_device *netdev = adapter->netdev;
1695         struct netdev_hw_addr *ha;
1696         bool mc_promisc = false;
1697         int status;
1698
1699         netif_addr_lock_bh(netdev);
1700         __dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1701
1702         if (netdev->flags & IFF_PROMISC) {
1703                 adapter->update_mc_list = false;
1704         } else if (netdev->flags & IFF_ALLMULTI ||
1705                    netdev_mc_count(netdev) > be_max_mc(adapter)) {
1706                 /* Enable multicast promisc if num configured exceeds
1707                  * what we support
1708                  */
1709                 mc_promisc = true;
1710                 adapter->update_mc_list = false;
1711         } else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1712                 /* Update mc-list unconditionally if the iface was previously
1713                  * in mc-promisc mode and now is out of that mode.
1714                  */
1715                 adapter->update_mc_list = true;
1716         }
1717
1718         if (adapter->update_mc_list) {
1719                 int i = 0;
1720
1721                 /* cache the mc-list in adapter */
1722                 netdev_for_each_mc_addr(ha, netdev) {
1723                         ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1724                         i++;
1725                 }
1726                 adapter->mc_count = netdev_mc_count(netdev);
1727         }
1728         netif_addr_unlock_bh(netdev);
1729
1730         if (mc_promisc) {
1731                 be_set_mc_promisc(adapter);
1732         } else if (adapter->update_mc_list) {
1733                 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1734                 if (!status)
1735                         adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1736                 else
1737                         be_set_mc_promisc(adapter);
1738
1739                 adapter->update_mc_list = false;
1740         }
1741 }
1742
1743 static void be_clear_mc_list(struct be_adapter *adapter)
1744 {
1745         struct net_device *netdev = adapter->netdev;
1746
1747         __dev_mc_unsync(netdev, NULL);
1748         be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1749         adapter->mc_count = 0;
1750 }
1751
1752 static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
1753 {
1754         if (ether_addr_equal(adapter->uc_list[uc_idx].mac, adapter->dev_mac)) {
1755                 adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
1756                 return 0;
1757         }
1758
1759         return be_cmd_pmac_add(adapter, adapter->uc_list[uc_idx].mac,
1760                                adapter->if_handle,
1761                                &adapter->pmac_id[uc_idx + 1], 0);
1762 }
1763
1764 static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
1765 {
1766         if (pmac_id == adapter->pmac_id[0])
1767                 return;
1768
1769         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
1770 }
1771
1772 static void be_set_uc_list(struct be_adapter *adapter)
1773 {
1774         struct net_device *netdev = adapter->netdev;
1775         struct netdev_hw_addr *ha;
1776         bool uc_promisc = false;
1777         int curr_uc_macs = 0, i;
1778
1779         netif_addr_lock_bh(netdev);
1780         __dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1781
1782         if (netdev->flags & IFF_PROMISC) {
1783                 adapter->update_uc_list = false;
1784         } else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1785                 uc_promisc = true;
1786                 adapter->update_uc_list = false;
1787         }  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1788                 /* Update uc-list unconditionally if the iface was previously
1789                  * in uc-promisc mode and now is out of that mode.
1790                  */
1791                 adapter->update_uc_list = true;
1792         }
1793
1794         if (adapter->update_uc_list) {
1795                 /* cache the uc-list in adapter array */
1796                 i = 0;
1797                 netdev_for_each_uc_addr(ha, netdev) {
1798                         ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1799                         i++;
1800                 }
1801                 curr_uc_macs = netdev_uc_count(netdev);
1802         }
1803         netif_addr_unlock_bh(netdev);
1804
1805         if (uc_promisc) {
1806                 be_set_uc_promisc(adapter);
1807         } else if (adapter->update_uc_list) {
1808                 be_clear_uc_promisc(adapter);
1809
1810                 for (i = 0; i < adapter->uc_macs; i++)
1811                         be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1812
1813                 for (i = 0; i < curr_uc_macs; i++)
1814                         be_uc_mac_add(adapter, i);
1815                 adapter->uc_macs = curr_uc_macs;
1816                 adapter->update_uc_list = false;
1817         }
1818 }
1819
1820 static void be_clear_uc_list(struct be_adapter *adapter)
1821 {
1822         struct net_device *netdev = adapter->netdev;
1823         int i;
1824
1825         __dev_uc_unsync(netdev, NULL);
1826         for (i = 0; i < adapter->uc_macs; i++)
1827                 be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1828
1829         adapter->uc_macs = 0;
1830 }
1831
1832 static void __be_set_rx_mode(struct be_adapter *adapter)
1833 {
1834         struct net_device *netdev = adapter->netdev;
1835
1836         mutex_lock(&adapter->rx_filter_lock);
1837
1838         if (netdev->flags & IFF_PROMISC) {
1839                 if (!be_in_all_promisc(adapter))
1840                         be_set_all_promisc(adapter);
1841         } else if (be_in_all_promisc(adapter)) {
1842                 /* We need to re-program the vlan-list or clear
1843                  * vlan-promisc mode (if needed) when the interface
1844                  * comes out of promisc mode.
1845                  */
1846                 be_vid_config(adapter);
1847         }
1848
1849         be_set_uc_list(adapter);
1850         be_set_mc_list(adapter);
1851
1852         mutex_unlock(&adapter->rx_filter_lock);
1853 }
1854
1855 static void be_work_set_rx_mode(struct work_struct *work)
1856 {
1857         struct be_cmd_work *cmd_work =
1858                                 container_of(work, struct be_cmd_work, work);
1859
1860         __be_set_rx_mode(cmd_work->adapter);
1861         kfree(cmd_work);
1862 }
1863
1864 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1865 {
1866         struct be_adapter *adapter = netdev_priv(netdev);
1867         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1868         int status;
1869
1870         if (!sriov_enabled(adapter))
1871                 return -EPERM;
1872
1873         if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1874                 return -EINVAL;
1875
1876         /* Proceed further only if user provided MAC is different
1877          * from active MAC
1878          */
1879         if (ether_addr_equal(mac, vf_cfg->mac_addr))
1880                 return 0;
1881
1882         if (BEx_chip(adapter)) {
1883                 be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1884                                 vf + 1);
1885
1886                 status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1887                                          &vf_cfg->pmac_id, vf + 1);
1888         } else {
1889                 status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1890                                         vf + 1);
1891         }
1892
1893         if (status) {
1894                 dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1895                         mac, vf, status);
1896                 return be_cmd_status(status);
1897         }
1898
1899         ether_addr_copy(vf_cfg->mac_addr, mac);
1900
1901         return 0;
1902 }
1903
1904 static int be_get_vf_config(struct net_device *netdev, int vf,
1905                             struct ifla_vf_info *vi)
1906 {
1907         struct be_adapter *adapter = netdev_priv(netdev);
1908         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1909
1910         if (!sriov_enabled(adapter))
1911                 return -EPERM;
1912
1913         if (vf >= adapter->num_vfs)
1914                 return -EINVAL;
1915
1916         vi->vf = vf;
1917         vi->max_tx_rate = vf_cfg->tx_rate;
1918         vi->min_tx_rate = 0;
1919         vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1920         vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1921         memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1922         vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1923         vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1924
1925         return 0;
1926 }
1927
1928 static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1929 {
1930         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1931         u16 vids[BE_NUM_VLANS_SUPPORTED];
1932         int vf_if_id = vf_cfg->if_handle;
1933         int status;
1934
1935         /* Enable Transparent VLAN Tagging */
1936         status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1937         if (status)
1938                 return status;
1939
1940         /* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1941         vids[0] = 0;
1942         status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1943         if (!status)
1944                 dev_info(&adapter->pdev->dev,
1945                          "Cleared guest VLANs on VF%d", vf);
1946
1947         /* After TVT is enabled, disallow VFs to program VLAN filters */
1948         if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1949                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1950                                                   ~BE_PRIV_FILTMGMT, vf + 1);
1951                 if (!status)
1952                         vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1953         }
1954         return 0;
1955 }
1956
1957 static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1958 {
1959         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1960         struct device *dev = &adapter->pdev->dev;
1961         int status;
1962
1963         /* Reset Transparent VLAN Tagging. */
1964         status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1965                                        vf_cfg->if_handle, 0, 0);
1966         if (status)
1967                 return status;
1968
1969         /* Allow VFs to program VLAN filtering */
1970         if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1971                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1972                                                   BE_PRIV_FILTMGMT, vf + 1);
1973                 if (!status) {
1974                         vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1975                         dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1976                 }
1977         }
1978
1979         dev_info(dev,
1980                  "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1981         return 0;
1982 }
1983
1984 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
1985                           __be16 vlan_proto)
1986 {
1987         struct be_adapter *adapter = netdev_priv(netdev);
1988         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1989         int status;
1990
1991         if (!sriov_enabled(adapter))
1992                 return -EPERM;
1993
1994         if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1995                 return -EINVAL;
1996
1997         if (vlan_proto != htons(ETH_P_8021Q))
1998                 return -EPROTONOSUPPORT;
1999
2000         if (vlan || qos) {
2001                 vlan |= qos << VLAN_PRIO_SHIFT;
2002                 status = be_set_vf_tvt(adapter, vf, vlan);
2003         } else {
2004                 status = be_clear_vf_tvt(adapter, vf);
2005         }
2006
2007         if (status) {
2008                 dev_err(&adapter->pdev->dev,
2009                         "VLAN %d config on VF %d failed : %#x\n", vlan, vf,
2010                         status);
2011                 return be_cmd_status(status);
2012         }
2013
2014         vf_cfg->vlan_tag = vlan;
2015         return 0;
2016 }
2017
2018 static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
2019                              int min_tx_rate, int max_tx_rate)
2020 {
2021         struct be_adapter *adapter = netdev_priv(netdev);
2022         struct device *dev = &adapter->pdev->dev;
2023         int percent_rate, status = 0;
2024         u16 link_speed = 0;
2025         u8 link_status;
2026
2027         if (!sriov_enabled(adapter))
2028                 return -EPERM;
2029
2030         if (vf >= adapter->num_vfs)
2031                 return -EINVAL;
2032
2033         if (min_tx_rate)
2034                 return -EINVAL;
2035
2036         if (!max_tx_rate)
2037                 goto config_qos;
2038
2039         status = be_cmd_link_status_query(adapter, &link_speed,
2040                                           &link_status, 0);
2041         if (status)
2042                 goto err;
2043
2044         if (!link_status) {
2045                 dev_err(dev, "TX-rate setting not allowed when link is down\n");
2046                 status = -ENETDOWN;
2047                 goto err;
2048         }
2049
2050         if (max_tx_rate < 100 || max_tx_rate > link_speed) {
2051                 dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
2052                         link_speed);
2053                 status = -EINVAL;
2054                 goto err;
2055         }
2056
2057         /* On Skyhawk the QOS setting must be done only as a % value */
2058         percent_rate = link_speed / 100;
2059         if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
2060                 dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
2061                         percent_rate);
2062                 status = -EINVAL;
2063                 goto err;
2064         }
2065
2066 config_qos:
2067         status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
2068         if (status)
2069                 goto err;
2070
2071         adapter->vf_cfg[vf].tx_rate = max_tx_rate;
2072         return 0;
2073
2074 err:
2075         dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
2076                 max_tx_rate, vf);
2077         return be_cmd_status(status);
2078 }
2079
2080 static int be_set_vf_link_state(struct net_device *netdev, int vf,
2081                                 int link_state)
2082 {
2083         struct be_adapter *adapter = netdev_priv(netdev);
2084         int status;
2085
2086         if (!sriov_enabled(adapter))
2087                 return -EPERM;
2088
2089         if (vf >= adapter->num_vfs)
2090                 return -EINVAL;
2091
2092         status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
2093         if (status) {
2094                 dev_err(&adapter->pdev->dev,
2095                         "Link state change on VF %d failed: %#x\n", vf, status);
2096                 return be_cmd_status(status);
2097         }
2098
2099         adapter->vf_cfg[vf].plink_tracking = link_state;
2100
2101         return 0;
2102 }
2103
2104 static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
2105 {
2106         struct be_adapter *adapter = netdev_priv(netdev);
2107         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
2108         u8 spoofchk;
2109         int status;
2110
2111         if (!sriov_enabled(adapter))
2112                 return -EPERM;
2113
2114         if (vf >= adapter->num_vfs)
2115                 return -EINVAL;
2116
2117         if (BEx_chip(adapter))
2118                 return -EOPNOTSUPP;
2119
2120         if (enable == vf_cfg->spoofchk)
2121                 return 0;
2122
2123         spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
2124
2125         status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
2126                                        0, spoofchk);
2127         if (status) {
2128                 dev_err(&adapter->pdev->dev,
2129                         "Spoofchk change on VF %d failed: %#x\n", vf, status);
2130                 return be_cmd_status(status);
2131         }
2132
2133         vf_cfg->spoofchk = enable;
2134         return 0;
2135 }
2136
2137 static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
2138                           ulong now)
2139 {
2140         aic->rx_pkts_prev = rx_pkts;
2141         aic->tx_reqs_prev = tx_pkts;
2142         aic->jiffies = now;
2143 }
2144
2145 static int be_get_new_eqd(struct be_eq_obj *eqo)
2146 {
2147         struct be_adapter *adapter = eqo->adapter;
2148         int eqd, start;
2149         struct be_aic_obj *aic;
2150         struct be_rx_obj *rxo;
2151         struct be_tx_obj *txo;
2152         u64 rx_pkts = 0, tx_pkts = 0;
2153         ulong now;
2154         u32 pps, delta;
2155         int i;
2156
2157         aic = &adapter->aic_obj[eqo->idx];
2158         if (!aic->enable) {
2159                 if (aic->jiffies)
2160                         aic->jiffies = 0;
2161                 eqd = aic->et_eqd;
2162                 return eqd;
2163         }
2164
2165         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2166                 do {
2167                         start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
2168                         rx_pkts += rxo->stats.rx_pkts;
2169                 } while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
2170         }
2171
2172         for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2173                 do {
2174                         start = u64_stats_fetch_begin_irq(&txo->stats.sync);
2175                         tx_pkts += txo->stats.tx_reqs;
2176                 } while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
2177         }
2178
2179         /* Skip, if wrapped around or first calculation */
2180         now = jiffies;
2181         if (!aic->jiffies || time_before(now, aic->jiffies) ||
2182             rx_pkts < aic->rx_pkts_prev ||
2183             tx_pkts < aic->tx_reqs_prev) {
2184                 be_aic_update(aic, rx_pkts, tx_pkts, now);
2185                 return aic->prev_eqd;
2186         }
2187
2188         delta = jiffies_to_msecs(now - aic->jiffies);
2189         if (delta == 0)
2190                 return aic->prev_eqd;
2191
2192         pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2193                 (((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2194         eqd = (pps / 15000) << 2;
2195
2196         if (eqd < 8)
2197                 eqd = 0;
2198         eqd = min_t(u32, eqd, aic->max_eqd);
2199         eqd = max_t(u32, eqd, aic->min_eqd);
2200
2201         be_aic_update(aic, rx_pkts, tx_pkts, now);
2202
2203         return eqd;
2204 }
2205
2206 /* For Skyhawk-R only */
2207 static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2208 {
2209         struct be_adapter *adapter = eqo->adapter;
2210         struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2211         ulong now = jiffies;
2212         int eqd;
2213         u32 mult_enc;
2214
2215         if (!aic->enable)
2216                 return 0;
2217
2218         if (jiffies_to_msecs(now - aic->jiffies) < 1)
2219                 eqd = aic->prev_eqd;
2220         else
2221                 eqd = be_get_new_eqd(eqo);
2222
2223         if (eqd > 100)
2224                 mult_enc = R2I_DLY_ENC_1;
2225         else if (eqd > 60)
2226                 mult_enc = R2I_DLY_ENC_2;
2227         else if (eqd > 20)
2228                 mult_enc = R2I_DLY_ENC_3;
2229         else
2230                 mult_enc = R2I_DLY_ENC_0;
2231
2232         aic->prev_eqd = eqd;
2233
2234         return mult_enc;
2235 }
2236
2237 void be_eqd_update(struct be_adapter *adapter, bool force_update)
2238 {
2239         struct be_set_eqd set_eqd[MAX_EVT_QS];
2240         struct be_aic_obj *aic;
2241         struct be_eq_obj *eqo;
2242         int i, num = 0, eqd;
2243
2244         for_all_evt_queues(adapter, eqo, i) {
2245                 aic = &adapter->aic_obj[eqo->idx];
2246                 eqd = be_get_new_eqd(eqo);
2247                 if (force_update || eqd != aic->prev_eqd) {
2248                         set_eqd[num].delay_multiplier = (eqd * 65)/100;
2249                         set_eqd[num].eq_id = eqo->q.id;
2250                         aic->prev_eqd = eqd;
2251                         num++;
2252                 }
2253         }
2254
2255         if (num)
2256                 be_cmd_modify_eqd(adapter, set_eqd, num);
2257 }
2258
2259 static void be_rx_stats_update(struct be_rx_obj *rxo,
2260                                struct be_rx_compl_info *rxcp)
2261 {
2262         struct be_rx_stats *stats = rx_stats(rxo);
2263
2264         u64_stats_update_begin(&stats->sync);
2265         stats->rx_compl++;
2266         stats->rx_bytes += rxcp->pkt_size;
2267         stats->rx_pkts++;
2268         if (rxcp->tunneled)
2269                 stats->rx_vxlan_offload_pkts++;
2270         if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2271                 stats->rx_mcast_pkts++;
2272         if (rxcp->err)
2273                 stats->rx_compl_err++;
2274         u64_stats_update_end(&stats->sync);
2275 }
2276
2277 static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2278 {
2279         /* L4 checksum is not reliable for non TCP/UDP packets.
2280          * Also ignore ipcksm for ipv6 pkts
2281          */
2282         return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2283                 (rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2284 }
2285
2286 static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2287 {
2288         struct be_adapter *adapter = rxo->adapter;
2289         struct be_rx_page_info *rx_page_info;
2290         struct be_queue_info *rxq = &rxo->q;
2291         u32 frag_idx = rxq->tail;
2292
2293         rx_page_info = &rxo->page_info_tbl[frag_idx];
2294         BUG_ON(!rx_page_info->page);
2295
2296         if (rx_page_info->last_frag) {
2297                 dma_unmap_page(&adapter->pdev->dev,
2298                                dma_unmap_addr(rx_page_info, bus),
2299                                adapter->big_page_size, DMA_FROM_DEVICE);
2300                 rx_page_info->last_frag = false;
2301         } else {
2302                 dma_sync_single_for_cpu(&adapter->pdev->dev,
2303                                         dma_unmap_addr(rx_page_info, bus),
2304                                         rx_frag_size, DMA_FROM_DEVICE);
2305         }
2306
2307         queue_tail_inc(rxq);
2308         atomic_dec(&rxq->used);
2309         return rx_page_info;
2310 }
2311
2312 /* Throwaway the data in the Rx completion */
2313 static void be_rx_compl_discard(struct be_rx_obj *rxo,
2314                                 struct be_rx_compl_info *rxcp)
2315 {
2316         struct be_rx_page_info *page_info;
2317         u16 i, num_rcvd = rxcp->num_rcvd;
2318
2319         for (i = 0; i < num_rcvd; i++) {
2320                 page_info = get_rx_page_info(rxo);
2321                 put_page(page_info->page);
2322                 memset(page_info, 0, sizeof(*page_info));
2323         }
2324 }
2325
2326 /*
2327  * skb_fill_rx_data forms a complete skb for an ether frame
2328  * indicated by rxcp.
2329  */
2330 static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2331                              struct be_rx_compl_info *rxcp)
2332 {
2333         struct be_rx_page_info *page_info;
2334         u16 i, j;
2335         u16 hdr_len, curr_frag_len, remaining;
2336         u8 *start;
2337
2338         page_info = get_rx_page_info(rxo);
2339         start = page_address(page_info->page) + page_info->page_offset;
2340         prefetch(start);
2341
2342         /* Copy data in the first descriptor of this completion */
2343         curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2344
2345         skb->len = curr_frag_len;
2346         if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2347                 memcpy(skb->data, start, curr_frag_len);
2348                 /* Complete packet has now been moved to data */
2349                 put_page(page_info->page);
2350                 skb->data_len = 0;
2351                 skb->tail += curr_frag_len;
2352         } else {
2353                 hdr_len = ETH_HLEN;
2354                 memcpy(skb->data, start, hdr_len);
2355                 skb_shinfo(skb)->nr_frags = 1;
2356                 skb_frag_set_page(skb, 0, page_info->page);
2357                 skb_shinfo(skb)->frags[0].page_offset =
2358                                         page_info->page_offset + hdr_len;
2359                 skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2360                                   curr_frag_len - hdr_len);
2361                 skb->data_len = curr_frag_len - hdr_len;
2362                 skb->truesize += rx_frag_size;
2363                 skb->tail += hdr_len;
2364         }
2365         page_info->page = NULL;
2366
2367         if (rxcp->pkt_size <= rx_frag_size) {
2368                 BUG_ON(rxcp->num_rcvd != 1);
2369                 return;
2370         }
2371
2372         /* More frags present for this completion */
2373         remaining = rxcp->pkt_size - curr_frag_len;
2374         for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2375                 page_info = get_rx_page_info(rxo);
2376                 curr_frag_len = min(remaining, rx_frag_size);
2377
2378                 /* Coalesce all frags from the same physical page in one slot */
2379                 if (page_info->page_offset == 0) {
2380                         /* Fresh page */
2381                         j++;
2382                         skb_frag_set_page(skb, j, page_info->page);
2383                         skb_shinfo(skb)->frags[j].page_offset =
2384                                                         page_info->page_offset;
2385                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2386                         skb_shinfo(skb)->nr_frags++;
2387                 } else {
2388                         put_page(page_info->page);
2389                 }
2390
2391                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2392                 skb->len += curr_frag_len;
2393                 skb->data_len += curr_frag_len;
2394                 skb->truesize += rx_frag_size;
2395                 remaining -= curr_frag_len;
2396                 page_info->page = NULL;
2397         }
2398         BUG_ON(j > MAX_SKB_FRAGS);
2399 }
2400
2401 /* Process the RX completion indicated by rxcp when GRO is disabled */
2402 static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2403                                 struct be_rx_compl_info *rxcp)
2404 {
2405         struct be_adapter *adapter = rxo->adapter;
2406         struct net_device *netdev = adapter->netdev;
2407         struct sk_buff *skb;
2408
2409         skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2410         if (unlikely(!skb)) {
2411                 rx_stats(rxo)->rx_drops_no_skbs++;
2412                 be_rx_compl_discard(rxo, rxcp);
2413                 return;
2414         }
2415
2416         skb_fill_rx_data(rxo, skb, rxcp);
2417
2418         if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2419                 skb->ip_summed = CHECKSUM_UNNECESSARY;
2420         else
2421                 skb_checksum_none_assert(skb);
2422
2423         skb->protocol = eth_type_trans(skb, netdev);
2424         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2425         if (netdev->features & NETIF_F_RXHASH)
2426                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2427
2428         skb->csum_level = rxcp->tunneled;
2429         skb_mark_napi_id(skb, napi);
2430
2431         if (rxcp->vlanf)
2432                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2433
2434         netif_receive_skb(skb);
2435 }
2436
2437 /* Process the RX completion indicated by rxcp when GRO is enabled */
2438 static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2439                                     struct napi_struct *napi,
2440                                     struct be_rx_compl_info *rxcp)
2441 {
2442         struct be_adapter *adapter = rxo->adapter;
2443         struct be_rx_page_info *page_info;
2444         struct sk_buff *skb = NULL;
2445         u16 remaining, curr_frag_len;
2446         u16 i, j;
2447
2448         skb = napi_get_frags(napi);
2449         if (!skb) {
2450                 be_rx_compl_discard(rxo, rxcp);
2451                 return;
2452         }
2453
2454         remaining = rxcp->pkt_size;
2455         for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2456                 page_info = get_rx_page_info(rxo);
2457
2458                 curr_frag_len = min(remaining, rx_frag_size);
2459
2460                 /* Coalesce all frags from the same physical page in one slot */
2461                 if (i == 0 || page_info->page_offset == 0) {
2462                         /* First frag or Fresh page */
2463                         j++;
2464                         skb_frag_set_page(skb, j, page_info->page);
2465                         skb_shinfo(skb)->frags[j].page_offset =
2466                                                         page_info->page_offset;
2467                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2468                 } else {
2469                         put_page(page_info->page);
2470                 }
2471                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2472                 skb->truesize += rx_frag_size;
2473                 remaining -= curr_frag_len;
2474                 memset(page_info, 0, sizeof(*page_info));
2475         }
2476         BUG_ON(j > MAX_SKB_FRAGS);
2477
2478         skb_shinfo(skb)->nr_frags = j + 1;
2479         skb->len = rxcp->pkt_size;
2480         skb->data_len = rxcp->pkt_size;
2481         skb->ip_summed = CHECKSUM_UNNECESSARY;
2482         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2483         if (adapter->netdev->features & NETIF_F_RXHASH)
2484                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2485
2486         skb->csum_level = rxcp->tunneled;
2487
2488         if (rxcp->vlanf)
2489                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2490
2491         napi_gro_frags(napi);
2492 }
2493
2494 static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2495                                  struct be_rx_compl_info *rxcp)
2496 {
2497         rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2498         rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2499         rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2500         rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2501         rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2502         rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2503         rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2504         rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2505         rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2506         rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2507         rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2508         if (rxcp->vlanf) {
2509                 rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2510                 rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2511         }
2512         rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2513         rxcp->tunneled =
2514                 GET_RX_COMPL_V1_BITS(tunneled, compl);
2515 }
2516
2517 static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2518                                  struct be_rx_compl_info *rxcp)
2519 {
2520         rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2521         rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2522         rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2523         rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2524         rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2525         rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2526         rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2527         rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2528         rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2529         rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2530         rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2531         if (rxcp->vlanf) {
2532                 rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2533                 rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2534         }
2535         rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2536         rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2537 }
2538
2539 static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2540 {
2541         struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2542         struct be_rx_compl_info *rxcp = &rxo->rxcp;
2543         struct be_adapter *adapter = rxo->adapter;
2544
2545         /* For checking the valid bit it is Ok to use either definition as the
2546          * valid bit is at the same position in both v0 and v1 Rx compl */
2547         if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2548                 return NULL;
2549
2550         rmb();
2551         be_dws_le_to_cpu(compl, sizeof(*compl));
2552
2553         if (adapter->be3_native)
2554                 be_parse_rx_compl_v1(compl, rxcp);
2555         else
2556                 be_parse_rx_compl_v0(compl, rxcp);
2557
2558         if (rxcp->ip_frag)
2559                 rxcp->l4_csum = 0;
2560
2561         if (rxcp->vlanf) {
2562                 /* In QNQ modes, if qnq bit is not set, then the packet was
2563                  * tagged only with the transparent outer vlan-tag and must
2564                  * not be treated as a vlan packet by host
2565                  */
2566                 if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2567                         rxcp->vlanf = 0;
2568
2569                 if (!lancer_chip(adapter))
2570                         rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2571
2572                 if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2573                     !test_bit(rxcp->vlan_tag, adapter->vids))
2574                         rxcp->vlanf = 0;
2575         }
2576
2577         /* As the compl has been parsed, reset it; we wont touch it again */
2578         compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2579
2580         queue_tail_inc(&rxo->cq);
2581         return rxcp;
2582 }
2583
2584 static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2585 {
2586         u32 order = get_order(size);
2587
2588         if (order > 0)
2589                 gfp |= __GFP_COMP;
2590         return  alloc_pages(gfp, order);
2591 }
2592
2593 /*
2594  * Allocate a page, split it to fragments of size rx_frag_size and post as
2595  * receive buffers to BE
2596  */
2597 static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2598 {
2599         struct be_adapter *adapter = rxo->adapter;
2600         struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2601         struct be_queue_info *rxq = &rxo->q;
2602         struct page *pagep = NULL;
2603         struct device *dev = &adapter->pdev->dev;
2604         struct be_eth_rx_d *rxd;
2605         u64 page_dmaaddr = 0, frag_dmaaddr;
2606         u32 posted, page_offset = 0, notify = 0;
2607
2608         page_info = &rxo->page_info_tbl[rxq->head];
2609         for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2610                 if (!pagep) {
2611                         pagep = be_alloc_pages(adapter->big_page_size, gfp);
2612                         if (unlikely(!pagep)) {
2613                                 rx_stats(rxo)->rx_post_fail++;
2614                                 break;
2615                         }
2616                         page_dmaaddr = dma_map_page(dev, pagep, 0,
2617                                                     adapter->big_page_size,
2618                                                     DMA_FROM_DEVICE);
2619                         if (dma_mapping_error(dev, page_dmaaddr)) {
2620                                 put_page(pagep);
2621                                 pagep = NULL;
2622                                 adapter->drv_stats.dma_map_errors++;
2623                                 break;
2624                         }
2625                         page_offset = 0;
2626                 } else {
2627                         get_page(pagep);
2628                         page_offset += rx_frag_size;
2629                 }
2630                 page_info->page_offset = page_offset;
2631                 page_info->page = pagep;
2632
2633                 rxd = queue_head_node(rxq);
2634                 frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2635                 rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2636                 rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2637
2638                 /* Any space left in the current big page for another frag? */
2639                 if ((page_offset + rx_frag_size + rx_frag_size) >
2640                                         adapter->big_page_size) {
2641                         pagep = NULL;
2642                         page_info->last_frag = true;
2643                         dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2644                 } else {
2645                         dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2646                 }
2647
2648                 prev_page_info = page_info;
2649                 queue_head_inc(rxq);
2650                 page_info = &rxo->page_info_tbl[rxq->head];
2651         }
2652
2653         /* Mark the last frag of a page when we break out of the above loop
2654          * with no more slots available in the RXQ
2655          */
2656         if (pagep) {
2657                 prev_page_info->last_frag = true;
2658                 dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2659         }
2660
2661         if (posted) {
2662                 atomic_add(posted, &rxq->used);
2663                 if (rxo->rx_post_starved)
2664                         rxo->rx_post_starved = false;
2665                 do {
2666                         notify = min(MAX_NUM_POST_ERX_DB, posted);
2667                         be_rxq_notify(adapter, rxq->id, notify);
2668                         posted -= notify;
2669                 } while (posted);
2670         } else if (atomic_read(&rxq->used) == 0) {
2671                 /* Let be_worker replenish when memory is available */
2672                 rxo->rx_post_starved = true;
2673         }
2674 }
2675
2676 static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
2677 {
2678         switch (status) {
2679         case BE_TX_COMP_HDR_PARSE_ERR:
2680                 tx_stats(txo)->tx_hdr_parse_err++;
2681                 break;
2682         case BE_TX_COMP_NDMA_ERR:
2683                 tx_stats(txo)->tx_dma_err++;
2684                 break;
2685         case BE_TX_COMP_ACL_ERR:
2686                 tx_stats(txo)->tx_spoof_check_err++;
2687                 break;
2688         }
2689 }
2690
2691 static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
2692 {
2693         switch (status) {
2694         case LANCER_TX_COMP_LSO_ERR:
2695                 tx_stats(txo)->tx_tso_err++;
2696                 break;
2697         case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
2698         case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
2699                 tx_stats(txo)->tx_spoof_check_err++;
2700                 break;
2701         case LANCER_TX_COMP_QINQ_ERR:
2702                 tx_stats(txo)->tx_qinq_err++;
2703                 break;
2704         case LANCER_TX_COMP_PARITY_ERR:
2705                 tx_stats(txo)->tx_internal_parity_err++;
2706                 break;
2707         case LANCER_TX_COMP_DMA_ERR:
2708                 tx_stats(txo)->tx_dma_err++;
2709                 break;
2710         case LANCER_TX_COMP_SGE_ERR:
2711                 tx_stats(txo)->tx_sge_err++;
2712                 break;
2713         }
2714 }
2715
2716 static struct be_tx_compl_info *be_tx_compl_get(struct be_adapter *adapter,
2717                                                 struct be_tx_obj *txo)
2718 {
2719         struct be_queue_info *tx_cq = &txo->cq;
2720         struct be_tx_compl_info *txcp = &txo->txcp;
2721         struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2722
2723         if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2724                 return NULL;
2725
2726         /* Ensure load ordering of valid bit dword and other dwords below */
2727         rmb();
2728         be_dws_le_to_cpu(compl, sizeof(*compl));
2729
2730         txcp->status = GET_TX_COMPL_BITS(status, compl);
2731         txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2732
2733         if (txcp->status) {
2734                 if (lancer_chip(adapter)) {
2735                         lancer_update_tx_err(txo, txcp->status);
2736                         /* Reset the adapter incase of TSO,
2737                          * SGE or Parity error
2738                          */
2739                         if (txcp->status == LANCER_TX_COMP_LSO_ERR ||
2740                             txcp->status == LANCER_TX_COMP_PARITY_ERR ||
2741                             txcp->status == LANCER_TX_COMP_SGE_ERR)
2742                                 be_set_error(adapter, BE_ERROR_TX);
2743                 } else {
2744                         be_update_tx_err(txo, txcp->status);
2745                 }
2746         }
2747
2748         if (be_check_error(adapter, BE_ERROR_TX))
2749                 return NULL;
2750
2751         compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2752         queue_tail_inc(tx_cq);
2753         return txcp;
2754 }
2755
2756 static u16 be_tx_compl_process(struct be_adapter *adapter,
2757                                struct be_tx_obj *txo, u16 last_index)
2758 {
2759         struct sk_buff **sent_skbs = txo->sent_skb_list;
2760         struct be_queue_info *txq = &txo->q;
2761         struct sk_buff *skb = NULL;
2762         bool unmap_skb_hdr = false;
2763         struct be_eth_wrb *wrb;
2764         u16 num_wrbs = 0;
2765         u32 frag_index;
2766
2767         do {
2768                 if (sent_skbs[txq->tail]) {
2769                         /* Free skb from prev req */
2770                         if (skb)
2771                                 dev_consume_skb_any(skb);
2772                         skb = sent_skbs[txq->tail];
2773                         sent_skbs[txq->tail] = NULL;
2774                         queue_tail_inc(txq);  /* skip hdr wrb */
2775                         num_wrbs++;
2776                         unmap_skb_hdr = true;
2777                 }
2778                 wrb = queue_tail_node(txq);
2779                 frag_index = txq->tail;
2780                 unmap_tx_frag(&adapter->pdev->dev, wrb,
2781                               (unmap_skb_hdr && skb_headlen(skb)));
2782                 unmap_skb_hdr = false;
2783                 queue_tail_inc(txq);
2784                 num_wrbs++;
2785         } while (frag_index != last_index);
2786         dev_consume_skb_any(skb);
2787
2788         return num_wrbs;
2789 }
2790
2791 /* Return the number of events in the event queue */
2792 static inline int events_get(struct be_eq_obj *eqo)
2793 {
2794         struct be_eq_entry *eqe;
2795         int num = 0;
2796
2797         do {
2798                 eqe = queue_tail_node(&eqo->q);
2799                 if (eqe->evt == 0)
2800                         break;
2801
2802                 rmb();
2803                 eqe->evt = 0;
2804                 num++;
2805                 queue_tail_inc(&eqo->q);
2806         } while (true);
2807
2808         return num;
2809 }
2810
2811 /* Leaves the EQ is disarmed state */
2812 static void be_eq_clean(struct be_eq_obj *eqo)
2813 {
2814         int num = events_get(eqo);
2815
2816         be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2817 }
2818
2819 /* Free posted rx buffers that were not used */
2820 static void be_rxq_clean(struct be_rx_obj *rxo)
2821 {
2822         struct be_queue_info *rxq = &rxo->q;
2823         struct be_rx_page_info *page_info;
2824
2825         while (atomic_read(&rxq->used) > 0) {
2826                 page_info = get_rx_page_info(rxo);
2827                 put_page(page_info->page);
2828                 memset(page_info, 0, sizeof(*page_info));
2829         }
2830         BUG_ON(atomic_read(&rxq->used));
2831         rxq->tail = 0;
2832         rxq->head = 0;
2833 }
2834
2835 static void be_rx_cq_clean(struct be_rx_obj *rxo)
2836 {
2837         struct be_queue_info *rx_cq = &rxo->cq;
2838         struct be_rx_compl_info *rxcp;
2839         struct be_adapter *adapter = rxo->adapter;
2840         int flush_wait = 0;
2841
2842         /* Consume pending rx completions.
2843          * Wait for the flush completion (identified by zero num_rcvd)
2844          * to arrive. Notify CQ even when there are no more CQ entries
2845          * for HW to flush partially coalesced CQ entries.
2846          * In Lancer, there is no need to wait for flush compl.
2847          */
2848         for (;;) {
2849                 rxcp = be_rx_compl_get(rxo);
2850                 if (!rxcp) {
2851                         if (lancer_chip(adapter))
2852                                 break;
2853
2854                         if (flush_wait++ > 50 ||
2855                             be_check_error(adapter,
2856                                            BE_ERROR_HW)) {
2857                                 dev_warn(&adapter->pdev->dev,
2858                                          "did not receive flush compl\n");
2859                                 break;
2860                         }
2861                         be_cq_notify(adapter, rx_cq->id, true, 0);
2862                         mdelay(1);
2863                 } else {
2864                         be_rx_compl_discard(rxo, rxcp);
2865                         be_cq_notify(adapter, rx_cq->id, false, 1);
2866                         if (rxcp->num_rcvd == 0)
2867                                 break;
2868                 }
2869         }
2870
2871         /* After cleanup, leave the CQ in unarmed state */
2872         be_cq_notify(adapter, rx_cq->id, false, 0);
2873 }
2874
2875 static void be_tx_compl_clean(struct be_adapter *adapter)
2876 {
2877         struct device *dev = &adapter->pdev->dev;
2878         u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2879         struct be_tx_compl_info *txcp;
2880         struct be_queue_info *txq;
2881         u32 end_idx, notified_idx;
2882         struct be_tx_obj *txo;
2883         int i, pending_txqs;
2884
2885         /* Stop polling for compls when HW has been silent for 10ms */
2886         do {
2887                 pending_txqs = adapter->num_tx_qs;
2888
2889                 for_all_tx_queues(adapter, txo, i) {
2890                         cmpl = 0;
2891                         num_wrbs = 0;
2892                         txq = &txo->q;
2893                         while ((txcp = be_tx_compl_get(adapter, txo))) {
2894                                 num_wrbs +=
2895                                         be_tx_compl_process(adapter, txo,
2896                                                             txcp->end_index);
2897                                 cmpl++;
2898                         }
2899                         if (cmpl) {
2900                                 be_cq_notify(adapter, txo->cq.id, false, cmpl);
2901                                 atomic_sub(num_wrbs, &txq->used);
2902                                 timeo = 0;
2903                         }
2904                         if (!be_is_tx_compl_pending(txo))
2905                                 pending_txqs--;
2906                 }
2907
2908                 if (pending_txqs == 0 || ++timeo > 10 ||
2909                     be_check_error(adapter, BE_ERROR_HW))
2910                         break;
2911
2912                 mdelay(1);
2913         } while (true);
2914
2915         /* Free enqueued TX that was never notified to HW */
2916         for_all_tx_queues(adapter, txo, i) {
2917                 txq = &txo->q;
2918
2919                 if (atomic_read(&txq->used)) {
2920                         dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2921                                  i, atomic_read(&txq->used));
2922                         notified_idx = txq->tail;
2923                         end_idx = txq->tail;
2924                         index_adv(&end_idx, atomic_read(&txq->used) - 1,
2925                                   txq->len);
2926                         /* Use the tx-compl process logic to handle requests
2927                          * that were not sent to the HW.
2928                          */
2929                         num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2930                         atomic_sub(num_wrbs, &txq->used);
2931                         BUG_ON(atomic_read(&txq->used));
2932                         txo->pend_wrb_cnt = 0;
2933                         /* Since hw was never notified of these requests,
2934                          * reset TXQ indices
2935                          */
2936                         txq->head = notified_idx;
2937                         txq->tail = notified_idx;
2938                 }
2939         }
2940 }
2941
2942 static void be_evt_queues_destroy(struct be_adapter *adapter)
2943 {
2944         struct be_eq_obj *eqo;
2945         int i;
2946
2947         for_all_evt_queues(adapter, eqo, i) {
2948                 if (eqo->q.created) {
2949                         be_eq_clean(eqo);
2950                         be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2951                         netif_napi_del(&eqo->napi);
2952                         free_cpumask_var(eqo->affinity_mask);
2953                 }
2954                 be_queue_free(adapter, &eqo->q);
2955         }
2956 }
2957
2958 static int be_evt_queues_create(struct be_adapter *adapter)
2959 {
2960         struct be_queue_info *eq;
2961         struct be_eq_obj *eqo;
2962         struct be_aic_obj *aic;
2963         int i, rc;
2964
2965         /* need enough EQs to service both RX and TX queues */
2966         adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2967                                     max(adapter->cfg_num_rx_irqs,
2968                                         adapter->cfg_num_tx_irqs));
2969
2970         for_all_evt_queues(adapter, eqo, i) {
2971                 int numa_node = dev_to_node(&adapter->pdev->dev);
2972
2973                 aic = &adapter->aic_obj[i];
2974                 eqo->adapter = adapter;
2975                 eqo->idx = i;
2976                 aic->max_eqd = BE_MAX_EQD;
2977                 aic->enable = true;
2978
2979                 eq = &eqo->q;
2980                 rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2981                                     sizeof(struct be_eq_entry));
2982                 if (rc)
2983                         return rc;
2984
2985                 rc = be_cmd_eq_create(adapter, eqo);
2986                 if (rc)
2987                         return rc;
2988
2989                 if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2990                         return -ENOMEM;
2991                 cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2992                                 eqo->affinity_mask);
2993                 netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2994                                BE_NAPI_WEIGHT);
2995         }
2996         return 0;
2997 }
2998
2999 static void be_mcc_queues_destroy(struct be_adapter *adapter)
3000 {
3001         struct be_queue_info *q;
3002
3003         q = &adapter->mcc_obj.q;
3004         if (q->created)
3005                 be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
3006         be_queue_free(adapter, q);
3007
3008         q = &adapter->mcc_obj.cq;
3009         if (q->created)
3010                 be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3011         be_queue_free(adapter, q);
3012 }
3013
3014 /* Must be called only after TX qs are created as MCC shares TX EQ */
3015 static int be_mcc_queues_create(struct be_adapter *adapter)
3016 {
3017         struct be_queue_info *q, *cq;
3018
3019         cq = &adapter->mcc_obj.cq;
3020         if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
3021                            sizeof(struct be_mcc_compl)))
3022                 goto err;
3023
3024         /* Use the default EQ for MCC completions */
3025         if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
3026                 goto mcc_cq_free;
3027
3028         q = &adapter->mcc_obj.q;
3029         if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
3030                 goto mcc_cq_destroy;
3031
3032         if (be_cmd_mccq_create(adapter, q, cq))
3033                 goto mcc_q_free;
3034
3035         return 0;
3036
3037 mcc_q_free:
3038         be_queue_free(adapter, q);
3039 mcc_cq_destroy:
3040         be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
3041 mcc_cq_free:
3042         be_queue_free(adapter, cq);
3043 err:
3044         return -1;
3045 }
3046
3047 static void be_tx_queues_destroy(struct be_adapter *adapter)
3048 {
3049         struct be_queue_info *q;
3050         struct be_tx_obj *txo;
3051         u8 i;
3052
3053         for_all_tx_queues(adapter, txo, i) {
3054                 q = &txo->q;
3055                 if (q->created)
3056                         be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
3057                 be_queue_free(adapter, q);
3058
3059                 q = &txo->cq;
3060                 if (q->created)
3061                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3062                 be_queue_free(adapter, q);
3063         }
3064 }
3065
3066 static int be_tx_qs_create(struct be_adapter *adapter)
3067 {
3068         struct be_queue_info *cq;
3069         struct be_tx_obj *txo;
3070         struct be_eq_obj *eqo;
3071         int status, i;
3072
3073         adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
3074
3075         for_all_tx_queues(adapter, txo, i) {
3076                 cq = &txo->cq;
3077                 status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
3078                                         sizeof(struct be_eth_tx_compl));
3079                 if (status)
3080                         return status;
3081
3082                 u64_stats_init(&txo->stats.sync);
3083                 u64_stats_init(&txo->stats.sync_compl);
3084
3085                 /* If num_evt_qs is less than num_tx_qs, then more than
3086                  * one txq share an eq
3087                  */
3088                 eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
3089                 status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
3090                 if (status)
3091                         return status;
3092
3093                 status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
3094                                         sizeof(struct be_eth_wrb));
3095                 if (status)
3096                         return status;
3097
3098                 status = be_cmd_txq_create(adapter, txo);
3099                 if (status)
3100                         return status;
3101
3102                 netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
3103                                     eqo->idx);
3104         }
3105
3106         dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
3107                  adapter->num_tx_qs);
3108         return 0;
3109 }
3110
3111 static void be_rx_cqs_destroy(struct be_adapter *adapter)
3112 {
3113         struct be_queue_info *q;
3114         struct be_rx_obj *rxo;
3115         int i;
3116
3117         for_all_rx_queues(adapter, rxo, i) {
3118                 q = &rxo->cq;
3119                 if (q->created)
3120                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
3121                 be_queue_free(adapter, q);
3122         }
3123 }
3124
3125 static int be_rx_cqs_create(struct be_adapter *adapter)
3126 {
3127         struct be_queue_info *eq, *cq;
3128         struct be_rx_obj *rxo;
3129         int rc, i;
3130
3131         adapter->num_rss_qs =
3132                         min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
3133
3134         /* We'll use RSS only if atleast 2 RSS rings are supported. */
3135         if (adapter->num_rss_qs < 2)
3136                 adapter->num_rss_qs = 0;
3137
3138         adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
3139
3140         /* When the interface is not capable of RSS rings (and there is no
3141          * need to create a default RXQ) we'll still need one RXQ
3142          */
3143         if (adapter->num_rx_qs == 0)
3144                 adapter->num_rx_qs = 1;
3145
3146         adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
3147         for_all_rx_queues(adapter, rxo, i) {
3148                 rxo->adapter = adapter;
3149                 cq = &rxo->cq;
3150                 rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
3151                                     sizeof(struct be_eth_rx_compl));
3152                 if (rc)
3153                         return rc;
3154
3155                 u64_stats_init(&rxo->stats.sync);
3156                 eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
3157                 rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
3158                 if (rc)
3159                         return rc;
3160         }
3161
3162         dev_info(&adapter->pdev->dev,
3163                  "created %d RX queue(s)\n", adapter->num_rx_qs);
3164         return 0;
3165 }
3166
3167 static irqreturn_t be_intx(int irq, void *dev)
3168 {
3169         struct be_eq_obj *eqo = dev;
3170         struct be_adapter *adapter = eqo->adapter;
3171         int num_evts = 0;
3172
3173         /* IRQ is not expected when NAPI is scheduled as the EQ
3174          * will not be armed.
3175          * But, this can happen on Lancer INTx where it takes
3176          * a while to de-assert INTx or in BE2 where occasionaly
3177          * an interrupt may be raised even when EQ is unarmed.
3178          * If NAPI is already scheduled, then counting & notifying
3179          * events will orphan them.
3180          */
3181         if (napi_schedule_prep(&eqo->napi)) {
3182                 num_evts = events_get(eqo);
3183                 __napi_schedule(&eqo->napi);
3184                 if (num_evts)
3185                         eqo->spurious_intr = 0;
3186         }
3187         be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
3188
3189         /* Return IRQ_HANDLED only for the the first spurious intr
3190          * after a valid intr to stop the kernel from branding
3191          * this irq as a bad one!
3192          */
3193         if (num_evts || eqo->spurious_intr++ == 0)
3194                 return IRQ_HANDLED;
3195         else
3196                 return IRQ_NONE;
3197 }
3198
3199 static irqreturn_t be_msix(int irq, void *dev)
3200 {
3201         struct be_eq_obj *eqo = dev;
3202
3203         be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3204         napi_schedule(&eqo->napi);
3205         return IRQ_HANDLED;
3206 }
3207
3208 static inline bool do_gro(struct be_rx_compl_info *rxcp)
3209 {
3210         return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3211 }
3212
3213 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3214                          int budget)
3215 {
3216         struct be_adapter *adapter = rxo->adapter;
3217         struct be_queue_info *rx_cq = &rxo->cq;
3218         struct be_rx_compl_info *rxcp;
3219         u32 work_done;
3220         u32 frags_consumed = 0;
3221
3222         for (work_done = 0; work_done < budget; work_done++) {
3223                 rxcp = be_rx_compl_get(rxo);
3224                 if (!rxcp)
3225                         break;
3226
3227                 /* Is it a flush compl that has no data */
3228                 if (unlikely(rxcp->num_rcvd == 0))
3229                         goto loop_continue;
3230
3231                 /* Discard compl with partial DMA Lancer B0 */
3232                 if (unlikely(!rxcp->pkt_size)) {
3233                         be_rx_compl_discard(rxo, rxcp);
3234                         goto loop_continue;
3235                 }
3236
3237                 /* On BE drop pkts that arrive due to imperfect filtering in
3238                  * promiscuous mode on some skews
3239                  */
3240                 if (unlikely(rxcp->port != adapter->port_num &&
3241                              !lancer_chip(adapter))) {
3242                         be_rx_compl_discard(rxo, rxcp);
3243                         goto loop_continue;
3244                 }
3245
3246                 if (do_gro(rxcp))
3247                         be_rx_compl_process_gro(rxo, napi, rxcp);
3248                 else
3249                         be_rx_compl_process(rxo, napi, rxcp);
3250
3251 loop_continue:
3252                 frags_consumed += rxcp->num_rcvd;
3253                 be_rx_stats_update(rxo, rxcp);
3254         }
3255
3256         if (work_done) {
3257                 be_cq_notify(adapter, rx_cq->id, true, work_done);
3258
3259                 /* When an rx-obj gets into post_starved state, just
3260                  * let be_worker do the posting.
3261                  */
3262                 if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3263                     !rxo->rx_post_starved)
3264                         be_post_rx_frags(rxo, GFP_ATOMIC,
3265                                          max_t(u32, MAX_RX_POST,
3266                                                frags_consumed));
3267         }
3268
3269         return work_done;
3270 }
3271
3272
3273 static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3274                           int idx)
3275 {
3276         int num_wrbs = 0, work_done = 0;
3277         struct be_tx_compl_info *txcp;
3278
3279         while ((txcp = be_tx_compl_get(adapter, txo))) {
3280                 num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3281                 work_done++;
3282         }
3283
3284         if (work_done) {
3285                 be_cq_notify(adapter, txo->cq.id, true, work_done);
3286                 atomic_sub(num_wrbs, &txo->q.used);
3287
3288                 /* As Tx wrbs have been freed up, wake up netdev queue
3289                  * if it was stopped due to lack of tx wrbs.  */
3290                 if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3291                     be_can_txq_wake(txo)) {
3292                         netif_wake_subqueue(adapter->netdev, idx);
3293                 }
3294
3295                 u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3296                 tx_stats(txo)->tx_compl += work_done;
3297                 u64_stats_update_end(&tx_stats(txo)->sync_compl);
3298         }
3299 }
3300
3301 int be_poll(struct napi_struct *napi, int budget)
3302 {
3303         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3304         struct be_adapter *adapter = eqo->adapter;
3305         int max_work = 0, work, i, num_evts;
3306         struct be_rx_obj *rxo;
3307         struct be_tx_obj *txo;
3308         u32 mult_enc = 0;
3309
3310         num_evts = events_get(eqo);
3311
3312         for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3313                 be_process_tx(adapter, txo, i);
3314
3315         /* This loop will iterate twice for EQ0 in which
3316          * completions of the last RXQ (default one) are also processed
3317          * For other EQs the loop iterates only once
3318          */
3319         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3320                 work = be_process_rx(rxo, napi, budget);
3321                 max_work = max(work, max_work);
3322         }
3323
3324         if (is_mcc_eqo(eqo))
3325                 be_process_mcc(adapter);
3326
3327         if (max_work < budget) {
3328                 napi_complete_done(napi, max_work);
3329
3330                 /* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3331                  * delay via a delay multiplier encoding value
3332                  */
3333                 if (skyhawk_chip(adapter))
3334                         mult_enc = be_get_eq_delay_mult_enc(eqo);
3335
3336                 be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3337                              mult_enc);
3338         } else {
3339                 /* As we'll continue in polling mode, count and clear events */
3340                 be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3341         }
3342         return max_work;
3343 }
3344
3345 void be_detect_error(struct be_adapter *adapter)
3346 {
3347         u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3348         u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3349         struct device *dev = &adapter->pdev->dev;
3350         u16 val;
3351         u32 i;
3352
3353         if (be_check_error(adapter, BE_ERROR_HW))
3354                 return;
3355
3356         if (lancer_chip(adapter)) {
3357                 sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3358                 if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3359                         be_set_error(adapter, BE_ERROR_UE);
3360                         sliport_err1 = ioread32(adapter->db +
3361                                                 SLIPORT_ERROR1_OFFSET);
3362                         sliport_err2 = ioread32(adapter->db +
3363                                                 SLIPORT_ERROR2_OFFSET);
3364                         /* Do not log error messages if its a FW reset */
3365                         if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3366                             sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3367                                 dev_info(dev, "Reset is in progress\n");
3368                         } else {
3369                                 dev_err(dev, "Error detected in the card\n");
3370                                 dev_err(dev, "ERR: sliport status 0x%x\n",
3371                                         sliport_status);
3372                                 dev_err(dev, "ERR: sliport error1 0x%x\n",
3373                                         sliport_err1);
3374                                 dev_err(dev, "ERR: sliport error2 0x%x\n",
3375                                         sliport_err2);
3376                         }
3377                 }
3378         } else {
3379                 ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3380                 ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3381                 ue_lo_mask = ioread32(adapter->pcicfg +
3382                                       PCICFG_UE_STATUS_LOW_MASK);
3383                 ue_hi_mask = ioread32(adapter->pcicfg +
3384                                       PCICFG_UE_STATUS_HI_MASK);
3385
3386                 ue_lo = (ue_lo & ~ue_lo_mask);
3387                 ue_hi = (ue_hi & ~ue_hi_mask);
3388
3389                 if (ue_lo || ue_hi) {
3390                         /* On certain platforms BE3 hardware can indicate
3391                          * spurious UEs. In case of a UE in the chip,
3392                          * the POST register correctly reports either a
3393                          * FAT_LOG_START state (FW is currently dumping
3394                          * FAT log data) or a ARMFW_UE state. Check for the
3395                          * above states to ascertain if the UE is valid or not.
3396                          */
3397                         if (BE3_chip(adapter)) {
3398                                 val = be_POST_stage_get(adapter);
3399                                 if ((val & POST_STAGE_FAT_LOG_START)
3400                                      != POST_STAGE_FAT_LOG_START &&
3401                                     (val & POST_STAGE_ARMFW_UE)
3402                                      != POST_STAGE_ARMFW_UE &&
3403                                     (val & POST_STAGE_RECOVERABLE_ERR)
3404                                      != POST_STAGE_RECOVERABLE_ERR)
3405                                         return;
3406                         }
3407
3408                         dev_err(dev, "Error detected in the adapter");
3409                         be_set_error(adapter, BE_ERROR_UE);
3410
3411                         for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3412                                 if (ue_lo & 1)
3413                                         dev_err(dev, "UE: %s bit set\n",
3414                                                 ue_status_low_desc[i]);
3415                         }
3416                         for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3417                                 if (ue_hi & 1)
3418                                         dev_err(dev, "UE: %s bit set\n",
3419                                                 ue_status_hi_desc[i]);
3420                         }
3421                 }
3422         }
3423 }
3424
3425 static void be_msix_disable(struct be_adapter *adapter)
3426 {
3427         if (msix_enabled(adapter)) {
3428                 pci_disable_msix(adapter->pdev);
3429                 adapter->num_msix_vec = 0;
3430                 adapter->num_msix_roce_vec = 0;
3431         }
3432 }
3433
3434 static int be_msix_enable(struct be_adapter *adapter)
3435 {
3436         unsigned int i, max_roce_eqs;
3437         struct device *dev = &adapter->pdev->dev;
3438         int num_vec;
3439
3440         /* If RoCE is supported, program the max number of vectors that
3441          * could be used for NIC and RoCE, else, just program the number
3442          * we'll use initially.
3443          */
3444         if (be_roce_supported(adapter)) {
3445                 max_roce_eqs =
3446                         be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3447                 max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3448                 num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3449         } else {
3450                 num_vec = max(adapter->cfg_num_rx_irqs,
3451                               adapter->cfg_num_tx_irqs);
3452         }
3453
3454         for (i = 0; i < num_vec; i++)
3455                 adapter->msix_entries[i].entry = i;
3456
3457         num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3458                                         MIN_MSIX_VECTORS, num_vec);
3459         if (num_vec < 0)
3460                 goto fail;
3461
3462         if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3463                 adapter->num_msix_roce_vec = num_vec / 2;
3464                 dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3465                          adapter->num_msix_roce_vec);
3466         }
3467
3468         adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3469
3470         dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3471                  adapter->num_msix_vec);
3472         return 0;
3473
3474 fail:
3475         dev_warn(dev, "MSIx enable failed\n");
3476
3477         /* INTx is not supported in VFs, so fail probe if enable_msix fails */
3478         if (be_virtfn(adapter))
3479                 return num_vec;
3480         return 0;
3481 }
3482
3483 static inline int be_msix_vec_get(struct be_adapter *adapter,
3484                                   struct be_eq_obj *eqo)
3485 {
3486         return adapter->msix_entries[eqo->msix_idx].vector;
3487 }
3488
3489 static int be_msix_register(struct be_adapter *adapter)
3490 {
3491         struct net_device *netdev = adapter->netdev;
3492         struct be_eq_obj *eqo;
3493         int status, i, vec;
3494
3495         for_all_evt_queues(adapter, eqo, i) {
3496                 sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3497                 vec = be_msix_vec_get(adapter, eqo);
3498                 status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3499                 if (status)
3500                         goto err_msix;
3501
3502                 irq_set_affinity_hint(vec, eqo->affinity_mask);
3503         }
3504
3505         return 0;
3506 err_msix:
3507         for (i--; i >= 0; i--) {
3508                 eqo = &adapter->eq_obj[i];
3509                 free_irq(be_msix_vec_get(adapter, eqo), eqo);
3510         }
3511         dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3512                  status);
3513         be_msix_disable(adapter);
3514         return status;
3515 }
3516
3517 static int be_irq_register(struct be_adapter *adapter)
3518 {
3519         struct net_device *netdev = adapter->netdev;
3520         int status;
3521
3522         if (msix_enabled(adapter)) {
3523                 status = be_msix_register(adapter);
3524                 if (status == 0)
3525                         goto done;
3526                 /* INTx is not supported for VF */
3527                 if (be_virtfn(adapter))
3528                         return status;
3529         }
3530
3531         /* INTx: only the first EQ is used */
3532         netdev->irq = adapter->pdev->irq;
3533         status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3534                              &adapter->eq_obj[0]);
3535         if (status) {
3536                 dev_err(&adapter->pdev->dev,
3537                         "INTx request IRQ failed - err %d\n", status);
3538                 return status;
3539         }
3540 done:
3541         adapter->isr_registered = true;
3542         return 0;
3543 }
3544
3545 static void be_irq_unregister(struct be_adapter *adapter)
3546 {
3547         struct net_device *netdev = adapter->netdev;
3548         struct be_eq_obj *eqo;
3549         int i, vec;
3550
3551         if (!adapter->isr_registered)
3552                 return;
3553
3554         /* INTx */
3555         if (!msix_enabled(adapter)) {
3556                 free_irq(netdev->irq, &adapter->eq_obj[0]);
3557                 goto done;
3558         }
3559
3560         /* MSIx */
3561         for_all_evt_queues(adapter, eqo, i) {
3562                 vec = be_msix_vec_get(adapter, eqo);
3563                 irq_set_affinity_hint(vec, NULL);
3564                 free_irq(vec, eqo);
3565         }
3566
3567 done:
3568         adapter->isr_registered = false;
3569 }
3570
3571 static void be_rx_qs_destroy(struct be_adapter *adapter)
3572 {
3573         struct rss_info *rss = &adapter->rss_info;
3574         struct be_queue_info *q;
3575         struct be_rx_obj *rxo;
3576         int i;
3577
3578         for_all_rx_queues(adapter, rxo, i) {
3579                 q = &rxo->q;
3580                 if (q->created) {
3581                         /* If RXQs are destroyed while in an "out of buffer"
3582                          * state, there is a possibility of an HW stall on
3583                          * Lancer. So, post 64 buffers to each queue to relieve
3584                          * the "out of buffer" condition.
3585                          * Make sure there's space in the RXQ before posting.
3586                          */
3587                         if (lancer_chip(adapter)) {
3588                                 be_rx_cq_clean(rxo);
3589                                 if (atomic_read(&q->used) == 0)
3590                                         be_post_rx_frags(rxo, GFP_KERNEL,
3591                                                          MAX_RX_POST);
3592                         }
3593
3594                         be_cmd_rxq_destroy(adapter, q);
3595                         be_rx_cq_clean(rxo);
3596                         be_rxq_clean(rxo);
3597                 }
3598                 be_queue_free(adapter, q);
3599         }
3600
3601         if (rss->rss_flags) {
3602                 rss->rss_flags = RSS_ENABLE_NONE;
3603                 be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3604                                   128, rss->rss_hkey);
3605         }
3606 }
3607
3608 static void be_disable_if_filters(struct be_adapter *adapter)
3609 {
3610         /* Don't delete MAC on BE3 VFs without FILTMGMT privilege  */
3611         if (!BEx_chip(adapter) || !be_virtfn(adapter) ||
3612             check_privilege(adapter, BE_PRIV_FILTMGMT)) {
3613                 be_dev_mac_del(adapter, adapter->pmac_id[0]);
3614                 eth_zero_addr(adapter->dev_mac);
3615         }
3616
3617         be_clear_uc_list(adapter);
3618         be_clear_mc_list(adapter);
3619
3620         /* The IFACE flags are enabled in the open path and cleared
3621          * in the close path. When a VF gets detached from the host and
3622          * assigned to a VM the following happens:
3623          *      - VF's IFACE flags get cleared in the detach path
3624          *      - IFACE create is issued by the VF in the attach path
3625          * Due to a bug in the BE3/Skyhawk-R FW
3626          * (Lancer FW doesn't have the bug), the IFACE capability flags
3627          * specified along with the IFACE create cmd issued by a VF are not
3628          * honoured by FW.  As a consequence, if a *new* driver
3629          * (that enables/disables IFACE flags in open/close)
3630          * is loaded in the host and an *old* driver is * used by a VM/VF,
3631          * the IFACE gets created *without* the needed flags.
3632          * To avoid this, disable RX-filter flags only for Lancer.
3633          */
3634         if (lancer_chip(adapter)) {
3635                 be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3636                 adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3637         }
3638 }
3639
3640 static int be_close(struct net_device *netdev)
3641 {
3642         struct be_adapter *adapter = netdev_priv(netdev);
3643         struct be_eq_obj *eqo;
3644         int i;
3645
3646         /* This protection is needed as be_close() may be called even when the
3647          * adapter is in cleared state (after eeh perm failure)
3648          */
3649         if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3650                 return 0;
3651
3652         /* Before attempting cleanup ensure all the pending cmds in the
3653          * config_wq have finished execution
3654          */
3655         flush_workqueue(be_wq);
3656
3657         be_disable_if_filters(adapter);
3658
3659         if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3660                 for_all_evt_queues(adapter, eqo, i) {
3661                         napi_disable(&eqo->napi);
3662                 }
3663                 adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3664         }
3665
3666         be_async_mcc_disable(adapter);
3667
3668         /* Wait for all pending tx completions to arrive so that
3669          * all tx skbs are freed.
3670          */
3671         netif_tx_disable(netdev);
3672         be_tx_compl_clean(adapter);
3673
3674         be_rx_qs_destroy(adapter);
3675
3676         for_all_evt_queues(adapter, eqo, i) {
3677                 if (msix_enabled(adapter))
3678                         synchronize_irq(be_msix_vec_get(adapter, eqo));
3679                 else
3680                         synchronize_irq(netdev->irq);
3681                 be_eq_clean(eqo);
3682         }
3683
3684         be_irq_unregister(adapter);
3685
3686         return 0;
3687 }
3688
3689 static int be_rx_qs_create(struct be_adapter *adapter)
3690 {
3691         struct rss_info *rss = &adapter->rss_info;
3692         u8 rss_key[RSS_HASH_KEY_LEN];
3693         struct be_rx_obj *rxo;
3694         int rc, i, j;
3695
3696         for_all_rx_queues(adapter, rxo, i) {
3697                 rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3698                                     sizeof(struct be_eth_rx_d));
3699                 if (rc)
3700                         return rc;
3701         }
3702
3703         if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3704                 rxo = default_rxo(adapter);
3705                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3706                                        rx_frag_size, adapter->if_handle,
3707                                        false, &rxo->rss_id);
3708                 if (rc)
3709                         return rc;
3710         }
3711
3712         for_all_rss_queues(adapter, rxo, i) {
3713                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3714                                        rx_frag_size, adapter->if_handle,
3715                                        true, &rxo->rss_id);
3716                 if (rc)
3717                         return rc;
3718         }
3719
3720         if (be_multi_rxq(adapter)) {
3721                 for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3722                         for_all_rss_queues(adapter, rxo, i) {
3723                                 if ((j + i) >= RSS_INDIR_TABLE_LEN)
3724                                         break;
3725                                 rss->rsstable[j + i] = rxo->rss_id;
3726                                 rss->rss_queue[j + i] = i;
3727                         }
3728                 }
3729                 rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3730                         RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3731
3732                 if (!BEx_chip(adapter))
3733                         rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3734                                 RSS_ENABLE_UDP_IPV6;
3735
3736                 netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3737                 rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3738                                        RSS_INDIR_TABLE_LEN, rss_key);
3739                 if (rc) {
3740                         rss->rss_flags = RSS_ENABLE_NONE;
3741                         return rc;
3742                 }
3743
3744                 memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3745         } else {
3746                 /* Disable RSS, if only default RX Q is created */
3747                 rss->rss_flags = RSS_ENABLE_NONE;
3748         }
3749
3750
3751         /* Post 1 less than RXQ-len to avoid head being equal to tail,
3752          * which is a queue empty condition
3753          */
3754         for_all_rx_queues(adapter, rxo, i)
3755                 be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3756
3757         return 0;
3758 }
3759
3760 static int be_enable_if_filters(struct be_adapter *adapter)
3761 {
3762         int status;
3763
3764         status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3765         if (status)
3766                 return status;
3767
3768         /* Normally this condition usually true as the ->dev_mac is zeroed.
3769          * But on BE3 VFs the initial MAC is pre-programmed by PF and
3770          * subsequent be_dev_mac_add() can fail (after fresh boot)
3771          */
3772         if (!ether_addr_equal(adapter->dev_mac, adapter->netdev->dev_addr)) {
3773                 int old_pmac_id = -1;
3774
3775                 /* Remember old programmed MAC if any - can happen on BE3 VF */
3776                 if (!is_zero_ether_addr(adapter->dev_mac))
3777                         old_pmac_id = adapter->pmac_id[0];
3778
3779                 status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
3780                 if (status)
3781                         return status;
3782
3783                 /* Delete the old programmed MAC as we successfully programmed
3784                  * a new MAC
3785                  */
3786                 if (old_pmac_id >= 0 && old_pmac_id != adapter->pmac_id[0])
3787                         be_dev_mac_del(adapter, old_pmac_id);
3788
3789                 ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
3790         }
3791
3792         if (adapter->vlans_added)
3793                 be_vid_config(adapter);
3794
3795         __be_set_rx_mode(adapter);
3796
3797         return 0;
3798 }
3799
3800 static int be_open(struct net_device *netdev)
3801 {
3802         struct be_adapter *adapter = netdev_priv(netdev);
3803         struct be_eq_obj *eqo;
3804         struct be_rx_obj *rxo;
3805         struct be_tx_obj *txo;
3806         u8 link_status;
3807         int status, i;
3808
3809         status = be_rx_qs_create(adapter);
3810         if (status)
3811                 goto err;
3812
3813         status = be_enable_if_filters(adapter);
3814         if (status)
3815                 goto err;
3816
3817         status = be_irq_register(adapter);
3818         if (status)
3819                 goto err;
3820
3821         for_all_rx_queues(adapter, rxo, i)
3822                 be_cq_notify(adapter, rxo->cq.id, true, 0);
3823
3824         for_all_tx_queues(adapter, txo, i)
3825                 be_cq_notify(adapter, txo->cq.id, true, 0);
3826
3827         be_async_mcc_enable(adapter);
3828
3829         for_all_evt_queues(adapter, eqo, i) {
3830                 napi_enable(&eqo->napi);
3831                 be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3832         }
3833         adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3834
3835         status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3836         if (!status)
3837                 be_link_status_update(adapter, link_status);
3838
3839         netif_tx_start_all_queues(netdev);
3840         if (skyhawk_chip(adapter))
3841                 udp_tunnel_get_rx_info(netdev);
3842
3843         return 0;
3844 err:
3845         be_close(adapter->netdev);
3846         return -EIO;
3847 }
3848
3849 static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3850 {
3851         u32 addr;
3852
3853         addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3854
3855         mac[5] = (u8)(addr & 0xFF);
3856         mac[4] = (u8)((addr >> 8) & 0xFF);
3857         mac[3] = (u8)((addr >> 16) & 0xFF);
3858         /* Use the OUI from the current MAC address */
3859         memcpy(mac, adapter->netdev->dev_addr, 3);
3860 }
3861
3862 /*
3863  * Generate a seed MAC address from the PF MAC Address using jhash.
3864  * MAC Address for VFs are assigned incrementally starting from the seed.
3865  * These addresses are programmed in the ASIC by the PF and the VF driver
3866  * queries for the MAC address during its probe.
3867  */
3868 static int be_vf_eth_addr_config(struct be_adapter *adapter)
3869 {
3870         u32 vf;
3871         int status = 0;
3872         u8 mac[ETH_ALEN];
3873         struct be_vf_cfg *vf_cfg;
3874
3875         be_vf_eth_addr_generate(adapter, mac);
3876
3877         for_all_vfs(adapter, vf_cfg, vf) {
3878                 if (BEx_chip(adapter))
3879                         status = be_cmd_pmac_add(adapter, mac,
3880                                                  vf_cfg->if_handle,
3881                                                  &vf_cfg->pmac_id, vf + 1);
3882                 else
3883                         status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3884                                                 vf + 1);
3885
3886                 if (status)
3887                         dev_err(&adapter->pdev->dev,
3888                                 "Mac address assignment failed for VF %d\n",
3889                                 vf);
3890                 else
3891                         memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3892
3893                 mac[5] += 1;
3894         }
3895         return status;
3896 }
3897
3898 static int be_vfs_mac_query(struct be_adapter *adapter)
3899 {
3900         int status, vf;
3901         u8 mac[ETH_ALEN];
3902         struct be_vf_cfg *vf_cfg;
3903
3904         for_all_vfs(adapter, vf_cfg, vf) {
3905                 status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3906                                                mac, vf_cfg->if_handle,
3907                                                false, vf+1);
3908                 if (status)
3909                         return status;
3910                 memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3911         }
3912         return 0;
3913 }
3914
3915 static void be_vf_clear(struct be_adapter *adapter)
3916 {
3917         struct be_vf_cfg *vf_cfg;
3918         u32 vf;
3919
3920         if (pci_vfs_assigned(adapter->pdev)) {
3921                 dev_warn(&adapter->pdev->dev,
3922                          "VFs are assigned to VMs: not disabling VFs\n");
3923                 goto done;
3924         }
3925
3926         pci_disable_sriov(adapter->pdev);
3927
3928         for_all_vfs(adapter, vf_cfg, vf) {
3929                 if (BEx_chip(adapter))
3930                         be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3931                                         vf_cfg->pmac_id, vf + 1);
3932                 else
3933                         be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3934                                        vf + 1);
3935
3936                 be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3937         }
3938
3939         if (BE3_chip(adapter))
3940                 be_cmd_set_hsw_config(adapter, 0, 0,
3941                                       adapter->if_handle,
3942                                       PORT_FWD_TYPE_PASSTHRU, 0);
3943 done:
3944         kfree(adapter->vf_cfg);
3945         adapter->num_vfs = 0;
3946         adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3947 }
3948
3949 static void be_clear_queues(struct be_adapter *adapter)
3950 {
3951         be_mcc_queues_destroy(adapter);
3952         be_rx_cqs_destroy(adapter);
3953         be_tx_queues_destroy(adapter);
3954         be_evt_queues_destroy(adapter);
3955 }
3956
3957 static void be_cancel_worker(struct be_adapter *adapter)
3958 {
3959         if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3960                 cancel_delayed_work_sync(&adapter->work);
3961                 adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3962         }
3963 }
3964
3965 static void be_cancel_err_detection(struct be_adapter *adapter)
3966 {
3967         struct be_error_recovery *err_rec = &adapter->error_recovery;
3968
3969         if (!be_err_recovery_workq)
3970                 return;
3971
3972         if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3973                 cancel_delayed_work_sync(&err_rec->err_detection_work);
3974                 adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3975         }
3976 }
3977
3978 static int be_enable_vxlan_offloads(struct be_adapter *adapter)
3979 {
3980         struct net_device *netdev = adapter->netdev;
3981         struct device *dev = &adapter->pdev->dev;
3982         struct be_vxlan_port *vxlan_port;
3983         __be16 port;
3984         int status;
3985
3986         vxlan_port = list_first_entry(&adapter->vxlan_port_list,
3987                                       struct be_vxlan_port, list);
3988         port = vxlan_port->port;
3989
3990         status = be_cmd_manage_iface(adapter, adapter->if_handle,
3991                                      OP_CONVERT_NORMAL_TO_TUNNEL);
3992         if (status) {
3993                 dev_warn(dev, "Failed to convert normal interface to tunnel\n");
3994                 return status;
3995         }
3996         adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
3997
3998         status = be_cmd_set_vxlan_port(adapter, port);
3999         if (status) {
4000                 dev_warn(dev, "Failed to add VxLAN port\n");
4001                 return status;
4002         }
4003         adapter->vxlan_port = port;
4004
4005         netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
4006                                    NETIF_F_TSO | NETIF_F_TSO6 |
4007                                    NETIF_F_GSO_UDP_TUNNEL;
4008
4009         dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
4010                  be16_to_cpu(port));
4011         return 0;
4012 }
4013
4014 static void be_disable_vxlan_offloads(struct be_adapter *adapter)
4015 {
4016         struct net_device *netdev = adapter->netdev;
4017
4018         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
4019                 be_cmd_manage_iface(adapter, adapter->if_handle,
4020                                     OP_CONVERT_TUNNEL_TO_NORMAL);
4021
4022         if (adapter->vxlan_port)
4023                 be_cmd_set_vxlan_port(adapter, 0);
4024
4025         adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
4026         adapter->vxlan_port = 0;
4027
4028         netdev->hw_enc_features = 0;
4029 }
4030
4031 static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
4032                                 struct be_resources *vft_res)
4033 {
4034         struct be_resources res = adapter->pool_res;
4035         u32 vf_if_cap_flags = res.vf_if_cap_flags;
4036         struct be_resources res_mod = {0};
4037         u16 num_vf_qs = 1;
4038
4039         /* Distribute the queue resources among the PF and it's VFs */
4040         if (num_vfs) {
4041                 /* Divide the rx queues evenly among the VFs and the PF, capped
4042                  * at VF-EQ-count. Any remainder queues belong to the PF.
4043                  */
4044                 num_vf_qs = min(SH_VF_MAX_NIC_EQS,
4045                                 res.max_rss_qs / (num_vfs + 1));
4046
4047                 /* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
4048                  * RSS Tables per port. Provide RSS on VFs, only if number of
4049                  * VFs requested is less than it's PF Pool's RSS Tables limit.
4050                  */
4051                 if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
4052                         num_vf_qs = 1;
4053         }
4054
4055         /* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
4056          * which are modifiable using SET_PROFILE_CONFIG cmd.
4057          */
4058         be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
4059                                   RESOURCE_MODIFIABLE, 0);
4060
4061         /* If RSS IFACE capability flags are modifiable for a VF, set the
4062          * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
4063          * more than 1 RSSQ is available for a VF.
4064          * Otherwise, provision only 1 queue pair for VF.
4065          */
4066         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
4067                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4068                 if (num_vf_qs > 1) {
4069                         vf_if_cap_flags |= BE_IF_FLAGS_RSS;
4070                         if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
4071                                 vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
4072                 } else {
4073                         vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
4074                                              BE_IF_FLAGS_DEFQ_RSS);
4075                 }
4076         } else {
4077                 num_vf_qs = 1;
4078         }
4079
4080         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
4081                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
4082                 vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4083         }
4084
4085         vft_res->vf_if_cap_flags = vf_if_cap_flags;
4086         vft_res->max_rx_qs = num_vf_qs;
4087         vft_res->max_rss_qs = num_vf_qs;
4088         vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
4089         vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
4090
4091         /* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
4092          * among the PF and it's VFs, if the fields are changeable
4093          */
4094         if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
4095                 vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
4096
4097         if (res_mod.max_vlans == FIELD_MODIFIABLE)
4098                 vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
4099
4100         if (res_mod.max_iface_count == FIELD_MODIFIABLE)
4101                 vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
4102
4103         if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
4104                 vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
4105 }
4106
4107 static void be_if_destroy(struct be_adapter *adapter)
4108 {
4109         be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4110
4111         kfree(adapter->pmac_id);
4112         adapter->pmac_id = NULL;
4113
4114         kfree(adapter->mc_list);
4115         adapter->mc_list = NULL;
4116
4117         kfree(adapter->uc_list);
4118         adapter->uc_list = NULL;
4119 }
4120
4121 static int be_clear(struct be_adapter *adapter)
4122 {
4123         struct pci_dev *pdev = adapter->pdev;
4124         struct  be_resources vft_res = {0};
4125
4126         be_cancel_worker(adapter);
4127
4128         flush_workqueue(be_wq);
4129
4130         if (sriov_enabled(adapter))
4131                 be_vf_clear(adapter);
4132
4133         /* Re-configure FW to distribute resources evenly across max-supported
4134          * number of VFs, only when VFs are not already enabled.
4135          */
4136         if (skyhawk_chip(adapter) && be_physfn(adapter) &&
4137             !pci_vfs_assigned(pdev)) {
4138                 be_calculate_vf_res(adapter,
4139                                     pci_sriov_get_totalvfs(pdev),
4140                                     &vft_res);
4141                 be_cmd_set_sriov_config(adapter, adapter->pool_res,
4142                                         pci_sriov_get_totalvfs(pdev),
4143                                         &vft_res);
4144         }
4145
4146         be_disable_vxlan_offloads(adapter);
4147
4148         be_if_destroy(adapter);
4149
4150         be_clear_queues(adapter);
4151
4152         be_msix_disable(adapter);
4153         adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4154         return 0;
4155 }
4156
4157 static int be_vfs_if_create(struct be_adapter *adapter)
4158 {
4159         struct be_resources res = {0};
4160         u32 cap_flags, en_flags, vf;
4161         struct be_vf_cfg *vf_cfg;
4162         int status;
4163
4164         /* If a FW profile exists, then cap_flags are updated */
4165         cap_flags = BE_VF_IF_EN_FLAGS;
4166
4167         for_all_vfs(adapter, vf_cfg, vf) {
4168                 if (!BE3_chip(adapter)) {
4169                         status = be_cmd_get_profile_config(adapter, &res, NULL,
4170                                                            ACTIVE_PROFILE_TYPE,
4171                                                            RESOURCE_LIMITS,
4172                                                            vf + 1);
4173                         if (!status) {
4174                                 cap_flags = res.if_cap_flags;
4175                                 /* Prevent VFs from enabling VLAN promiscuous
4176                                  * mode
4177                                  */
4178                                 cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4179                         }
4180                 }
4181
4182                 /* PF should enable IF flags during proxy if_create call */
4183                 en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4184                 status = be_cmd_if_create(adapter, cap_flags, en_flags,
4185                                           &vf_cfg->if_handle, vf + 1);
4186                 if (status)
4187                         return status;
4188         }
4189
4190         return 0;
4191 }
4192
4193 static int be_vf_setup_init(struct be_adapter *adapter)
4194 {
4195         struct be_vf_cfg *vf_cfg;
4196         int vf;
4197
4198         adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4199                                   GFP_KERNEL);
4200         if (!adapter->vf_cfg)
4201                 return -ENOMEM;
4202
4203         for_all_vfs(adapter, vf_cfg, vf) {
4204                 vf_cfg->if_handle = -1;
4205                 vf_cfg->pmac_id = -1;
4206         }
4207         return 0;
4208 }
4209
4210 static int be_vf_setup(struct be_adapter *adapter)
4211 {
4212         struct device *dev = &adapter->pdev->dev;
4213         struct be_vf_cfg *vf_cfg;
4214         int status, old_vfs, vf;
4215         bool spoofchk;
4216
4217         old_vfs = pci_num_vf(adapter->pdev);
4218
4219         status = be_vf_setup_init(adapter);
4220         if (status)
4221                 goto err;
4222
4223         if (old_vfs) {
4224                 for_all_vfs(adapter, vf_cfg, vf) {
4225                         status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4226                         if (status)
4227                                 goto err;
4228                 }
4229
4230                 status = be_vfs_mac_query(adapter);
4231                 if (status)
4232                         goto err;
4233         } else {
4234                 status = be_vfs_if_create(adapter);
4235                 if (status)
4236                         goto err;
4237
4238                 status = be_vf_eth_addr_config(adapter);
4239                 if (status)
4240                         goto err;
4241         }
4242
4243         for_all_vfs(adapter, vf_cfg, vf) {
4244                 /* Allow VFs to programs MAC/VLAN filters */
4245                 status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4246                                                   vf + 1);
4247                 if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4248                         status = be_cmd_set_fn_privileges(adapter,
4249                                                           vf_cfg->privileges |
4250                                                           BE_PRIV_FILTMGMT,
4251                                                           vf + 1);
4252                         if (!status) {
4253                                 vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4254                                 dev_info(dev, "VF%d has FILTMGMT privilege\n",
4255                                          vf);
4256                         }
4257                 }
4258
4259                 /* Allow full available bandwidth */
4260                 if (!old_vfs)
4261                         be_cmd_config_qos(adapter, 0, 0, vf + 1);
4262
4263                 status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4264                                                vf_cfg->if_handle, NULL,
4265                                                &spoofchk);
4266                 if (!status)
4267                         vf_cfg->spoofchk = spoofchk;
4268
4269                 if (!old_vfs) {
4270                         be_cmd_enable_vf(adapter, vf + 1);
4271                         be_cmd_set_logical_link_config(adapter,
4272                                                        IFLA_VF_LINK_STATE_AUTO,
4273                                                        vf+1);
4274                 }
4275         }
4276
4277         if (!old_vfs) {
4278                 status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4279                 if (status) {
4280                         dev_err(dev, "SRIOV enable failed\n");
4281                         adapter->num_vfs = 0;
4282                         goto err;
4283                 }
4284         }
4285
4286         if (BE3_chip(adapter)) {
4287                 /* On BE3, enable VEB only when SRIOV is enabled */
4288                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4289                                                adapter->if_handle,
4290                                                PORT_FWD_TYPE_VEB, 0);
4291                 if (status)
4292                         goto err;
4293         }
4294
4295         adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4296         return 0;
4297 err:
4298         dev_err(dev, "VF setup failed\n");
4299         be_vf_clear(adapter);
4300         return status;
4301 }
4302
4303 /* Converting function_mode bits on BE3 to SH mc_type enums */
4304
4305 static u8 be_convert_mc_type(u32 function_mode)
4306 {
4307         if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4308                 return vNIC1;
4309         else if (function_mode & QNQ_MODE)
4310                 return FLEX10;
4311         else if (function_mode & VNIC_MODE)
4312                 return vNIC2;
4313         else if (function_mode & UMC_ENABLED)
4314                 return UMC;
4315         else
4316                 return MC_NONE;
4317 }
4318
4319 /* On BE2/BE3 FW does not suggest the supported limits */
4320 static void BEx_get_resources(struct be_adapter *adapter,
4321                               struct be_resources *res)
4322 {
4323         bool use_sriov = adapter->num_vfs ? 1 : 0;
4324
4325         if (be_physfn(adapter))
4326                 res->max_uc_mac = BE_UC_PMAC_COUNT;
4327         else
4328                 res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4329
4330         adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4331
4332         if (be_is_mc(adapter)) {
4333                 /* Assuming that there are 4 channels per port,
4334                  * when multi-channel is enabled
4335                  */
4336                 if (be_is_qnq_mode(adapter))
4337                         res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4338                 else
4339                         /* In a non-qnq multichannel mode, the pvid
4340                          * takes up one vlan entry
4341                          */
4342                         res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4343         } else {
4344                 res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4345         }
4346
4347         res->max_mcast_mac = BE_MAX_MC;
4348
4349         /* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4350          * 2) Create multiple TX rings on a BE3-R multi-channel interface
4351          *    *only* if it is RSS-capable.
4352          */
4353         if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4354             be_virtfn(adapter) ||
4355             (be_is_mc(adapter) &&
4356              !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4357                 res->max_tx_qs = 1;
4358         } else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4359                 struct be_resources super_nic_res = {0};
4360
4361                 /* On a SuperNIC profile, the driver needs to use the
4362                  * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4363                  */
4364                 be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4365                                           ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4366                                           0);
4367                 /* Some old versions of BE3 FW don't report max_tx_qs value */
4368                 res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4369         } else {
4370                 res->max_tx_qs = BE3_MAX_TX_QS;
4371         }
4372
4373         if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4374             !use_sriov && be_physfn(adapter))
4375                 res->max_rss_qs = (adapter->be3_native) ?
4376                                            BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4377         res->max_rx_qs = res->max_rss_qs + 1;
4378
4379         if (be_physfn(adapter))
4380                 res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4381                                         BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4382         else
4383                 res->max_evt_qs = 1;
4384
4385         res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4386         res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4387         if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4388                 res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4389 }
4390
4391 static void be_setup_init(struct be_adapter *adapter)
4392 {
4393         adapter->vlan_prio_bmap = 0xff;
4394         adapter->phy.link_speed = -1;
4395         adapter->if_handle = -1;
4396         adapter->be3_native = false;
4397         adapter->if_flags = 0;
4398         adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4399         if (be_physfn(adapter))
4400                 adapter->cmd_privileges = MAX_PRIVILEGES;
4401         else
4402                 adapter->cmd_privileges = MIN_PRIVILEGES;
4403 }
4404
4405 /* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4406  * However, this HW limitation is not exposed to the host via any SLI cmd.
4407  * As a result, in the case of SRIOV and in particular multi-partition configs
4408  * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4409  * for distribution between the VFs. This self-imposed limit will determine the
4410  * no: of VFs for which RSS can be enabled.
4411  */
4412 static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4413 {
4414         struct be_port_resources port_res = {0};
4415         u8 rss_tables_on_port;
4416         u16 max_vfs = be_max_vfs(adapter);
4417
4418         be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4419                                   RESOURCE_LIMITS, 0);
4420
4421         rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4422
4423         /* Each PF Pool's RSS Tables limit =
4424          * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4425          */
4426         adapter->pool_res.max_rss_tables =
4427                 max_vfs * rss_tables_on_port / port_res.max_vfs;
4428 }
4429
4430 static int be_get_sriov_config(struct be_adapter *adapter)
4431 {
4432         struct be_resources res = {0};
4433         int max_vfs, old_vfs;
4434
4435         be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4436                                   RESOURCE_LIMITS, 0);
4437
4438         /* Some old versions of BE3 FW don't report max_vfs value */
4439         if (BE3_chip(adapter) && !res.max_vfs) {
4440                 max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4441                 res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4442         }
4443
4444         adapter->pool_res = res;
4445
4446         /* If during previous unload of the driver, the VFs were not disabled,
4447          * then we cannot rely on the PF POOL limits for the TotalVFs value.
4448          * Instead use the TotalVFs value stored in the pci-dev struct.
4449          */
4450         old_vfs = pci_num_vf(adapter->pdev);
4451         if (old_vfs) {
4452                 dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4453                          old_vfs);
4454
4455                 adapter->pool_res.max_vfs =
4456                         pci_sriov_get_totalvfs(adapter->pdev);
4457                 adapter->num_vfs = old_vfs;
4458         }
4459
4460         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4461                 be_calculate_pf_pool_rss_tables(adapter);
4462                 dev_info(&adapter->pdev->dev,
4463                          "RSS can be enabled for all VFs if num_vfs <= %d\n",
4464                          be_max_pf_pool_rss_tables(adapter));
4465         }
4466         return 0;
4467 }
4468
4469 static void be_alloc_sriov_res(struct be_adapter *adapter)
4470 {
4471         int old_vfs = pci_num_vf(adapter->pdev);
4472         struct  be_resources vft_res = {0};
4473         int status;
4474
4475         be_get_sriov_config(adapter);
4476
4477         if (!old_vfs)
4478                 pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4479
4480         /* When the HW is in SRIOV capable configuration, the PF-pool
4481          * resources are given to PF during driver load, if there are no
4482          * old VFs. This facility is not available in BE3 FW.
4483          * Also, this is done by FW in Lancer chip.
4484          */
4485         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4486                 be_calculate_vf_res(adapter, 0, &vft_res);
4487                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4488                                                  &vft_res);
4489                 if (status)
4490                         dev_err(&adapter->pdev->dev,
4491                                 "Failed to optimize SRIOV resources\n");
4492         }
4493 }
4494
4495 static int be_get_resources(struct be_adapter *adapter)
4496 {
4497         struct device *dev = &adapter->pdev->dev;
4498         struct be_resources res = {0};
4499         int status;
4500
4501         /* For Lancer, SH etc read per-function resource limits from FW.
4502          * GET_FUNC_CONFIG returns per function guaranteed limits.
4503          * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4504          */
4505         if (BEx_chip(adapter)) {
4506                 BEx_get_resources(adapter, &res);
4507         } else {
4508                 status = be_cmd_get_func_config(adapter, &res);
4509                 if (status)
4510                         return status;
4511
4512                 /* If a deafault RXQ must be created, we'll use up one RSSQ*/
4513                 if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4514                     !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4515                         res.max_rss_qs -= 1;
4516         }
4517
4518         /* If RoCE is supported stash away half the EQs for RoCE */
4519         res.max_nic_evt_qs = be_roce_supported(adapter) ?
4520                                 res.max_evt_qs / 2 : res.max_evt_qs;
4521         adapter->res = res;
4522
4523         /* If FW supports RSS default queue, then skip creating non-RSS
4524          * queue for non-IP traffic.
4525          */
4526         adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4527                                  BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4528
4529         dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4530                  be_max_txqs(adapter), be_max_rxqs(adapter),
4531                  be_max_rss(adapter), be_max_nic_eqs(adapter),
4532                  be_max_vfs(adapter));
4533         dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4534                  be_max_uc(adapter), be_max_mc(adapter),
4535                  be_max_vlans(adapter));
4536
4537         /* Ensure RX and TX queues are created in pairs at init time */
4538         adapter->cfg_num_rx_irqs =
4539                                 min_t(u16, netif_get_num_default_rss_queues(),
4540                                       be_max_qp_irqs(adapter));
4541         adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4542         return 0;
4543 }
4544
4545 static int be_get_config(struct be_adapter *adapter)
4546 {
4547         int status, level;
4548         u16 profile_id;
4549
4550         status = be_cmd_get_cntl_attributes(adapter);
4551         if (status)
4552                 return status;
4553
4554         status = be_cmd_query_fw_cfg(adapter);
4555         if (status)
4556                 return status;
4557
4558         if (!lancer_chip(adapter) && be_physfn(adapter))
4559                 be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4560
4561         if (BEx_chip(adapter)) {
4562                 level = be_cmd_get_fw_log_level(adapter);
4563                 adapter->msg_enable =
4564                         level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4565         }
4566
4567         be_cmd_get_acpi_wol_cap(adapter);
4568         pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4569         pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4570
4571         be_cmd_query_port_name(adapter);
4572
4573         if (be_physfn(adapter)) {
4574                 status = be_cmd_get_active_profile(adapter, &profile_id);
4575                 if (!status)
4576                         dev_info(&adapter->pdev->dev,
4577                                  "Using profile 0x%x\n", profile_id);
4578         }
4579
4580         return 0;
4581 }
4582
4583 static int be_mac_setup(struct be_adapter *adapter)
4584 {
4585         u8 mac[ETH_ALEN];
4586         int status;
4587
4588         if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4589                 status = be_cmd_get_perm_mac(adapter, mac);
4590                 if (status)
4591                         return status;
4592
4593                 memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4594                 memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4595
4596                 /* Initial MAC for BE3 VFs is already programmed by PF */
4597                 if (BEx_chip(adapter) && be_virtfn(adapter))
4598                         memcpy(adapter->dev_mac, mac, ETH_ALEN);
4599         }
4600
4601         return 0;
4602 }
4603
4604 static void be_schedule_worker(struct be_adapter *adapter)
4605 {
4606         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4607         adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4608 }
4609
4610 static void be_destroy_err_recovery_workq(void)
4611 {
4612         if (!be_err_recovery_workq)
4613                 return;
4614
4615         flush_workqueue(be_err_recovery_workq);
4616         destroy_workqueue(be_err_recovery_workq);
4617         be_err_recovery_workq = NULL;
4618 }
4619
4620 static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4621 {
4622         struct be_error_recovery *err_rec = &adapter->error_recovery;
4623
4624         if (!be_err_recovery_workq)
4625                 return;
4626
4627         queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
4628                            msecs_to_jiffies(delay));
4629         adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4630 }
4631
4632 static int be_setup_queues(struct be_adapter *adapter)
4633 {
4634         struct net_device *netdev = adapter->netdev;
4635         int status;
4636
4637         status = be_evt_queues_create(adapter);
4638         if (status)
4639                 goto err;
4640
4641         status = be_tx_qs_create(adapter);
4642         if (status)
4643                 goto err;
4644
4645         status = be_rx_cqs_create(adapter);
4646         if (status)
4647                 goto err;
4648
4649         status = be_mcc_queues_create(adapter);
4650         if (status)
4651                 goto err;
4652
4653         status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4654         if (status)
4655                 goto err;
4656
4657         status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4658         if (status)
4659                 goto err;
4660
4661         return 0;
4662 err:
4663         dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4664         return status;
4665 }
4666
4667 static int be_if_create(struct be_adapter *adapter)
4668 {
4669         u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4670         u32 cap_flags = be_if_cap_flags(adapter);
4671         int status;
4672
4673         /* alloc required memory for other filtering fields */
4674         adapter->pmac_id = kcalloc(be_max_uc(adapter),
4675                                    sizeof(*adapter->pmac_id), GFP_KERNEL);
4676         if (!adapter->pmac_id)
4677                 return -ENOMEM;
4678
4679         adapter->mc_list = kcalloc(be_max_mc(adapter),
4680                                    sizeof(*adapter->mc_list), GFP_KERNEL);
4681         if (!adapter->mc_list)
4682                 return -ENOMEM;
4683
4684         adapter->uc_list = kcalloc(be_max_uc(adapter),
4685                                    sizeof(*adapter->uc_list), GFP_KERNEL);
4686         if (!adapter->uc_list)
4687                 return -ENOMEM;
4688
4689         if (adapter->cfg_num_rx_irqs == 1)
4690                 cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4691
4692         en_flags &= cap_flags;
4693         /* will enable all the needed filter flags in be_open() */
4694         status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4695                                   &adapter->if_handle, 0);
4696
4697         if (status)
4698                 return status;
4699
4700         return 0;
4701 }
4702
4703 int be_update_queues(struct be_adapter *adapter)
4704 {
4705         struct net_device *netdev = adapter->netdev;
4706         int status;
4707
4708         if (netif_running(netdev))
4709                 be_close(netdev);
4710
4711         be_cancel_worker(adapter);
4712
4713         /* If any vectors have been shared with RoCE we cannot re-program
4714          * the MSIx table.
4715          */
4716         if (!adapter->num_msix_roce_vec)
4717                 be_msix_disable(adapter);
4718
4719         be_clear_queues(adapter);
4720         status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4721         if (status)
4722                 return status;
4723
4724         if (!msix_enabled(adapter)) {
4725                 status = be_msix_enable(adapter);
4726                 if (status)
4727                         return status;
4728         }
4729
4730         status = be_if_create(adapter);
4731         if (status)
4732                 return status;
4733
4734         status = be_setup_queues(adapter);
4735         if (status)
4736                 return status;
4737
4738         be_schedule_worker(adapter);
4739
4740         /* The IF was destroyed and re-created. We need to clear
4741          * all promiscuous flags valid for the destroyed IF.
4742          * Without this promisc mode is not restored during
4743          * be_open() because the driver thinks that it is
4744          * already enabled in HW.
4745          */
4746         adapter->if_flags &= ~BE_IF_FLAGS_ALL_PROMISCUOUS;
4747
4748         if (netif_running(netdev))
4749                 status = be_open(netdev);
4750
4751         return status;
4752 }
4753
4754 static inline int fw_major_num(const char *fw_ver)
4755 {
4756         int fw_major = 0, i;
4757
4758         i = sscanf(fw_ver, "%d.", &fw_major);
4759         if (i != 1)
4760                 return 0;
4761
4762         return fw_major;
4763 }
4764
4765 /* If it is error recovery, FLR the PF
4766  * Else if any VFs are already enabled don't FLR the PF
4767  */
4768 static bool be_reset_required(struct be_adapter *adapter)
4769 {
4770         if (be_error_recovering(adapter))
4771                 return true;
4772         else
4773                 return pci_num_vf(adapter->pdev) == 0;
4774 }
4775
4776 /* Wait for the FW to be ready and perform the required initialization */
4777 static int be_func_init(struct be_adapter *adapter)
4778 {
4779         int status;
4780
4781         status = be_fw_wait_ready(adapter);
4782         if (status)
4783                 return status;
4784
4785         /* FW is now ready; clear errors to allow cmds/doorbell */
4786         be_clear_error(adapter, BE_CLEAR_ALL);
4787
4788         if (be_reset_required(adapter)) {
4789                 status = be_cmd_reset_function(adapter);
4790                 if (status)
4791                         return status;
4792
4793                 /* Wait for interrupts to quiesce after an FLR */
4794                 msleep(100);
4795         }
4796
4797         /* Tell FW we're ready to fire cmds */
4798         status = be_cmd_fw_init(adapter);
4799         if (status)
4800                 return status;
4801
4802         /* Allow interrupts for other ULPs running on NIC function */
4803         be_intr_set(adapter, true);
4804
4805         return 0;
4806 }
4807
4808 static int be_setup(struct be_adapter *adapter)
4809 {
4810         struct device *dev = &adapter->pdev->dev;
4811         int status;
4812
4813         status = be_func_init(adapter);
4814         if (status)
4815                 return status;
4816
4817         be_setup_init(adapter);
4818
4819         if (!lancer_chip(adapter))
4820                 be_cmd_req_native_mode(adapter);
4821
4822         /* invoke this cmd first to get pf_num and vf_num which are needed
4823          * for issuing profile related cmds
4824          */
4825         if (!BEx_chip(adapter)) {
4826                 status = be_cmd_get_func_config(adapter, NULL);
4827                 if (status)
4828                         return status;
4829         }
4830
4831         status = be_get_config(adapter);
4832         if (status)
4833                 goto err;
4834
4835         if (!BE2_chip(adapter) && be_physfn(adapter))
4836                 be_alloc_sriov_res(adapter);
4837
4838         status = be_get_resources(adapter);
4839         if (status)
4840                 goto err;
4841
4842         status = be_msix_enable(adapter);
4843         if (status)
4844                 goto err;
4845
4846         /* will enable all the needed filter flags in be_open() */
4847         status = be_if_create(adapter);
4848         if (status)
4849                 goto err;
4850
4851         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4852         rtnl_lock();
4853         status = be_setup_queues(adapter);
4854         rtnl_unlock();
4855         if (status)
4856                 goto err;
4857
4858         be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4859
4860         status = be_mac_setup(adapter);
4861         if (status)
4862                 goto err;
4863
4864         be_cmd_get_fw_ver(adapter);
4865         dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4866
4867         if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4868                 dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4869                         adapter->fw_ver);
4870                 dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4871         }
4872
4873         status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4874                                          adapter->rx_fc);
4875         if (status)
4876                 be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4877                                         &adapter->rx_fc);
4878
4879         dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4880                  adapter->tx_fc, adapter->rx_fc);
4881
4882         if (be_physfn(adapter))
4883                 be_cmd_set_logical_link_config(adapter,
4884                                                IFLA_VF_LINK_STATE_AUTO, 0);
4885
4886         /* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4887          * confusing a linux bridge or OVS that it might be connected to.
4888          * Set the EVB to PASSTHRU mode which effectively disables the EVB
4889          * when SRIOV is not enabled.
4890          */
4891         if (BE3_chip(adapter))
4892                 be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4893                                       PORT_FWD_TYPE_PASSTHRU, 0);
4894
4895         if (adapter->num_vfs)
4896                 be_vf_setup(adapter);
4897
4898         status = be_cmd_get_phy_info(adapter);
4899         if (!status && be_pause_supported(adapter))
4900                 adapter->phy.fc_autoneg = 1;
4901
4902         if (be_physfn(adapter) && !lancer_chip(adapter))
4903                 be_cmd_set_features(adapter);
4904
4905         be_schedule_worker(adapter);
4906         adapter->flags |= BE_FLAGS_SETUP_DONE;
4907         return 0;
4908 err:
4909         be_clear(adapter);
4910         return status;
4911 }
4912
4913 #ifdef CONFIG_NET_POLL_CONTROLLER
4914 static void be_netpoll(struct net_device *netdev)
4915 {
4916         struct be_adapter *adapter = netdev_priv(netdev);
4917         struct be_eq_obj *eqo;
4918         int i;
4919
4920         for_all_evt_queues(adapter, eqo, i) {
4921                 be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4922                 napi_schedule(&eqo->napi);
4923         }
4924 }
4925 #endif
4926
4927 int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4928 {
4929         const struct firmware *fw;
4930         int status;
4931
4932         if (!netif_running(adapter->netdev)) {
4933                 dev_err(&adapter->pdev->dev,
4934                         "Firmware load not allowed (interface is down)\n");
4935                 return -ENETDOWN;
4936         }
4937
4938         status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4939         if (status)
4940                 goto fw_exit;
4941
4942         dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4943
4944         if (lancer_chip(adapter))
4945                 status = lancer_fw_download(adapter, fw);
4946         else
4947                 status = be_fw_download(adapter, fw);
4948
4949         if (!status)
4950                 be_cmd_get_fw_ver(adapter);
4951
4952 fw_exit:
4953         release_firmware(fw);
4954         return status;
4955 }
4956
4957 static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4958                                  u16 flags, struct netlink_ext_ack *extack)
4959 {
4960         struct be_adapter *adapter = netdev_priv(dev);
4961         struct nlattr *attr, *br_spec;
4962         int rem;
4963         int status = 0;
4964         u16 mode = 0;
4965
4966         if (!sriov_enabled(adapter))
4967                 return -EOPNOTSUPP;
4968
4969         br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4970         if (!br_spec)
4971                 return -EINVAL;
4972
4973         nla_for_each_nested(attr, br_spec, rem) {
4974                 if (nla_type(attr) != IFLA_BRIDGE_MODE)
4975                         continue;
4976
4977                 if (nla_len(attr) < sizeof(mode))
4978                         return -EINVAL;
4979
4980                 mode = nla_get_u16(attr);
4981                 if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4982                         return -EOPNOTSUPP;
4983
4984                 if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4985                         return -EINVAL;
4986
4987                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4988                                                adapter->if_handle,
4989                                                mode == BRIDGE_MODE_VEPA ?
4990                                                PORT_FWD_TYPE_VEPA :
4991                                                PORT_FWD_TYPE_VEB, 0);
4992                 if (status)
4993                         goto err;
4994
4995                 dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
4996                          mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4997
4998                 return status;
4999         }
5000 err:
5001         dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
5002                 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
5003
5004         return status;
5005 }
5006
5007 static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
5008                                  struct net_device *dev, u32 filter_mask,
5009                                  int nlflags)
5010 {
5011         struct be_adapter *adapter = netdev_priv(dev);
5012         int status = 0;
5013         u8 hsw_mode;
5014
5015         /* BE and Lancer chips support VEB mode only */
5016         if (BEx_chip(adapter) || lancer_chip(adapter)) {
5017                 /* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
5018                 if (!pci_sriov_get_totalvfs(adapter->pdev))
5019                         return 0;
5020                 hsw_mode = PORT_FWD_TYPE_VEB;
5021         } else {
5022                 status = be_cmd_get_hsw_config(adapter, NULL, 0,
5023                                                adapter->if_handle, &hsw_mode,
5024                                                NULL);
5025                 if (status)
5026                         return 0;
5027
5028                 if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
5029                         return 0;
5030         }
5031
5032         return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
5033                                        hsw_mode == PORT_FWD_TYPE_VEPA ?
5034                                        BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
5035                                        0, 0, nlflags, filter_mask, NULL);
5036 }
5037
5038 static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
5039                                          void (*func)(struct work_struct *))
5040 {
5041         struct be_cmd_work *work;
5042
5043         work = kzalloc(sizeof(*work), GFP_ATOMIC);
5044         if (!work) {
5045                 dev_err(&adapter->pdev->dev,
5046                         "be_work memory allocation failed\n");
5047                 return NULL;
5048         }
5049
5050         INIT_WORK(&work->work, func);
5051         work->adapter = adapter;
5052         return work;
5053 }
5054
5055 /* VxLAN offload Notes:
5056  *
5057  * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
5058  * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
5059  * is expected to work across all types of IP tunnels once exported. Skyhawk
5060  * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
5061  * offloads in hw_enc_features only when a VxLAN port is added. If other (non
5062  * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
5063  * those other tunnels are unexported on the fly through ndo_features_check().
5064  *
5065  * Skyhawk supports VxLAN offloads only for one UDP dport. So, if the stack
5066  * adds more than one port, disable offloads and re-enable them again when
5067  * there's only one port left. We maintain a list of ports for this purpose.
5068  */
5069 static void be_work_add_vxlan_port(struct work_struct *work)
5070 {
5071         struct be_cmd_work *cmd_work =
5072                                 container_of(work, struct be_cmd_work, work);
5073         struct be_adapter *adapter = cmd_work->adapter;
5074         struct device *dev = &adapter->pdev->dev;
5075         __be16 port = cmd_work->info.vxlan_port;
5076         struct be_vxlan_port *vxlan_port;
5077         int status;
5078
5079         /* Bump up the alias count if it is an existing port */
5080         list_for_each_entry(vxlan_port, &adapter->vxlan_port_list, list) {
5081                 if (vxlan_port->port == port) {
5082                         vxlan_port->port_aliases++;
5083                         goto done;
5084                 }
5085         }
5086
5087         /* Add a new port to our list. We don't need a lock here since port
5088          * add/delete are done only in the context of a single-threaded work
5089          * queue (be_wq).
5090          */
5091         vxlan_port = kzalloc(sizeof(*vxlan_port), GFP_KERNEL);
5092         if (!vxlan_port)
5093                 goto done;
5094
5095         vxlan_port->port = port;
5096         INIT_LIST_HEAD(&vxlan_port->list);
5097         list_add_tail(&vxlan_port->list, &adapter->vxlan_port_list);
5098         adapter->vxlan_port_count++;
5099
5100         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
5101                 dev_info(dev,
5102                          "Only one UDP port supported for VxLAN offloads\n");
5103                 dev_info(dev, "Disabling VxLAN offloads\n");
5104                 goto err;
5105         }
5106
5107         if (adapter->vxlan_port_count > 1)
5108                 goto done;
5109
5110         status = be_enable_vxlan_offloads(adapter);
5111         if (!status)
5112                 goto done;
5113
5114 err:
5115         be_disable_vxlan_offloads(adapter);
5116 done:
5117         kfree(cmd_work);
5118         return;
5119 }
5120
5121 static void be_work_del_vxlan_port(struct work_struct *work)
5122 {
5123         struct be_cmd_work *cmd_work =
5124                                 container_of(work, struct be_cmd_work, work);
5125         struct be_adapter *adapter = cmd_work->adapter;
5126         __be16 port = cmd_work->info.vxlan_port;
5127         struct be_vxlan_port *vxlan_port;
5128
5129         /* Nothing to be done if a port alias is being deleted */
5130         list_for_each_entry(vxlan_port, &adapter->vxlan_port_list, list) {
5131                 if (vxlan_port->port == port) {
5132                         if (vxlan_port->port_aliases) {
5133                                 vxlan_port->port_aliases--;
5134                                 goto done;
5135                         }
5136                         break;
5137                 }
5138         }
5139
5140         /* No port aliases left; delete the port from the list */
5141         list_del(&vxlan_port->list);
5142         adapter->vxlan_port_count--;
5143
5144         /* Disable VxLAN offload if this is the offloaded port */
5145         if (adapter->vxlan_port == vxlan_port->port) {
5146                 WARN_ON(adapter->vxlan_port_count);
5147                 be_disable_vxlan_offloads(adapter);
5148                 dev_info(&adapter->pdev->dev,
5149                          "Disabled VxLAN offloads for UDP port %d\n",
5150                          be16_to_cpu(port));
5151                 goto out;
5152         }
5153
5154         /* If only 1 port is left, re-enable VxLAN offload */
5155         if (adapter->vxlan_port_count == 1)
5156                 be_enable_vxlan_offloads(adapter);
5157
5158 out:
5159         kfree(vxlan_port);
5160 done:
5161         kfree(cmd_work);
5162 }
5163
5164 static void be_cfg_vxlan_port(struct net_device *netdev,
5165                               struct udp_tunnel_info *ti,
5166                               void (*func)(struct work_struct *))
5167 {
5168         struct be_adapter *adapter = netdev_priv(netdev);
5169         struct be_cmd_work *cmd_work;
5170
5171         if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
5172                 return;
5173
5174         if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
5175                 return;
5176
5177         cmd_work = be_alloc_work(adapter, func);
5178         if (cmd_work) {
5179                 cmd_work->info.vxlan_port = ti->port;
5180                 queue_work(be_wq, &cmd_work->work);
5181         }
5182 }
5183
5184 static void be_del_vxlan_port(struct net_device *netdev,
5185                               struct udp_tunnel_info *ti)
5186 {
5187         be_cfg_vxlan_port(netdev, ti, be_work_del_vxlan_port);
5188 }
5189
5190 static void be_add_vxlan_port(struct net_device *netdev,
5191                               struct udp_tunnel_info *ti)
5192 {
5193         be_cfg_vxlan_port(netdev, ti, be_work_add_vxlan_port);
5194 }
5195
5196 static netdev_features_t be_features_check(struct sk_buff *skb,
5197                                            struct net_device *dev,
5198                                            netdev_features_t features)
5199 {
5200         struct be_adapter *adapter = netdev_priv(dev);
5201         u8 l4_hdr = 0;
5202
5203         if (skb_is_gso(skb)) {
5204                 /* IPv6 TSO requests with extension hdrs are a problem
5205                  * to Lancer and BE3 HW. Disable TSO6 feature.
5206                  */
5207                 if (!skyhawk_chip(adapter) && is_ipv6_ext_hdr(skb))
5208                         features &= ~NETIF_F_TSO6;
5209
5210                 /* Lancer cannot handle the packet with MSS less than 256.
5211                  * Also it can't handle a TSO packet with a single segment
5212                  * Disable the GSO support in such cases
5213                  */
5214                 if (lancer_chip(adapter) &&
5215                     (skb_shinfo(skb)->gso_size < 256 ||
5216                      skb_shinfo(skb)->gso_segs == 1))
5217                         features &= ~NETIF_F_GSO_MASK;
5218         }
5219
5220         /* The code below restricts offload features for some tunneled and
5221          * Q-in-Q packets.
5222          * Offload features for normal (non tunnel) packets are unchanged.
5223          */
5224         features = vlan_features_check(skb, features);
5225         if (!skb->encapsulation ||
5226             !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5227                 return features;
5228
5229         /* It's an encapsulated packet and VxLAN offloads are enabled. We
5230          * should disable tunnel offload features if it's not a VxLAN packet,
5231          * as tunnel offloads have been enabled only for VxLAN. This is done to
5232          * allow other tunneled traffic like GRE work fine while VxLAN
5233          * offloads are configured in Skyhawk-R.
5234          */
5235         switch (vlan_get_protocol(skb)) {
5236         case htons(ETH_P_IP):
5237                 l4_hdr = ip_hdr(skb)->protocol;
5238                 break;
5239         case htons(ETH_P_IPV6):
5240                 l4_hdr = ipv6_hdr(skb)->nexthdr;
5241                 break;
5242         default:
5243                 return features;
5244         }
5245
5246         if (l4_hdr != IPPROTO_UDP ||
5247             skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5248             skb->inner_protocol != htons(ETH_P_TEB) ||
5249             skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5250                 sizeof(struct udphdr) + sizeof(struct vxlanhdr) ||
5251             !adapter->vxlan_port ||
5252             udp_hdr(skb)->dest != adapter->vxlan_port)
5253                 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5254
5255         return features;
5256 }
5257
5258 static int be_get_phys_port_id(struct net_device *dev,
5259                                struct netdev_phys_item_id *ppid)
5260 {
5261         int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5262         struct be_adapter *adapter = netdev_priv(dev);
5263         u8 *id;
5264
5265         if (MAX_PHYS_ITEM_ID_LEN < id_len)
5266                 return -ENOSPC;
5267
5268         ppid->id[0] = adapter->hba_port_num + 1;
5269         id = &ppid->id[1];
5270         for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5271              i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5272                 memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5273
5274         ppid->id_len = id_len;
5275
5276         return 0;
5277 }
5278
5279 static void be_set_rx_mode(struct net_device *dev)
5280 {
5281         struct be_adapter *adapter = netdev_priv(dev);
5282         struct be_cmd_work *work;
5283
5284         work = be_alloc_work(adapter, be_work_set_rx_mode);
5285         if (work)
5286                 queue_work(be_wq, &work->work);
5287 }
5288
5289 static const struct net_device_ops be_netdev_ops = {
5290         .ndo_open               = be_open,
5291         .ndo_stop               = be_close,
5292         .ndo_start_xmit         = be_xmit,
5293         .ndo_set_rx_mode        = be_set_rx_mode,
5294         .ndo_set_mac_address    = be_mac_addr_set,
5295         .ndo_get_stats64        = be_get_stats64,
5296         .ndo_validate_addr      = eth_validate_addr,
5297         .ndo_vlan_rx_add_vid    = be_vlan_add_vid,
5298         .ndo_vlan_rx_kill_vid   = be_vlan_rem_vid,
5299         .ndo_set_vf_mac         = be_set_vf_mac,
5300         .ndo_set_vf_vlan        = be_set_vf_vlan,
5301         .ndo_set_vf_rate        = be_set_vf_tx_rate,
5302         .ndo_get_vf_config      = be_get_vf_config,
5303         .ndo_set_vf_link_state  = be_set_vf_link_state,
5304         .ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
5305         .ndo_tx_timeout         = be_tx_timeout,
5306 #ifdef CONFIG_NET_POLL_CONTROLLER
5307         .ndo_poll_controller    = be_netpoll,
5308 #endif
5309         .ndo_bridge_setlink     = be_ndo_bridge_setlink,
5310         .ndo_bridge_getlink     = be_ndo_bridge_getlink,
5311         .ndo_udp_tunnel_add     = be_add_vxlan_port,
5312         .ndo_udp_tunnel_del     = be_del_vxlan_port,
5313         .ndo_features_check     = be_features_check,
5314         .ndo_get_phys_port_id   = be_get_phys_port_id,
5315 };
5316
5317 static void be_netdev_init(struct net_device *netdev)
5318 {
5319         struct be_adapter *adapter = netdev_priv(netdev);
5320
5321         netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5322                 NETIF_F_GSO_UDP_TUNNEL |
5323                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5324                 NETIF_F_HW_VLAN_CTAG_TX;
5325         if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5326                 netdev->hw_features |= NETIF_F_RXHASH;
5327
5328         netdev->features |= netdev->hw_features |
5329                 NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
5330
5331         netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5332                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5333
5334         netdev->priv_flags |= IFF_UNICAST_FLT;
5335
5336         netdev->flags |= IFF_MULTICAST;
5337
5338         netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5339
5340         netdev->netdev_ops = &be_netdev_ops;
5341
5342         netdev->ethtool_ops = &be_ethtool_ops;
5343
5344         /* MTU range: 256 - 9000 */
5345         netdev->min_mtu = BE_MIN_MTU;
5346         netdev->max_mtu = BE_MAX_MTU;
5347 }
5348
5349 static void be_cleanup(struct be_adapter *adapter)
5350 {
5351         struct net_device *netdev = adapter->netdev;
5352
5353         rtnl_lock();
5354         netif_device_detach(netdev);
5355         if (netif_running(netdev))
5356                 be_close(netdev);
5357         rtnl_unlock();
5358
5359         be_clear(adapter);
5360 }
5361
5362 static int be_resume(struct be_adapter *adapter)
5363 {
5364         struct net_device *netdev = adapter->netdev;
5365         int status;
5366
5367         status = be_setup(adapter);
5368         if (status)
5369                 return status;
5370
5371         rtnl_lock();
5372         if (netif_running(netdev))
5373                 status = be_open(netdev);
5374         rtnl_unlock();
5375
5376         if (status)
5377                 return status;
5378
5379         netif_device_attach(netdev);
5380
5381         return 0;
5382 }
5383
5384 static void be_soft_reset(struct be_adapter *adapter)
5385 {
5386         u32 val;
5387
5388         dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
5389         val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5390         val |= SLIPORT_SOFTRESET_SR_MASK;
5391         iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5392 }
5393
5394 static bool be_err_is_recoverable(struct be_adapter *adapter)
5395 {
5396         struct be_error_recovery *err_rec = &adapter->error_recovery;
5397         unsigned long initial_idle_time =
5398                 msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
5399         unsigned long recovery_interval =
5400                 msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
5401         u16 ue_err_code;
5402         u32 val;
5403
5404         val = be_POST_stage_get(adapter);
5405         if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
5406                 return false;
5407         ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
5408         if (ue_err_code == 0)
5409                 return false;
5410
5411         dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
5412                 ue_err_code);
5413
5414         if (time_before_eq(jiffies - err_rec->probe_time, initial_idle_time)) {
5415                 dev_err(&adapter->pdev->dev,
5416                         "Cannot recover within %lu sec from driver load\n",
5417                         jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
5418                 return false;
5419         }
5420
5421         if (err_rec->last_recovery_time && time_before_eq(
5422                 jiffies - err_rec->last_recovery_time, recovery_interval)) {
5423                 dev_err(&adapter->pdev->dev,
5424                         "Cannot recover within %lu sec from last recovery\n",
5425                         jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
5426                 return false;
5427         }
5428
5429         if (ue_err_code == err_rec->last_err_code) {
5430                 dev_err(&adapter->pdev->dev,
5431                         "Cannot recover from a consecutive TPE error\n");
5432                 return false;
5433         }
5434
5435         err_rec->last_recovery_time = jiffies;
5436         err_rec->last_err_code = ue_err_code;
5437         return true;
5438 }
5439
5440 static int be_tpe_recover(struct be_adapter *adapter)
5441 {
5442         struct be_error_recovery *err_rec = &adapter->error_recovery;
5443         int status = -EAGAIN;
5444         u32 val;
5445
5446         switch (err_rec->recovery_state) {
5447         case ERR_RECOVERY_ST_NONE:
5448                 err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
5449                 err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
5450                 break;
5451
5452         case ERR_RECOVERY_ST_DETECT:
5453                 val = be_POST_stage_get(adapter);
5454                 if ((val & POST_STAGE_RECOVERABLE_ERR) !=
5455                     POST_STAGE_RECOVERABLE_ERR) {
5456                         dev_err(&adapter->pdev->dev,
5457                                 "Unrecoverable HW error detected: 0x%x\n", val);
5458                         status = -EINVAL;
5459                         err_rec->resched_delay = 0;
5460                         break;
5461                 }
5462
5463                 dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
5464
5465                 /* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
5466                  * milliseconds before it checks for final error status in
5467                  * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
5468                  * If it does, then PF0 initiates a Soft Reset.
5469                  */
5470                 if (adapter->pf_num == 0) {
5471                         err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
5472                         err_rec->resched_delay = err_rec->ue_to_reset_time -
5473                                         ERR_RECOVERY_UE_DETECT_DURATION;
5474                         break;
5475                 }
5476
5477                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5478                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5479                                         ERR_RECOVERY_UE_DETECT_DURATION;
5480                 break;
5481
5482         case ERR_RECOVERY_ST_RESET:
5483                 if (!be_err_is_recoverable(adapter)) {
5484                         dev_err(&adapter->pdev->dev,
5485                                 "Failed to meet recovery criteria\n");
5486                         status = -EIO;
5487                         err_rec->resched_delay = 0;
5488                         break;
5489                 }
5490                 be_soft_reset(adapter);
5491                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5492                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5493                                         err_rec->ue_to_reset_time;
5494                 break;
5495
5496         case ERR_RECOVERY_ST_PRE_POLL:
5497                 err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
5498                 err_rec->resched_delay = 0;
5499                 status = 0;                     /* done */
5500                 break;
5501
5502         default:
5503                 status = -EINVAL;
5504                 err_rec->resched_delay = 0;
5505                 break;
5506         }
5507
5508         return status;
5509 }
5510
5511 static int be_err_recover(struct be_adapter *adapter)
5512 {
5513         int status;
5514
5515         if (!lancer_chip(adapter)) {
5516                 if (!adapter->error_recovery.recovery_supported ||
5517                     adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
5518                         return -EIO;
5519                 status = be_tpe_recover(adapter);
5520                 if (status)
5521                         goto err;
5522         }
5523
5524         /* Wait for adapter to reach quiescent state before
5525          * destroying queues
5526          */
5527         status = be_fw_wait_ready(adapter);
5528         if (status)
5529                 goto err;
5530
5531         adapter->flags |= BE_FLAGS_TRY_RECOVERY;
5532
5533         be_cleanup(adapter);
5534
5535         status = be_resume(adapter);
5536         if (status)
5537                 goto err;
5538
5539         adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
5540
5541 err:
5542         return status;
5543 }
5544
5545 static void be_err_detection_task(struct work_struct *work)
5546 {
5547         struct be_error_recovery *err_rec =
5548                         container_of(work, struct be_error_recovery,
5549                                      err_detection_work.work);
5550         struct be_adapter *adapter =
5551                         container_of(err_rec, struct be_adapter,
5552                                      error_recovery);
5553         u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
5554         struct device *dev = &adapter->pdev->dev;
5555         int recovery_status;
5556
5557         be_detect_error(adapter);
5558         if (!be_check_error(adapter, BE_ERROR_HW))
5559                 goto reschedule_task;
5560
5561         recovery_status = be_err_recover(adapter);
5562         if (!recovery_status) {
5563                 err_rec->recovery_retries = 0;
5564                 err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
5565                 dev_info(dev, "Adapter recovery successful\n");
5566                 goto reschedule_task;
5567         } else if (!lancer_chip(adapter) && err_rec->resched_delay) {
5568                 /* BEx/SH recovery state machine */
5569                 if (adapter->pf_num == 0 &&
5570                     err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
5571                         dev_err(&adapter->pdev->dev,
5572                                 "Adapter recovery in progress\n");
5573                 resched_delay = err_rec->resched_delay;
5574                 goto reschedule_task;
5575         } else if (lancer_chip(adapter) && be_virtfn(adapter)) {
5576                 /* For VFs, check if PF have allocated resources
5577                  * every second.
5578                  */
5579                 dev_err(dev, "Re-trying adapter recovery\n");
5580                 goto reschedule_task;
5581         } else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
5582                    ERR_RECOVERY_MAX_RETRY_COUNT) {
5583                 /* In case of another error during recovery, it takes 30 sec
5584                  * for adapter to come out of error. Retry error recovery after
5585                  * this time interval.
5586                  */
5587                 dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5588                 resched_delay = ERR_RECOVERY_RETRY_DELAY;
5589                 goto reschedule_task;
5590         } else {
5591                 dev_err(dev, "Adapter recovery failed\n");
5592                 dev_err(dev, "Please reboot server to recover\n");
5593         }
5594
5595         return;
5596
5597 reschedule_task:
5598         be_schedule_err_detection(adapter, resched_delay);
5599 }
5600
5601 static void be_log_sfp_info(struct be_adapter *adapter)
5602 {
5603         int status;
5604
5605         status = be_cmd_query_sfp_info(adapter);
5606         if (!status) {
5607                 dev_err(&adapter->pdev->dev,
5608                         "Port %c: %s Vendor: %s part no: %s",
5609                         adapter->port_name,
5610                         be_misconfig_evt_port_state[adapter->phy_state],
5611                         adapter->phy.vendor_name,
5612                         adapter->phy.vendor_pn);
5613         }
5614         adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5615 }
5616
5617 static void be_worker(struct work_struct *work)
5618 {
5619         struct be_adapter *adapter =
5620                 container_of(work, struct be_adapter, work.work);
5621         struct be_rx_obj *rxo;
5622         int i;
5623
5624         if (be_physfn(adapter) &&
5625             MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5626                 be_cmd_get_die_temperature(adapter);
5627
5628         /* when interrupts are not yet enabled, just reap any pending
5629          * mcc completions
5630          */
5631         if (!netif_running(adapter->netdev)) {
5632                 local_bh_disable();
5633                 be_process_mcc(adapter);
5634                 local_bh_enable();
5635                 goto reschedule;
5636         }
5637
5638         if (!adapter->stats_cmd_sent) {
5639                 if (lancer_chip(adapter))
5640                         lancer_cmd_get_pport_stats(adapter,
5641                                                    &adapter->stats_cmd);
5642                 else
5643                         be_cmd_get_stats(adapter, &adapter->stats_cmd);
5644         }
5645
5646         for_all_rx_queues(adapter, rxo, i) {
5647                 /* Replenish RX-queues starved due to memory
5648                  * allocation failures.
5649                  */
5650                 if (rxo->rx_post_starved)
5651                         be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5652         }
5653
5654         /* EQ-delay update for Skyhawk is done while notifying EQ */
5655         if (!skyhawk_chip(adapter))
5656                 be_eqd_update(adapter, false);
5657
5658         if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5659                 be_log_sfp_info(adapter);
5660
5661 reschedule:
5662         adapter->work_counter++;
5663         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5664 }
5665
5666 static void be_unmap_pci_bars(struct be_adapter *adapter)
5667 {
5668         if (adapter->csr)
5669                 pci_iounmap(adapter->pdev, adapter->csr);
5670         if (adapter->db)
5671                 pci_iounmap(adapter->pdev, adapter->db);
5672         if (adapter->pcicfg && adapter->pcicfg_mapped)
5673                 pci_iounmap(adapter->pdev, adapter->pcicfg);
5674 }
5675
5676 static int db_bar(struct be_adapter *adapter)
5677 {
5678         if (lancer_chip(adapter) || be_virtfn(adapter))
5679                 return 0;
5680         else
5681                 return 4;
5682 }
5683
5684 static int be_roce_map_pci_bars(struct be_adapter *adapter)
5685 {
5686         if (skyhawk_chip(adapter)) {
5687                 adapter->roce_db.size = 4096;
5688                 adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5689                                                               db_bar(adapter));
5690                 adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5691                                                                db_bar(adapter));
5692         }
5693         return 0;
5694 }
5695
5696 static int be_map_pci_bars(struct be_adapter *adapter)
5697 {
5698         struct pci_dev *pdev = adapter->pdev;
5699         u8 __iomem *addr;
5700         u32 sli_intf;
5701
5702         pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5703         adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5704                                 SLI_INTF_FAMILY_SHIFT;
5705         adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5706
5707         if (BEx_chip(adapter) && be_physfn(adapter)) {
5708                 adapter->csr = pci_iomap(pdev, 2, 0);
5709                 if (!adapter->csr)
5710                         return -ENOMEM;
5711         }
5712
5713         addr = pci_iomap(pdev, db_bar(adapter), 0);
5714         if (!addr)
5715                 goto pci_map_err;
5716         adapter->db = addr;
5717
5718         if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5719                 if (be_physfn(adapter)) {
5720                         /* PCICFG is the 2nd BAR in BE2 */
5721                         addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5722                         if (!addr)
5723                                 goto pci_map_err;
5724                         adapter->pcicfg = addr;
5725                         adapter->pcicfg_mapped = true;
5726                 } else {
5727                         adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5728                         adapter->pcicfg_mapped = false;
5729                 }
5730         }
5731
5732         be_roce_map_pci_bars(adapter);
5733         return 0;
5734
5735 pci_map_err:
5736         dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5737         be_unmap_pci_bars(adapter);
5738         return -ENOMEM;
5739 }
5740
5741 static void be_drv_cleanup(struct be_adapter *adapter)
5742 {
5743         struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5744         struct device *dev = &adapter->pdev->dev;
5745
5746         if (mem->va)
5747                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5748
5749         mem = &adapter->rx_filter;
5750         if (mem->va)
5751                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5752
5753         mem = &adapter->stats_cmd;
5754         if (mem->va)
5755                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5756 }
5757
5758 /* Allocate and initialize various fields in be_adapter struct */
5759 static int be_drv_init(struct be_adapter *adapter)
5760 {
5761         struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5762         struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5763         struct be_dma_mem *rx_filter = &adapter->rx_filter;
5764         struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5765         struct device *dev = &adapter->pdev->dev;
5766         int status = 0;
5767
5768         mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5769         mbox_mem_alloc->va = dma_zalloc_coherent(dev, mbox_mem_alloc->size,
5770                                                  &mbox_mem_alloc->dma,
5771                                                  GFP_KERNEL);
5772         if (!mbox_mem_alloc->va)
5773                 return -ENOMEM;
5774
5775         mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5776         mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5777         mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5778
5779         rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5780         rx_filter->va = dma_zalloc_coherent(dev, rx_filter->size,
5781                                             &rx_filter->dma, GFP_KERNEL);
5782         if (!rx_filter->va) {
5783                 status = -ENOMEM;
5784                 goto free_mbox;
5785         }
5786
5787         if (lancer_chip(adapter))
5788                 stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5789         else if (BE2_chip(adapter))
5790                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5791         else if (BE3_chip(adapter))
5792                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5793         else
5794                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5795         stats_cmd->va = dma_zalloc_coherent(dev, stats_cmd->size,
5796                                             &stats_cmd->dma, GFP_KERNEL);
5797         if (!stats_cmd->va) {
5798                 status = -ENOMEM;
5799                 goto free_rx_filter;
5800         }
5801
5802         mutex_init(&adapter->mbox_lock);
5803         mutex_init(&adapter->mcc_lock);
5804         mutex_init(&adapter->rx_filter_lock);
5805         spin_lock_init(&adapter->mcc_cq_lock);
5806         init_completion(&adapter->et_cmd_compl);
5807
5808         pci_save_state(adapter->pdev);
5809
5810         INIT_DELAYED_WORK(&adapter->work, be_worker);
5811
5812         adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
5813         adapter->error_recovery.resched_delay = 0;
5814         INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
5815                           be_err_detection_task);
5816
5817         adapter->rx_fc = true;
5818         adapter->tx_fc = true;
5819
5820         /* Must be a power of 2 or else MODULO will BUG_ON */
5821         adapter->be_get_temp_freq = 64;
5822
5823         INIT_LIST_HEAD(&adapter->vxlan_port_list);
5824         return 0;
5825
5826 free_rx_filter:
5827         dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5828 free_mbox:
5829         dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5830                           mbox_mem_alloc->dma);
5831         return status;
5832 }
5833
5834 static void be_remove(struct pci_dev *pdev)
5835 {
5836         struct be_adapter *adapter = pci_get_drvdata(pdev);
5837
5838         if (!adapter)
5839                 return;
5840
5841         be_roce_dev_remove(adapter);
5842         be_intr_set(adapter, false);
5843
5844         be_cancel_err_detection(adapter);
5845
5846         unregister_netdev(adapter->netdev);
5847
5848         be_clear(adapter);
5849
5850         if (!pci_vfs_assigned(adapter->pdev))
5851                 be_cmd_reset_function(adapter);
5852
5853         /* tell fw we're done with firing cmds */
5854         be_cmd_fw_clean(adapter);
5855
5856         be_unmap_pci_bars(adapter);
5857         be_drv_cleanup(adapter);
5858
5859         pci_disable_pcie_error_reporting(pdev);
5860
5861         pci_release_regions(pdev);
5862         pci_disable_device(pdev);
5863
5864         free_netdev(adapter->netdev);
5865 }
5866
5867 static ssize_t be_hwmon_show_temp(struct device *dev,
5868                                   struct device_attribute *dev_attr,
5869                                   char *buf)
5870 {
5871         struct be_adapter *adapter = dev_get_drvdata(dev);
5872
5873         /* Unit: millidegree Celsius */
5874         if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5875                 return -EIO;
5876         else
5877                 return sprintf(buf, "%u\n",
5878                                adapter->hwmon_info.be_on_die_temp * 1000);
5879 }
5880
5881 static SENSOR_DEVICE_ATTR(temp1_input, 0444,
5882                           be_hwmon_show_temp, NULL, 1);
5883
5884 static struct attribute *be_hwmon_attrs[] = {
5885         &sensor_dev_attr_temp1_input.dev_attr.attr,
5886         NULL
5887 };
5888
5889 ATTRIBUTE_GROUPS(be_hwmon);
5890
5891 static char *mc_name(struct be_adapter *adapter)
5892 {
5893         char *str = ""; /* default */
5894
5895         switch (adapter->mc_type) {
5896         case UMC:
5897                 str = "UMC";
5898                 break;
5899         case FLEX10:
5900                 str = "FLEX10";
5901                 break;
5902         case vNIC1:
5903                 str = "vNIC-1";
5904                 break;
5905         case nPAR:
5906                 str = "nPAR";
5907                 break;
5908         case UFP:
5909                 str = "UFP";
5910                 break;
5911         case vNIC2:
5912                 str = "vNIC-2";
5913                 break;
5914         default:
5915                 str = "";
5916         }
5917
5918         return str;
5919 }
5920
5921 static inline char *func_name(struct be_adapter *adapter)
5922 {
5923         return be_physfn(adapter) ? "PF" : "VF";
5924 }
5925
5926 static inline char *nic_name(struct pci_dev *pdev)
5927 {
5928         switch (pdev->device) {
5929         case OC_DEVICE_ID1:
5930                 return OC_NAME;
5931         case OC_DEVICE_ID2:
5932                 return OC_NAME_BE;
5933         case OC_DEVICE_ID3:
5934         case OC_DEVICE_ID4:
5935                 return OC_NAME_LANCER;
5936         case BE_DEVICE_ID2:
5937                 return BE3_NAME;
5938         case OC_DEVICE_ID5:
5939         case OC_DEVICE_ID6:
5940                 return OC_NAME_SH;
5941         default:
5942                 return BE_NAME;
5943         }
5944 }
5945
5946 static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5947 {
5948         struct be_adapter *adapter;
5949         struct net_device *netdev;
5950         int status = 0;
5951
5952         dev_info(&pdev->dev, "%s version is %s\n", DRV_NAME, DRV_VER);
5953
5954         status = pci_enable_device(pdev);
5955         if (status)
5956                 goto do_none;
5957
5958         status = pci_request_regions(pdev, DRV_NAME);
5959         if (status)
5960                 goto disable_dev;
5961         pci_set_master(pdev);
5962
5963         netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5964         if (!netdev) {
5965                 status = -ENOMEM;
5966                 goto rel_reg;
5967         }
5968         adapter = netdev_priv(netdev);
5969         adapter->pdev = pdev;
5970         pci_set_drvdata(pdev, adapter);
5971         adapter->netdev = netdev;
5972         SET_NETDEV_DEV(netdev, &pdev->dev);
5973
5974         status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5975         if (!status) {
5976                 netdev->features |= NETIF_F_HIGHDMA;
5977         } else {
5978                 status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5979                 if (status) {
5980                         dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5981                         goto free_netdev;
5982                 }
5983         }
5984
5985         status = pci_enable_pcie_error_reporting(pdev);
5986         if (!status)
5987                 dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5988
5989         status = be_map_pci_bars(adapter);
5990         if (status)
5991                 goto free_netdev;
5992
5993         status = be_drv_init(adapter);
5994         if (status)
5995                 goto unmap_bars;
5996
5997         status = be_setup(adapter);
5998         if (status)
5999                 goto drv_cleanup;
6000
6001         be_netdev_init(netdev);
6002         status = register_netdev(netdev);
6003         if (status != 0)
6004                 goto unsetup;
6005
6006         be_roce_dev_add(adapter);
6007
6008         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6009         adapter->error_recovery.probe_time = jiffies;
6010
6011         /* On Die temperature not supported for VF. */
6012         if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
6013                 adapter->hwmon_info.hwmon_dev =
6014                         devm_hwmon_device_register_with_groups(&pdev->dev,
6015                                                                DRV_NAME,
6016                                                                adapter,
6017                                                                be_hwmon_groups);
6018                 adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
6019         }
6020
6021         dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
6022                  func_name(adapter), mc_name(adapter), adapter->port_name);
6023
6024         return 0;
6025
6026 unsetup:
6027         be_clear(adapter);
6028 drv_cleanup:
6029         be_drv_cleanup(adapter);
6030 unmap_bars:
6031         be_unmap_pci_bars(adapter);
6032 free_netdev:
6033         free_netdev(netdev);
6034 rel_reg:
6035         pci_release_regions(pdev);
6036 disable_dev:
6037         pci_disable_device(pdev);
6038 do_none:
6039         dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
6040         return status;
6041 }
6042
6043 static int be_suspend(struct pci_dev *pdev, pm_message_t state)
6044 {
6045         struct be_adapter *adapter = pci_get_drvdata(pdev);
6046
6047         be_intr_set(adapter, false);
6048         be_cancel_err_detection(adapter);
6049
6050         be_cleanup(adapter);
6051
6052         pci_save_state(pdev);
6053         pci_disable_device(pdev);
6054         pci_set_power_state(pdev, pci_choose_state(pdev, state));
6055         return 0;
6056 }
6057
6058 static int be_pci_resume(struct pci_dev *pdev)
6059 {
6060         struct be_adapter *adapter = pci_get_drvdata(pdev);
6061         int status = 0;
6062
6063         status = pci_enable_device(pdev);
6064         if (status)
6065                 return status;
6066
6067         pci_restore_state(pdev);
6068
6069         status = be_resume(adapter);
6070         if (status)
6071                 return status;
6072
6073         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6074
6075         return 0;
6076 }
6077
6078 /*
6079  * An FLR will stop BE from DMAing any data.
6080  */
6081 static void be_shutdown(struct pci_dev *pdev)
6082 {
6083         struct be_adapter *adapter = pci_get_drvdata(pdev);
6084
6085         if (!adapter)
6086                 return;
6087
6088         be_roce_dev_shutdown(adapter);
6089         cancel_delayed_work_sync(&adapter->work);
6090         be_cancel_err_detection(adapter);
6091
6092         netif_device_detach(adapter->netdev);
6093
6094         be_cmd_reset_function(adapter);
6095
6096         pci_disable_device(pdev);
6097 }
6098
6099 static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
6100                                             pci_channel_state_t state)
6101 {
6102         struct be_adapter *adapter = pci_get_drvdata(pdev);
6103
6104         dev_err(&adapter->pdev->dev, "EEH error detected\n");
6105
6106         be_roce_dev_remove(adapter);
6107
6108         if (!be_check_error(adapter, BE_ERROR_EEH)) {
6109                 be_set_error(adapter, BE_ERROR_EEH);
6110
6111                 be_cancel_err_detection(adapter);
6112
6113                 be_cleanup(adapter);
6114         }
6115
6116         if (state == pci_channel_io_perm_failure)
6117                 return PCI_ERS_RESULT_DISCONNECT;
6118
6119         pci_disable_device(pdev);
6120
6121         /* The error could cause the FW to trigger a flash debug dump.
6122          * Resetting the card while flash dump is in progress
6123          * can cause it not to recover; wait for it to finish.
6124          * Wait only for first function as it is needed only once per
6125          * adapter.
6126          */
6127         if (pdev->devfn == 0)
6128                 ssleep(30);
6129
6130         return PCI_ERS_RESULT_NEED_RESET;
6131 }
6132
6133 static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
6134 {
6135         struct be_adapter *adapter = pci_get_drvdata(pdev);
6136         int status;
6137
6138         dev_info(&adapter->pdev->dev, "EEH reset\n");
6139
6140         status = pci_enable_device(pdev);
6141         if (status)
6142                 return PCI_ERS_RESULT_DISCONNECT;
6143
6144         pci_set_master(pdev);
6145         pci_restore_state(pdev);
6146
6147         /* Check if card is ok and fw is ready */
6148         dev_info(&adapter->pdev->dev,
6149                  "Waiting for FW to be ready after EEH reset\n");
6150         status = be_fw_wait_ready(adapter);
6151         if (status)
6152                 return PCI_ERS_RESULT_DISCONNECT;
6153
6154         be_clear_error(adapter, BE_CLEAR_ALL);
6155         return PCI_ERS_RESULT_RECOVERED;
6156 }
6157
6158 static void be_eeh_resume(struct pci_dev *pdev)
6159 {
6160         int status = 0;
6161         struct be_adapter *adapter = pci_get_drvdata(pdev);
6162
6163         dev_info(&adapter->pdev->dev, "EEH resume\n");
6164
6165         pci_save_state(pdev);
6166
6167         status = be_resume(adapter);
6168         if (status)
6169                 goto err;
6170
6171         be_roce_dev_add(adapter);
6172
6173         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6174         return;
6175 err:
6176         dev_err(&adapter->pdev->dev, "EEH resume failed\n");
6177 }
6178
6179 static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
6180 {
6181         struct be_adapter *adapter = pci_get_drvdata(pdev);
6182         struct be_resources vft_res = {0};
6183         int status;
6184
6185         if (!num_vfs)
6186                 be_vf_clear(adapter);
6187
6188         adapter->num_vfs = num_vfs;
6189
6190         if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
6191                 dev_warn(&pdev->dev,
6192                          "Cannot disable VFs while they are assigned\n");
6193                 return -EBUSY;
6194         }
6195
6196         /* When the HW is in SRIOV capable configuration, the PF-pool resources
6197          * are equally distributed across the max-number of VFs. The user may
6198          * request only a subset of the max-vfs to be enabled.
6199          * Based on num_vfs, redistribute the resources across num_vfs so that
6200          * each VF will have access to more number of resources.
6201          * This facility is not available in BE3 FW.
6202          * Also, this is done by FW in Lancer chip.
6203          */
6204         if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
6205                 be_calculate_vf_res(adapter, adapter->num_vfs,
6206                                     &vft_res);
6207                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
6208                                                  adapter->num_vfs, &vft_res);
6209                 if (status)
6210                         dev_err(&pdev->dev,
6211                                 "Failed to optimize SR-IOV resources\n");
6212         }
6213
6214         status = be_get_resources(adapter);
6215         if (status)
6216                 return be_cmd_status(status);
6217
6218         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
6219         rtnl_lock();
6220         status = be_update_queues(adapter);
6221         rtnl_unlock();
6222         if (status)
6223                 return be_cmd_status(status);
6224
6225         if (adapter->num_vfs)
6226                 status = be_vf_setup(adapter);
6227
6228         if (!status)
6229                 return adapter->num_vfs;
6230
6231         return 0;
6232 }
6233
6234 static const struct pci_error_handlers be_eeh_handlers = {
6235         .error_detected = be_eeh_err_detected,
6236         .slot_reset = be_eeh_reset,
6237         .resume = be_eeh_resume,
6238 };
6239
6240 static struct pci_driver be_driver = {
6241         .name = DRV_NAME,
6242         .id_table = be_dev_ids,
6243         .probe = be_probe,
6244         .remove = be_remove,
6245         .suspend = be_suspend,
6246         .resume = be_pci_resume,
6247         .shutdown = be_shutdown,
6248         .sriov_configure = be_pci_sriov_configure,
6249         .err_handler = &be_eeh_handlers
6250 };
6251
6252 static int __init be_init_module(void)
6253 {
6254         int status;
6255
6256         if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
6257             rx_frag_size != 2048) {
6258                 printk(KERN_WARNING DRV_NAME
6259                         " : Module param rx_frag_size must be 2048/4096/8192."
6260                         " Using 2048\n");
6261                 rx_frag_size = 2048;
6262         }
6263
6264         if (num_vfs > 0) {
6265                 pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
6266                 pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
6267         }
6268
6269         be_wq = create_singlethread_workqueue("be_wq");
6270         if (!be_wq) {
6271                 pr_warn(DRV_NAME "workqueue creation failed\n");
6272                 return -1;
6273         }
6274
6275         be_err_recovery_workq =
6276                 create_singlethread_workqueue("be_err_recover");
6277         if (!be_err_recovery_workq)
6278                 pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
6279
6280         status = pci_register_driver(&be_driver);
6281         if (status) {
6282                 destroy_workqueue(be_wq);
6283                 be_destroy_err_recovery_workq();
6284         }
6285         return status;
6286 }
6287 module_init(be_init_module);
6288
6289 static void __exit be_exit_module(void)
6290 {
6291         pci_unregister_driver(&be_driver);
6292
6293         be_destroy_err_recovery_workq();
6294
6295         if (be_wq)
6296                 destroy_workqueue(be_wq);
6297 }
6298 module_exit(be_exit_module);