Merge tag 'upstream-4.16-rc1' of git://git.infradead.org/linux-ubifs
[sfrench/cifs-2.6.git] / drivers / net / ethernet / emulex / benet / be_main.c
1 /*
2  * Copyright (C) 2005 - 2016 Broadcom
3  * All rights reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License version 2
7  * as published by the Free Software Foundation.  The full GNU General
8  * Public License is included in this distribution in the file called COPYING.
9  *
10  * Contact Information:
11  * linux-drivers@emulex.com
12  *
13  * Emulex
14  * 3333 Susan Street
15  * Costa Mesa, CA 92626
16  */
17
18 #include <linux/prefetch.h>
19 #include <linux/module.h>
20 #include "be.h"
21 #include "be_cmds.h"
22 #include <asm/div64.h>
23 #include <linux/aer.h>
24 #include <linux/if_bridge.h>
25 #include <net/busy_poll.h>
26 #include <net/vxlan.h>
27
28 MODULE_VERSION(DRV_VER);
29 MODULE_DESCRIPTION(DRV_DESC " " DRV_VER);
30 MODULE_AUTHOR("Emulex Corporation");
31 MODULE_LICENSE("GPL");
32
33 /* num_vfs module param is obsolete.
34  * Use sysfs method to enable/disable VFs.
35  */
36 static unsigned int num_vfs;
37 module_param(num_vfs, uint, S_IRUGO);
38 MODULE_PARM_DESC(num_vfs, "Number of PCI VFs to initialize");
39
40 static ushort rx_frag_size = 2048;
41 module_param(rx_frag_size, ushort, S_IRUGO);
42 MODULE_PARM_DESC(rx_frag_size, "Size of a fragment that holds rcvd data.");
43
44 /* Per-module error detection/recovery workq shared across all functions.
45  * Each function schedules its own work request on this shared workq.
46  */
47 static struct workqueue_struct *be_err_recovery_workq;
48
49 static const struct pci_device_id be_dev_ids[] = {
50         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID1) },
51         { PCI_DEVICE(BE_VENDOR_ID, BE_DEVICE_ID2) },
52         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID1) },
53         { PCI_DEVICE(BE_VENDOR_ID, OC_DEVICE_ID2) },
54         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID3)},
55         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID4)},
56         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID5)},
57         { PCI_DEVICE(EMULEX_VENDOR_ID, OC_DEVICE_ID6)},
58         { 0 }
59 };
60 MODULE_DEVICE_TABLE(pci, be_dev_ids);
61
62 /* Workqueue used by all functions for defering cmd calls to the adapter */
63 static struct workqueue_struct *be_wq;
64
65 /* UE Status Low CSR */
66 static const char * const ue_status_low_desc[] = {
67         "CEV",
68         "CTX",
69         "DBUF",
70         "ERX",
71         "Host",
72         "MPU",
73         "NDMA",
74         "PTC ",
75         "RDMA ",
76         "RXF ",
77         "RXIPS ",
78         "RXULP0 ",
79         "RXULP1 ",
80         "RXULP2 ",
81         "TIM ",
82         "TPOST ",
83         "TPRE ",
84         "TXIPS ",
85         "TXULP0 ",
86         "TXULP1 ",
87         "UC ",
88         "WDMA ",
89         "TXULP2 ",
90         "HOST1 ",
91         "P0_OB_LINK ",
92         "P1_OB_LINK ",
93         "HOST_GPIO ",
94         "MBOX ",
95         "ERX2 ",
96         "SPARE ",
97         "JTAG ",
98         "MPU_INTPEND "
99 };
100
101 /* UE Status High CSR */
102 static const char * const ue_status_hi_desc[] = {
103         "LPCMEMHOST",
104         "MGMT_MAC",
105         "PCS0ONLINE",
106         "MPU_IRAM",
107         "PCS1ONLINE",
108         "PCTL0",
109         "PCTL1",
110         "PMEM",
111         "RR",
112         "TXPB",
113         "RXPP",
114         "XAUI",
115         "TXP",
116         "ARM",
117         "IPC",
118         "HOST2",
119         "HOST3",
120         "HOST4",
121         "HOST5",
122         "HOST6",
123         "HOST7",
124         "ECRC",
125         "Poison TLP",
126         "NETC",
127         "PERIPH",
128         "LLTXULP",
129         "D2P",
130         "RCON",
131         "LDMA",
132         "LLTXP",
133         "LLTXPB",
134         "Unknown"
135 };
136
137 #define BE_VF_IF_EN_FLAGS       (BE_IF_FLAGS_UNTAGGED | \
138                                  BE_IF_FLAGS_BROADCAST | \
139                                  BE_IF_FLAGS_MULTICAST | \
140                                  BE_IF_FLAGS_PASS_L3L4_ERRORS)
141
142 static void be_queue_free(struct be_adapter *adapter, struct be_queue_info *q)
143 {
144         struct be_dma_mem *mem = &q->dma_mem;
145
146         if (mem->va) {
147                 dma_free_coherent(&adapter->pdev->dev, mem->size, mem->va,
148                                   mem->dma);
149                 mem->va = NULL;
150         }
151 }
152
153 static int be_queue_alloc(struct be_adapter *adapter, struct be_queue_info *q,
154                           u16 len, u16 entry_size)
155 {
156         struct be_dma_mem *mem = &q->dma_mem;
157
158         memset(q, 0, sizeof(*q));
159         q->len = len;
160         q->entry_size = entry_size;
161         mem->size = len * entry_size;
162         mem->va = dma_zalloc_coherent(&adapter->pdev->dev, mem->size, &mem->dma,
163                                       GFP_KERNEL);
164         if (!mem->va)
165                 return -ENOMEM;
166         return 0;
167 }
168
169 static void be_reg_intr_set(struct be_adapter *adapter, bool enable)
170 {
171         u32 reg, enabled;
172
173         pci_read_config_dword(adapter->pdev, PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET,
174                               &reg);
175         enabled = reg & MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
176
177         if (!enabled && enable)
178                 reg |= MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
179         else if (enabled && !enable)
180                 reg &= ~MEMBAR_CTRL_INT_CTRL_HOSTINTR_MASK;
181         else
182                 return;
183
184         pci_write_config_dword(adapter->pdev,
185                                PCICFG_MEMBAR_CTRL_INT_CTRL_OFFSET, reg);
186 }
187
188 static void be_intr_set(struct be_adapter *adapter, bool enable)
189 {
190         int status = 0;
191
192         /* On lancer interrupts can't be controlled via this register */
193         if (lancer_chip(adapter))
194                 return;
195
196         if (be_check_error(adapter, BE_ERROR_EEH))
197                 return;
198
199         status = be_cmd_intr_set(adapter, enable);
200         if (status)
201                 be_reg_intr_set(adapter, enable);
202 }
203
204 static void be_rxq_notify(struct be_adapter *adapter, u16 qid, u16 posted)
205 {
206         u32 val = 0;
207
208         if (be_check_error(adapter, BE_ERROR_HW))
209                 return;
210
211         val |= qid & DB_RQ_RING_ID_MASK;
212         val |= posted << DB_RQ_NUM_POSTED_SHIFT;
213
214         wmb();
215         iowrite32(val, adapter->db + DB_RQ_OFFSET);
216 }
217
218 static void be_txq_notify(struct be_adapter *adapter, struct be_tx_obj *txo,
219                           u16 posted)
220 {
221         u32 val = 0;
222
223         if (be_check_error(adapter, BE_ERROR_HW))
224                 return;
225
226         val |= txo->q.id & DB_TXULP_RING_ID_MASK;
227         val |= (posted & DB_TXULP_NUM_POSTED_MASK) << DB_TXULP_NUM_POSTED_SHIFT;
228
229         wmb();
230         iowrite32(val, adapter->db + txo->db_offset);
231 }
232
233 static void be_eq_notify(struct be_adapter *adapter, u16 qid,
234                          bool arm, bool clear_int, u16 num_popped,
235                          u32 eq_delay_mult_enc)
236 {
237         u32 val = 0;
238
239         val |= qid & DB_EQ_RING_ID_MASK;
240         val |= ((qid & DB_EQ_RING_ID_EXT_MASK) << DB_EQ_RING_ID_EXT_MASK_SHIFT);
241
242         if (be_check_error(adapter, BE_ERROR_HW))
243                 return;
244
245         if (arm)
246                 val |= 1 << DB_EQ_REARM_SHIFT;
247         if (clear_int)
248                 val |= 1 << DB_EQ_CLR_SHIFT;
249         val |= 1 << DB_EQ_EVNT_SHIFT;
250         val |= num_popped << DB_EQ_NUM_POPPED_SHIFT;
251         val |= eq_delay_mult_enc << DB_EQ_R2I_DLY_SHIFT;
252         iowrite32(val, adapter->db + DB_EQ_OFFSET);
253 }
254
255 void be_cq_notify(struct be_adapter *adapter, u16 qid, bool arm, u16 num_popped)
256 {
257         u32 val = 0;
258
259         val |= qid & DB_CQ_RING_ID_MASK;
260         val |= ((qid & DB_CQ_RING_ID_EXT_MASK) <<
261                         DB_CQ_RING_ID_EXT_MASK_SHIFT);
262
263         if (be_check_error(adapter, BE_ERROR_HW))
264                 return;
265
266         if (arm)
267                 val |= 1 << DB_CQ_REARM_SHIFT;
268         val |= num_popped << DB_CQ_NUM_POPPED_SHIFT;
269         iowrite32(val, adapter->db + DB_CQ_OFFSET);
270 }
271
272 static int be_dev_mac_add(struct be_adapter *adapter, u8 *mac)
273 {
274         int i;
275
276         /* Check if mac has already been added as part of uc-list */
277         for (i = 0; i < adapter->uc_macs; i++) {
278                 if (ether_addr_equal(adapter->uc_list[i].mac, mac)) {
279                         /* mac already added, skip addition */
280                         adapter->pmac_id[0] = adapter->pmac_id[i + 1];
281                         return 0;
282                 }
283         }
284
285         return be_cmd_pmac_add(adapter, mac, adapter->if_handle,
286                                &adapter->pmac_id[0], 0);
287 }
288
289 static void be_dev_mac_del(struct be_adapter *adapter, int pmac_id)
290 {
291         int i;
292
293         /* Skip deletion if the programmed mac is
294          * being used in uc-list
295          */
296         for (i = 0; i < adapter->uc_macs; i++) {
297                 if (adapter->pmac_id[i + 1] == pmac_id)
298                         return;
299         }
300         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
301 }
302
303 static int be_mac_addr_set(struct net_device *netdev, void *p)
304 {
305         struct be_adapter *adapter = netdev_priv(netdev);
306         struct device *dev = &adapter->pdev->dev;
307         struct sockaddr *addr = p;
308         int status;
309         u8 mac[ETH_ALEN];
310         u32 old_pmac_id = adapter->pmac_id[0];
311
312         if (!is_valid_ether_addr(addr->sa_data))
313                 return -EADDRNOTAVAIL;
314
315         /* Proceed further only if, User provided MAC is different
316          * from active MAC
317          */
318         if (ether_addr_equal(addr->sa_data, adapter->dev_mac))
319                 return 0;
320
321         /* BE3 VFs without FILTMGMT privilege are not allowed to set its MAC
322          * address
323          */
324         if (BEx_chip(adapter) && be_virtfn(adapter) &&
325             !check_privilege(adapter, BE_PRIV_FILTMGMT))
326                 return -EPERM;
327
328         /* if device is not running, copy MAC to netdev->dev_addr */
329         if (!netif_running(netdev))
330                 goto done;
331
332         /* The PMAC_ADD cmd may fail if the VF doesn't have FILTMGMT
333          * privilege or if PF did not provision the new MAC address.
334          * On BE3, this cmd will always fail if the VF doesn't have the
335          * FILTMGMT privilege. This failure is OK, only if the PF programmed
336          * the MAC for the VF.
337          */
338         mutex_lock(&adapter->rx_filter_lock);
339         status = be_dev_mac_add(adapter, (u8 *)addr->sa_data);
340         if (!status) {
341
342                 /* Delete the old programmed MAC. This call may fail if the
343                  * old MAC was already deleted by the PF driver.
344                  */
345                 if (adapter->pmac_id[0] != old_pmac_id)
346                         be_dev_mac_del(adapter, old_pmac_id);
347         }
348
349         mutex_unlock(&adapter->rx_filter_lock);
350         /* Decide if the new MAC is successfully activated only after
351          * querying the FW
352          */
353         status = be_cmd_get_active_mac(adapter, adapter->pmac_id[0], mac,
354                                        adapter->if_handle, true, 0);
355         if (status)
356                 goto err;
357
358         /* The MAC change did not happen, either due to lack of privilege
359          * or PF didn't pre-provision.
360          */
361         if (!ether_addr_equal(addr->sa_data, mac)) {
362                 status = -EPERM;
363                 goto err;
364         }
365
366         /* Remember currently programmed MAC */
367         ether_addr_copy(adapter->dev_mac, addr->sa_data);
368 done:
369         ether_addr_copy(netdev->dev_addr, addr->sa_data);
370         dev_info(dev, "MAC address changed to %pM\n", addr->sa_data);
371         return 0;
372 err:
373         dev_warn(dev, "MAC address change to %pM failed\n", addr->sa_data);
374         return status;
375 }
376
377 /* BE2 supports only v0 cmd */
378 static void *hw_stats_from_cmd(struct be_adapter *adapter)
379 {
380         if (BE2_chip(adapter)) {
381                 struct be_cmd_resp_get_stats_v0 *cmd = adapter->stats_cmd.va;
382
383                 return &cmd->hw_stats;
384         } else if (BE3_chip(adapter)) {
385                 struct be_cmd_resp_get_stats_v1 *cmd = adapter->stats_cmd.va;
386
387                 return &cmd->hw_stats;
388         } else {
389                 struct be_cmd_resp_get_stats_v2 *cmd = adapter->stats_cmd.va;
390
391                 return &cmd->hw_stats;
392         }
393 }
394
395 /* BE2 supports only v0 cmd */
396 static void *be_erx_stats_from_cmd(struct be_adapter *adapter)
397 {
398         if (BE2_chip(adapter)) {
399                 struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
400
401                 return &hw_stats->erx;
402         } else if (BE3_chip(adapter)) {
403                 struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
404
405                 return &hw_stats->erx;
406         } else {
407                 struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
408
409                 return &hw_stats->erx;
410         }
411 }
412
413 static void populate_be_v0_stats(struct be_adapter *adapter)
414 {
415         struct be_hw_stats_v0 *hw_stats = hw_stats_from_cmd(adapter);
416         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
417         struct be_rxf_stats_v0 *rxf_stats = &hw_stats->rxf;
418         struct be_port_rxf_stats_v0 *port_stats =
419                                         &rxf_stats->port[adapter->port_num];
420         struct be_drv_stats *drvs = &adapter->drv_stats;
421
422         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
423         drvs->rx_pause_frames = port_stats->rx_pause_frames;
424         drvs->rx_crc_errors = port_stats->rx_crc_errors;
425         drvs->rx_control_frames = port_stats->rx_control_frames;
426         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
427         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
428         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
429         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
430         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
431         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
432         drvs->rxpp_fifo_overflow_drop = port_stats->rx_fifo_overflow;
433         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
434         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
435         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
436         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
437         drvs->rx_input_fifo_overflow_drop = port_stats->rx_input_fifo_overflow;
438         drvs->rx_dropped_header_too_small =
439                 port_stats->rx_dropped_header_too_small;
440         drvs->rx_address_filtered =
441                                         port_stats->rx_address_filtered +
442                                         port_stats->rx_vlan_filtered;
443         drvs->rx_alignment_symbol_errors =
444                 port_stats->rx_alignment_symbol_errors;
445
446         drvs->tx_pauseframes = port_stats->tx_pauseframes;
447         drvs->tx_controlframes = port_stats->tx_controlframes;
448
449         if (adapter->port_num)
450                 drvs->jabber_events = rxf_stats->port1_jabber_events;
451         else
452                 drvs->jabber_events = rxf_stats->port0_jabber_events;
453         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
454         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
455         drvs->forwarded_packets = rxf_stats->forwarded_packets;
456         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
457         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
458         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
459         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
460 }
461
462 static void populate_be_v1_stats(struct be_adapter *adapter)
463 {
464         struct be_hw_stats_v1 *hw_stats = hw_stats_from_cmd(adapter);
465         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
466         struct be_rxf_stats_v1 *rxf_stats = &hw_stats->rxf;
467         struct be_port_rxf_stats_v1 *port_stats =
468                                         &rxf_stats->port[adapter->port_num];
469         struct be_drv_stats *drvs = &adapter->drv_stats;
470
471         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
472         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
473         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
474         drvs->rx_pause_frames = port_stats->rx_pause_frames;
475         drvs->rx_crc_errors = port_stats->rx_crc_errors;
476         drvs->rx_control_frames = port_stats->rx_control_frames;
477         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
478         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
479         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
480         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
481         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
482         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
483         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
484         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
485         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
486         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
487         drvs->rx_dropped_header_too_small =
488                 port_stats->rx_dropped_header_too_small;
489         drvs->rx_input_fifo_overflow_drop =
490                 port_stats->rx_input_fifo_overflow_drop;
491         drvs->rx_address_filtered = port_stats->rx_address_filtered;
492         drvs->rx_alignment_symbol_errors =
493                 port_stats->rx_alignment_symbol_errors;
494         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
495         drvs->tx_pauseframes = port_stats->tx_pauseframes;
496         drvs->tx_controlframes = port_stats->tx_controlframes;
497         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
498         drvs->jabber_events = port_stats->jabber_events;
499         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
500         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
501         drvs->forwarded_packets = rxf_stats->forwarded_packets;
502         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
503         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
504         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
505         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
506 }
507
508 static void populate_be_v2_stats(struct be_adapter *adapter)
509 {
510         struct be_hw_stats_v2 *hw_stats = hw_stats_from_cmd(adapter);
511         struct be_pmem_stats *pmem_sts = &hw_stats->pmem;
512         struct be_rxf_stats_v2 *rxf_stats = &hw_stats->rxf;
513         struct be_port_rxf_stats_v2 *port_stats =
514                                         &rxf_stats->port[adapter->port_num];
515         struct be_drv_stats *drvs = &adapter->drv_stats;
516
517         be_dws_le_to_cpu(hw_stats, sizeof(*hw_stats));
518         drvs->pmem_fifo_overflow_drop = port_stats->pmem_fifo_overflow_drop;
519         drvs->rx_priority_pause_frames = port_stats->rx_priority_pause_frames;
520         drvs->rx_pause_frames = port_stats->rx_pause_frames;
521         drvs->rx_crc_errors = port_stats->rx_crc_errors;
522         drvs->rx_control_frames = port_stats->rx_control_frames;
523         drvs->rx_in_range_errors = port_stats->rx_in_range_errors;
524         drvs->rx_frame_too_long = port_stats->rx_frame_too_long;
525         drvs->rx_dropped_runt = port_stats->rx_dropped_runt;
526         drvs->rx_ip_checksum_errs = port_stats->rx_ip_checksum_errs;
527         drvs->rx_tcp_checksum_errs = port_stats->rx_tcp_checksum_errs;
528         drvs->rx_udp_checksum_errs = port_stats->rx_udp_checksum_errs;
529         drvs->rx_dropped_tcp_length = port_stats->rx_dropped_tcp_length;
530         drvs->rx_dropped_too_small = port_stats->rx_dropped_too_small;
531         drvs->rx_dropped_too_short = port_stats->rx_dropped_too_short;
532         drvs->rx_out_range_errors = port_stats->rx_out_range_errors;
533         drvs->rx_dropped_header_too_small =
534                 port_stats->rx_dropped_header_too_small;
535         drvs->rx_input_fifo_overflow_drop =
536                 port_stats->rx_input_fifo_overflow_drop;
537         drvs->rx_address_filtered = port_stats->rx_address_filtered;
538         drvs->rx_alignment_symbol_errors =
539                 port_stats->rx_alignment_symbol_errors;
540         drvs->rxpp_fifo_overflow_drop = port_stats->rxpp_fifo_overflow_drop;
541         drvs->tx_pauseframes = port_stats->tx_pauseframes;
542         drvs->tx_controlframes = port_stats->tx_controlframes;
543         drvs->tx_priority_pauseframes = port_stats->tx_priority_pauseframes;
544         drvs->jabber_events = port_stats->jabber_events;
545         drvs->rx_drops_no_pbuf = rxf_stats->rx_drops_no_pbuf;
546         drvs->rx_drops_no_erx_descr = rxf_stats->rx_drops_no_erx_descr;
547         drvs->forwarded_packets = rxf_stats->forwarded_packets;
548         drvs->rx_drops_mtu = rxf_stats->rx_drops_mtu;
549         drvs->rx_drops_no_tpre_descr = rxf_stats->rx_drops_no_tpre_descr;
550         drvs->rx_drops_too_many_frags = rxf_stats->rx_drops_too_many_frags;
551         adapter->drv_stats.eth_red_drops = pmem_sts->eth_red_drops;
552         if (be_roce_supported(adapter)) {
553                 drvs->rx_roce_bytes_lsd = port_stats->roce_bytes_received_lsd;
554                 drvs->rx_roce_bytes_msd = port_stats->roce_bytes_received_msd;
555                 drvs->rx_roce_frames = port_stats->roce_frames_received;
556                 drvs->roce_drops_crc = port_stats->roce_drops_crc;
557                 drvs->roce_drops_payload_len =
558                         port_stats->roce_drops_payload_len;
559         }
560 }
561
562 static void populate_lancer_stats(struct be_adapter *adapter)
563 {
564         struct be_drv_stats *drvs = &adapter->drv_stats;
565         struct lancer_pport_stats *pport_stats = pport_stats_from_cmd(adapter);
566
567         be_dws_le_to_cpu(pport_stats, sizeof(*pport_stats));
568         drvs->rx_pause_frames = pport_stats->rx_pause_frames_lo;
569         drvs->rx_crc_errors = pport_stats->rx_crc_errors_lo;
570         drvs->rx_control_frames = pport_stats->rx_control_frames_lo;
571         drvs->rx_in_range_errors = pport_stats->rx_in_range_errors;
572         drvs->rx_frame_too_long = pport_stats->rx_frames_too_long_lo;
573         drvs->rx_dropped_runt = pport_stats->rx_dropped_runt;
574         drvs->rx_ip_checksum_errs = pport_stats->rx_ip_checksum_errors;
575         drvs->rx_tcp_checksum_errs = pport_stats->rx_tcp_checksum_errors;
576         drvs->rx_udp_checksum_errs = pport_stats->rx_udp_checksum_errors;
577         drvs->rx_dropped_tcp_length =
578                                 pport_stats->rx_dropped_invalid_tcp_length;
579         drvs->rx_dropped_too_small = pport_stats->rx_dropped_too_small;
580         drvs->rx_dropped_too_short = pport_stats->rx_dropped_too_short;
581         drvs->rx_out_range_errors = pport_stats->rx_out_of_range_errors;
582         drvs->rx_dropped_header_too_small =
583                                 pport_stats->rx_dropped_header_too_small;
584         drvs->rx_input_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
585         drvs->rx_address_filtered =
586                                         pport_stats->rx_address_filtered +
587                                         pport_stats->rx_vlan_filtered;
588         drvs->rx_alignment_symbol_errors = pport_stats->rx_symbol_errors_lo;
589         drvs->rxpp_fifo_overflow_drop = pport_stats->rx_fifo_overflow;
590         drvs->tx_pauseframes = pport_stats->tx_pause_frames_lo;
591         drvs->tx_controlframes = pport_stats->tx_control_frames_lo;
592         drvs->jabber_events = pport_stats->rx_jabbers;
593         drvs->forwarded_packets = pport_stats->num_forwards_lo;
594         drvs->rx_drops_mtu = pport_stats->rx_drops_mtu_lo;
595         drvs->rx_drops_too_many_frags =
596                                 pport_stats->rx_drops_too_many_frags_lo;
597 }
598
599 static void accumulate_16bit_val(u32 *acc, u16 val)
600 {
601 #define lo(x)                   (x & 0xFFFF)
602 #define hi(x)                   (x & 0xFFFF0000)
603         bool wrapped = val < lo(*acc);
604         u32 newacc = hi(*acc) + val;
605
606         if (wrapped)
607                 newacc += 65536;
608         WRITE_ONCE(*acc, newacc);
609 }
610
611 static void populate_erx_stats(struct be_adapter *adapter,
612                                struct be_rx_obj *rxo, u32 erx_stat)
613 {
614         if (!BEx_chip(adapter))
615                 rx_stats(rxo)->rx_drops_no_frags = erx_stat;
616         else
617                 /* below erx HW counter can actually wrap around after
618                  * 65535. Driver accumulates a 32-bit value
619                  */
620                 accumulate_16bit_val(&rx_stats(rxo)->rx_drops_no_frags,
621                                      (u16)erx_stat);
622 }
623
624 void be_parse_stats(struct be_adapter *adapter)
625 {
626         struct be_erx_stats_v2 *erx = be_erx_stats_from_cmd(adapter);
627         struct be_rx_obj *rxo;
628         int i;
629         u32 erx_stat;
630
631         if (lancer_chip(adapter)) {
632                 populate_lancer_stats(adapter);
633         } else {
634                 if (BE2_chip(adapter))
635                         populate_be_v0_stats(adapter);
636                 else if (BE3_chip(adapter))
637                         /* for BE3 */
638                         populate_be_v1_stats(adapter);
639                 else
640                         populate_be_v2_stats(adapter);
641
642                 /* erx_v2 is longer than v0, v1. use v2 for v0, v1 access */
643                 for_all_rx_queues(adapter, rxo, i) {
644                         erx_stat = erx->rx_drops_no_fragments[rxo->q.id];
645                         populate_erx_stats(adapter, rxo, erx_stat);
646                 }
647         }
648 }
649
650 static void be_get_stats64(struct net_device *netdev,
651                            struct rtnl_link_stats64 *stats)
652 {
653         struct be_adapter *adapter = netdev_priv(netdev);
654         struct be_drv_stats *drvs = &adapter->drv_stats;
655         struct be_rx_obj *rxo;
656         struct be_tx_obj *txo;
657         u64 pkts, bytes;
658         unsigned int start;
659         int i;
660
661         for_all_rx_queues(adapter, rxo, i) {
662                 const struct be_rx_stats *rx_stats = rx_stats(rxo);
663
664                 do {
665                         start = u64_stats_fetch_begin_irq(&rx_stats->sync);
666                         pkts = rx_stats(rxo)->rx_pkts;
667                         bytes = rx_stats(rxo)->rx_bytes;
668                 } while (u64_stats_fetch_retry_irq(&rx_stats->sync, start));
669                 stats->rx_packets += pkts;
670                 stats->rx_bytes += bytes;
671                 stats->multicast += rx_stats(rxo)->rx_mcast_pkts;
672                 stats->rx_dropped += rx_stats(rxo)->rx_drops_no_skbs +
673                                         rx_stats(rxo)->rx_drops_no_frags;
674         }
675
676         for_all_tx_queues(adapter, txo, i) {
677                 const struct be_tx_stats *tx_stats = tx_stats(txo);
678
679                 do {
680                         start = u64_stats_fetch_begin_irq(&tx_stats->sync);
681                         pkts = tx_stats(txo)->tx_pkts;
682                         bytes = tx_stats(txo)->tx_bytes;
683                 } while (u64_stats_fetch_retry_irq(&tx_stats->sync, start));
684                 stats->tx_packets += pkts;
685                 stats->tx_bytes += bytes;
686         }
687
688         /* bad pkts received */
689         stats->rx_errors = drvs->rx_crc_errors +
690                 drvs->rx_alignment_symbol_errors +
691                 drvs->rx_in_range_errors +
692                 drvs->rx_out_range_errors +
693                 drvs->rx_frame_too_long +
694                 drvs->rx_dropped_too_small +
695                 drvs->rx_dropped_too_short +
696                 drvs->rx_dropped_header_too_small +
697                 drvs->rx_dropped_tcp_length +
698                 drvs->rx_dropped_runt;
699
700         /* detailed rx errors */
701         stats->rx_length_errors = drvs->rx_in_range_errors +
702                 drvs->rx_out_range_errors +
703                 drvs->rx_frame_too_long;
704
705         stats->rx_crc_errors = drvs->rx_crc_errors;
706
707         /* frame alignment errors */
708         stats->rx_frame_errors = drvs->rx_alignment_symbol_errors;
709
710         /* receiver fifo overrun */
711         /* drops_no_pbuf is no per i/f, it's per BE card */
712         stats->rx_fifo_errors = drvs->rxpp_fifo_overflow_drop +
713                                 drvs->rx_input_fifo_overflow_drop +
714                                 drvs->rx_drops_no_pbuf;
715 }
716
717 void be_link_status_update(struct be_adapter *adapter, u8 link_status)
718 {
719         struct net_device *netdev = adapter->netdev;
720
721         if (!(adapter->flags & BE_FLAGS_LINK_STATUS_INIT)) {
722                 netif_carrier_off(netdev);
723                 adapter->flags |= BE_FLAGS_LINK_STATUS_INIT;
724         }
725
726         if (link_status)
727                 netif_carrier_on(netdev);
728         else
729                 netif_carrier_off(netdev);
730
731         netdev_info(netdev, "Link is %s\n", link_status ? "Up" : "Down");
732 }
733
734 static int be_gso_hdr_len(struct sk_buff *skb)
735 {
736         if (skb->encapsulation)
737                 return skb_inner_transport_offset(skb) +
738                        inner_tcp_hdrlen(skb);
739         return skb_transport_offset(skb) + tcp_hdrlen(skb);
740 }
741
742 static void be_tx_stats_update(struct be_tx_obj *txo, struct sk_buff *skb)
743 {
744         struct be_tx_stats *stats = tx_stats(txo);
745         u32 tx_pkts = skb_shinfo(skb)->gso_segs ? : 1;
746         /* Account for headers which get duplicated in TSO pkt */
747         u32 dup_hdr_len = tx_pkts > 1 ? be_gso_hdr_len(skb) * (tx_pkts - 1) : 0;
748
749         u64_stats_update_begin(&stats->sync);
750         stats->tx_reqs++;
751         stats->tx_bytes += skb->len + dup_hdr_len;
752         stats->tx_pkts += tx_pkts;
753         if (skb->encapsulation && skb->ip_summed == CHECKSUM_PARTIAL)
754                 stats->tx_vxlan_offload_pkts += tx_pkts;
755         u64_stats_update_end(&stats->sync);
756 }
757
758 /* Returns number of WRBs needed for the skb */
759 static u32 skb_wrb_cnt(struct sk_buff *skb)
760 {
761         /* +1 for the header wrb */
762         return 1 + (skb_headlen(skb) ? 1 : 0) + skb_shinfo(skb)->nr_frags;
763 }
764
765 static inline void wrb_fill(struct be_eth_wrb *wrb, u64 addr, int len)
766 {
767         wrb->frag_pa_hi = cpu_to_le32(upper_32_bits(addr));
768         wrb->frag_pa_lo = cpu_to_le32(lower_32_bits(addr));
769         wrb->frag_len = cpu_to_le32(len & ETH_WRB_FRAG_LEN_MASK);
770         wrb->rsvd0 = 0;
771 }
772
773 /* A dummy wrb is just all zeros. Using a separate routine for dummy-wrb
774  * to avoid the swap and shift/mask operations in wrb_fill().
775  */
776 static inline void wrb_fill_dummy(struct be_eth_wrb *wrb)
777 {
778         wrb->frag_pa_hi = 0;
779         wrb->frag_pa_lo = 0;
780         wrb->frag_len = 0;
781         wrb->rsvd0 = 0;
782 }
783
784 static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
785                                      struct sk_buff *skb)
786 {
787         u8 vlan_prio;
788         u16 vlan_tag;
789
790         vlan_tag = skb_vlan_tag_get(skb);
791         vlan_prio = (vlan_tag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
792         /* If vlan priority provided by OS is NOT in available bmap */
793         if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
794                 vlan_tag = (vlan_tag & ~VLAN_PRIO_MASK) |
795                                 adapter->recommended_prio_bits;
796
797         return vlan_tag;
798 }
799
800 /* Used only for IP tunnel packets */
801 static u16 skb_inner_ip_proto(struct sk_buff *skb)
802 {
803         return (inner_ip_hdr(skb)->version == 4) ?
804                 inner_ip_hdr(skb)->protocol : inner_ipv6_hdr(skb)->nexthdr;
805 }
806
807 static u16 skb_ip_proto(struct sk_buff *skb)
808 {
809         return (ip_hdr(skb)->version == 4) ?
810                 ip_hdr(skb)->protocol : ipv6_hdr(skb)->nexthdr;
811 }
812
813 static inline bool be_is_txq_full(struct be_tx_obj *txo)
814 {
815         return atomic_read(&txo->q.used) + BE_MAX_TX_FRAG_COUNT >= txo->q.len;
816 }
817
818 static inline bool be_can_txq_wake(struct be_tx_obj *txo)
819 {
820         return atomic_read(&txo->q.used) < txo->q.len / 2;
821 }
822
823 static inline bool be_is_tx_compl_pending(struct be_tx_obj *txo)
824 {
825         return atomic_read(&txo->q.used) > txo->pend_wrb_cnt;
826 }
827
828 static void be_get_wrb_params_from_skb(struct be_adapter *adapter,
829                                        struct sk_buff *skb,
830                                        struct be_wrb_params *wrb_params)
831 {
832         u16 proto;
833
834         if (skb_is_gso(skb)) {
835                 BE_WRB_F_SET(wrb_params->features, LSO, 1);
836                 wrb_params->lso_mss = skb_shinfo(skb)->gso_size;
837                 if (skb_is_gso_v6(skb) && !lancer_chip(adapter))
838                         BE_WRB_F_SET(wrb_params->features, LSO6, 1);
839         } else if (skb->ip_summed == CHECKSUM_PARTIAL) {
840                 if (skb->encapsulation) {
841                         BE_WRB_F_SET(wrb_params->features, IPCS, 1);
842                         proto = skb_inner_ip_proto(skb);
843                 } else {
844                         proto = skb_ip_proto(skb);
845                 }
846                 if (proto == IPPROTO_TCP)
847                         BE_WRB_F_SET(wrb_params->features, TCPCS, 1);
848                 else if (proto == IPPROTO_UDP)
849                         BE_WRB_F_SET(wrb_params->features, UDPCS, 1);
850         }
851
852         if (skb_vlan_tag_present(skb)) {
853                 BE_WRB_F_SET(wrb_params->features, VLAN, 1);
854                 wrb_params->vlan_tag = be_get_tx_vlan_tag(adapter, skb);
855         }
856
857         BE_WRB_F_SET(wrb_params->features, CRC, 1);
858 }
859
860 static void wrb_fill_hdr(struct be_adapter *adapter,
861                          struct be_eth_hdr_wrb *hdr,
862                          struct be_wrb_params *wrb_params,
863                          struct sk_buff *skb)
864 {
865         memset(hdr, 0, sizeof(*hdr));
866
867         SET_TX_WRB_HDR_BITS(crc, hdr,
868                             BE_WRB_F_GET(wrb_params->features, CRC));
869         SET_TX_WRB_HDR_BITS(ipcs, hdr,
870                             BE_WRB_F_GET(wrb_params->features, IPCS));
871         SET_TX_WRB_HDR_BITS(tcpcs, hdr,
872                             BE_WRB_F_GET(wrb_params->features, TCPCS));
873         SET_TX_WRB_HDR_BITS(udpcs, hdr,
874                             BE_WRB_F_GET(wrb_params->features, UDPCS));
875
876         SET_TX_WRB_HDR_BITS(lso, hdr,
877                             BE_WRB_F_GET(wrb_params->features, LSO));
878         SET_TX_WRB_HDR_BITS(lso6, hdr,
879                             BE_WRB_F_GET(wrb_params->features, LSO6));
880         SET_TX_WRB_HDR_BITS(lso_mss, hdr, wrb_params->lso_mss);
881
882         /* Hack to skip HW VLAN tagging needs evt = 1, compl = 0. When this
883          * hack is not needed, the evt bit is set while ringing DB.
884          */
885         SET_TX_WRB_HDR_BITS(event, hdr,
886                             BE_WRB_F_GET(wrb_params->features, VLAN_SKIP_HW));
887         SET_TX_WRB_HDR_BITS(vlan, hdr,
888                             BE_WRB_F_GET(wrb_params->features, VLAN));
889         SET_TX_WRB_HDR_BITS(vlan_tag, hdr, wrb_params->vlan_tag);
890
891         SET_TX_WRB_HDR_BITS(num_wrb, hdr, skb_wrb_cnt(skb));
892         SET_TX_WRB_HDR_BITS(len, hdr, skb->len);
893         SET_TX_WRB_HDR_BITS(mgmt, hdr,
894                             BE_WRB_F_GET(wrb_params->features, OS2BMC));
895 }
896
897 static void unmap_tx_frag(struct device *dev, struct be_eth_wrb *wrb,
898                           bool unmap_single)
899 {
900         dma_addr_t dma;
901         u32 frag_len = le32_to_cpu(wrb->frag_len);
902
903
904         dma = (u64)le32_to_cpu(wrb->frag_pa_hi) << 32 |
905                 (u64)le32_to_cpu(wrb->frag_pa_lo);
906         if (frag_len) {
907                 if (unmap_single)
908                         dma_unmap_single(dev, dma, frag_len, DMA_TO_DEVICE);
909                 else
910                         dma_unmap_page(dev, dma, frag_len, DMA_TO_DEVICE);
911         }
912 }
913
914 /* Grab a WRB header for xmit */
915 static u32 be_tx_get_wrb_hdr(struct be_tx_obj *txo)
916 {
917         u32 head = txo->q.head;
918
919         queue_head_inc(&txo->q);
920         return head;
921 }
922
923 /* Set up the WRB header for xmit */
924 static void be_tx_setup_wrb_hdr(struct be_adapter *adapter,
925                                 struct be_tx_obj *txo,
926                                 struct be_wrb_params *wrb_params,
927                                 struct sk_buff *skb, u16 head)
928 {
929         u32 num_frags = skb_wrb_cnt(skb);
930         struct be_queue_info *txq = &txo->q;
931         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, head);
932
933         wrb_fill_hdr(adapter, hdr, wrb_params, skb);
934         be_dws_cpu_to_le(hdr, sizeof(*hdr));
935
936         BUG_ON(txo->sent_skb_list[head]);
937         txo->sent_skb_list[head] = skb;
938         txo->last_req_hdr = head;
939         atomic_add(num_frags, &txq->used);
940         txo->last_req_wrb_cnt = num_frags;
941         txo->pend_wrb_cnt += num_frags;
942 }
943
944 /* Setup a WRB fragment (buffer descriptor) for xmit */
945 static void be_tx_setup_wrb_frag(struct be_tx_obj *txo, dma_addr_t busaddr,
946                                  int len)
947 {
948         struct be_eth_wrb *wrb;
949         struct be_queue_info *txq = &txo->q;
950
951         wrb = queue_head_node(txq);
952         wrb_fill(wrb, busaddr, len);
953         queue_head_inc(txq);
954 }
955
956 /* Bring the queue back to the state it was in before be_xmit_enqueue() routine
957  * was invoked. The producer index is restored to the previous packet and the
958  * WRBs of the current packet are unmapped. Invoked to handle tx setup errors.
959  */
960 static void be_xmit_restore(struct be_adapter *adapter,
961                             struct be_tx_obj *txo, u32 head, bool map_single,
962                             u32 copied)
963 {
964         struct device *dev;
965         struct be_eth_wrb *wrb;
966         struct be_queue_info *txq = &txo->q;
967
968         dev = &adapter->pdev->dev;
969         txq->head = head;
970
971         /* skip the first wrb (hdr); it's not mapped */
972         queue_head_inc(txq);
973         while (copied) {
974                 wrb = queue_head_node(txq);
975                 unmap_tx_frag(dev, wrb, map_single);
976                 map_single = false;
977                 copied -= le32_to_cpu(wrb->frag_len);
978                 queue_head_inc(txq);
979         }
980
981         txq->head = head;
982 }
983
984 /* Enqueue the given packet for transmit. This routine allocates WRBs for the
985  * packet, dma maps the packet buffers and sets up the WRBs. Returns the number
986  * of WRBs used up by the packet.
987  */
988 static u32 be_xmit_enqueue(struct be_adapter *adapter, struct be_tx_obj *txo,
989                            struct sk_buff *skb,
990                            struct be_wrb_params *wrb_params)
991 {
992         u32 i, copied = 0, wrb_cnt = skb_wrb_cnt(skb);
993         struct device *dev = &adapter->pdev->dev;
994         struct be_queue_info *txq = &txo->q;
995         bool map_single = false;
996         u32 head = txq->head;
997         dma_addr_t busaddr;
998         int len;
999
1000         head = be_tx_get_wrb_hdr(txo);
1001
1002         if (skb->len > skb->data_len) {
1003                 len = skb_headlen(skb);
1004
1005                 busaddr = dma_map_single(dev, skb->data, len, DMA_TO_DEVICE);
1006                 if (dma_mapping_error(dev, busaddr))
1007                         goto dma_err;
1008                 map_single = true;
1009                 be_tx_setup_wrb_frag(txo, busaddr, len);
1010                 copied += len;
1011         }
1012
1013         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1014                 const struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i];
1015                 len = skb_frag_size(frag);
1016
1017                 busaddr = skb_frag_dma_map(dev, frag, 0, len, DMA_TO_DEVICE);
1018                 if (dma_mapping_error(dev, busaddr))
1019                         goto dma_err;
1020                 be_tx_setup_wrb_frag(txo, busaddr, len);
1021                 copied += len;
1022         }
1023
1024         be_tx_setup_wrb_hdr(adapter, txo, wrb_params, skb, head);
1025
1026         be_tx_stats_update(txo, skb);
1027         return wrb_cnt;
1028
1029 dma_err:
1030         adapter->drv_stats.dma_map_errors++;
1031         be_xmit_restore(adapter, txo, head, map_single, copied);
1032         return 0;
1033 }
1034
1035 static inline int qnq_async_evt_rcvd(struct be_adapter *adapter)
1036 {
1037         return adapter->flags & BE_FLAGS_QNQ_ASYNC_EVT_RCVD;
1038 }
1039
1040 static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
1041                                              struct sk_buff *skb,
1042                                              struct be_wrb_params
1043                                              *wrb_params)
1044 {
1045         u16 vlan_tag = 0;
1046
1047         skb = skb_share_check(skb, GFP_ATOMIC);
1048         if (unlikely(!skb))
1049                 return skb;
1050
1051         if (skb_vlan_tag_present(skb))
1052                 vlan_tag = be_get_tx_vlan_tag(adapter, skb);
1053
1054         if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
1055                 if (!vlan_tag)
1056                         vlan_tag = adapter->pvid;
1057                 /* f/w workaround to set skip_hw_vlan = 1, informs the F/W to
1058                  * skip VLAN insertion
1059                  */
1060                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1061         }
1062
1063         if (vlan_tag) {
1064                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1065                                                 vlan_tag);
1066                 if (unlikely(!skb))
1067                         return skb;
1068                 skb->vlan_tci = 0;
1069         }
1070
1071         /* Insert the outer VLAN, if any */
1072         if (adapter->qnq_vid) {
1073                 vlan_tag = adapter->qnq_vid;
1074                 skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
1075                                                 vlan_tag);
1076                 if (unlikely(!skb))
1077                         return skb;
1078                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1079         }
1080
1081         return skb;
1082 }
1083
1084 static bool be_ipv6_exthdr_check(struct sk_buff *skb)
1085 {
1086         struct ethhdr *eh = (struct ethhdr *)skb->data;
1087         u16 offset = ETH_HLEN;
1088
1089         if (eh->h_proto == htons(ETH_P_IPV6)) {
1090                 struct ipv6hdr *ip6h = (struct ipv6hdr *)(skb->data + offset);
1091
1092                 offset += sizeof(struct ipv6hdr);
1093                 if (ip6h->nexthdr != NEXTHDR_TCP &&
1094                     ip6h->nexthdr != NEXTHDR_UDP) {
1095                         struct ipv6_opt_hdr *ehdr =
1096                                 (struct ipv6_opt_hdr *)(skb->data + offset);
1097
1098                         /* offending pkt: 2nd byte following IPv6 hdr is 0xff */
1099                         if (ehdr->hdrlen == 0xff)
1100                                 return true;
1101                 }
1102         }
1103         return false;
1104 }
1105
1106 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
1107 {
1108         return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
1109 }
1110
1111 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
1112 {
1113         return BE3_chip(adapter) && be_ipv6_exthdr_check(skb);
1114 }
1115
1116 static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
1117                                                   struct sk_buff *skb,
1118                                                   struct be_wrb_params
1119                                                   *wrb_params)
1120 {
1121         struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
1122         unsigned int eth_hdr_len;
1123         struct iphdr *ip;
1124
1125         /* For padded packets, BE HW modifies tot_len field in IP header
1126          * incorrecly when VLAN tag is inserted by HW.
1127          * For padded packets, Lancer computes incorrect checksum.
1128          */
1129         eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
1130                                                 VLAN_ETH_HLEN : ETH_HLEN;
1131         if (skb->len <= 60 &&
1132             (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
1133             is_ipv4_pkt(skb)) {
1134                 ip = (struct iphdr *)ip_hdr(skb);
1135                 pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
1136         }
1137
1138         /* If vlan tag is already inlined in the packet, skip HW VLAN
1139          * tagging in pvid-tagging mode
1140          */
1141         if (be_pvid_tagging_enabled(adapter) &&
1142             veh->h_vlan_proto == htons(ETH_P_8021Q))
1143                 BE_WRB_F_SET(wrb_params->features, VLAN_SKIP_HW, 1);
1144
1145         /* HW has a bug wherein it will calculate CSUM for VLAN
1146          * pkts even though it is disabled.
1147          * Manually insert VLAN in pkt.
1148          */
1149         if (skb->ip_summed != CHECKSUM_PARTIAL &&
1150             skb_vlan_tag_present(skb)) {
1151                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1152                 if (unlikely(!skb))
1153                         goto err;
1154         }
1155
1156         /* HW may lockup when VLAN HW tagging is requested on
1157          * certain ipv6 packets. Drop such pkts if the HW workaround to
1158          * skip HW tagging is not enabled by FW.
1159          */
1160         if (unlikely(be_ipv6_tx_stall_chk(adapter, skb) &&
1161                      (adapter->pvid || adapter->qnq_vid) &&
1162                      !qnq_async_evt_rcvd(adapter)))
1163                 goto tx_drop;
1164
1165         /* Manual VLAN tag insertion to prevent:
1166          * ASIC lockup when the ASIC inserts VLAN tag into
1167          * certain ipv6 packets. Insert VLAN tags in driver,
1168          * and set event, completion, vlan bits accordingly
1169          * in the Tx WRB.
1170          */
1171         if (be_ipv6_tx_stall_chk(adapter, skb) &&
1172             be_vlan_tag_tx_chk(adapter, skb)) {
1173                 skb = be_insert_vlan_in_pkt(adapter, skb, wrb_params);
1174                 if (unlikely(!skb))
1175                         goto err;
1176         }
1177
1178         return skb;
1179 tx_drop:
1180         dev_kfree_skb_any(skb);
1181 err:
1182         return NULL;
1183 }
1184
1185 static struct sk_buff *be_xmit_workarounds(struct be_adapter *adapter,
1186                                            struct sk_buff *skb,
1187                                            struct be_wrb_params *wrb_params)
1188 {
1189         int err;
1190
1191         /* Lancer, SH and BE3 in SRIOV mode have a bug wherein
1192          * packets that are 32b or less may cause a transmit stall
1193          * on that port. The workaround is to pad such packets
1194          * (len <= 32 bytes) to a minimum length of 36b.
1195          */
1196         if (skb->len <= 32) {
1197                 if (skb_put_padto(skb, 36))
1198                         return NULL;
1199         }
1200
1201         if (BEx_chip(adapter) || lancer_chip(adapter)) {
1202                 skb = be_lancer_xmit_workarounds(adapter, skb, wrb_params);
1203                 if (!skb)
1204                         return NULL;
1205         }
1206
1207         /* The stack can send us skbs with length greater than
1208          * what the HW can handle. Trim the extra bytes.
1209          */
1210         WARN_ON_ONCE(skb->len > BE_MAX_GSO_SIZE);
1211         err = pskb_trim(skb, BE_MAX_GSO_SIZE);
1212         WARN_ON(err);
1213
1214         return skb;
1215 }
1216
1217 static void be_xmit_flush(struct be_adapter *adapter, struct be_tx_obj *txo)
1218 {
1219         struct be_queue_info *txq = &txo->q;
1220         struct be_eth_hdr_wrb *hdr = queue_index_node(txq, txo->last_req_hdr);
1221
1222         /* Mark the last request eventable if it hasn't been marked already */
1223         if (!(hdr->dw[2] & cpu_to_le32(TX_HDR_WRB_EVT)))
1224                 hdr->dw[2] |= cpu_to_le32(TX_HDR_WRB_EVT | TX_HDR_WRB_COMPL);
1225
1226         /* compose a dummy wrb if there are odd set of wrbs to notify */
1227         if (!lancer_chip(adapter) && (txo->pend_wrb_cnt & 1)) {
1228                 wrb_fill_dummy(queue_head_node(txq));
1229                 queue_head_inc(txq);
1230                 atomic_inc(&txq->used);
1231                 txo->pend_wrb_cnt++;
1232                 hdr->dw[2] &= ~cpu_to_le32(TX_HDR_WRB_NUM_MASK <<
1233                                            TX_HDR_WRB_NUM_SHIFT);
1234                 hdr->dw[2] |= cpu_to_le32((txo->last_req_wrb_cnt + 1) <<
1235                                           TX_HDR_WRB_NUM_SHIFT);
1236         }
1237         be_txq_notify(adapter, txo, txo->pend_wrb_cnt);
1238         txo->pend_wrb_cnt = 0;
1239 }
1240
1241 /* OS2BMC related */
1242
1243 #define DHCP_CLIENT_PORT        68
1244 #define DHCP_SERVER_PORT        67
1245 #define NET_BIOS_PORT1          137
1246 #define NET_BIOS_PORT2          138
1247 #define DHCPV6_RAS_PORT         547
1248
1249 #define is_mc_allowed_on_bmc(adapter, eh)       \
1250         (!is_multicast_filt_enabled(adapter) && \
1251          is_multicast_ether_addr(eh->h_dest) && \
1252          !is_broadcast_ether_addr(eh->h_dest))
1253
1254 #define is_bc_allowed_on_bmc(adapter, eh)       \
1255         (!is_broadcast_filt_enabled(adapter) && \
1256          is_broadcast_ether_addr(eh->h_dest))
1257
1258 #define is_arp_allowed_on_bmc(adapter, skb)     \
1259         (is_arp(skb) && is_arp_filt_enabled(adapter))
1260
1261 #define is_broadcast_packet(eh, adapter)        \
1262                 (is_multicast_ether_addr(eh->h_dest) && \
1263                 !compare_ether_addr(eh->h_dest, adapter->netdev->broadcast))
1264
1265 #define is_arp(skb)     (skb->protocol == htons(ETH_P_ARP))
1266
1267 #define is_arp_filt_enabled(adapter)    \
1268                 (adapter->bmc_filt_mask & (BMC_FILT_BROADCAST_ARP))
1269
1270 #define is_dhcp_client_filt_enabled(adapter)    \
1271                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_CLIENT)
1272
1273 #define is_dhcp_srvr_filt_enabled(adapter)      \
1274                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_DHCP_SERVER)
1275
1276 #define is_nbios_filt_enabled(adapter)  \
1277                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST_NET_BIOS)
1278
1279 #define is_ipv6_na_filt_enabled(adapter)        \
1280                 (adapter->bmc_filt_mask &       \
1281                         BMC_FILT_MULTICAST_IPV6_NEIGH_ADVER)
1282
1283 #define is_ipv6_ra_filt_enabled(adapter)        \
1284                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RA)
1285
1286 #define is_ipv6_ras_filt_enabled(adapter)       \
1287                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST_IPV6_RAS)
1288
1289 #define is_broadcast_filt_enabled(adapter)      \
1290                 (adapter->bmc_filt_mask & BMC_FILT_BROADCAST)
1291
1292 #define is_multicast_filt_enabled(adapter)      \
1293                 (adapter->bmc_filt_mask & BMC_FILT_MULTICAST)
1294
1295 static bool be_send_pkt_to_bmc(struct be_adapter *adapter,
1296                                struct sk_buff **skb)
1297 {
1298         struct ethhdr *eh = (struct ethhdr *)(*skb)->data;
1299         bool os2bmc = false;
1300
1301         if (!be_is_os2bmc_enabled(adapter))
1302                 goto done;
1303
1304         if (!is_multicast_ether_addr(eh->h_dest))
1305                 goto done;
1306
1307         if (is_mc_allowed_on_bmc(adapter, eh) ||
1308             is_bc_allowed_on_bmc(adapter, eh) ||
1309             is_arp_allowed_on_bmc(adapter, (*skb))) {
1310                 os2bmc = true;
1311                 goto done;
1312         }
1313
1314         if ((*skb)->protocol == htons(ETH_P_IPV6)) {
1315                 struct ipv6hdr *hdr = ipv6_hdr((*skb));
1316                 u8 nexthdr = hdr->nexthdr;
1317
1318                 if (nexthdr == IPPROTO_ICMPV6) {
1319                         struct icmp6hdr *icmp6 = icmp6_hdr((*skb));
1320
1321                         switch (icmp6->icmp6_type) {
1322                         case NDISC_ROUTER_ADVERTISEMENT:
1323                                 os2bmc = is_ipv6_ra_filt_enabled(adapter);
1324                                 goto done;
1325                         case NDISC_NEIGHBOUR_ADVERTISEMENT:
1326                                 os2bmc = is_ipv6_na_filt_enabled(adapter);
1327                                 goto done;
1328                         default:
1329                                 break;
1330                         }
1331                 }
1332         }
1333
1334         if (is_udp_pkt((*skb))) {
1335                 struct udphdr *udp = udp_hdr((*skb));
1336
1337                 switch (ntohs(udp->dest)) {
1338                 case DHCP_CLIENT_PORT:
1339                         os2bmc = is_dhcp_client_filt_enabled(adapter);
1340                         goto done;
1341                 case DHCP_SERVER_PORT:
1342                         os2bmc = is_dhcp_srvr_filt_enabled(adapter);
1343                         goto done;
1344                 case NET_BIOS_PORT1:
1345                 case NET_BIOS_PORT2:
1346                         os2bmc = is_nbios_filt_enabled(adapter);
1347                         goto done;
1348                 case DHCPV6_RAS_PORT:
1349                         os2bmc = is_ipv6_ras_filt_enabled(adapter);
1350                         goto done;
1351                 default:
1352                         break;
1353                 }
1354         }
1355 done:
1356         /* For packets over a vlan, which are destined
1357          * to BMC, asic expects the vlan to be inline in the packet.
1358          */
1359         if (os2bmc)
1360                 *skb = be_insert_vlan_in_pkt(adapter, *skb, NULL);
1361
1362         return os2bmc;
1363 }
1364
1365 static netdev_tx_t be_xmit(struct sk_buff *skb, struct net_device *netdev)
1366 {
1367         struct be_adapter *adapter = netdev_priv(netdev);
1368         u16 q_idx = skb_get_queue_mapping(skb);
1369         struct be_tx_obj *txo = &adapter->tx_obj[q_idx];
1370         struct be_wrb_params wrb_params = { 0 };
1371         bool flush = !skb->xmit_more;
1372         u16 wrb_cnt;
1373
1374         skb = be_xmit_workarounds(adapter, skb, &wrb_params);
1375         if (unlikely(!skb))
1376                 goto drop;
1377
1378         be_get_wrb_params_from_skb(adapter, skb, &wrb_params);
1379
1380         wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1381         if (unlikely(!wrb_cnt)) {
1382                 dev_kfree_skb_any(skb);
1383                 goto drop;
1384         }
1385
1386         /* if os2bmc is enabled and if the pkt is destined to bmc,
1387          * enqueue the pkt a 2nd time with mgmt bit set.
1388          */
1389         if (be_send_pkt_to_bmc(adapter, &skb)) {
1390                 BE_WRB_F_SET(wrb_params.features, OS2BMC, 1);
1391                 wrb_cnt = be_xmit_enqueue(adapter, txo, skb, &wrb_params);
1392                 if (unlikely(!wrb_cnt))
1393                         goto drop;
1394                 else
1395                         skb_get(skb);
1396         }
1397
1398         if (be_is_txq_full(txo)) {
1399                 netif_stop_subqueue(netdev, q_idx);
1400                 tx_stats(txo)->tx_stops++;
1401         }
1402
1403         if (flush || __netif_subqueue_stopped(netdev, q_idx))
1404                 be_xmit_flush(adapter, txo);
1405
1406         return NETDEV_TX_OK;
1407 drop:
1408         tx_stats(txo)->tx_drv_drops++;
1409         /* Flush the already enqueued tx requests */
1410         if (flush && txo->pend_wrb_cnt)
1411                 be_xmit_flush(adapter, txo);
1412
1413         return NETDEV_TX_OK;
1414 }
1415
1416 static inline bool be_in_all_promisc(struct be_adapter *adapter)
1417 {
1418         return (adapter->if_flags & BE_IF_FLAGS_ALL_PROMISCUOUS) ==
1419                         BE_IF_FLAGS_ALL_PROMISCUOUS;
1420 }
1421
1422 static int be_set_vlan_promisc(struct be_adapter *adapter)
1423 {
1424         struct device *dev = &adapter->pdev->dev;
1425         int status;
1426
1427         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS)
1428                 return 0;
1429
1430         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, ON);
1431         if (!status) {
1432                 dev_info(dev, "Enabled VLAN promiscuous mode\n");
1433                 adapter->if_flags |= BE_IF_FLAGS_VLAN_PROMISCUOUS;
1434         } else {
1435                 dev_err(dev, "Failed to enable VLAN promiscuous mode\n");
1436         }
1437         return status;
1438 }
1439
1440 static int be_clear_vlan_promisc(struct be_adapter *adapter)
1441 {
1442         struct device *dev = &adapter->pdev->dev;
1443         int status;
1444
1445         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_VLAN_PROMISCUOUS, OFF);
1446         if (!status) {
1447                 dev_info(dev, "Disabling VLAN promiscuous mode\n");
1448                 adapter->if_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
1449         }
1450         return status;
1451 }
1452
1453 /*
1454  * A max of 64 (BE_NUM_VLANS_SUPPORTED) vlans can be configured in BE.
1455  * If the user configures more, place BE in vlan promiscuous mode.
1456  */
1457 static int be_vid_config(struct be_adapter *adapter)
1458 {
1459         struct device *dev = &adapter->pdev->dev;
1460         u16 vids[BE_NUM_VLANS_SUPPORTED];
1461         u16 num = 0, i = 0;
1462         int status = 0;
1463
1464         /* No need to change the VLAN state if the I/F is in promiscuous */
1465         if (adapter->netdev->flags & IFF_PROMISC)
1466                 return 0;
1467
1468         if (adapter->vlans_added > be_max_vlans(adapter))
1469                 return be_set_vlan_promisc(adapter);
1470
1471         if (adapter->if_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
1472                 status = be_clear_vlan_promisc(adapter);
1473                 if (status)
1474                         return status;
1475         }
1476         /* Construct VLAN Table to give to HW */
1477         for_each_set_bit(i, adapter->vids, VLAN_N_VID)
1478                 vids[num++] = cpu_to_le16(i);
1479
1480         status = be_cmd_vlan_config(adapter, adapter->if_handle, vids, num, 0);
1481         if (status) {
1482                 dev_err(dev, "Setting HW VLAN filtering failed\n");
1483                 /* Set to VLAN promisc mode as setting VLAN filter failed */
1484                 if (addl_status(status) == MCC_ADDL_STATUS_INSUFFICIENT_VLANS ||
1485                     addl_status(status) ==
1486                                 MCC_ADDL_STATUS_INSUFFICIENT_RESOURCES)
1487                         return be_set_vlan_promisc(adapter);
1488         }
1489         return status;
1490 }
1491
1492 static int be_vlan_add_vid(struct net_device *netdev, __be16 proto, u16 vid)
1493 {
1494         struct be_adapter *adapter = netdev_priv(netdev);
1495         int status = 0;
1496
1497         mutex_lock(&adapter->rx_filter_lock);
1498
1499         /* Packets with VID 0 are always received by Lancer by default */
1500         if (lancer_chip(adapter) && vid == 0)
1501                 goto done;
1502
1503         if (test_bit(vid, adapter->vids))
1504                 goto done;
1505
1506         set_bit(vid, adapter->vids);
1507         adapter->vlans_added++;
1508
1509         status = be_vid_config(adapter);
1510 done:
1511         mutex_unlock(&adapter->rx_filter_lock);
1512         return status;
1513 }
1514
1515 static int be_vlan_rem_vid(struct net_device *netdev, __be16 proto, u16 vid)
1516 {
1517         struct be_adapter *adapter = netdev_priv(netdev);
1518         int status = 0;
1519
1520         mutex_lock(&adapter->rx_filter_lock);
1521
1522         /* Packets with VID 0 are always received by Lancer by default */
1523         if (lancer_chip(adapter) && vid == 0)
1524                 goto done;
1525
1526         if (!test_bit(vid, adapter->vids))
1527                 goto done;
1528
1529         clear_bit(vid, adapter->vids);
1530         adapter->vlans_added--;
1531
1532         status = be_vid_config(adapter);
1533 done:
1534         mutex_unlock(&adapter->rx_filter_lock);
1535         return status;
1536 }
1537
1538 static void be_set_all_promisc(struct be_adapter *adapter)
1539 {
1540         be_cmd_rx_filter(adapter, BE_IF_FLAGS_ALL_PROMISCUOUS, ON);
1541         adapter->if_flags |= BE_IF_FLAGS_ALL_PROMISCUOUS;
1542 }
1543
1544 static void be_set_mc_promisc(struct be_adapter *adapter)
1545 {
1546         int status;
1547
1548         if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS)
1549                 return;
1550
1551         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MCAST_PROMISCUOUS, ON);
1552         if (!status)
1553                 adapter->if_flags |= BE_IF_FLAGS_MCAST_PROMISCUOUS;
1554 }
1555
1556 static void be_set_uc_promisc(struct be_adapter *adapter)
1557 {
1558         int status;
1559
1560         if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS)
1561                 return;
1562
1563         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, ON);
1564         if (!status)
1565                 adapter->if_flags |= BE_IF_FLAGS_PROMISCUOUS;
1566 }
1567
1568 static void be_clear_uc_promisc(struct be_adapter *adapter)
1569 {
1570         int status;
1571
1572         if (!(adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS))
1573                 return;
1574
1575         status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_PROMISCUOUS, OFF);
1576         if (!status)
1577                 adapter->if_flags &= ~BE_IF_FLAGS_PROMISCUOUS;
1578 }
1579
1580 /* The below 2 functions are the callback args for __dev_mc_sync/dev_uc_sync().
1581  * We use a single callback function for both sync and unsync. We really don't
1582  * add/remove addresses through this callback. But, we use it to detect changes
1583  * to the uc/mc lists. The entire uc/mc list is programmed in be_set_rx_mode().
1584  */
1585 static int be_uc_list_update(struct net_device *netdev,
1586                              const unsigned char *addr)
1587 {
1588         struct be_adapter *adapter = netdev_priv(netdev);
1589
1590         adapter->update_uc_list = true;
1591         return 0;
1592 }
1593
1594 static int be_mc_list_update(struct net_device *netdev,
1595                              const unsigned char *addr)
1596 {
1597         struct be_adapter *adapter = netdev_priv(netdev);
1598
1599         adapter->update_mc_list = true;
1600         return 0;
1601 }
1602
1603 static void be_set_mc_list(struct be_adapter *adapter)
1604 {
1605         struct net_device *netdev = adapter->netdev;
1606         struct netdev_hw_addr *ha;
1607         bool mc_promisc = false;
1608         int status;
1609
1610         netif_addr_lock_bh(netdev);
1611         __dev_mc_sync(netdev, be_mc_list_update, be_mc_list_update);
1612
1613         if (netdev->flags & IFF_PROMISC) {
1614                 adapter->update_mc_list = false;
1615         } else if (netdev->flags & IFF_ALLMULTI ||
1616                    netdev_mc_count(netdev) > be_max_mc(adapter)) {
1617                 /* Enable multicast promisc if num configured exceeds
1618                  * what we support
1619                  */
1620                 mc_promisc = true;
1621                 adapter->update_mc_list = false;
1622         } else if (adapter->if_flags & BE_IF_FLAGS_MCAST_PROMISCUOUS) {
1623                 /* Update mc-list unconditionally if the iface was previously
1624                  * in mc-promisc mode and now is out of that mode.
1625                  */
1626                 adapter->update_mc_list = true;
1627         }
1628
1629         if (adapter->update_mc_list) {
1630                 int i = 0;
1631
1632                 /* cache the mc-list in adapter */
1633                 netdev_for_each_mc_addr(ha, netdev) {
1634                         ether_addr_copy(adapter->mc_list[i].mac, ha->addr);
1635                         i++;
1636                 }
1637                 adapter->mc_count = netdev_mc_count(netdev);
1638         }
1639         netif_addr_unlock_bh(netdev);
1640
1641         if (mc_promisc) {
1642                 be_set_mc_promisc(adapter);
1643         } else if (adapter->update_mc_list) {
1644                 status = be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, ON);
1645                 if (!status)
1646                         adapter->if_flags &= ~BE_IF_FLAGS_MCAST_PROMISCUOUS;
1647                 else
1648                         be_set_mc_promisc(adapter);
1649
1650                 adapter->update_mc_list = false;
1651         }
1652 }
1653
1654 static void be_clear_mc_list(struct be_adapter *adapter)
1655 {
1656         struct net_device *netdev = adapter->netdev;
1657
1658         __dev_mc_unsync(netdev, NULL);
1659         be_cmd_rx_filter(adapter, BE_IF_FLAGS_MULTICAST, OFF);
1660         adapter->mc_count = 0;
1661 }
1662
1663 static int be_uc_mac_add(struct be_adapter *adapter, int uc_idx)
1664 {
1665         if (ether_addr_equal(adapter->uc_list[uc_idx].mac, adapter->dev_mac)) {
1666                 adapter->pmac_id[uc_idx + 1] = adapter->pmac_id[0];
1667                 return 0;
1668         }
1669
1670         return be_cmd_pmac_add(adapter, adapter->uc_list[uc_idx].mac,
1671                                adapter->if_handle,
1672                                &adapter->pmac_id[uc_idx + 1], 0);
1673 }
1674
1675 static void be_uc_mac_del(struct be_adapter *adapter, int pmac_id)
1676 {
1677         if (pmac_id == adapter->pmac_id[0])
1678                 return;
1679
1680         be_cmd_pmac_del(adapter, adapter->if_handle, pmac_id, 0);
1681 }
1682
1683 static void be_set_uc_list(struct be_adapter *adapter)
1684 {
1685         struct net_device *netdev = adapter->netdev;
1686         struct netdev_hw_addr *ha;
1687         bool uc_promisc = false;
1688         int curr_uc_macs = 0, i;
1689
1690         netif_addr_lock_bh(netdev);
1691         __dev_uc_sync(netdev, be_uc_list_update, be_uc_list_update);
1692
1693         if (netdev->flags & IFF_PROMISC) {
1694                 adapter->update_uc_list = false;
1695         } else if (netdev_uc_count(netdev) > (be_max_uc(adapter) - 1)) {
1696                 uc_promisc = true;
1697                 adapter->update_uc_list = false;
1698         }  else if (adapter->if_flags & BE_IF_FLAGS_PROMISCUOUS) {
1699                 /* Update uc-list unconditionally if the iface was previously
1700                  * in uc-promisc mode and now is out of that mode.
1701                  */
1702                 adapter->update_uc_list = true;
1703         }
1704
1705         if (adapter->update_uc_list) {
1706                 /* cache the uc-list in adapter array */
1707                 i = 0;
1708                 netdev_for_each_uc_addr(ha, netdev) {
1709                         ether_addr_copy(adapter->uc_list[i].mac, ha->addr);
1710                         i++;
1711                 }
1712                 curr_uc_macs = netdev_uc_count(netdev);
1713         }
1714         netif_addr_unlock_bh(netdev);
1715
1716         if (uc_promisc) {
1717                 be_set_uc_promisc(adapter);
1718         } else if (adapter->update_uc_list) {
1719                 be_clear_uc_promisc(adapter);
1720
1721                 for (i = 0; i < adapter->uc_macs; i++)
1722                         be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1723
1724                 for (i = 0; i < curr_uc_macs; i++)
1725                         be_uc_mac_add(adapter, i);
1726                 adapter->uc_macs = curr_uc_macs;
1727                 adapter->update_uc_list = false;
1728         }
1729 }
1730
1731 static void be_clear_uc_list(struct be_adapter *adapter)
1732 {
1733         struct net_device *netdev = adapter->netdev;
1734         int i;
1735
1736         __dev_uc_unsync(netdev, NULL);
1737         for (i = 0; i < adapter->uc_macs; i++)
1738                 be_uc_mac_del(adapter, adapter->pmac_id[i + 1]);
1739
1740         adapter->uc_macs = 0;
1741 }
1742
1743 static void __be_set_rx_mode(struct be_adapter *adapter)
1744 {
1745         struct net_device *netdev = adapter->netdev;
1746
1747         mutex_lock(&adapter->rx_filter_lock);
1748
1749         if (netdev->flags & IFF_PROMISC) {
1750                 if (!be_in_all_promisc(adapter))
1751                         be_set_all_promisc(adapter);
1752         } else if (be_in_all_promisc(adapter)) {
1753                 /* We need to re-program the vlan-list or clear
1754                  * vlan-promisc mode (if needed) when the interface
1755                  * comes out of promisc mode.
1756                  */
1757                 be_vid_config(adapter);
1758         }
1759
1760         be_set_uc_list(adapter);
1761         be_set_mc_list(adapter);
1762
1763         mutex_unlock(&adapter->rx_filter_lock);
1764 }
1765
1766 static void be_work_set_rx_mode(struct work_struct *work)
1767 {
1768         struct be_cmd_work *cmd_work =
1769                                 container_of(work, struct be_cmd_work, work);
1770
1771         __be_set_rx_mode(cmd_work->adapter);
1772         kfree(cmd_work);
1773 }
1774
1775 static int be_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
1776 {
1777         struct be_adapter *adapter = netdev_priv(netdev);
1778         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1779         int status;
1780
1781         if (!sriov_enabled(adapter))
1782                 return -EPERM;
1783
1784         if (!is_valid_ether_addr(mac) || vf >= adapter->num_vfs)
1785                 return -EINVAL;
1786
1787         /* Proceed further only if user provided MAC is different
1788          * from active MAC
1789          */
1790         if (ether_addr_equal(mac, vf_cfg->mac_addr))
1791                 return 0;
1792
1793         if (BEx_chip(adapter)) {
1794                 be_cmd_pmac_del(adapter, vf_cfg->if_handle, vf_cfg->pmac_id,
1795                                 vf + 1);
1796
1797                 status = be_cmd_pmac_add(adapter, mac, vf_cfg->if_handle,
1798                                          &vf_cfg->pmac_id, vf + 1);
1799         } else {
1800                 status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
1801                                         vf + 1);
1802         }
1803
1804         if (status) {
1805                 dev_err(&adapter->pdev->dev, "MAC %pM set on VF %d Failed: %#x",
1806                         mac, vf, status);
1807                 return be_cmd_status(status);
1808         }
1809
1810         ether_addr_copy(vf_cfg->mac_addr, mac);
1811
1812         return 0;
1813 }
1814
1815 static int be_get_vf_config(struct net_device *netdev, int vf,
1816                             struct ifla_vf_info *vi)
1817 {
1818         struct be_adapter *adapter = netdev_priv(netdev);
1819         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1820
1821         if (!sriov_enabled(adapter))
1822                 return -EPERM;
1823
1824         if (vf >= adapter->num_vfs)
1825                 return -EINVAL;
1826
1827         vi->vf = vf;
1828         vi->max_tx_rate = vf_cfg->tx_rate;
1829         vi->min_tx_rate = 0;
1830         vi->vlan = vf_cfg->vlan_tag & VLAN_VID_MASK;
1831         vi->qos = vf_cfg->vlan_tag >> VLAN_PRIO_SHIFT;
1832         memcpy(&vi->mac, vf_cfg->mac_addr, ETH_ALEN);
1833         vi->linkstate = adapter->vf_cfg[vf].plink_tracking;
1834         vi->spoofchk = adapter->vf_cfg[vf].spoofchk;
1835
1836         return 0;
1837 }
1838
1839 static int be_set_vf_tvt(struct be_adapter *adapter, int vf, u16 vlan)
1840 {
1841         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1842         u16 vids[BE_NUM_VLANS_SUPPORTED];
1843         int vf_if_id = vf_cfg->if_handle;
1844         int status;
1845
1846         /* Enable Transparent VLAN Tagging */
1847         status = be_cmd_set_hsw_config(adapter, vlan, vf + 1, vf_if_id, 0, 0);
1848         if (status)
1849                 return status;
1850
1851         /* Clear pre-programmed VLAN filters on VF if any, if TVT is enabled */
1852         vids[0] = 0;
1853         status = be_cmd_vlan_config(adapter, vf_if_id, vids, 1, vf + 1);
1854         if (!status)
1855                 dev_info(&adapter->pdev->dev,
1856                          "Cleared guest VLANs on VF%d", vf);
1857
1858         /* After TVT is enabled, disallow VFs to program VLAN filters */
1859         if (vf_cfg->privileges & BE_PRIV_FILTMGMT) {
1860                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges &
1861                                                   ~BE_PRIV_FILTMGMT, vf + 1);
1862                 if (!status)
1863                         vf_cfg->privileges &= ~BE_PRIV_FILTMGMT;
1864         }
1865         return 0;
1866 }
1867
1868 static int be_clear_vf_tvt(struct be_adapter *adapter, int vf)
1869 {
1870         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1871         struct device *dev = &adapter->pdev->dev;
1872         int status;
1873
1874         /* Reset Transparent VLAN Tagging. */
1875         status = be_cmd_set_hsw_config(adapter, BE_RESET_VLAN_TAG_ID, vf + 1,
1876                                        vf_cfg->if_handle, 0, 0);
1877         if (status)
1878                 return status;
1879
1880         /* Allow VFs to program VLAN filtering */
1881         if (!(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
1882                 status = be_cmd_set_fn_privileges(adapter, vf_cfg->privileges |
1883                                                   BE_PRIV_FILTMGMT, vf + 1);
1884                 if (!status) {
1885                         vf_cfg->privileges |= BE_PRIV_FILTMGMT;
1886                         dev_info(dev, "VF%d: FILTMGMT priv enabled", vf);
1887                 }
1888         }
1889
1890         dev_info(dev,
1891                  "Disable/re-enable i/f in VM to clear Transparent VLAN tag");
1892         return 0;
1893 }
1894
1895 static int be_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
1896                           __be16 vlan_proto)
1897 {
1898         struct be_adapter *adapter = netdev_priv(netdev);
1899         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
1900         int status;
1901
1902         if (!sriov_enabled(adapter))
1903                 return -EPERM;
1904
1905         if (vf >= adapter->num_vfs || vlan > 4095 || qos > 7)
1906                 return -EINVAL;
1907
1908         if (vlan_proto != htons(ETH_P_8021Q))
1909                 return -EPROTONOSUPPORT;
1910
1911         if (vlan || qos) {
1912                 vlan |= qos << VLAN_PRIO_SHIFT;
1913                 status = be_set_vf_tvt(adapter, vf, vlan);
1914         } else {
1915                 status = be_clear_vf_tvt(adapter, vf);
1916         }
1917
1918         if (status) {
1919                 dev_err(&adapter->pdev->dev,
1920                         "VLAN %d config on VF %d failed : %#x\n", vlan, vf,
1921                         status);
1922                 return be_cmd_status(status);
1923         }
1924
1925         vf_cfg->vlan_tag = vlan;
1926         return 0;
1927 }
1928
1929 static int be_set_vf_tx_rate(struct net_device *netdev, int vf,
1930                              int min_tx_rate, int max_tx_rate)
1931 {
1932         struct be_adapter *adapter = netdev_priv(netdev);
1933         struct device *dev = &adapter->pdev->dev;
1934         int percent_rate, status = 0;
1935         u16 link_speed = 0;
1936         u8 link_status;
1937
1938         if (!sriov_enabled(adapter))
1939                 return -EPERM;
1940
1941         if (vf >= adapter->num_vfs)
1942                 return -EINVAL;
1943
1944         if (min_tx_rate)
1945                 return -EINVAL;
1946
1947         if (!max_tx_rate)
1948                 goto config_qos;
1949
1950         status = be_cmd_link_status_query(adapter, &link_speed,
1951                                           &link_status, 0);
1952         if (status)
1953                 goto err;
1954
1955         if (!link_status) {
1956                 dev_err(dev, "TX-rate setting not allowed when link is down\n");
1957                 status = -ENETDOWN;
1958                 goto err;
1959         }
1960
1961         if (max_tx_rate < 100 || max_tx_rate > link_speed) {
1962                 dev_err(dev, "TX-rate must be between 100 and %d Mbps\n",
1963                         link_speed);
1964                 status = -EINVAL;
1965                 goto err;
1966         }
1967
1968         /* On Skyhawk the QOS setting must be done only as a % value */
1969         percent_rate = link_speed / 100;
1970         if (skyhawk_chip(adapter) && (max_tx_rate % percent_rate)) {
1971                 dev_err(dev, "TX-rate must be a multiple of %d Mbps\n",
1972                         percent_rate);
1973                 status = -EINVAL;
1974                 goto err;
1975         }
1976
1977 config_qos:
1978         status = be_cmd_config_qos(adapter, max_tx_rate, link_speed, vf + 1);
1979         if (status)
1980                 goto err;
1981
1982         adapter->vf_cfg[vf].tx_rate = max_tx_rate;
1983         return 0;
1984
1985 err:
1986         dev_err(dev, "TX-rate setting of %dMbps on VF%d failed\n",
1987                 max_tx_rate, vf);
1988         return be_cmd_status(status);
1989 }
1990
1991 static int be_set_vf_link_state(struct net_device *netdev, int vf,
1992                                 int link_state)
1993 {
1994         struct be_adapter *adapter = netdev_priv(netdev);
1995         int status;
1996
1997         if (!sriov_enabled(adapter))
1998                 return -EPERM;
1999
2000         if (vf >= adapter->num_vfs)
2001                 return -EINVAL;
2002
2003         status = be_cmd_set_logical_link_config(adapter, link_state, vf+1);
2004         if (status) {
2005                 dev_err(&adapter->pdev->dev,
2006                         "Link state change on VF %d failed: %#x\n", vf, status);
2007                 return be_cmd_status(status);
2008         }
2009
2010         adapter->vf_cfg[vf].plink_tracking = link_state;
2011
2012         return 0;
2013 }
2014
2015 static int be_set_vf_spoofchk(struct net_device *netdev, int vf, bool enable)
2016 {
2017         struct be_adapter *adapter = netdev_priv(netdev);
2018         struct be_vf_cfg *vf_cfg = &adapter->vf_cfg[vf];
2019         u8 spoofchk;
2020         int status;
2021
2022         if (!sriov_enabled(adapter))
2023                 return -EPERM;
2024
2025         if (vf >= adapter->num_vfs)
2026                 return -EINVAL;
2027
2028         if (BEx_chip(adapter))
2029                 return -EOPNOTSUPP;
2030
2031         if (enable == vf_cfg->spoofchk)
2032                 return 0;
2033
2034         spoofchk = enable ? ENABLE_MAC_SPOOFCHK : DISABLE_MAC_SPOOFCHK;
2035
2036         status = be_cmd_set_hsw_config(adapter, 0, vf + 1, vf_cfg->if_handle,
2037                                        0, spoofchk);
2038         if (status) {
2039                 dev_err(&adapter->pdev->dev,
2040                         "Spoofchk change on VF %d failed: %#x\n", vf, status);
2041                 return be_cmd_status(status);
2042         }
2043
2044         vf_cfg->spoofchk = enable;
2045         return 0;
2046 }
2047
2048 static void be_aic_update(struct be_aic_obj *aic, u64 rx_pkts, u64 tx_pkts,
2049                           ulong now)
2050 {
2051         aic->rx_pkts_prev = rx_pkts;
2052         aic->tx_reqs_prev = tx_pkts;
2053         aic->jiffies = now;
2054 }
2055
2056 static int be_get_new_eqd(struct be_eq_obj *eqo)
2057 {
2058         struct be_adapter *adapter = eqo->adapter;
2059         int eqd, start;
2060         struct be_aic_obj *aic;
2061         struct be_rx_obj *rxo;
2062         struct be_tx_obj *txo;
2063         u64 rx_pkts = 0, tx_pkts = 0;
2064         ulong now;
2065         u32 pps, delta;
2066         int i;
2067
2068         aic = &adapter->aic_obj[eqo->idx];
2069         if (!aic->enable) {
2070                 if (aic->jiffies)
2071                         aic->jiffies = 0;
2072                 eqd = aic->et_eqd;
2073                 return eqd;
2074         }
2075
2076         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
2077                 do {
2078                         start = u64_stats_fetch_begin_irq(&rxo->stats.sync);
2079                         rx_pkts += rxo->stats.rx_pkts;
2080                 } while (u64_stats_fetch_retry_irq(&rxo->stats.sync, start));
2081         }
2082
2083         for_all_tx_queues_on_eq(adapter, eqo, txo, i) {
2084                 do {
2085                         start = u64_stats_fetch_begin_irq(&txo->stats.sync);
2086                         tx_pkts += txo->stats.tx_reqs;
2087                 } while (u64_stats_fetch_retry_irq(&txo->stats.sync, start));
2088         }
2089
2090         /* Skip, if wrapped around or first calculation */
2091         now = jiffies;
2092         if (!aic->jiffies || time_before(now, aic->jiffies) ||
2093             rx_pkts < aic->rx_pkts_prev ||
2094             tx_pkts < aic->tx_reqs_prev) {
2095                 be_aic_update(aic, rx_pkts, tx_pkts, now);
2096                 return aic->prev_eqd;
2097         }
2098
2099         delta = jiffies_to_msecs(now - aic->jiffies);
2100         if (delta == 0)
2101                 return aic->prev_eqd;
2102
2103         pps = (((u32)(rx_pkts - aic->rx_pkts_prev) * 1000) / delta) +
2104                 (((u32)(tx_pkts - aic->tx_reqs_prev) * 1000) / delta);
2105         eqd = (pps / 15000) << 2;
2106
2107         if (eqd < 8)
2108                 eqd = 0;
2109         eqd = min_t(u32, eqd, aic->max_eqd);
2110         eqd = max_t(u32, eqd, aic->min_eqd);
2111
2112         be_aic_update(aic, rx_pkts, tx_pkts, now);
2113
2114         return eqd;
2115 }
2116
2117 /* For Skyhawk-R only */
2118 static u32 be_get_eq_delay_mult_enc(struct be_eq_obj *eqo)
2119 {
2120         struct be_adapter *adapter = eqo->adapter;
2121         struct be_aic_obj *aic = &adapter->aic_obj[eqo->idx];
2122         ulong now = jiffies;
2123         int eqd;
2124         u32 mult_enc;
2125
2126         if (!aic->enable)
2127                 return 0;
2128
2129         if (jiffies_to_msecs(now - aic->jiffies) < 1)
2130                 eqd = aic->prev_eqd;
2131         else
2132                 eqd = be_get_new_eqd(eqo);
2133
2134         if (eqd > 100)
2135                 mult_enc = R2I_DLY_ENC_1;
2136         else if (eqd > 60)
2137                 mult_enc = R2I_DLY_ENC_2;
2138         else if (eqd > 20)
2139                 mult_enc = R2I_DLY_ENC_3;
2140         else
2141                 mult_enc = R2I_DLY_ENC_0;
2142
2143         aic->prev_eqd = eqd;
2144
2145         return mult_enc;
2146 }
2147
2148 void be_eqd_update(struct be_adapter *adapter, bool force_update)
2149 {
2150         struct be_set_eqd set_eqd[MAX_EVT_QS];
2151         struct be_aic_obj *aic;
2152         struct be_eq_obj *eqo;
2153         int i, num = 0, eqd;
2154
2155         for_all_evt_queues(adapter, eqo, i) {
2156                 aic = &adapter->aic_obj[eqo->idx];
2157                 eqd = be_get_new_eqd(eqo);
2158                 if (force_update || eqd != aic->prev_eqd) {
2159                         set_eqd[num].delay_multiplier = (eqd * 65)/100;
2160                         set_eqd[num].eq_id = eqo->q.id;
2161                         aic->prev_eqd = eqd;
2162                         num++;
2163                 }
2164         }
2165
2166         if (num)
2167                 be_cmd_modify_eqd(adapter, set_eqd, num);
2168 }
2169
2170 static void be_rx_stats_update(struct be_rx_obj *rxo,
2171                                struct be_rx_compl_info *rxcp)
2172 {
2173         struct be_rx_stats *stats = rx_stats(rxo);
2174
2175         u64_stats_update_begin(&stats->sync);
2176         stats->rx_compl++;
2177         stats->rx_bytes += rxcp->pkt_size;
2178         stats->rx_pkts++;
2179         if (rxcp->tunneled)
2180                 stats->rx_vxlan_offload_pkts++;
2181         if (rxcp->pkt_type == BE_MULTICAST_PACKET)
2182                 stats->rx_mcast_pkts++;
2183         if (rxcp->err)
2184                 stats->rx_compl_err++;
2185         u64_stats_update_end(&stats->sync);
2186 }
2187
2188 static inline bool csum_passed(struct be_rx_compl_info *rxcp)
2189 {
2190         /* L4 checksum is not reliable for non TCP/UDP packets.
2191          * Also ignore ipcksm for ipv6 pkts
2192          */
2193         return (rxcp->tcpf || rxcp->udpf) && rxcp->l4_csum &&
2194                 (rxcp->ip_csum || rxcp->ipv6) && !rxcp->err;
2195 }
2196
2197 static struct be_rx_page_info *get_rx_page_info(struct be_rx_obj *rxo)
2198 {
2199         struct be_adapter *adapter = rxo->adapter;
2200         struct be_rx_page_info *rx_page_info;
2201         struct be_queue_info *rxq = &rxo->q;
2202         u32 frag_idx = rxq->tail;
2203
2204         rx_page_info = &rxo->page_info_tbl[frag_idx];
2205         BUG_ON(!rx_page_info->page);
2206
2207         if (rx_page_info->last_frag) {
2208                 dma_unmap_page(&adapter->pdev->dev,
2209                                dma_unmap_addr(rx_page_info, bus),
2210                                adapter->big_page_size, DMA_FROM_DEVICE);
2211                 rx_page_info->last_frag = false;
2212         } else {
2213                 dma_sync_single_for_cpu(&adapter->pdev->dev,
2214                                         dma_unmap_addr(rx_page_info, bus),
2215                                         rx_frag_size, DMA_FROM_DEVICE);
2216         }
2217
2218         queue_tail_inc(rxq);
2219         atomic_dec(&rxq->used);
2220         return rx_page_info;
2221 }
2222
2223 /* Throwaway the data in the Rx completion */
2224 static void be_rx_compl_discard(struct be_rx_obj *rxo,
2225                                 struct be_rx_compl_info *rxcp)
2226 {
2227         struct be_rx_page_info *page_info;
2228         u16 i, num_rcvd = rxcp->num_rcvd;
2229
2230         for (i = 0; i < num_rcvd; i++) {
2231                 page_info = get_rx_page_info(rxo);
2232                 put_page(page_info->page);
2233                 memset(page_info, 0, sizeof(*page_info));
2234         }
2235 }
2236
2237 /*
2238  * skb_fill_rx_data forms a complete skb for an ether frame
2239  * indicated by rxcp.
2240  */
2241 static void skb_fill_rx_data(struct be_rx_obj *rxo, struct sk_buff *skb,
2242                              struct be_rx_compl_info *rxcp)
2243 {
2244         struct be_rx_page_info *page_info;
2245         u16 i, j;
2246         u16 hdr_len, curr_frag_len, remaining;
2247         u8 *start;
2248
2249         page_info = get_rx_page_info(rxo);
2250         start = page_address(page_info->page) + page_info->page_offset;
2251         prefetch(start);
2252
2253         /* Copy data in the first descriptor of this completion */
2254         curr_frag_len = min(rxcp->pkt_size, rx_frag_size);
2255
2256         skb->len = curr_frag_len;
2257         if (curr_frag_len <= BE_HDR_LEN) { /* tiny packet */
2258                 memcpy(skb->data, start, curr_frag_len);
2259                 /* Complete packet has now been moved to data */
2260                 put_page(page_info->page);
2261                 skb->data_len = 0;
2262                 skb->tail += curr_frag_len;
2263         } else {
2264                 hdr_len = ETH_HLEN;
2265                 memcpy(skb->data, start, hdr_len);
2266                 skb_shinfo(skb)->nr_frags = 1;
2267                 skb_frag_set_page(skb, 0, page_info->page);
2268                 skb_shinfo(skb)->frags[0].page_offset =
2269                                         page_info->page_offset + hdr_len;
2270                 skb_frag_size_set(&skb_shinfo(skb)->frags[0],
2271                                   curr_frag_len - hdr_len);
2272                 skb->data_len = curr_frag_len - hdr_len;
2273                 skb->truesize += rx_frag_size;
2274                 skb->tail += hdr_len;
2275         }
2276         page_info->page = NULL;
2277
2278         if (rxcp->pkt_size <= rx_frag_size) {
2279                 BUG_ON(rxcp->num_rcvd != 1);
2280                 return;
2281         }
2282
2283         /* More frags present for this completion */
2284         remaining = rxcp->pkt_size - curr_frag_len;
2285         for (i = 1, j = 0; i < rxcp->num_rcvd; i++) {
2286                 page_info = get_rx_page_info(rxo);
2287                 curr_frag_len = min(remaining, rx_frag_size);
2288
2289                 /* Coalesce all frags from the same physical page in one slot */
2290                 if (page_info->page_offset == 0) {
2291                         /* Fresh page */
2292                         j++;
2293                         skb_frag_set_page(skb, j, page_info->page);
2294                         skb_shinfo(skb)->frags[j].page_offset =
2295                                                         page_info->page_offset;
2296                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2297                         skb_shinfo(skb)->nr_frags++;
2298                 } else {
2299                         put_page(page_info->page);
2300                 }
2301
2302                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2303                 skb->len += curr_frag_len;
2304                 skb->data_len += curr_frag_len;
2305                 skb->truesize += rx_frag_size;
2306                 remaining -= curr_frag_len;
2307                 page_info->page = NULL;
2308         }
2309         BUG_ON(j > MAX_SKB_FRAGS);
2310 }
2311
2312 /* Process the RX completion indicated by rxcp when GRO is disabled */
2313 static void be_rx_compl_process(struct be_rx_obj *rxo, struct napi_struct *napi,
2314                                 struct be_rx_compl_info *rxcp)
2315 {
2316         struct be_adapter *adapter = rxo->adapter;
2317         struct net_device *netdev = adapter->netdev;
2318         struct sk_buff *skb;
2319
2320         skb = netdev_alloc_skb_ip_align(netdev, BE_RX_SKB_ALLOC_SIZE);
2321         if (unlikely(!skb)) {
2322                 rx_stats(rxo)->rx_drops_no_skbs++;
2323                 be_rx_compl_discard(rxo, rxcp);
2324                 return;
2325         }
2326
2327         skb_fill_rx_data(rxo, skb, rxcp);
2328
2329         if (likely((netdev->features & NETIF_F_RXCSUM) && csum_passed(rxcp)))
2330                 skb->ip_summed = CHECKSUM_UNNECESSARY;
2331         else
2332                 skb_checksum_none_assert(skb);
2333
2334         skb->protocol = eth_type_trans(skb, netdev);
2335         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2336         if (netdev->features & NETIF_F_RXHASH)
2337                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2338
2339         skb->csum_level = rxcp->tunneled;
2340         skb_mark_napi_id(skb, napi);
2341
2342         if (rxcp->vlanf)
2343                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2344
2345         netif_receive_skb(skb);
2346 }
2347
2348 /* Process the RX completion indicated by rxcp when GRO is enabled */
2349 static void be_rx_compl_process_gro(struct be_rx_obj *rxo,
2350                                     struct napi_struct *napi,
2351                                     struct be_rx_compl_info *rxcp)
2352 {
2353         struct be_adapter *adapter = rxo->adapter;
2354         struct be_rx_page_info *page_info;
2355         struct sk_buff *skb = NULL;
2356         u16 remaining, curr_frag_len;
2357         u16 i, j;
2358
2359         skb = napi_get_frags(napi);
2360         if (!skb) {
2361                 be_rx_compl_discard(rxo, rxcp);
2362                 return;
2363         }
2364
2365         remaining = rxcp->pkt_size;
2366         for (i = 0, j = -1; i < rxcp->num_rcvd; i++) {
2367                 page_info = get_rx_page_info(rxo);
2368
2369                 curr_frag_len = min(remaining, rx_frag_size);
2370
2371                 /* Coalesce all frags from the same physical page in one slot */
2372                 if (i == 0 || page_info->page_offset == 0) {
2373                         /* First frag or Fresh page */
2374                         j++;
2375                         skb_frag_set_page(skb, j, page_info->page);
2376                         skb_shinfo(skb)->frags[j].page_offset =
2377                                                         page_info->page_offset;
2378                         skb_frag_size_set(&skb_shinfo(skb)->frags[j], 0);
2379                 } else {
2380                         put_page(page_info->page);
2381                 }
2382                 skb_frag_size_add(&skb_shinfo(skb)->frags[j], curr_frag_len);
2383                 skb->truesize += rx_frag_size;
2384                 remaining -= curr_frag_len;
2385                 memset(page_info, 0, sizeof(*page_info));
2386         }
2387         BUG_ON(j > MAX_SKB_FRAGS);
2388
2389         skb_shinfo(skb)->nr_frags = j + 1;
2390         skb->len = rxcp->pkt_size;
2391         skb->data_len = rxcp->pkt_size;
2392         skb->ip_summed = CHECKSUM_UNNECESSARY;
2393         skb_record_rx_queue(skb, rxo - &adapter->rx_obj[0]);
2394         if (adapter->netdev->features & NETIF_F_RXHASH)
2395                 skb_set_hash(skb, rxcp->rss_hash, PKT_HASH_TYPE_L3);
2396
2397         skb->csum_level = rxcp->tunneled;
2398
2399         if (rxcp->vlanf)
2400                 __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), rxcp->vlan_tag);
2401
2402         napi_gro_frags(napi);
2403 }
2404
2405 static void be_parse_rx_compl_v1(struct be_eth_rx_compl *compl,
2406                                  struct be_rx_compl_info *rxcp)
2407 {
2408         rxcp->pkt_size = GET_RX_COMPL_V1_BITS(pktsize, compl);
2409         rxcp->vlanf = GET_RX_COMPL_V1_BITS(vtp, compl);
2410         rxcp->err = GET_RX_COMPL_V1_BITS(err, compl);
2411         rxcp->tcpf = GET_RX_COMPL_V1_BITS(tcpf, compl);
2412         rxcp->udpf = GET_RX_COMPL_V1_BITS(udpf, compl);
2413         rxcp->ip_csum = GET_RX_COMPL_V1_BITS(ipcksm, compl);
2414         rxcp->l4_csum = GET_RX_COMPL_V1_BITS(l4_cksm, compl);
2415         rxcp->ipv6 = GET_RX_COMPL_V1_BITS(ip_version, compl);
2416         rxcp->num_rcvd = GET_RX_COMPL_V1_BITS(numfrags, compl);
2417         rxcp->pkt_type = GET_RX_COMPL_V1_BITS(cast_enc, compl);
2418         rxcp->rss_hash = GET_RX_COMPL_V1_BITS(rsshash, compl);
2419         if (rxcp->vlanf) {
2420                 rxcp->qnq = GET_RX_COMPL_V1_BITS(qnq, compl);
2421                 rxcp->vlan_tag = GET_RX_COMPL_V1_BITS(vlan_tag, compl);
2422         }
2423         rxcp->port = GET_RX_COMPL_V1_BITS(port, compl);
2424         rxcp->tunneled =
2425                 GET_RX_COMPL_V1_BITS(tunneled, compl);
2426 }
2427
2428 static void be_parse_rx_compl_v0(struct be_eth_rx_compl *compl,
2429                                  struct be_rx_compl_info *rxcp)
2430 {
2431         rxcp->pkt_size = GET_RX_COMPL_V0_BITS(pktsize, compl);
2432         rxcp->vlanf = GET_RX_COMPL_V0_BITS(vtp, compl);
2433         rxcp->err = GET_RX_COMPL_V0_BITS(err, compl);
2434         rxcp->tcpf = GET_RX_COMPL_V0_BITS(tcpf, compl);
2435         rxcp->udpf = GET_RX_COMPL_V0_BITS(udpf, compl);
2436         rxcp->ip_csum = GET_RX_COMPL_V0_BITS(ipcksm, compl);
2437         rxcp->l4_csum = GET_RX_COMPL_V0_BITS(l4_cksm, compl);
2438         rxcp->ipv6 = GET_RX_COMPL_V0_BITS(ip_version, compl);
2439         rxcp->num_rcvd = GET_RX_COMPL_V0_BITS(numfrags, compl);
2440         rxcp->pkt_type = GET_RX_COMPL_V0_BITS(cast_enc, compl);
2441         rxcp->rss_hash = GET_RX_COMPL_V0_BITS(rsshash, compl);
2442         if (rxcp->vlanf) {
2443                 rxcp->qnq = GET_RX_COMPL_V0_BITS(qnq, compl);
2444                 rxcp->vlan_tag = GET_RX_COMPL_V0_BITS(vlan_tag, compl);
2445         }
2446         rxcp->port = GET_RX_COMPL_V0_BITS(port, compl);
2447         rxcp->ip_frag = GET_RX_COMPL_V0_BITS(ip_frag, compl);
2448 }
2449
2450 static struct be_rx_compl_info *be_rx_compl_get(struct be_rx_obj *rxo)
2451 {
2452         struct be_eth_rx_compl *compl = queue_tail_node(&rxo->cq);
2453         struct be_rx_compl_info *rxcp = &rxo->rxcp;
2454         struct be_adapter *adapter = rxo->adapter;
2455
2456         /* For checking the valid bit it is Ok to use either definition as the
2457          * valid bit is at the same position in both v0 and v1 Rx compl */
2458         if (compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] == 0)
2459                 return NULL;
2460
2461         rmb();
2462         be_dws_le_to_cpu(compl, sizeof(*compl));
2463
2464         if (adapter->be3_native)
2465                 be_parse_rx_compl_v1(compl, rxcp);
2466         else
2467                 be_parse_rx_compl_v0(compl, rxcp);
2468
2469         if (rxcp->ip_frag)
2470                 rxcp->l4_csum = 0;
2471
2472         if (rxcp->vlanf) {
2473                 /* In QNQ modes, if qnq bit is not set, then the packet was
2474                  * tagged only with the transparent outer vlan-tag and must
2475                  * not be treated as a vlan packet by host
2476                  */
2477                 if (be_is_qnq_mode(adapter) && !rxcp->qnq)
2478                         rxcp->vlanf = 0;
2479
2480                 if (!lancer_chip(adapter))
2481                         rxcp->vlan_tag = swab16(rxcp->vlan_tag);
2482
2483                 if (adapter->pvid == (rxcp->vlan_tag & VLAN_VID_MASK) &&
2484                     !test_bit(rxcp->vlan_tag, adapter->vids))
2485                         rxcp->vlanf = 0;
2486         }
2487
2488         /* As the compl has been parsed, reset it; we wont touch it again */
2489         compl->dw[offsetof(struct amap_eth_rx_compl_v1, valid) / 32] = 0;
2490
2491         queue_tail_inc(&rxo->cq);
2492         return rxcp;
2493 }
2494
2495 static inline struct page *be_alloc_pages(u32 size, gfp_t gfp)
2496 {
2497         u32 order = get_order(size);
2498
2499         if (order > 0)
2500                 gfp |= __GFP_COMP;
2501         return  alloc_pages(gfp, order);
2502 }
2503
2504 /*
2505  * Allocate a page, split it to fragments of size rx_frag_size and post as
2506  * receive buffers to BE
2507  */
2508 static void be_post_rx_frags(struct be_rx_obj *rxo, gfp_t gfp, u32 frags_needed)
2509 {
2510         struct be_adapter *adapter = rxo->adapter;
2511         struct be_rx_page_info *page_info = NULL, *prev_page_info = NULL;
2512         struct be_queue_info *rxq = &rxo->q;
2513         struct page *pagep = NULL;
2514         struct device *dev = &adapter->pdev->dev;
2515         struct be_eth_rx_d *rxd;
2516         u64 page_dmaaddr = 0, frag_dmaaddr;
2517         u32 posted, page_offset = 0, notify = 0;
2518
2519         page_info = &rxo->page_info_tbl[rxq->head];
2520         for (posted = 0; posted < frags_needed && !page_info->page; posted++) {
2521                 if (!pagep) {
2522                         pagep = be_alloc_pages(adapter->big_page_size, gfp);
2523                         if (unlikely(!pagep)) {
2524                                 rx_stats(rxo)->rx_post_fail++;
2525                                 break;
2526                         }
2527                         page_dmaaddr = dma_map_page(dev, pagep, 0,
2528                                                     adapter->big_page_size,
2529                                                     DMA_FROM_DEVICE);
2530                         if (dma_mapping_error(dev, page_dmaaddr)) {
2531                                 put_page(pagep);
2532                                 pagep = NULL;
2533                                 adapter->drv_stats.dma_map_errors++;
2534                                 break;
2535                         }
2536                         page_offset = 0;
2537                 } else {
2538                         get_page(pagep);
2539                         page_offset += rx_frag_size;
2540                 }
2541                 page_info->page_offset = page_offset;
2542                 page_info->page = pagep;
2543
2544                 rxd = queue_head_node(rxq);
2545                 frag_dmaaddr = page_dmaaddr + page_info->page_offset;
2546                 rxd->fragpa_lo = cpu_to_le32(frag_dmaaddr & 0xFFFFFFFF);
2547                 rxd->fragpa_hi = cpu_to_le32(upper_32_bits(frag_dmaaddr));
2548
2549                 /* Any space left in the current big page for another frag? */
2550                 if ((page_offset + rx_frag_size + rx_frag_size) >
2551                                         adapter->big_page_size) {
2552                         pagep = NULL;
2553                         page_info->last_frag = true;
2554                         dma_unmap_addr_set(page_info, bus, page_dmaaddr);
2555                 } else {
2556                         dma_unmap_addr_set(page_info, bus, frag_dmaaddr);
2557                 }
2558
2559                 prev_page_info = page_info;
2560                 queue_head_inc(rxq);
2561                 page_info = &rxo->page_info_tbl[rxq->head];
2562         }
2563
2564         /* Mark the last frag of a page when we break out of the above loop
2565          * with no more slots available in the RXQ
2566          */
2567         if (pagep) {
2568                 prev_page_info->last_frag = true;
2569                 dma_unmap_addr_set(prev_page_info, bus, page_dmaaddr);
2570         }
2571
2572         if (posted) {
2573                 atomic_add(posted, &rxq->used);
2574                 if (rxo->rx_post_starved)
2575                         rxo->rx_post_starved = false;
2576                 do {
2577                         notify = min(MAX_NUM_POST_ERX_DB, posted);
2578                         be_rxq_notify(adapter, rxq->id, notify);
2579                         posted -= notify;
2580                 } while (posted);
2581         } else if (atomic_read(&rxq->used) == 0) {
2582                 /* Let be_worker replenish when memory is available */
2583                 rxo->rx_post_starved = true;
2584         }
2585 }
2586
2587 static struct be_tx_compl_info *be_tx_compl_get(struct be_tx_obj *txo)
2588 {
2589         struct be_queue_info *tx_cq = &txo->cq;
2590         struct be_tx_compl_info *txcp = &txo->txcp;
2591         struct be_eth_tx_compl *compl = queue_tail_node(tx_cq);
2592
2593         if (compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] == 0)
2594                 return NULL;
2595
2596         /* Ensure load ordering of valid bit dword and other dwords below */
2597         rmb();
2598         be_dws_le_to_cpu(compl, sizeof(*compl));
2599
2600         txcp->status = GET_TX_COMPL_BITS(status, compl);
2601         txcp->end_index = GET_TX_COMPL_BITS(wrb_index, compl);
2602
2603         compl->dw[offsetof(struct amap_eth_tx_compl, valid) / 32] = 0;
2604         queue_tail_inc(tx_cq);
2605         return txcp;
2606 }
2607
2608 static u16 be_tx_compl_process(struct be_adapter *adapter,
2609                                struct be_tx_obj *txo, u16 last_index)
2610 {
2611         struct sk_buff **sent_skbs = txo->sent_skb_list;
2612         struct be_queue_info *txq = &txo->q;
2613         struct sk_buff *skb = NULL;
2614         bool unmap_skb_hdr = false;
2615         struct be_eth_wrb *wrb;
2616         u16 num_wrbs = 0;
2617         u32 frag_index;
2618
2619         do {
2620                 if (sent_skbs[txq->tail]) {
2621                         /* Free skb from prev req */
2622                         if (skb)
2623                                 dev_consume_skb_any(skb);
2624                         skb = sent_skbs[txq->tail];
2625                         sent_skbs[txq->tail] = NULL;
2626                         queue_tail_inc(txq);  /* skip hdr wrb */
2627                         num_wrbs++;
2628                         unmap_skb_hdr = true;
2629                 }
2630                 wrb = queue_tail_node(txq);
2631                 frag_index = txq->tail;
2632                 unmap_tx_frag(&adapter->pdev->dev, wrb,
2633                               (unmap_skb_hdr && skb_headlen(skb)));
2634                 unmap_skb_hdr = false;
2635                 queue_tail_inc(txq);
2636                 num_wrbs++;
2637         } while (frag_index != last_index);
2638         dev_consume_skb_any(skb);
2639
2640         return num_wrbs;
2641 }
2642
2643 /* Return the number of events in the event queue */
2644 static inline int events_get(struct be_eq_obj *eqo)
2645 {
2646         struct be_eq_entry *eqe;
2647         int num = 0;
2648
2649         do {
2650                 eqe = queue_tail_node(&eqo->q);
2651                 if (eqe->evt == 0)
2652                         break;
2653
2654                 rmb();
2655                 eqe->evt = 0;
2656                 num++;
2657                 queue_tail_inc(&eqo->q);
2658         } while (true);
2659
2660         return num;
2661 }
2662
2663 /* Leaves the EQ is disarmed state */
2664 static void be_eq_clean(struct be_eq_obj *eqo)
2665 {
2666         int num = events_get(eqo);
2667
2668         be_eq_notify(eqo->adapter, eqo->q.id, false, true, num, 0);
2669 }
2670
2671 /* Free posted rx buffers that were not used */
2672 static void be_rxq_clean(struct be_rx_obj *rxo)
2673 {
2674         struct be_queue_info *rxq = &rxo->q;
2675         struct be_rx_page_info *page_info;
2676
2677         while (atomic_read(&rxq->used) > 0) {
2678                 page_info = get_rx_page_info(rxo);
2679                 put_page(page_info->page);
2680                 memset(page_info, 0, sizeof(*page_info));
2681         }
2682         BUG_ON(atomic_read(&rxq->used));
2683         rxq->tail = 0;
2684         rxq->head = 0;
2685 }
2686
2687 static void be_rx_cq_clean(struct be_rx_obj *rxo)
2688 {
2689         struct be_queue_info *rx_cq = &rxo->cq;
2690         struct be_rx_compl_info *rxcp;
2691         struct be_adapter *adapter = rxo->adapter;
2692         int flush_wait = 0;
2693
2694         /* Consume pending rx completions.
2695          * Wait for the flush completion (identified by zero num_rcvd)
2696          * to arrive. Notify CQ even when there are no more CQ entries
2697          * for HW to flush partially coalesced CQ entries.
2698          * In Lancer, there is no need to wait for flush compl.
2699          */
2700         for (;;) {
2701                 rxcp = be_rx_compl_get(rxo);
2702                 if (!rxcp) {
2703                         if (lancer_chip(adapter))
2704                                 break;
2705
2706                         if (flush_wait++ > 50 ||
2707                             be_check_error(adapter,
2708                                            BE_ERROR_HW)) {
2709                                 dev_warn(&adapter->pdev->dev,
2710                                          "did not receive flush compl\n");
2711                                 break;
2712                         }
2713                         be_cq_notify(adapter, rx_cq->id, true, 0);
2714                         mdelay(1);
2715                 } else {
2716                         be_rx_compl_discard(rxo, rxcp);
2717                         be_cq_notify(adapter, rx_cq->id, false, 1);
2718                         if (rxcp->num_rcvd == 0)
2719                                 break;
2720                 }
2721         }
2722
2723         /* After cleanup, leave the CQ in unarmed state */
2724         be_cq_notify(adapter, rx_cq->id, false, 0);
2725 }
2726
2727 static void be_tx_compl_clean(struct be_adapter *adapter)
2728 {
2729         struct device *dev = &adapter->pdev->dev;
2730         u16 cmpl = 0, timeo = 0, num_wrbs = 0;
2731         struct be_tx_compl_info *txcp;
2732         struct be_queue_info *txq;
2733         u32 end_idx, notified_idx;
2734         struct be_tx_obj *txo;
2735         int i, pending_txqs;
2736
2737         /* Stop polling for compls when HW has been silent for 10ms */
2738         do {
2739                 pending_txqs = adapter->num_tx_qs;
2740
2741                 for_all_tx_queues(adapter, txo, i) {
2742                         cmpl = 0;
2743                         num_wrbs = 0;
2744                         txq = &txo->q;
2745                         while ((txcp = be_tx_compl_get(txo))) {
2746                                 num_wrbs +=
2747                                         be_tx_compl_process(adapter, txo,
2748                                                             txcp->end_index);
2749                                 cmpl++;
2750                         }
2751                         if (cmpl) {
2752                                 be_cq_notify(adapter, txo->cq.id, false, cmpl);
2753                                 atomic_sub(num_wrbs, &txq->used);
2754                                 timeo = 0;
2755                         }
2756                         if (!be_is_tx_compl_pending(txo))
2757                                 pending_txqs--;
2758                 }
2759
2760                 if (pending_txqs == 0 || ++timeo > 10 ||
2761                     be_check_error(adapter, BE_ERROR_HW))
2762                         break;
2763
2764                 mdelay(1);
2765         } while (true);
2766
2767         /* Free enqueued TX that was never notified to HW */
2768         for_all_tx_queues(adapter, txo, i) {
2769                 txq = &txo->q;
2770
2771                 if (atomic_read(&txq->used)) {
2772                         dev_info(dev, "txq%d: cleaning %d pending tx-wrbs\n",
2773                                  i, atomic_read(&txq->used));
2774                         notified_idx = txq->tail;
2775                         end_idx = txq->tail;
2776                         index_adv(&end_idx, atomic_read(&txq->used) - 1,
2777                                   txq->len);
2778                         /* Use the tx-compl process logic to handle requests
2779                          * that were not sent to the HW.
2780                          */
2781                         num_wrbs = be_tx_compl_process(adapter, txo, end_idx);
2782                         atomic_sub(num_wrbs, &txq->used);
2783                         BUG_ON(atomic_read(&txq->used));
2784                         txo->pend_wrb_cnt = 0;
2785                         /* Since hw was never notified of these requests,
2786                          * reset TXQ indices
2787                          */
2788                         txq->head = notified_idx;
2789                         txq->tail = notified_idx;
2790                 }
2791         }
2792 }
2793
2794 static void be_evt_queues_destroy(struct be_adapter *adapter)
2795 {
2796         struct be_eq_obj *eqo;
2797         int i;
2798
2799         for_all_evt_queues(adapter, eqo, i) {
2800                 if (eqo->q.created) {
2801                         be_eq_clean(eqo);
2802                         be_cmd_q_destroy(adapter, &eqo->q, QTYPE_EQ);
2803                         netif_napi_del(&eqo->napi);
2804                         free_cpumask_var(eqo->affinity_mask);
2805                 }
2806                 be_queue_free(adapter, &eqo->q);
2807         }
2808 }
2809
2810 static int be_evt_queues_create(struct be_adapter *adapter)
2811 {
2812         struct be_queue_info *eq;
2813         struct be_eq_obj *eqo;
2814         struct be_aic_obj *aic;
2815         int i, rc;
2816
2817         /* need enough EQs to service both RX and TX queues */
2818         adapter->num_evt_qs = min_t(u16, num_irqs(adapter),
2819                                     max(adapter->cfg_num_rx_irqs,
2820                                         adapter->cfg_num_tx_irqs));
2821
2822         for_all_evt_queues(adapter, eqo, i) {
2823                 int numa_node = dev_to_node(&adapter->pdev->dev);
2824
2825                 aic = &adapter->aic_obj[i];
2826                 eqo->adapter = adapter;
2827                 eqo->idx = i;
2828                 aic->max_eqd = BE_MAX_EQD;
2829                 aic->enable = true;
2830
2831                 eq = &eqo->q;
2832                 rc = be_queue_alloc(adapter, eq, EVNT_Q_LEN,
2833                                     sizeof(struct be_eq_entry));
2834                 if (rc)
2835                         return rc;
2836
2837                 rc = be_cmd_eq_create(adapter, eqo);
2838                 if (rc)
2839                         return rc;
2840
2841                 if (!zalloc_cpumask_var(&eqo->affinity_mask, GFP_KERNEL))
2842                         return -ENOMEM;
2843                 cpumask_set_cpu(cpumask_local_spread(i, numa_node),
2844                                 eqo->affinity_mask);
2845                 netif_napi_add(adapter->netdev, &eqo->napi, be_poll,
2846                                BE_NAPI_WEIGHT);
2847         }
2848         return 0;
2849 }
2850
2851 static void be_mcc_queues_destroy(struct be_adapter *adapter)
2852 {
2853         struct be_queue_info *q;
2854
2855         q = &adapter->mcc_obj.q;
2856         if (q->created)
2857                 be_cmd_q_destroy(adapter, q, QTYPE_MCCQ);
2858         be_queue_free(adapter, q);
2859
2860         q = &adapter->mcc_obj.cq;
2861         if (q->created)
2862                 be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2863         be_queue_free(adapter, q);
2864 }
2865
2866 /* Must be called only after TX qs are created as MCC shares TX EQ */
2867 static int be_mcc_queues_create(struct be_adapter *adapter)
2868 {
2869         struct be_queue_info *q, *cq;
2870
2871         cq = &adapter->mcc_obj.cq;
2872         if (be_queue_alloc(adapter, cq, MCC_CQ_LEN,
2873                            sizeof(struct be_mcc_compl)))
2874                 goto err;
2875
2876         /* Use the default EQ for MCC completions */
2877         if (be_cmd_cq_create(adapter, cq, &mcc_eqo(adapter)->q, true, 0))
2878                 goto mcc_cq_free;
2879
2880         q = &adapter->mcc_obj.q;
2881         if (be_queue_alloc(adapter, q, MCC_Q_LEN, sizeof(struct be_mcc_wrb)))
2882                 goto mcc_cq_destroy;
2883
2884         if (be_cmd_mccq_create(adapter, q, cq))
2885                 goto mcc_q_free;
2886
2887         return 0;
2888
2889 mcc_q_free:
2890         be_queue_free(adapter, q);
2891 mcc_cq_destroy:
2892         be_cmd_q_destroy(adapter, cq, QTYPE_CQ);
2893 mcc_cq_free:
2894         be_queue_free(adapter, cq);
2895 err:
2896         return -1;
2897 }
2898
2899 static void be_tx_queues_destroy(struct be_adapter *adapter)
2900 {
2901         struct be_queue_info *q;
2902         struct be_tx_obj *txo;
2903         u8 i;
2904
2905         for_all_tx_queues(adapter, txo, i) {
2906                 q = &txo->q;
2907                 if (q->created)
2908                         be_cmd_q_destroy(adapter, q, QTYPE_TXQ);
2909                 be_queue_free(adapter, q);
2910
2911                 q = &txo->cq;
2912                 if (q->created)
2913                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2914                 be_queue_free(adapter, q);
2915         }
2916 }
2917
2918 static int be_tx_qs_create(struct be_adapter *adapter)
2919 {
2920         struct be_queue_info *cq;
2921         struct be_tx_obj *txo;
2922         struct be_eq_obj *eqo;
2923         int status, i;
2924
2925         adapter->num_tx_qs = min(adapter->num_evt_qs, adapter->cfg_num_tx_irqs);
2926
2927         for_all_tx_queues(adapter, txo, i) {
2928                 cq = &txo->cq;
2929                 status = be_queue_alloc(adapter, cq, TX_CQ_LEN,
2930                                         sizeof(struct be_eth_tx_compl));
2931                 if (status)
2932                         return status;
2933
2934                 u64_stats_init(&txo->stats.sync);
2935                 u64_stats_init(&txo->stats.sync_compl);
2936
2937                 /* If num_evt_qs is less than num_tx_qs, then more than
2938                  * one txq share an eq
2939                  */
2940                 eqo = &adapter->eq_obj[i % adapter->num_evt_qs];
2941                 status = be_cmd_cq_create(adapter, cq, &eqo->q, false, 3);
2942                 if (status)
2943                         return status;
2944
2945                 status = be_queue_alloc(adapter, &txo->q, TX_Q_LEN,
2946                                         sizeof(struct be_eth_wrb));
2947                 if (status)
2948                         return status;
2949
2950                 status = be_cmd_txq_create(adapter, txo);
2951                 if (status)
2952                         return status;
2953
2954                 netif_set_xps_queue(adapter->netdev, eqo->affinity_mask,
2955                                     eqo->idx);
2956         }
2957
2958         dev_info(&adapter->pdev->dev, "created %d TX queue(s)\n",
2959                  adapter->num_tx_qs);
2960         return 0;
2961 }
2962
2963 static void be_rx_cqs_destroy(struct be_adapter *adapter)
2964 {
2965         struct be_queue_info *q;
2966         struct be_rx_obj *rxo;
2967         int i;
2968
2969         for_all_rx_queues(adapter, rxo, i) {
2970                 q = &rxo->cq;
2971                 if (q->created)
2972                         be_cmd_q_destroy(adapter, q, QTYPE_CQ);
2973                 be_queue_free(adapter, q);
2974         }
2975 }
2976
2977 static int be_rx_cqs_create(struct be_adapter *adapter)
2978 {
2979         struct be_queue_info *eq, *cq;
2980         struct be_rx_obj *rxo;
2981         int rc, i;
2982
2983         adapter->num_rss_qs =
2984                         min(adapter->num_evt_qs, adapter->cfg_num_rx_irqs);
2985
2986         /* We'll use RSS only if atleast 2 RSS rings are supported. */
2987         if (adapter->num_rss_qs < 2)
2988                 adapter->num_rss_qs = 0;
2989
2990         adapter->num_rx_qs = adapter->num_rss_qs + adapter->need_def_rxq;
2991
2992         /* When the interface is not capable of RSS rings (and there is no
2993          * need to create a default RXQ) we'll still need one RXQ
2994          */
2995         if (adapter->num_rx_qs == 0)
2996                 adapter->num_rx_qs = 1;
2997
2998         adapter->big_page_size = (1 << get_order(rx_frag_size)) * PAGE_SIZE;
2999         for_all_rx_queues(adapter, rxo, i) {
3000                 rxo->adapter = adapter;
3001                 cq = &rxo->cq;
3002                 rc = be_queue_alloc(adapter, cq, RX_CQ_LEN,
3003                                     sizeof(struct be_eth_rx_compl));
3004                 if (rc)
3005                         return rc;
3006
3007                 u64_stats_init(&rxo->stats.sync);
3008                 eq = &adapter->eq_obj[i % adapter->num_evt_qs].q;
3009                 rc = be_cmd_cq_create(adapter, cq, eq, false, 3);
3010                 if (rc)
3011                         return rc;
3012         }
3013
3014         dev_info(&adapter->pdev->dev,
3015                  "created %d RX queue(s)\n", adapter->num_rx_qs);
3016         return 0;
3017 }
3018
3019 static irqreturn_t be_intx(int irq, void *dev)
3020 {
3021         struct be_eq_obj *eqo = dev;
3022         struct be_adapter *adapter = eqo->adapter;
3023         int num_evts = 0;
3024
3025         /* IRQ is not expected when NAPI is scheduled as the EQ
3026          * will not be armed.
3027          * But, this can happen on Lancer INTx where it takes
3028          * a while to de-assert INTx or in BE2 where occasionaly
3029          * an interrupt may be raised even when EQ is unarmed.
3030          * If NAPI is already scheduled, then counting & notifying
3031          * events will orphan them.
3032          */
3033         if (napi_schedule_prep(&eqo->napi)) {
3034                 num_evts = events_get(eqo);
3035                 __napi_schedule(&eqo->napi);
3036                 if (num_evts)
3037                         eqo->spurious_intr = 0;
3038         }
3039         be_eq_notify(adapter, eqo->q.id, false, true, num_evts, 0);
3040
3041         /* Return IRQ_HANDLED only for the the first spurious intr
3042          * after a valid intr to stop the kernel from branding
3043          * this irq as a bad one!
3044          */
3045         if (num_evts || eqo->spurious_intr++ == 0)
3046                 return IRQ_HANDLED;
3047         else
3048                 return IRQ_NONE;
3049 }
3050
3051 static irqreturn_t be_msix(int irq, void *dev)
3052 {
3053         struct be_eq_obj *eqo = dev;
3054
3055         be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
3056         napi_schedule(&eqo->napi);
3057         return IRQ_HANDLED;
3058 }
3059
3060 static inline bool do_gro(struct be_rx_compl_info *rxcp)
3061 {
3062         return (rxcp->tcpf && !rxcp->err && rxcp->l4_csum) ? true : false;
3063 }
3064
3065 static int be_process_rx(struct be_rx_obj *rxo, struct napi_struct *napi,
3066                          int budget)
3067 {
3068         struct be_adapter *adapter = rxo->adapter;
3069         struct be_queue_info *rx_cq = &rxo->cq;
3070         struct be_rx_compl_info *rxcp;
3071         u32 work_done;
3072         u32 frags_consumed = 0;
3073
3074         for (work_done = 0; work_done < budget; work_done++) {
3075                 rxcp = be_rx_compl_get(rxo);
3076                 if (!rxcp)
3077                         break;
3078
3079                 /* Is it a flush compl that has no data */
3080                 if (unlikely(rxcp->num_rcvd == 0))
3081                         goto loop_continue;
3082
3083                 /* Discard compl with partial DMA Lancer B0 */
3084                 if (unlikely(!rxcp->pkt_size)) {
3085                         be_rx_compl_discard(rxo, rxcp);
3086                         goto loop_continue;
3087                 }
3088
3089                 /* On BE drop pkts that arrive due to imperfect filtering in
3090                  * promiscuous mode on some skews
3091                  */
3092                 if (unlikely(rxcp->port != adapter->port_num &&
3093                              !lancer_chip(adapter))) {
3094                         be_rx_compl_discard(rxo, rxcp);
3095                         goto loop_continue;
3096                 }
3097
3098                 if (do_gro(rxcp))
3099                         be_rx_compl_process_gro(rxo, napi, rxcp);
3100                 else
3101                         be_rx_compl_process(rxo, napi, rxcp);
3102
3103 loop_continue:
3104                 frags_consumed += rxcp->num_rcvd;
3105                 be_rx_stats_update(rxo, rxcp);
3106         }
3107
3108         if (work_done) {
3109                 be_cq_notify(adapter, rx_cq->id, true, work_done);
3110
3111                 /* When an rx-obj gets into post_starved state, just
3112                  * let be_worker do the posting.
3113                  */
3114                 if (atomic_read(&rxo->q.used) < RX_FRAGS_REFILL_WM &&
3115                     !rxo->rx_post_starved)
3116                         be_post_rx_frags(rxo, GFP_ATOMIC,
3117                                          max_t(u32, MAX_RX_POST,
3118                                                frags_consumed));
3119         }
3120
3121         return work_done;
3122 }
3123
3124 static inline void be_update_tx_err(struct be_tx_obj *txo, u8 status)
3125 {
3126         switch (status) {
3127         case BE_TX_COMP_HDR_PARSE_ERR:
3128                 tx_stats(txo)->tx_hdr_parse_err++;
3129                 break;
3130         case BE_TX_COMP_NDMA_ERR:
3131                 tx_stats(txo)->tx_dma_err++;
3132                 break;
3133         case BE_TX_COMP_ACL_ERR:
3134                 tx_stats(txo)->tx_spoof_check_err++;
3135                 break;
3136         }
3137 }
3138
3139 static inline void lancer_update_tx_err(struct be_tx_obj *txo, u8 status)
3140 {
3141         switch (status) {
3142         case LANCER_TX_COMP_LSO_ERR:
3143                 tx_stats(txo)->tx_tso_err++;
3144                 break;
3145         case LANCER_TX_COMP_HSW_DROP_MAC_ERR:
3146         case LANCER_TX_COMP_HSW_DROP_VLAN_ERR:
3147                 tx_stats(txo)->tx_spoof_check_err++;
3148                 break;
3149         case LANCER_TX_COMP_QINQ_ERR:
3150                 tx_stats(txo)->tx_qinq_err++;
3151                 break;
3152         case LANCER_TX_COMP_PARITY_ERR:
3153                 tx_stats(txo)->tx_internal_parity_err++;
3154                 break;
3155         case LANCER_TX_COMP_DMA_ERR:
3156                 tx_stats(txo)->tx_dma_err++;
3157                 break;
3158         }
3159 }
3160
3161 static void be_process_tx(struct be_adapter *adapter, struct be_tx_obj *txo,
3162                           int idx)
3163 {
3164         int num_wrbs = 0, work_done = 0;
3165         struct be_tx_compl_info *txcp;
3166
3167         while ((txcp = be_tx_compl_get(txo))) {
3168                 num_wrbs += be_tx_compl_process(adapter, txo, txcp->end_index);
3169                 work_done++;
3170
3171                 if (txcp->status) {
3172                         if (lancer_chip(adapter))
3173                                 lancer_update_tx_err(txo, txcp->status);
3174                         else
3175                                 be_update_tx_err(txo, txcp->status);
3176                 }
3177         }
3178
3179         if (work_done) {
3180                 be_cq_notify(adapter, txo->cq.id, true, work_done);
3181                 atomic_sub(num_wrbs, &txo->q.used);
3182
3183                 /* As Tx wrbs have been freed up, wake up netdev queue
3184                  * if it was stopped due to lack of tx wrbs.  */
3185                 if (__netif_subqueue_stopped(adapter->netdev, idx) &&
3186                     be_can_txq_wake(txo)) {
3187                         netif_wake_subqueue(adapter->netdev, idx);
3188                 }
3189
3190                 u64_stats_update_begin(&tx_stats(txo)->sync_compl);
3191                 tx_stats(txo)->tx_compl += work_done;
3192                 u64_stats_update_end(&tx_stats(txo)->sync_compl);
3193         }
3194 }
3195
3196 int be_poll(struct napi_struct *napi, int budget)
3197 {
3198         struct be_eq_obj *eqo = container_of(napi, struct be_eq_obj, napi);
3199         struct be_adapter *adapter = eqo->adapter;
3200         int max_work = 0, work, i, num_evts;
3201         struct be_rx_obj *rxo;
3202         struct be_tx_obj *txo;
3203         u32 mult_enc = 0;
3204
3205         num_evts = events_get(eqo);
3206
3207         for_all_tx_queues_on_eq(adapter, eqo, txo, i)
3208                 be_process_tx(adapter, txo, i);
3209
3210         /* This loop will iterate twice for EQ0 in which
3211          * completions of the last RXQ (default one) are also processed
3212          * For other EQs the loop iterates only once
3213          */
3214         for_all_rx_queues_on_eq(adapter, eqo, rxo, i) {
3215                 work = be_process_rx(rxo, napi, budget);
3216                 max_work = max(work, max_work);
3217         }
3218
3219         if (is_mcc_eqo(eqo))
3220                 be_process_mcc(adapter);
3221
3222         if (max_work < budget) {
3223                 napi_complete_done(napi, max_work);
3224
3225                 /* Skyhawk EQ_DB has a provision to set the rearm to interrupt
3226                  * delay via a delay multiplier encoding value
3227                  */
3228                 if (skyhawk_chip(adapter))
3229                         mult_enc = be_get_eq_delay_mult_enc(eqo);
3230
3231                 be_eq_notify(adapter, eqo->q.id, true, false, num_evts,
3232                              mult_enc);
3233         } else {
3234                 /* As we'll continue in polling mode, count and clear events */
3235                 be_eq_notify(adapter, eqo->q.id, false, false, num_evts, 0);
3236         }
3237         return max_work;
3238 }
3239
3240 void be_detect_error(struct be_adapter *adapter)
3241 {
3242         u32 ue_lo = 0, ue_hi = 0, ue_lo_mask = 0, ue_hi_mask = 0;
3243         u32 sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
3244         struct device *dev = &adapter->pdev->dev;
3245         u16 val;
3246         u32 i;
3247
3248         if (be_check_error(adapter, BE_ERROR_HW))
3249                 return;
3250
3251         if (lancer_chip(adapter)) {
3252                 sliport_status = ioread32(adapter->db + SLIPORT_STATUS_OFFSET);
3253                 if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
3254                         be_set_error(adapter, BE_ERROR_UE);
3255                         sliport_err1 = ioread32(adapter->db +
3256                                                 SLIPORT_ERROR1_OFFSET);
3257                         sliport_err2 = ioread32(adapter->db +
3258                                                 SLIPORT_ERROR2_OFFSET);
3259                         /* Do not log error messages if its a FW reset */
3260                         if (sliport_err1 == SLIPORT_ERROR_FW_RESET1 &&
3261                             sliport_err2 == SLIPORT_ERROR_FW_RESET2) {
3262                                 dev_info(dev, "Firmware update in progress\n");
3263                         } else {
3264                                 dev_err(dev, "Error detected in the card\n");
3265                                 dev_err(dev, "ERR: sliport status 0x%x\n",
3266                                         sliport_status);
3267                                 dev_err(dev, "ERR: sliport error1 0x%x\n",
3268                                         sliport_err1);
3269                                 dev_err(dev, "ERR: sliport error2 0x%x\n",
3270                                         sliport_err2);
3271                         }
3272                 }
3273         } else {
3274                 ue_lo = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_LOW);
3275                 ue_hi = ioread32(adapter->pcicfg + PCICFG_UE_STATUS_HIGH);
3276                 ue_lo_mask = ioread32(adapter->pcicfg +
3277                                       PCICFG_UE_STATUS_LOW_MASK);
3278                 ue_hi_mask = ioread32(adapter->pcicfg +
3279                                       PCICFG_UE_STATUS_HI_MASK);
3280
3281                 ue_lo = (ue_lo & ~ue_lo_mask);
3282                 ue_hi = (ue_hi & ~ue_hi_mask);
3283
3284                 if (ue_lo || ue_hi) {
3285                         /* On certain platforms BE3 hardware can indicate
3286                          * spurious UEs. In case of a UE in the chip,
3287                          * the POST register correctly reports either a
3288                          * FAT_LOG_START state (FW is currently dumping
3289                          * FAT log data) or a ARMFW_UE state. Check for the
3290                          * above states to ascertain if the UE is valid or not.
3291                          */
3292                         if (BE3_chip(adapter)) {
3293                                 val = be_POST_stage_get(adapter);
3294                                 if ((val & POST_STAGE_FAT_LOG_START)
3295                                      != POST_STAGE_FAT_LOG_START &&
3296                                     (val & POST_STAGE_ARMFW_UE)
3297                                      != POST_STAGE_ARMFW_UE)
3298                                         return;
3299                         }
3300
3301                         dev_err(dev, "Error detected in the adapter");
3302                         be_set_error(adapter, BE_ERROR_UE);
3303
3304                         for (i = 0; ue_lo; ue_lo >>= 1, i++) {
3305                                 if (ue_lo & 1)
3306                                         dev_err(dev, "UE: %s bit set\n",
3307                                                 ue_status_low_desc[i]);
3308                         }
3309                         for (i = 0; ue_hi; ue_hi >>= 1, i++) {
3310                                 if (ue_hi & 1)
3311                                         dev_err(dev, "UE: %s bit set\n",
3312                                                 ue_status_hi_desc[i]);
3313                         }
3314                 }
3315         }
3316 }
3317
3318 static void be_msix_disable(struct be_adapter *adapter)
3319 {
3320         if (msix_enabled(adapter)) {
3321                 pci_disable_msix(adapter->pdev);
3322                 adapter->num_msix_vec = 0;
3323                 adapter->num_msix_roce_vec = 0;
3324         }
3325 }
3326
3327 static int be_msix_enable(struct be_adapter *adapter)
3328 {
3329         unsigned int i, max_roce_eqs;
3330         struct device *dev = &adapter->pdev->dev;
3331         int num_vec;
3332
3333         /* If RoCE is supported, program the max number of vectors that
3334          * could be used for NIC and RoCE, else, just program the number
3335          * we'll use initially.
3336          */
3337         if (be_roce_supported(adapter)) {
3338                 max_roce_eqs =
3339                         be_max_func_eqs(adapter) - be_max_nic_eqs(adapter);
3340                 max_roce_eqs = min(max_roce_eqs, num_online_cpus());
3341                 num_vec = be_max_any_irqs(adapter) + max_roce_eqs;
3342         } else {
3343                 num_vec = max(adapter->cfg_num_rx_irqs,
3344                               adapter->cfg_num_tx_irqs);
3345         }
3346
3347         for (i = 0; i < num_vec; i++)
3348                 adapter->msix_entries[i].entry = i;
3349
3350         num_vec = pci_enable_msix_range(adapter->pdev, adapter->msix_entries,
3351                                         MIN_MSIX_VECTORS, num_vec);
3352         if (num_vec < 0)
3353                 goto fail;
3354
3355         if (be_roce_supported(adapter) && num_vec > MIN_MSIX_VECTORS) {
3356                 adapter->num_msix_roce_vec = num_vec / 2;
3357                 dev_info(dev, "enabled %d MSI-x vector(s) for RoCE\n",
3358                          adapter->num_msix_roce_vec);
3359         }
3360
3361         adapter->num_msix_vec = num_vec - adapter->num_msix_roce_vec;
3362
3363         dev_info(dev, "enabled %d MSI-x vector(s) for NIC\n",
3364                  adapter->num_msix_vec);
3365         return 0;
3366
3367 fail:
3368         dev_warn(dev, "MSIx enable failed\n");
3369
3370         /* INTx is not supported in VFs, so fail probe if enable_msix fails */
3371         if (be_virtfn(adapter))
3372                 return num_vec;
3373         return 0;
3374 }
3375
3376 static inline int be_msix_vec_get(struct be_adapter *adapter,
3377                                   struct be_eq_obj *eqo)
3378 {
3379         return adapter->msix_entries[eqo->msix_idx].vector;
3380 }
3381
3382 static int be_msix_register(struct be_adapter *adapter)
3383 {
3384         struct net_device *netdev = adapter->netdev;
3385         struct be_eq_obj *eqo;
3386         int status, i, vec;
3387
3388         for_all_evt_queues(adapter, eqo, i) {
3389                 sprintf(eqo->desc, "%s-q%d", netdev->name, i);
3390                 vec = be_msix_vec_get(adapter, eqo);
3391                 status = request_irq(vec, be_msix, 0, eqo->desc, eqo);
3392                 if (status)
3393                         goto err_msix;
3394
3395                 irq_set_affinity_hint(vec, eqo->affinity_mask);
3396         }
3397
3398         return 0;
3399 err_msix:
3400         for (i--; i >= 0; i--) {
3401                 eqo = &adapter->eq_obj[i];
3402                 free_irq(be_msix_vec_get(adapter, eqo), eqo);
3403         }
3404         dev_warn(&adapter->pdev->dev, "MSIX Request IRQ failed - err %d\n",
3405                  status);
3406         be_msix_disable(adapter);
3407         return status;
3408 }
3409
3410 static int be_irq_register(struct be_adapter *adapter)
3411 {
3412         struct net_device *netdev = adapter->netdev;
3413         int status;
3414
3415         if (msix_enabled(adapter)) {
3416                 status = be_msix_register(adapter);
3417                 if (status == 0)
3418                         goto done;
3419                 /* INTx is not supported for VF */
3420                 if (be_virtfn(adapter))
3421                         return status;
3422         }
3423
3424         /* INTx: only the first EQ is used */
3425         netdev->irq = adapter->pdev->irq;
3426         status = request_irq(netdev->irq, be_intx, IRQF_SHARED, netdev->name,
3427                              &adapter->eq_obj[0]);
3428         if (status) {
3429                 dev_err(&adapter->pdev->dev,
3430                         "INTx request IRQ failed - err %d\n", status);
3431                 return status;
3432         }
3433 done:
3434         adapter->isr_registered = true;
3435         return 0;
3436 }
3437
3438 static void be_irq_unregister(struct be_adapter *adapter)
3439 {
3440         struct net_device *netdev = adapter->netdev;
3441         struct be_eq_obj *eqo;
3442         int i, vec;
3443
3444         if (!adapter->isr_registered)
3445                 return;
3446
3447         /* INTx */
3448         if (!msix_enabled(adapter)) {
3449                 free_irq(netdev->irq, &adapter->eq_obj[0]);
3450                 goto done;
3451         }
3452
3453         /* MSIx */
3454         for_all_evt_queues(adapter, eqo, i) {
3455                 vec = be_msix_vec_get(adapter, eqo);
3456                 irq_set_affinity_hint(vec, NULL);
3457                 free_irq(vec, eqo);
3458         }
3459
3460 done:
3461         adapter->isr_registered = false;
3462 }
3463
3464 static void be_rx_qs_destroy(struct be_adapter *adapter)
3465 {
3466         struct rss_info *rss = &adapter->rss_info;
3467         struct be_queue_info *q;
3468         struct be_rx_obj *rxo;
3469         int i;
3470
3471         for_all_rx_queues(adapter, rxo, i) {
3472                 q = &rxo->q;
3473                 if (q->created) {
3474                         /* If RXQs are destroyed while in an "out of buffer"
3475                          * state, there is a possibility of an HW stall on
3476                          * Lancer. So, post 64 buffers to each queue to relieve
3477                          * the "out of buffer" condition.
3478                          * Make sure there's space in the RXQ before posting.
3479                          */
3480                         if (lancer_chip(adapter)) {
3481                                 be_rx_cq_clean(rxo);
3482                                 if (atomic_read(&q->used) == 0)
3483                                         be_post_rx_frags(rxo, GFP_KERNEL,
3484                                                          MAX_RX_POST);
3485                         }
3486
3487                         be_cmd_rxq_destroy(adapter, q);
3488                         be_rx_cq_clean(rxo);
3489                         be_rxq_clean(rxo);
3490                 }
3491                 be_queue_free(adapter, q);
3492         }
3493
3494         if (rss->rss_flags) {
3495                 rss->rss_flags = RSS_ENABLE_NONE;
3496                 be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3497                                   128, rss->rss_hkey);
3498         }
3499 }
3500
3501 static void be_disable_if_filters(struct be_adapter *adapter)
3502 {
3503         /* Don't delete MAC on BE3 VFs without FILTMGMT privilege  */
3504         if (!BEx_chip(adapter) || !be_virtfn(adapter) ||
3505             check_privilege(adapter, BE_PRIV_FILTMGMT)) {
3506                 be_dev_mac_del(adapter, adapter->pmac_id[0]);
3507                 eth_zero_addr(adapter->dev_mac);
3508         }
3509
3510         be_clear_uc_list(adapter);
3511         be_clear_mc_list(adapter);
3512
3513         /* The IFACE flags are enabled in the open path and cleared
3514          * in the close path. When a VF gets detached from the host and
3515          * assigned to a VM the following happens:
3516          *      - VF's IFACE flags get cleared in the detach path
3517          *      - IFACE create is issued by the VF in the attach path
3518          * Due to a bug in the BE3/Skyhawk-R FW
3519          * (Lancer FW doesn't have the bug), the IFACE capability flags
3520          * specified along with the IFACE create cmd issued by a VF are not
3521          * honoured by FW.  As a consequence, if a *new* driver
3522          * (that enables/disables IFACE flags in open/close)
3523          * is loaded in the host and an *old* driver is * used by a VM/VF,
3524          * the IFACE gets created *without* the needed flags.
3525          * To avoid this, disable RX-filter flags only for Lancer.
3526          */
3527         if (lancer_chip(adapter)) {
3528                 be_cmd_rx_filter(adapter, BE_IF_ALL_FILT_FLAGS, OFF);
3529                 adapter->if_flags &= ~BE_IF_ALL_FILT_FLAGS;
3530         }
3531 }
3532
3533 static int be_close(struct net_device *netdev)
3534 {
3535         struct be_adapter *adapter = netdev_priv(netdev);
3536         struct be_eq_obj *eqo;
3537         int i;
3538
3539         /* This protection is needed as be_close() may be called even when the
3540          * adapter is in cleared state (after eeh perm failure)
3541          */
3542         if (!(adapter->flags & BE_FLAGS_SETUP_DONE))
3543                 return 0;
3544
3545         /* Before attempting cleanup ensure all the pending cmds in the
3546          * config_wq have finished execution
3547          */
3548         flush_workqueue(be_wq);
3549
3550         be_disable_if_filters(adapter);
3551
3552         if (adapter->flags & BE_FLAGS_NAPI_ENABLED) {
3553                 for_all_evt_queues(adapter, eqo, i) {
3554                         napi_disable(&eqo->napi);
3555                 }
3556                 adapter->flags &= ~BE_FLAGS_NAPI_ENABLED;
3557         }
3558
3559         be_async_mcc_disable(adapter);
3560
3561         /* Wait for all pending tx completions to arrive so that
3562          * all tx skbs are freed.
3563          */
3564         netif_tx_disable(netdev);
3565         be_tx_compl_clean(adapter);
3566
3567         be_rx_qs_destroy(adapter);
3568
3569         for_all_evt_queues(adapter, eqo, i) {
3570                 if (msix_enabled(adapter))
3571                         synchronize_irq(be_msix_vec_get(adapter, eqo));
3572                 else
3573                         synchronize_irq(netdev->irq);
3574                 be_eq_clean(eqo);
3575         }
3576
3577         be_irq_unregister(adapter);
3578
3579         return 0;
3580 }
3581
3582 static int be_rx_qs_create(struct be_adapter *adapter)
3583 {
3584         struct rss_info *rss = &adapter->rss_info;
3585         u8 rss_key[RSS_HASH_KEY_LEN];
3586         struct be_rx_obj *rxo;
3587         int rc, i, j;
3588
3589         for_all_rx_queues(adapter, rxo, i) {
3590                 rc = be_queue_alloc(adapter, &rxo->q, RX_Q_LEN,
3591                                     sizeof(struct be_eth_rx_d));
3592                 if (rc)
3593                         return rc;
3594         }
3595
3596         if (adapter->need_def_rxq || !adapter->num_rss_qs) {
3597                 rxo = default_rxo(adapter);
3598                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3599                                        rx_frag_size, adapter->if_handle,
3600                                        false, &rxo->rss_id);
3601                 if (rc)
3602                         return rc;
3603         }
3604
3605         for_all_rss_queues(adapter, rxo, i) {
3606                 rc = be_cmd_rxq_create(adapter, &rxo->q, rxo->cq.id,
3607                                        rx_frag_size, adapter->if_handle,
3608                                        true, &rxo->rss_id);
3609                 if (rc)
3610                         return rc;
3611         }
3612
3613         if (be_multi_rxq(adapter)) {
3614                 for (j = 0; j < RSS_INDIR_TABLE_LEN; j += adapter->num_rss_qs) {
3615                         for_all_rss_queues(adapter, rxo, i) {
3616                                 if ((j + i) >= RSS_INDIR_TABLE_LEN)
3617                                         break;
3618                                 rss->rsstable[j + i] = rxo->rss_id;
3619                                 rss->rss_queue[j + i] = i;
3620                         }
3621                 }
3622                 rss->rss_flags = RSS_ENABLE_TCP_IPV4 | RSS_ENABLE_IPV4 |
3623                         RSS_ENABLE_TCP_IPV6 | RSS_ENABLE_IPV6;
3624
3625                 if (!BEx_chip(adapter))
3626                         rss->rss_flags |= RSS_ENABLE_UDP_IPV4 |
3627                                 RSS_ENABLE_UDP_IPV6;
3628
3629                 netdev_rss_key_fill(rss_key, RSS_HASH_KEY_LEN);
3630                 rc = be_cmd_rss_config(adapter, rss->rsstable, rss->rss_flags,
3631                                        RSS_INDIR_TABLE_LEN, rss_key);
3632                 if (rc) {
3633                         rss->rss_flags = RSS_ENABLE_NONE;
3634                         return rc;
3635                 }
3636
3637                 memcpy(rss->rss_hkey, rss_key, RSS_HASH_KEY_LEN);
3638         } else {
3639                 /* Disable RSS, if only default RX Q is created */
3640                 rss->rss_flags = RSS_ENABLE_NONE;
3641         }
3642
3643
3644         /* Post 1 less than RXQ-len to avoid head being equal to tail,
3645          * which is a queue empty condition
3646          */
3647         for_all_rx_queues(adapter, rxo, i)
3648                 be_post_rx_frags(rxo, GFP_KERNEL, RX_Q_LEN - 1);
3649
3650         return 0;
3651 }
3652
3653 static int be_enable_if_filters(struct be_adapter *adapter)
3654 {
3655         int status;
3656
3657         status = be_cmd_rx_filter(adapter, BE_IF_FILT_FLAGS_BASIC, ON);
3658         if (status)
3659                 return status;
3660
3661         /* Normally this condition usually true as the ->dev_mac is zeroed.
3662          * But on BE3 VFs the initial MAC is pre-programmed by PF and
3663          * subsequent be_dev_mac_add() can fail (after fresh boot)
3664          */
3665         if (!ether_addr_equal(adapter->dev_mac, adapter->netdev->dev_addr)) {
3666                 int old_pmac_id = -1;
3667
3668                 /* Remember old programmed MAC if any - can happen on BE3 VF */
3669                 if (!is_zero_ether_addr(adapter->dev_mac))
3670                         old_pmac_id = adapter->pmac_id[0];
3671
3672                 status = be_dev_mac_add(adapter, adapter->netdev->dev_addr);
3673                 if (status)
3674                         return status;
3675
3676                 /* Delete the old programmed MAC as we successfully programmed
3677                  * a new MAC
3678                  */
3679                 if (old_pmac_id >= 0 && old_pmac_id != adapter->pmac_id[0])
3680                         be_dev_mac_del(adapter, old_pmac_id);
3681
3682                 ether_addr_copy(adapter->dev_mac, adapter->netdev->dev_addr);
3683         }
3684
3685         if (adapter->vlans_added)
3686                 be_vid_config(adapter);
3687
3688         __be_set_rx_mode(adapter);
3689
3690         return 0;
3691 }
3692
3693 static int be_open(struct net_device *netdev)
3694 {
3695         struct be_adapter *adapter = netdev_priv(netdev);
3696         struct be_eq_obj *eqo;
3697         struct be_rx_obj *rxo;
3698         struct be_tx_obj *txo;
3699         u8 link_status;
3700         int status, i;
3701
3702         status = be_rx_qs_create(adapter);
3703         if (status)
3704                 goto err;
3705
3706         status = be_enable_if_filters(adapter);
3707         if (status)
3708                 goto err;
3709
3710         status = be_irq_register(adapter);
3711         if (status)
3712                 goto err;
3713
3714         for_all_rx_queues(adapter, rxo, i)
3715                 be_cq_notify(adapter, rxo->cq.id, true, 0);
3716
3717         for_all_tx_queues(adapter, txo, i)
3718                 be_cq_notify(adapter, txo->cq.id, true, 0);
3719
3720         be_async_mcc_enable(adapter);
3721
3722         for_all_evt_queues(adapter, eqo, i) {
3723                 napi_enable(&eqo->napi);
3724                 be_eq_notify(adapter, eqo->q.id, true, true, 0, 0);
3725         }
3726         adapter->flags |= BE_FLAGS_NAPI_ENABLED;
3727
3728         status = be_cmd_link_status_query(adapter, NULL, &link_status, 0);
3729         if (!status)
3730                 be_link_status_update(adapter, link_status);
3731
3732         netif_tx_start_all_queues(netdev);
3733         if (skyhawk_chip(adapter))
3734                 udp_tunnel_get_rx_info(netdev);
3735
3736         return 0;
3737 err:
3738         be_close(adapter->netdev);
3739         return -EIO;
3740 }
3741
3742 static void be_vf_eth_addr_generate(struct be_adapter *adapter, u8 *mac)
3743 {
3744         u32 addr;
3745
3746         addr = jhash(adapter->netdev->dev_addr, ETH_ALEN, 0);
3747
3748         mac[5] = (u8)(addr & 0xFF);
3749         mac[4] = (u8)((addr >> 8) & 0xFF);
3750         mac[3] = (u8)((addr >> 16) & 0xFF);
3751         /* Use the OUI from the current MAC address */
3752         memcpy(mac, adapter->netdev->dev_addr, 3);
3753 }
3754
3755 /*
3756  * Generate a seed MAC address from the PF MAC Address using jhash.
3757  * MAC Address for VFs are assigned incrementally starting from the seed.
3758  * These addresses are programmed in the ASIC by the PF and the VF driver
3759  * queries for the MAC address during its probe.
3760  */
3761 static int be_vf_eth_addr_config(struct be_adapter *adapter)
3762 {
3763         u32 vf;
3764         int status = 0;
3765         u8 mac[ETH_ALEN];
3766         struct be_vf_cfg *vf_cfg;
3767
3768         be_vf_eth_addr_generate(adapter, mac);
3769
3770         for_all_vfs(adapter, vf_cfg, vf) {
3771                 if (BEx_chip(adapter))
3772                         status = be_cmd_pmac_add(adapter, mac,
3773                                                  vf_cfg->if_handle,
3774                                                  &vf_cfg->pmac_id, vf + 1);
3775                 else
3776                         status = be_cmd_set_mac(adapter, mac, vf_cfg->if_handle,
3777                                                 vf + 1);
3778
3779                 if (status)
3780                         dev_err(&adapter->pdev->dev,
3781                                 "Mac address assignment failed for VF %d\n",
3782                                 vf);
3783                 else
3784                         memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3785
3786                 mac[5] += 1;
3787         }
3788         return status;
3789 }
3790
3791 static int be_vfs_mac_query(struct be_adapter *adapter)
3792 {
3793         int status, vf;
3794         u8 mac[ETH_ALEN];
3795         struct be_vf_cfg *vf_cfg;
3796
3797         for_all_vfs(adapter, vf_cfg, vf) {
3798                 status = be_cmd_get_active_mac(adapter, vf_cfg->pmac_id,
3799                                                mac, vf_cfg->if_handle,
3800                                                false, vf+1);
3801                 if (status)
3802                         return status;
3803                 memcpy(vf_cfg->mac_addr, mac, ETH_ALEN);
3804         }
3805         return 0;
3806 }
3807
3808 static void be_vf_clear(struct be_adapter *adapter)
3809 {
3810         struct be_vf_cfg *vf_cfg;
3811         u32 vf;
3812
3813         if (pci_vfs_assigned(adapter->pdev)) {
3814                 dev_warn(&adapter->pdev->dev,
3815                          "VFs are assigned to VMs: not disabling VFs\n");
3816                 goto done;
3817         }
3818
3819         pci_disable_sriov(adapter->pdev);
3820
3821         for_all_vfs(adapter, vf_cfg, vf) {
3822                 if (BEx_chip(adapter))
3823                         be_cmd_pmac_del(adapter, vf_cfg->if_handle,
3824                                         vf_cfg->pmac_id, vf + 1);
3825                 else
3826                         be_cmd_set_mac(adapter, NULL, vf_cfg->if_handle,
3827                                        vf + 1);
3828
3829                 be_cmd_if_destroy(adapter, vf_cfg->if_handle, vf + 1);
3830         }
3831
3832         if (BE3_chip(adapter))
3833                 be_cmd_set_hsw_config(adapter, 0, 0,
3834                                       adapter->if_handle,
3835                                       PORT_FWD_TYPE_PASSTHRU, 0);
3836 done:
3837         kfree(adapter->vf_cfg);
3838         adapter->num_vfs = 0;
3839         adapter->flags &= ~BE_FLAGS_SRIOV_ENABLED;
3840 }
3841
3842 static void be_clear_queues(struct be_adapter *adapter)
3843 {
3844         be_mcc_queues_destroy(adapter);
3845         be_rx_cqs_destroy(adapter);
3846         be_tx_queues_destroy(adapter);
3847         be_evt_queues_destroy(adapter);
3848 }
3849
3850 static void be_cancel_worker(struct be_adapter *adapter)
3851 {
3852         if (adapter->flags & BE_FLAGS_WORKER_SCHEDULED) {
3853                 cancel_delayed_work_sync(&adapter->work);
3854                 adapter->flags &= ~BE_FLAGS_WORKER_SCHEDULED;
3855         }
3856 }
3857
3858 static void be_cancel_err_detection(struct be_adapter *adapter)
3859 {
3860         struct be_error_recovery *err_rec = &adapter->error_recovery;
3861
3862         if (!be_err_recovery_workq)
3863                 return;
3864
3865         if (adapter->flags & BE_FLAGS_ERR_DETECTION_SCHEDULED) {
3866                 cancel_delayed_work_sync(&err_rec->err_detection_work);
3867                 adapter->flags &= ~BE_FLAGS_ERR_DETECTION_SCHEDULED;
3868         }
3869 }
3870
3871 static int be_enable_vxlan_offloads(struct be_adapter *adapter)
3872 {
3873         struct net_device *netdev = adapter->netdev;
3874         struct device *dev = &adapter->pdev->dev;
3875         struct be_vxlan_port *vxlan_port;
3876         __be16 port;
3877         int status;
3878
3879         vxlan_port = list_first_entry(&adapter->vxlan_port_list,
3880                                       struct be_vxlan_port, list);
3881         port = vxlan_port->port;
3882
3883         status = be_cmd_manage_iface(adapter, adapter->if_handle,
3884                                      OP_CONVERT_NORMAL_TO_TUNNEL);
3885         if (status) {
3886                 dev_warn(dev, "Failed to convert normal interface to tunnel\n");
3887                 return status;
3888         }
3889         adapter->flags |= BE_FLAGS_VXLAN_OFFLOADS;
3890
3891         status = be_cmd_set_vxlan_port(adapter, port);
3892         if (status) {
3893                 dev_warn(dev, "Failed to add VxLAN port\n");
3894                 return status;
3895         }
3896         adapter->vxlan_port = port;
3897
3898         netdev->hw_enc_features |= NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
3899                                    NETIF_F_TSO | NETIF_F_TSO6 |
3900                                    NETIF_F_GSO_UDP_TUNNEL;
3901         netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL;
3902         netdev->features |= NETIF_F_GSO_UDP_TUNNEL;
3903
3904         dev_info(dev, "Enabled VxLAN offloads for UDP port %d\n",
3905                  be16_to_cpu(port));
3906         return 0;
3907 }
3908
3909 static void be_disable_vxlan_offloads(struct be_adapter *adapter)
3910 {
3911         struct net_device *netdev = adapter->netdev;
3912
3913         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS)
3914                 be_cmd_manage_iface(adapter, adapter->if_handle,
3915                                     OP_CONVERT_TUNNEL_TO_NORMAL);
3916
3917         if (adapter->vxlan_port)
3918                 be_cmd_set_vxlan_port(adapter, 0);
3919
3920         adapter->flags &= ~BE_FLAGS_VXLAN_OFFLOADS;
3921         adapter->vxlan_port = 0;
3922
3923         netdev->hw_enc_features = 0;
3924         netdev->hw_features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3925         netdev->features &= ~(NETIF_F_GSO_UDP_TUNNEL);
3926 }
3927
3928 static void be_calculate_vf_res(struct be_adapter *adapter, u16 num_vfs,
3929                                 struct be_resources *vft_res)
3930 {
3931         struct be_resources res = adapter->pool_res;
3932         u32 vf_if_cap_flags = res.vf_if_cap_flags;
3933         struct be_resources res_mod = {0};
3934         u16 num_vf_qs = 1;
3935
3936         /* Distribute the queue resources among the PF and it's VFs */
3937         if (num_vfs) {
3938                 /* Divide the rx queues evenly among the VFs and the PF, capped
3939                  * at VF-EQ-count. Any remainder queues belong to the PF.
3940                  */
3941                 num_vf_qs = min(SH_VF_MAX_NIC_EQS,
3942                                 res.max_rss_qs / (num_vfs + 1));
3943
3944                 /* Skyhawk-R chip supports only MAX_PORT_RSS_TABLES
3945                  * RSS Tables per port. Provide RSS on VFs, only if number of
3946                  * VFs requested is less than it's PF Pool's RSS Tables limit.
3947                  */
3948                 if (num_vfs >= be_max_pf_pool_rss_tables(adapter))
3949                         num_vf_qs = 1;
3950         }
3951
3952         /* Resource with fields set to all '1's by GET_PROFILE_CONFIG cmd,
3953          * which are modifiable using SET_PROFILE_CONFIG cmd.
3954          */
3955         be_cmd_get_profile_config(adapter, &res_mod, NULL, ACTIVE_PROFILE_TYPE,
3956                                   RESOURCE_MODIFIABLE, 0);
3957
3958         /* If RSS IFACE capability flags are modifiable for a VF, set the
3959          * capability flag as valid and set RSS and DEFQ_RSS IFACE flags if
3960          * more than 1 RSSQ is available for a VF.
3961          * Otherwise, provision only 1 queue pair for VF.
3962          */
3963         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_RSS) {
3964                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
3965                 if (num_vf_qs > 1) {
3966                         vf_if_cap_flags |= BE_IF_FLAGS_RSS;
3967                         if (res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS)
3968                                 vf_if_cap_flags |= BE_IF_FLAGS_DEFQ_RSS;
3969                 } else {
3970                         vf_if_cap_flags &= ~(BE_IF_FLAGS_RSS |
3971                                              BE_IF_FLAGS_DEFQ_RSS);
3972                 }
3973         } else {
3974                 num_vf_qs = 1;
3975         }
3976
3977         if (res_mod.vf_if_cap_flags & BE_IF_FLAGS_VLAN_PROMISCUOUS) {
3978                 vft_res->flags |= BIT(IF_CAPS_FLAGS_VALID_SHIFT);
3979                 vf_if_cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
3980         }
3981
3982         vft_res->vf_if_cap_flags = vf_if_cap_flags;
3983         vft_res->max_rx_qs = num_vf_qs;
3984         vft_res->max_rss_qs = num_vf_qs;
3985         vft_res->max_tx_qs = res.max_tx_qs / (num_vfs + 1);
3986         vft_res->max_cq_count = res.max_cq_count / (num_vfs + 1);
3987
3988         /* Distribute unicast MACs, VLANs, IFACE count and MCCQ count equally
3989          * among the PF and it's VFs, if the fields are changeable
3990          */
3991         if (res_mod.max_uc_mac == FIELD_MODIFIABLE)
3992                 vft_res->max_uc_mac = res.max_uc_mac / (num_vfs + 1);
3993
3994         if (res_mod.max_vlans == FIELD_MODIFIABLE)
3995                 vft_res->max_vlans = res.max_vlans / (num_vfs + 1);
3996
3997         if (res_mod.max_iface_count == FIELD_MODIFIABLE)
3998                 vft_res->max_iface_count = res.max_iface_count / (num_vfs + 1);
3999
4000         if (res_mod.max_mcc_count == FIELD_MODIFIABLE)
4001                 vft_res->max_mcc_count = res.max_mcc_count / (num_vfs + 1);
4002 }
4003
4004 static void be_if_destroy(struct be_adapter *adapter)
4005 {
4006         be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4007
4008         kfree(adapter->pmac_id);
4009         adapter->pmac_id = NULL;
4010
4011         kfree(adapter->mc_list);
4012         adapter->mc_list = NULL;
4013
4014         kfree(adapter->uc_list);
4015         adapter->uc_list = NULL;
4016 }
4017
4018 static int be_clear(struct be_adapter *adapter)
4019 {
4020         struct pci_dev *pdev = adapter->pdev;
4021         struct  be_resources vft_res = {0};
4022
4023         be_cancel_worker(adapter);
4024
4025         flush_workqueue(be_wq);
4026
4027         if (sriov_enabled(adapter))
4028                 be_vf_clear(adapter);
4029
4030         /* Re-configure FW to distribute resources evenly across max-supported
4031          * number of VFs, only when VFs are not already enabled.
4032          */
4033         if (skyhawk_chip(adapter) && be_physfn(adapter) &&
4034             !pci_vfs_assigned(pdev)) {
4035                 be_calculate_vf_res(adapter,
4036                                     pci_sriov_get_totalvfs(pdev),
4037                                     &vft_res);
4038                 be_cmd_set_sriov_config(adapter, adapter->pool_res,
4039                                         pci_sriov_get_totalvfs(pdev),
4040                                         &vft_res);
4041         }
4042
4043         be_disable_vxlan_offloads(adapter);
4044
4045         be_if_destroy(adapter);
4046
4047         be_clear_queues(adapter);
4048
4049         be_msix_disable(adapter);
4050         adapter->flags &= ~BE_FLAGS_SETUP_DONE;
4051         return 0;
4052 }
4053
4054 static int be_vfs_if_create(struct be_adapter *adapter)
4055 {
4056         struct be_resources res = {0};
4057         u32 cap_flags, en_flags, vf;
4058         struct be_vf_cfg *vf_cfg;
4059         int status;
4060
4061         /* If a FW profile exists, then cap_flags are updated */
4062         cap_flags = BE_VF_IF_EN_FLAGS;
4063
4064         for_all_vfs(adapter, vf_cfg, vf) {
4065                 if (!BE3_chip(adapter)) {
4066                         status = be_cmd_get_profile_config(adapter, &res, NULL,
4067                                                            ACTIVE_PROFILE_TYPE,
4068                                                            RESOURCE_LIMITS,
4069                                                            vf + 1);
4070                         if (!status) {
4071                                 cap_flags = res.if_cap_flags;
4072                                 /* Prevent VFs from enabling VLAN promiscuous
4073                                  * mode
4074                                  */
4075                                 cap_flags &= ~BE_IF_FLAGS_VLAN_PROMISCUOUS;
4076                         }
4077                 }
4078
4079                 /* PF should enable IF flags during proxy if_create call */
4080                 en_flags = cap_flags & BE_VF_IF_EN_FLAGS;
4081                 status = be_cmd_if_create(adapter, cap_flags, en_flags,
4082                                           &vf_cfg->if_handle, vf + 1);
4083                 if (status)
4084                         return status;
4085         }
4086
4087         return 0;
4088 }
4089
4090 static int be_vf_setup_init(struct be_adapter *adapter)
4091 {
4092         struct be_vf_cfg *vf_cfg;
4093         int vf;
4094
4095         adapter->vf_cfg = kcalloc(adapter->num_vfs, sizeof(*vf_cfg),
4096                                   GFP_KERNEL);
4097         if (!adapter->vf_cfg)
4098                 return -ENOMEM;
4099
4100         for_all_vfs(adapter, vf_cfg, vf) {
4101                 vf_cfg->if_handle = -1;
4102                 vf_cfg->pmac_id = -1;
4103         }
4104         return 0;
4105 }
4106
4107 static int be_vf_setup(struct be_adapter *adapter)
4108 {
4109         struct device *dev = &adapter->pdev->dev;
4110         struct be_vf_cfg *vf_cfg;
4111         int status, old_vfs, vf;
4112         bool spoofchk;
4113
4114         old_vfs = pci_num_vf(adapter->pdev);
4115
4116         status = be_vf_setup_init(adapter);
4117         if (status)
4118                 goto err;
4119
4120         if (old_vfs) {
4121                 for_all_vfs(adapter, vf_cfg, vf) {
4122                         status = be_cmd_get_if_id(adapter, vf_cfg, vf);
4123                         if (status)
4124                                 goto err;
4125                 }
4126
4127                 status = be_vfs_mac_query(adapter);
4128                 if (status)
4129                         goto err;
4130         } else {
4131                 status = be_vfs_if_create(adapter);
4132                 if (status)
4133                         goto err;
4134
4135                 status = be_vf_eth_addr_config(adapter);
4136                 if (status)
4137                         goto err;
4138         }
4139
4140         for_all_vfs(adapter, vf_cfg, vf) {
4141                 /* Allow VFs to programs MAC/VLAN filters */
4142                 status = be_cmd_get_fn_privileges(adapter, &vf_cfg->privileges,
4143                                                   vf + 1);
4144                 if (!status && !(vf_cfg->privileges & BE_PRIV_FILTMGMT)) {
4145                         status = be_cmd_set_fn_privileges(adapter,
4146                                                           vf_cfg->privileges |
4147                                                           BE_PRIV_FILTMGMT,
4148                                                           vf + 1);
4149                         if (!status) {
4150                                 vf_cfg->privileges |= BE_PRIV_FILTMGMT;
4151                                 dev_info(dev, "VF%d has FILTMGMT privilege\n",
4152                                          vf);
4153                         }
4154                 }
4155
4156                 /* Allow full available bandwidth */
4157                 if (!old_vfs)
4158                         be_cmd_config_qos(adapter, 0, 0, vf + 1);
4159
4160                 status = be_cmd_get_hsw_config(adapter, NULL, vf + 1,
4161                                                vf_cfg->if_handle, NULL,
4162                                                &spoofchk);
4163                 if (!status)
4164                         vf_cfg->spoofchk = spoofchk;
4165
4166                 if (!old_vfs) {
4167                         be_cmd_enable_vf(adapter, vf + 1);
4168                         be_cmd_set_logical_link_config(adapter,
4169                                                        IFLA_VF_LINK_STATE_AUTO,
4170                                                        vf+1);
4171                 }
4172         }
4173
4174         if (!old_vfs) {
4175                 status = pci_enable_sriov(adapter->pdev, adapter->num_vfs);
4176                 if (status) {
4177                         dev_err(dev, "SRIOV enable failed\n");
4178                         adapter->num_vfs = 0;
4179                         goto err;
4180                 }
4181         }
4182
4183         if (BE3_chip(adapter)) {
4184                 /* On BE3, enable VEB only when SRIOV is enabled */
4185                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4186                                                adapter->if_handle,
4187                                                PORT_FWD_TYPE_VEB, 0);
4188                 if (status)
4189                         goto err;
4190         }
4191
4192         adapter->flags |= BE_FLAGS_SRIOV_ENABLED;
4193         return 0;
4194 err:
4195         dev_err(dev, "VF setup failed\n");
4196         be_vf_clear(adapter);
4197         return status;
4198 }
4199
4200 /* Converting function_mode bits on BE3 to SH mc_type enums */
4201
4202 static u8 be_convert_mc_type(u32 function_mode)
4203 {
4204         if (function_mode & VNIC_MODE && function_mode & QNQ_MODE)
4205                 return vNIC1;
4206         else if (function_mode & QNQ_MODE)
4207                 return FLEX10;
4208         else if (function_mode & VNIC_MODE)
4209                 return vNIC2;
4210         else if (function_mode & UMC_ENABLED)
4211                 return UMC;
4212         else
4213                 return MC_NONE;
4214 }
4215
4216 /* On BE2/BE3 FW does not suggest the supported limits */
4217 static void BEx_get_resources(struct be_adapter *adapter,
4218                               struct be_resources *res)
4219 {
4220         bool use_sriov = adapter->num_vfs ? 1 : 0;
4221
4222         if (be_physfn(adapter))
4223                 res->max_uc_mac = BE_UC_PMAC_COUNT;
4224         else
4225                 res->max_uc_mac = BE_VF_UC_PMAC_COUNT;
4226
4227         adapter->mc_type = be_convert_mc_type(adapter->function_mode);
4228
4229         if (be_is_mc(adapter)) {
4230                 /* Assuming that there are 4 channels per port,
4231                  * when multi-channel is enabled
4232                  */
4233                 if (be_is_qnq_mode(adapter))
4234                         res->max_vlans = BE_NUM_VLANS_SUPPORTED/8;
4235                 else
4236                         /* In a non-qnq multichannel mode, the pvid
4237                          * takes up one vlan entry
4238                          */
4239                         res->max_vlans = (BE_NUM_VLANS_SUPPORTED / 4) - 1;
4240         } else {
4241                 res->max_vlans = BE_NUM_VLANS_SUPPORTED;
4242         }
4243
4244         res->max_mcast_mac = BE_MAX_MC;
4245
4246         /* 1) For BE3 1Gb ports, FW does not support multiple TXQs
4247          * 2) Create multiple TX rings on a BE3-R multi-channel interface
4248          *    *only* if it is RSS-capable.
4249          */
4250         if (BE2_chip(adapter) || use_sriov ||  (adapter->port_num > 1) ||
4251             be_virtfn(adapter) ||
4252             (be_is_mc(adapter) &&
4253              !(adapter->function_caps & BE_FUNCTION_CAPS_RSS))) {
4254                 res->max_tx_qs = 1;
4255         } else if (adapter->function_caps & BE_FUNCTION_CAPS_SUPER_NIC) {
4256                 struct be_resources super_nic_res = {0};
4257
4258                 /* On a SuperNIC profile, the driver needs to use the
4259                  * GET_PROFILE_CONFIG cmd to query the per-function TXQ limits
4260                  */
4261                 be_cmd_get_profile_config(adapter, &super_nic_res, NULL,
4262                                           ACTIVE_PROFILE_TYPE, RESOURCE_LIMITS,
4263                                           0);
4264                 /* Some old versions of BE3 FW don't report max_tx_qs value */
4265                 res->max_tx_qs = super_nic_res.max_tx_qs ? : BE3_MAX_TX_QS;
4266         } else {
4267                 res->max_tx_qs = BE3_MAX_TX_QS;
4268         }
4269
4270         if ((adapter->function_caps & BE_FUNCTION_CAPS_RSS) &&
4271             !use_sriov && be_physfn(adapter))
4272                 res->max_rss_qs = (adapter->be3_native) ?
4273                                            BE3_MAX_RSS_QS : BE2_MAX_RSS_QS;
4274         res->max_rx_qs = res->max_rss_qs + 1;
4275
4276         if (be_physfn(adapter))
4277                 res->max_evt_qs = (be_max_vfs(adapter) > 0) ?
4278                                         BE3_SRIOV_MAX_EVT_QS : BE3_MAX_EVT_QS;
4279         else
4280                 res->max_evt_qs = 1;
4281
4282         res->if_cap_flags = BE_IF_CAP_FLAGS_WANT;
4283         res->if_cap_flags &= ~BE_IF_FLAGS_DEFQ_RSS;
4284         if (!(adapter->function_caps & BE_FUNCTION_CAPS_RSS))
4285                 res->if_cap_flags &= ~BE_IF_FLAGS_RSS;
4286 }
4287
4288 static void be_setup_init(struct be_adapter *adapter)
4289 {
4290         adapter->vlan_prio_bmap = 0xff;
4291         adapter->phy.link_speed = -1;
4292         adapter->if_handle = -1;
4293         adapter->be3_native = false;
4294         adapter->if_flags = 0;
4295         adapter->phy_state = BE_UNKNOWN_PHY_STATE;
4296         if (be_physfn(adapter))
4297                 adapter->cmd_privileges = MAX_PRIVILEGES;
4298         else
4299                 adapter->cmd_privileges = MIN_PRIVILEGES;
4300 }
4301
4302 /* HW supports only MAX_PORT_RSS_TABLES RSS Policy Tables per port.
4303  * However, this HW limitation is not exposed to the host via any SLI cmd.
4304  * As a result, in the case of SRIOV and in particular multi-partition configs
4305  * the driver needs to calcuate a proportional share of RSS Tables per PF-pool
4306  * for distribution between the VFs. This self-imposed limit will determine the
4307  * no: of VFs for which RSS can be enabled.
4308  */
4309 static void be_calculate_pf_pool_rss_tables(struct be_adapter *adapter)
4310 {
4311         struct be_port_resources port_res = {0};
4312         u8 rss_tables_on_port;
4313         u16 max_vfs = be_max_vfs(adapter);
4314
4315         be_cmd_get_profile_config(adapter, NULL, &port_res, SAVED_PROFILE_TYPE,
4316                                   RESOURCE_LIMITS, 0);
4317
4318         rss_tables_on_port = MAX_PORT_RSS_TABLES - port_res.nic_pfs;
4319
4320         /* Each PF Pool's RSS Tables limit =
4321          * PF's Max VFs / Total_Max_VFs on Port * RSS Tables on Port
4322          */
4323         adapter->pool_res.max_rss_tables =
4324                 max_vfs * rss_tables_on_port / port_res.max_vfs;
4325 }
4326
4327 static int be_get_sriov_config(struct be_adapter *adapter)
4328 {
4329         struct be_resources res = {0};
4330         int max_vfs, old_vfs;
4331
4332         be_cmd_get_profile_config(adapter, &res, NULL, ACTIVE_PROFILE_TYPE,
4333                                   RESOURCE_LIMITS, 0);
4334
4335         /* Some old versions of BE3 FW don't report max_vfs value */
4336         if (BE3_chip(adapter) && !res.max_vfs) {
4337                 max_vfs = pci_sriov_get_totalvfs(adapter->pdev);
4338                 res.max_vfs = max_vfs > 0 ? min(MAX_VFS, max_vfs) : 0;
4339         }
4340
4341         adapter->pool_res = res;
4342
4343         /* If during previous unload of the driver, the VFs were not disabled,
4344          * then we cannot rely on the PF POOL limits for the TotalVFs value.
4345          * Instead use the TotalVFs value stored in the pci-dev struct.
4346          */
4347         old_vfs = pci_num_vf(adapter->pdev);
4348         if (old_vfs) {
4349                 dev_info(&adapter->pdev->dev, "%d VFs are already enabled\n",
4350                          old_vfs);
4351
4352                 adapter->pool_res.max_vfs =
4353                         pci_sriov_get_totalvfs(adapter->pdev);
4354                 adapter->num_vfs = old_vfs;
4355         }
4356
4357         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4358                 be_calculate_pf_pool_rss_tables(adapter);
4359                 dev_info(&adapter->pdev->dev,
4360                          "RSS can be enabled for all VFs if num_vfs <= %d\n",
4361                          be_max_pf_pool_rss_tables(adapter));
4362         }
4363         return 0;
4364 }
4365
4366 static void be_alloc_sriov_res(struct be_adapter *adapter)
4367 {
4368         int old_vfs = pci_num_vf(adapter->pdev);
4369         struct  be_resources vft_res = {0};
4370         int status;
4371
4372         be_get_sriov_config(adapter);
4373
4374         if (!old_vfs)
4375                 pci_sriov_set_totalvfs(adapter->pdev, be_max_vfs(adapter));
4376
4377         /* When the HW is in SRIOV capable configuration, the PF-pool
4378          * resources are given to PF during driver load, if there are no
4379          * old VFs. This facility is not available in BE3 FW.
4380          * Also, this is done by FW in Lancer chip.
4381          */
4382         if (skyhawk_chip(adapter) && be_max_vfs(adapter) && !old_vfs) {
4383                 be_calculate_vf_res(adapter, 0, &vft_res);
4384                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res, 0,
4385                                                  &vft_res);
4386                 if (status)
4387                         dev_err(&adapter->pdev->dev,
4388                                 "Failed to optimize SRIOV resources\n");
4389         }
4390 }
4391
4392 static int be_get_resources(struct be_adapter *adapter)
4393 {
4394         struct device *dev = &adapter->pdev->dev;
4395         struct be_resources res = {0};
4396         int status;
4397
4398         /* For Lancer, SH etc read per-function resource limits from FW.
4399          * GET_FUNC_CONFIG returns per function guaranteed limits.
4400          * GET_PROFILE_CONFIG returns PCI-E related limits PF-pool limits
4401          */
4402         if (BEx_chip(adapter)) {
4403                 BEx_get_resources(adapter, &res);
4404         } else {
4405                 status = be_cmd_get_func_config(adapter, &res);
4406                 if (status)
4407                         return status;
4408
4409                 /* If a deafault RXQ must be created, we'll use up one RSSQ*/
4410                 if (res.max_rss_qs && res.max_rss_qs == res.max_rx_qs &&
4411                     !(res.if_cap_flags & BE_IF_FLAGS_DEFQ_RSS))
4412                         res.max_rss_qs -= 1;
4413         }
4414
4415         /* If RoCE is supported stash away half the EQs for RoCE */
4416         res.max_nic_evt_qs = be_roce_supported(adapter) ?
4417                                 res.max_evt_qs / 2 : res.max_evt_qs;
4418         adapter->res = res;
4419
4420         /* If FW supports RSS default queue, then skip creating non-RSS
4421          * queue for non-IP traffic.
4422          */
4423         adapter->need_def_rxq = (be_if_cap_flags(adapter) &
4424                                  BE_IF_FLAGS_DEFQ_RSS) ? 0 : 1;
4425
4426         dev_info(dev, "Max: txqs %d, rxqs %d, rss %d, eqs %d, vfs %d\n",
4427                  be_max_txqs(adapter), be_max_rxqs(adapter),
4428                  be_max_rss(adapter), be_max_nic_eqs(adapter),
4429                  be_max_vfs(adapter));
4430         dev_info(dev, "Max: uc-macs %d, mc-macs %d, vlans %d\n",
4431                  be_max_uc(adapter), be_max_mc(adapter),
4432                  be_max_vlans(adapter));
4433
4434         /* Ensure RX and TX queues are created in pairs at init time */
4435         adapter->cfg_num_rx_irqs =
4436                                 min_t(u16, netif_get_num_default_rss_queues(),
4437                                       be_max_qp_irqs(adapter));
4438         adapter->cfg_num_tx_irqs = adapter->cfg_num_rx_irqs;
4439         return 0;
4440 }
4441
4442 static int be_get_config(struct be_adapter *adapter)
4443 {
4444         int status, level;
4445         u16 profile_id;
4446
4447         status = be_cmd_get_cntl_attributes(adapter);
4448         if (status)
4449                 return status;
4450
4451         status = be_cmd_query_fw_cfg(adapter);
4452         if (status)
4453                 return status;
4454
4455         if (!lancer_chip(adapter) && be_physfn(adapter))
4456                 be_cmd_get_fat_dump_len(adapter, &adapter->fat_dump_len);
4457
4458         if (BEx_chip(adapter)) {
4459                 level = be_cmd_get_fw_log_level(adapter);
4460                 adapter->msg_enable =
4461                         level <= FW_LOG_LEVEL_DEFAULT ? NETIF_MSG_HW : 0;
4462         }
4463
4464         be_cmd_get_acpi_wol_cap(adapter);
4465         pci_enable_wake(adapter->pdev, PCI_D3hot, adapter->wol_en);
4466         pci_enable_wake(adapter->pdev, PCI_D3cold, adapter->wol_en);
4467
4468         be_cmd_query_port_name(adapter);
4469
4470         if (be_physfn(adapter)) {
4471                 status = be_cmd_get_active_profile(adapter, &profile_id);
4472                 if (!status)
4473                         dev_info(&adapter->pdev->dev,
4474                                  "Using profile 0x%x\n", profile_id);
4475         }
4476
4477         return 0;
4478 }
4479
4480 static int be_mac_setup(struct be_adapter *adapter)
4481 {
4482         u8 mac[ETH_ALEN];
4483         int status;
4484
4485         if (is_zero_ether_addr(adapter->netdev->dev_addr)) {
4486                 status = be_cmd_get_perm_mac(adapter, mac);
4487                 if (status)
4488                         return status;
4489
4490                 memcpy(adapter->netdev->dev_addr, mac, ETH_ALEN);
4491                 memcpy(adapter->netdev->perm_addr, mac, ETH_ALEN);
4492
4493                 /* Initial MAC for BE3 VFs is already programmed by PF */
4494                 if (BEx_chip(adapter) && be_virtfn(adapter))
4495                         memcpy(adapter->dev_mac, mac, ETH_ALEN);
4496         }
4497
4498         return 0;
4499 }
4500
4501 static void be_schedule_worker(struct be_adapter *adapter)
4502 {
4503         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
4504         adapter->flags |= BE_FLAGS_WORKER_SCHEDULED;
4505 }
4506
4507 static void be_destroy_err_recovery_workq(void)
4508 {
4509         if (!be_err_recovery_workq)
4510                 return;
4511
4512         flush_workqueue(be_err_recovery_workq);
4513         destroy_workqueue(be_err_recovery_workq);
4514         be_err_recovery_workq = NULL;
4515 }
4516
4517 static void be_schedule_err_detection(struct be_adapter *adapter, u32 delay)
4518 {
4519         struct be_error_recovery *err_rec = &adapter->error_recovery;
4520
4521         if (!be_err_recovery_workq)
4522                 return;
4523
4524         queue_delayed_work(be_err_recovery_workq, &err_rec->err_detection_work,
4525                            msecs_to_jiffies(delay));
4526         adapter->flags |= BE_FLAGS_ERR_DETECTION_SCHEDULED;
4527 }
4528
4529 static int be_setup_queues(struct be_adapter *adapter)
4530 {
4531         struct net_device *netdev = adapter->netdev;
4532         int status;
4533
4534         status = be_evt_queues_create(adapter);
4535         if (status)
4536                 goto err;
4537
4538         status = be_tx_qs_create(adapter);
4539         if (status)
4540                 goto err;
4541
4542         status = be_rx_cqs_create(adapter);
4543         if (status)
4544                 goto err;
4545
4546         status = be_mcc_queues_create(adapter);
4547         if (status)
4548                 goto err;
4549
4550         status = netif_set_real_num_rx_queues(netdev, adapter->num_rx_qs);
4551         if (status)
4552                 goto err;
4553
4554         status = netif_set_real_num_tx_queues(netdev, adapter->num_tx_qs);
4555         if (status)
4556                 goto err;
4557
4558         return 0;
4559 err:
4560         dev_err(&adapter->pdev->dev, "queue_setup failed\n");
4561         return status;
4562 }
4563
4564 static int be_if_create(struct be_adapter *adapter)
4565 {
4566         u32 en_flags = BE_IF_FLAGS_RSS | BE_IF_FLAGS_DEFQ_RSS;
4567         u32 cap_flags = be_if_cap_flags(adapter);
4568         int status;
4569
4570         /* alloc required memory for other filtering fields */
4571         adapter->pmac_id = kcalloc(be_max_uc(adapter),
4572                                    sizeof(*adapter->pmac_id), GFP_KERNEL);
4573         if (!adapter->pmac_id)
4574                 return -ENOMEM;
4575
4576         adapter->mc_list = kcalloc(be_max_mc(adapter),
4577                                    sizeof(*adapter->mc_list), GFP_KERNEL);
4578         if (!adapter->mc_list)
4579                 return -ENOMEM;
4580
4581         adapter->uc_list = kcalloc(be_max_uc(adapter),
4582                                    sizeof(*adapter->uc_list), GFP_KERNEL);
4583         if (!adapter->uc_list)
4584                 return -ENOMEM;
4585
4586         if (adapter->cfg_num_rx_irqs == 1)
4587                 cap_flags &= ~(BE_IF_FLAGS_DEFQ_RSS | BE_IF_FLAGS_RSS);
4588
4589         en_flags &= cap_flags;
4590         /* will enable all the needed filter flags in be_open() */
4591         status = be_cmd_if_create(adapter, be_if_cap_flags(adapter), en_flags,
4592                                   &adapter->if_handle, 0);
4593
4594         if (status)
4595                 return status;
4596
4597         return 0;
4598 }
4599
4600 int be_update_queues(struct be_adapter *adapter)
4601 {
4602         struct net_device *netdev = adapter->netdev;
4603         int status;
4604
4605         if (netif_running(netdev))
4606                 be_close(netdev);
4607
4608         be_cancel_worker(adapter);
4609
4610         /* If any vectors have been shared with RoCE we cannot re-program
4611          * the MSIx table.
4612          */
4613         if (!adapter->num_msix_roce_vec)
4614                 be_msix_disable(adapter);
4615
4616         be_clear_queues(adapter);
4617         status = be_cmd_if_destroy(adapter, adapter->if_handle,  0);
4618         if (status)
4619                 return status;
4620
4621         if (!msix_enabled(adapter)) {
4622                 status = be_msix_enable(adapter);
4623                 if (status)
4624                         return status;
4625         }
4626
4627         status = be_if_create(adapter);
4628         if (status)
4629                 return status;
4630
4631         status = be_setup_queues(adapter);
4632         if (status)
4633                 return status;
4634
4635         be_schedule_worker(adapter);
4636
4637         /*
4638          * The IF was destroyed and re-created. We need to clear
4639          * all promiscuous flags valid for the destroyed IF.
4640          * Without this promisc mode is not restored during
4641          * be_open() because the driver thinks that it is
4642          * already enabled in HW.
4643          */
4644         adapter->if_flags &= ~BE_IF_FLAGS_ALL_PROMISCUOUS;
4645
4646         if (netif_running(netdev))
4647                 status = be_open(netdev);
4648
4649         return status;
4650 }
4651
4652 static inline int fw_major_num(const char *fw_ver)
4653 {
4654         int fw_major = 0, i;
4655
4656         i = sscanf(fw_ver, "%d.", &fw_major);
4657         if (i != 1)
4658                 return 0;
4659
4660         return fw_major;
4661 }
4662
4663 /* If it is error recovery, FLR the PF
4664  * Else if any VFs are already enabled don't FLR the PF
4665  */
4666 static bool be_reset_required(struct be_adapter *adapter)
4667 {
4668         if (be_error_recovering(adapter))
4669                 return true;
4670         else
4671                 return pci_num_vf(adapter->pdev) == 0;
4672 }
4673
4674 /* Wait for the FW to be ready and perform the required initialization */
4675 static int be_func_init(struct be_adapter *adapter)
4676 {
4677         int status;
4678
4679         status = be_fw_wait_ready(adapter);
4680         if (status)
4681                 return status;
4682
4683         /* FW is now ready; clear errors to allow cmds/doorbell */
4684         be_clear_error(adapter, BE_CLEAR_ALL);
4685
4686         if (be_reset_required(adapter)) {
4687                 status = be_cmd_reset_function(adapter);
4688                 if (status)
4689                         return status;
4690
4691                 /* Wait for interrupts to quiesce after an FLR */
4692                 msleep(100);
4693         }
4694
4695         /* Tell FW we're ready to fire cmds */
4696         status = be_cmd_fw_init(adapter);
4697         if (status)
4698                 return status;
4699
4700         /* Allow interrupts for other ULPs running on NIC function */
4701         be_intr_set(adapter, true);
4702
4703         return 0;
4704 }
4705
4706 static int be_setup(struct be_adapter *adapter)
4707 {
4708         struct device *dev = &adapter->pdev->dev;
4709         int status;
4710
4711         status = be_func_init(adapter);
4712         if (status)
4713                 return status;
4714
4715         be_setup_init(adapter);
4716
4717         if (!lancer_chip(adapter))
4718                 be_cmd_req_native_mode(adapter);
4719
4720         /* invoke this cmd first to get pf_num and vf_num which are needed
4721          * for issuing profile related cmds
4722          */
4723         if (!BEx_chip(adapter)) {
4724                 status = be_cmd_get_func_config(adapter, NULL);
4725                 if (status)
4726                         return status;
4727         }
4728
4729         status = be_get_config(adapter);
4730         if (status)
4731                 goto err;
4732
4733         if (!BE2_chip(adapter) && be_physfn(adapter))
4734                 be_alloc_sriov_res(adapter);
4735
4736         status = be_get_resources(adapter);
4737         if (status)
4738                 goto err;
4739
4740         status = be_msix_enable(adapter);
4741         if (status)
4742                 goto err;
4743
4744         /* will enable all the needed filter flags in be_open() */
4745         status = be_if_create(adapter);
4746         if (status)
4747                 goto err;
4748
4749         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
4750         rtnl_lock();
4751         status = be_setup_queues(adapter);
4752         rtnl_unlock();
4753         if (status)
4754                 goto err;
4755
4756         be_cmd_get_fn_privileges(adapter, &adapter->cmd_privileges, 0);
4757
4758         status = be_mac_setup(adapter);
4759         if (status)
4760                 goto err;
4761
4762         be_cmd_get_fw_ver(adapter);
4763         dev_info(dev, "FW version is %s\n", adapter->fw_ver);
4764
4765         if (BE2_chip(adapter) && fw_major_num(adapter->fw_ver) < 4) {
4766                 dev_err(dev, "Firmware on card is old(%s), IRQs may not work",
4767                         adapter->fw_ver);
4768                 dev_err(dev, "Please upgrade firmware to version >= 4.0\n");
4769         }
4770
4771         status = be_cmd_set_flow_control(adapter, adapter->tx_fc,
4772                                          adapter->rx_fc);
4773         if (status)
4774                 be_cmd_get_flow_control(adapter, &adapter->tx_fc,
4775                                         &adapter->rx_fc);
4776
4777         dev_info(&adapter->pdev->dev, "HW Flow control - TX:%d RX:%d\n",
4778                  adapter->tx_fc, adapter->rx_fc);
4779
4780         if (be_physfn(adapter))
4781                 be_cmd_set_logical_link_config(adapter,
4782                                                IFLA_VF_LINK_STATE_AUTO, 0);
4783
4784         /* BE3 EVB echoes broadcast/multicast packets back to PF's vport
4785          * confusing a linux bridge or OVS that it might be connected to.
4786          * Set the EVB to PASSTHRU mode which effectively disables the EVB
4787          * when SRIOV is not enabled.
4788          */
4789         if (BE3_chip(adapter))
4790                 be_cmd_set_hsw_config(adapter, 0, 0, adapter->if_handle,
4791                                       PORT_FWD_TYPE_PASSTHRU, 0);
4792
4793         if (adapter->num_vfs)
4794                 be_vf_setup(adapter);
4795
4796         status = be_cmd_get_phy_info(adapter);
4797         if (!status && be_pause_supported(adapter))
4798                 adapter->phy.fc_autoneg = 1;
4799
4800         if (be_physfn(adapter) && !lancer_chip(adapter))
4801                 be_cmd_set_features(adapter);
4802
4803         be_schedule_worker(adapter);
4804         adapter->flags |= BE_FLAGS_SETUP_DONE;
4805         return 0;
4806 err:
4807         be_clear(adapter);
4808         return status;
4809 }
4810
4811 #ifdef CONFIG_NET_POLL_CONTROLLER
4812 static void be_netpoll(struct net_device *netdev)
4813 {
4814         struct be_adapter *adapter = netdev_priv(netdev);
4815         struct be_eq_obj *eqo;
4816         int i;
4817
4818         for_all_evt_queues(adapter, eqo, i) {
4819                 be_eq_notify(eqo->adapter, eqo->q.id, false, true, 0, 0);
4820                 napi_schedule(&eqo->napi);
4821         }
4822 }
4823 #endif
4824
4825 int be_load_fw(struct be_adapter *adapter, u8 *fw_file)
4826 {
4827         const struct firmware *fw;
4828         int status;
4829
4830         if (!netif_running(adapter->netdev)) {
4831                 dev_err(&adapter->pdev->dev,
4832                         "Firmware load not allowed (interface is down)\n");
4833                 return -ENETDOWN;
4834         }
4835
4836         status = request_firmware(&fw, fw_file, &adapter->pdev->dev);
4837         if (status)
4838                 goto fw_exit;
4839
4840         dev_info(&adapter->pdev->dev, "Flashing firmware file %s\n", fw_file);
4841
4842         if (lancer_chip(adapter))
4843                 status = lancer_fw_download(adapter, fw);
4844         else
4845                 status = be_fw_download(adapter, fw);
4846
4847         if (!status)
4848                 be_cmd_get_fw_ver(adapter);
4849
4850 fw_exit:
4851         release_firmware(fw);
4852         return status;
4853 }
4854
4855 static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
4856                                  u16 flags)
4857 {
4858         struct be_adapter *adapter = netdev_priv(dev);
4859         struct nlattr *attr, *br_spec;
4860         int rem;
4861         int status = 0;
4862         u16 mode = 0;
4863
4864         if (!sriov_enabled(adapter))
4865                 return -EOPNOTSUPP;
4866
4867         br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC);
4868         if (!br_spec)
4869                 return -EINVAL;
4870
4871         nla_for_each_nested(attr, br_spec, rem) {
4872                 if (nla_type(attr) != IFLA_BRIDGE_MODE)
4873                         continue;
4874
4875                 if (nla_len(attr) < sizeof(mode))
4876                         return -EINVAL;
4877
4878                 mode = nla_get_u16(attr);
4879                 if (BE3_chip(adapter) && mode == BRIDGE_MODE_VEPA)
4880                         return -EOPNOTSUPP;
4881
4882                 if (mode != BRIDGE_MODE_VEPA && mode != BRIDGE_MODE_VEB)
4883                         return -EINVAL;
4884
4885                 status = be_cmd_set_hsw_config(adapter, 0, 0,
4886                                                adapter->if_handle,
4887                                                mode == BRIDGE_MODE_VEPA ?
4888                                                PORT_FWD_TYPE_VEPA :
4889                                                PORT_FWD_TYPE_VEB, 0);
4890                 if (status)
4891                         goto err;
4892
4893                 dev_info(&adapter->pdev->dev, "enabled switch mode: %s\n",
4894                          mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4895
4896                 return status;
4897         }
4898 err:
4899         dev_err(&adapter->pdev->dev, "Failed to set switch mode %s\n",
4900                 mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB");
4901
4902         return status;
4903 }
4904
4905 static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
4906                                  struct net_device *dev, u32 filter_mask,
4907                                  int nlflags)
4908 {
4909         struct be_adapter *adapter = netdev_priv(dev);
4910         int status = 0;
4911         u8 hsw_mode;
4912
4913         /* BE and Lancer chips support VEB mode only */
4914         if (BEx_chip(adapter) || lancer_chip(adapter)) {
4915                 /* VEB is disabled in non-SR-IOV profiles on BE3/Lancer */
4916                 if (!pci_sriov_get_totalvfs(adapter->pdev))
4917                         return 0;
4918                 hsw_mode = PORT_FWD_TYPE_VEB;
4919         } else {
4920                 status = be_cmd_get_hsw_config(adapter, NULL, 0,
4921                                                adapter->if_handle, &hsw_mode,
4922                                                NULL);
4923                 if (status)
4924                         return 0;
4925
4926                 if (hsw_mode == PORT_FWD_TYPE_PASSTHRU)
4927                         return 0;
4928         }
4929
4930         return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
4931                                        hsw_mode == PORT_FWD_TYPE_VEPA ?
4932                                        BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
4933                                        0, 0, nlflags, filter_mask, NULL);
4934 }
4935
4936 static struct be_cmd_work *be_alloc_work(struct be_adapter *adapter,
4937                                          void (*func)(struct work_struct *))
4938 {
4939         struct be_cmd_work *work;
4940
4941         work = kzalloc(sizeof(*work), GFP_ATOMIC);
4942         if (!work) {
4943                 dev_err(&adapter->pdev->dev,
4944                         "be_work memory allocation failed\n");
4945                 return NULL;
4946         }
4947
4948         INIT_WORK(&work->work, func);
4949         work->adapter = adapter;
4950         return work;
4951 }
4952
4953 /* VxLAN offload Notes:
4954  *
4955  * The stack defines tunnel offload flags (hw_enc_features) for IP and doesn't
4956  * distinguish various types of transports (VxLAN, GRE, NVGRE ..). So, offload
4957  * is expected to work across all types of IP tunnels once exported. Skyhawk
4958  * supports offloads for either VxLAN or NVGRE, exclusively. So we export VxLAN
4959  * offloads in hw_enc_features only when a VxLAN port is added. If other (non
4960  * VxLAN) tunnels are configured while VxLAN offloads are enabled, offloads for
4961  * those other tunnels are unexported on the fly through ndo_features_check().
4962  *
4963  * Skyhawk supports VxLAN offloads only for one UDP dport. So, if the stack
4964  * adds more than one port, disable offloads and re-enable them again when
4965  * there's only one port left. We maintain a list of ports for this purpose.
4966  */
4967 static void be_work_add_vxlan_port(struct work_struct *work)
4968 {
4969         struct be_cmd_work *cmd_work =
4970                                 container_of(work, struct be_cmd_work, work);
4971         struct be_adapter *adapter = cmd_work->adapter;
4972         struct device *dev = &adapter->pdev->dev;
4973         __be16 port = cmd_work->info.vxlan_port;
4974         struct be_vxlan_port *vxlan_port;
4975         int status;
4976
4977         /* Bump up the alias count if it is an existing port */
4978         list_for_each_entry(vxlan_port, &adapter->vxlan_port_list, list) {
4979                 if (vxlan_port->port == port) {
4980                         vxlan_port->port_aliases++;
4981                         goto done;
4982                 }
4983         }
4984
4985         /* Add a new port to our list. We don't need a lock here since port
4986          * add/delete are done only in the context of a single-threaded work
4987          * queue (be_wq).
4988          */
4989         vxlan_port = kzalloc(sizeof(*vxlan_port), GFP_KERNEL);
4990         if (!vxlan_port)
4991                 goto done;
4992
4993         vxlan_port->port = port;
4994         INIT_LIST_HEAD(&vxlan_port->list);
4995         list_add_tail(&vxlan_port->list, &adapter->vxlan_port_list);
4996         adapter->vxlan_port_count++;
4997
4998         if (adapter->flags & BE_FLAGS_VXLAN_OFFLOADS) {
4999                 dev_info(dev,
5000                          "Only one UDP port supported for VxLAN offloads\n");
5001                 dev_info(dev, "Disabling VxLAN offloads\n");
5002                 goto err;
5003         }
5004
5005         if (adapter->vxlan_port_count > 1)
5006                 goto done;
5007
5008         status = be_enable_vxlan_offloads(adapter);
5009         if (!status)
5010                 goto done;
5011
5012 err:
5013         be_disable_vxlan_offloads(adapter);
5014 done:
5015         kfree(cmd_work);
5016         return;
5017 }
5018
5019 static void be_work_del_vxlan_port(struct work_struct *work)
5020 {
5021         struct be_cmd_work *cmd_work =
5022                                 container_of(work, struct be_cmd_work, work);
5023         struct be_adapter *adapter = cmd_work->adapter;
5024         __be16 port = cmd_work->info.vxlan_port;
5025         struct be_vxlan_port *vxlan_port;
5026
5027         /* Nothing to be done if a port alias is being deleted */
5028         list_for_each_entry(vxlan_port, &adapter->vxlan_port_list, list) {
5029                 if (vxlan_port->port == port) {
5030                         if (vxlan_port->port_aliases) {
5031                                 vxlan_port->port_aliases--;
5032                                 goto done;
5033                         }
5034                         break;
5035                 }
5036         }
5037
5038         /* No port aliases left; delete the port from the list */
5039         list_del(&vxlan_port->list);
5040         adapter->vxlan_port_count--;
5041
5042         /* Disable VxLAN offload if this is the offloaded port */
5043         if (adapter->vxlan_port == vxlan_port->port) {
5044                 WARN_ON(adapter->vxlan_port_count);
5045                 be_disable_vxlan_offloads(adapter);
5046                 dev_info(&adapter->pdev->dev,
5047                          "Disabled VxLAN offloads for UDP port %d\n",
5048                          be16_to_cpu(port));
5049                 goto out;
5050         }
5051
5052         /* If only 1 port is left, re-enable VxLAN offload */
5053         if (adapter->vxlan_port_count == 1)
5054                 be_enable_vxlan_offloads(adapter);
5055
5056 out:
5057         kfree(vxlan_port);
5058 done:
5059         kfree(cmd_work);
5060 }
5061
5062 static void be_cfg_vxlan_port(struct net_device *netdev,
5063                               struct udp_tunnel_info *ti,
5064                               void (*func)(struct work_struct *))
5065 {
5066         struct be_adapter *adapter = netdev_priv(netdev);
5067         struct be_cmd_work *cmd_work;
5068
5069         if (ti->type != UDP_TUNNEL_TYPE_VXLAN)
5070                 return;
5071
5072         if (lancer_chip(adapter) || BEx_chip(adapter) || be_is_mc(adapter))
5073                 return;
5074
5075         cmd_work = be_alloc_work(adapter, func);
5076         if (cmd_work) {
5077                 cmd_work->info.vxlan_port = ti->port;
5078                 queue_work(be_wq, &cmd_work->work);
5079         }
5080 }
5081
5082 static void be_del_vxlan_port(struct net_device *netdev,
5083                               struct udp_tunnel_info *ti)
5084 {
5085         be_cfg_vxlan_port(netdev, ti, be_work_del_vxlan_port);
5086 }
5087
5088 static void be_add_vxlan_port(struct net_device *netdev,
5089                               struct udp_tunnel_info *ti)
5090 {
5091         be_cfg_vxlan_port(netdev, ti, be_work_add_vxlan_port);
5092 }
5093
5094 static netdev_features_t be_features_check(struct sk_buff *skb,
5095                                            struct net_device *dev,
5096                                            netdev_features_t features)
5097 {
5098         struct be_adapter *adapter = netdev_priv(dev);
5099         u8 l4_hdr = 0;
5100
5101         if (skb_is_gso(skb)) {
5102                 /* IPv6 TSO requests with extension hdrs are a problem
5103                  * to Lancer and BE3 HW. Disable TSO6 feature.
5104                  */
5105                 if (!skyhawk_chip(adapter) && is_ipv6_ext_hdr(skb))
5106                         features &= ~NETIF_F_TSO6;
5107
5108                 /* Lancer cannot handle the packet with MSS less than 256.
5109                  * Disable the GSO support in such cases
5110                  */
5111                 if (lancer_chip(adapter) && skb_shinfo(skb)->gso_size < 256)
5112                         features &= ~NETIF_F_GSO_MASK;
5113         }
5114
5115         /* The code below restricts offload features for some tunneled and
5116          * Q-in-Q packets.
5117          * Offload features for normal (non tunnel) packets are unchanged.
5118          */
5119         features = vlan_features_check(skb, features);
5120         if (!skb->encapsulation ||
5121             !(adapter->flags & BE_FLAGS_VXLAN_OFFLOADS))
5122                 return features;
5123
5124         /* It's an encapsulated packet and VxLAN offloads are enabled. We
5125          * should disable tunnel offload features if it's not a VxLAN packet,
5126          * as tunnel offloads have been enabled only for VxLAN. This is done to
5127          * allow other tunneled traffic like GRE work fine while VxLAN
5128          * offloads are configured in Skyhawk-R.
5129          */
5130         switch (vlan_get_protocol(skb)) {
5131         case htons(ETH_P_IP):
5132                 l4_hdr = ip_hdr(skb)->protocol;
5133                 break;
5134         case htons(ETH_P_IPV6):
5135                 l4_hdr = ipv6_hdr(skb)->nexthdr;
5136                 break;
5137         default:
5138                 return features;
5139         }
5140
5141         if (l4_hdr != IPPROTO_UDP ||
5142             skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
5143             skb->inner_protocol != htons(ETH_P_TEB) ||
5144             skb_inner_mac_header(skb) - skb_transport_header(skb) !=
5145                 sizeof(struct udphdr) + sizeof(struct vxlanhdr) ||
5146             !adapter->vxlan_port ||
5147             udp_hdr(skb)->dest != adapter->vxlan_port)
5148                 return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
5149
5150         return features;
5151 }
5152
5153 static int be_get_phys_port_id(struct net_device *dev,
5154                                struct netdev_phys_item_id *ppid)
5155 {
5156         int i, id_len = CNTL_SERIAL_NUM_WORDS * CNTL_SERIAL_NUM_WORD_SZ + 1;
5157         struct be_adapter *adapter = netdev_priv(dev);
5158         u8 *id;
5159
5160         if (MAX_PHYS_ITEM_ID_LEN < id_len)
5161                 return -ENOSPC;
5162
5163         ppid->id[0] = adapter->hba_port_num + 1;
5164         id = &ppid->id[1];
5165         for (i = CNTL_SERIAL_NUM_WORDS - 1; i >= 0;
5166              i--, id += CNTL_SERIAL_NUM_WORD_SZ)
5167                 memcpy(id, &adapter->serial_num[i], CNTL_SERIAL_NUM_WORD_SZ);
5168
5169         ppid->id_len = id_len;
5170
5171         return 0;
5172 }
5173
5174 static void be_set_rx_mode(struct net_device *dev)
5175 {
5176         struct be_adapter *adapter = netdev_priv(dev);
5177         struct be_cmd_work *work;
5178
5179         work = be_alloc_work(adapter, be_work_set_rx_mode);
5180         if (work)
5181                 queue_work(be_wq, &work->work);
5182 }
5183
5184 static const struct net_device_ops be_netdev_ops = {
5185         .ndo_open               = be_open,
5186         .ndo_stop               = be_close,
5187         .ndo_start_xmit         = be_xmit,
5188         .ndo_set_rx_mode        = be_set_rx_mode,
5189         .ndo_set_mac_address    = be_mac_addr_set,
5190         .ndo_get_stats64        = be_get_stats64,
5191         .ndo_validate_addr      = eth_validate_addr,
5192         .ndo_vlan_rx_add_vid    = be_vlan_add_vid,
5193         .ndo_vlan_rx_kill_vid   = be_vlan_rem_vid,
5194         .ndo_set_vf_mac         = be_set_vf_mac,
5195         .ndo_set_vf_vlan        = be_set_vf_vlan,
5196         .ndo_set_vf_rate        = be_set_vf_tx_rate,
5197         .ndo_get_vf_config      = be_get_vf_config,
5198         .ndo_set_vf_link_state  = be_set_vf_link_state,
5199         .ndo_set_vf_spoofchk    = be_set_vf_spoofchk,
5200 #ifdef CONFIG_NET_POLL_CONTROLLER
5201         .ndo_poll_controller    = be_netpoll,
5202 #endif
5203         .ndo_bridge_setlink     = be_ndo_bridge_setlink,
5204         .ndo_bridge_getlink     = be_ndo_bridge_getlink,
5205         .ndo_udp_tunnel_add     = be_add_vxlan_port,
5206         .ndo_udp_tunnel_del     = be_del_vxlan_port,
5207         .ndo_features_check     = be_features_check,
5208         .ndo_get_phys_port_id   = be_get_phys_port_id,
5209 };
5210
5211 static void be_netdev_init(struct net_device *netdev)
5212 {
5213         struct be_adapter *adapter = netdev_priv(netdev);
5214
5215         netdev->hw_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5216                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_RXCSUM |
5217                 NETIF_F_HW_VLAN_CTAG_TX;
5218         if ((be_if_cap_flags(adapter) & BE_IF_FLAGS_RSS))
5219                 netdev->hw_features |= NETIF_F_RXHASH;
5220
5221         netdev->features |= netdev->hw_features |
5222                 NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_HW_VLAN_CTAG_FILTER;
5223
5224         netdev->vlan_features |= NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 |
5225                 NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
5226
5227         netdev->priv_flags |= IFF_UNICAST_FLT;
5228
5229         netdev->flags |= IFF_MULTICAST;
5230
5231         netif_set_gso_max_size(netdev, BE_MAX_GSO_SIZE - ETH_HLEN);
5232
5233         netdev->netdev_ops = &be_netdev_ops;
5234
5235         netdev->ethtool_ops = &be_ethtool_ops;
5236
5237         /* MTU range: 256 - 9000 */
5238         netdev->min_mtu = BE_MIN_MTU;
5239         netdev->max_mtu = BE_MAX_MTU;
5240 }
5241
5242 static void be_cleanup(struct be_adapter *adapter)
5243 {
5244         struct net_device *netdev = adapter->netdev;
5245
5246         rtnl_lock();
5247         netif_device_detach(netdev);
5248         if (netif_running(netdev))
5249                 be_close(netdev);
5250         rtnl_unlock();
5251
5252         be_clear(adapter);
5253 }
5254
5255 static int be_resume(struct be_adapter *adapter)
5256 {
5257         struct net_device *netdev = adapter->netdev;
5258         int status;
5259
5260         status = be_setup(adapter);
5261         if (status)
5262                 return status;
5263
5264         rtnl_lock();
5265         if (netif_running(netdev))
5266                 status = be_open(netdev);
5267         rtnl_unlock();
5268
5269         if (status)
5270                 return status;
5271
5272         netif_device_attach(netdev);
5273
5274         return 0;
5275 }
5276
5277 static void be_soft_reset(struct be_adapter *adapter)
5278 {
5279         u32 val;
5280
5281         dev_info(&adapter->pdev->dev, "Initiating chip soft reset\n");
5282         val = ioread32(adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5283         val |= SLIPORT_SOFTRESET_SR_MASK;
5284         iowrite32(val, adapter->pcicfg + SLIPORT_SOFTRESET_OFFSET);
5285 }
5286
5287 static bool be_err_is_recoverable(struct be_adapter *adapter)
5288 {
5289         struct be_error_recovery *err_rec = &adapter->error_recovery;
5290         unsigned long initial_idle_time =
5291                 msecs_to_jiffies(ERR_RECOVERY_IDLE_TIME);
5292         unsigned long recovery_interval =
5293                 msecs_to_jiffies(ERR_RECOVERY_INTERVAL);
5294         u16 ue_err_code;
5295         u32 val;
5296
5297         val = be_POST_stage_get(adapter);
5298         if ((val & POST_STAGE_RECOVERABLE_ERR) != POST_STAGE_RECOVERABLE_ERR)
5299                 return false;
5300         ue_err_code = val & POST_ERR_RECOVERY_CODE_MASK;
5301         if (ue_err_code == 0)
5302                 return false;
5303
5304         dev_err(&adapter->pdev->dev, "Recoverable HW error code: 0x%x\n",
5305                 ue_err_code);
5306
5307         if (time_before_eq(jiffies - err_rec->probe_time, initial_idle_time)) {
5308                 dev_err(&adapter->pdev->dev,
5309                         "Cannot recover within %lu sec from driver load\n",
5310                         jiffies_to_msecs(initial_idle_time) / MSEC_PER_SEC);
5311                 return false;
5312         }
5313
5314         if (err_rec->last_recovery_time && time_before_eq(
5315                 jiffies - err_rec->last_recovery_time, recovery_interval)) {
5316                 dev_err(&adapter->pdev->dev,
5317                         "Cannot recover within %lu sec from last recovery\n",
5318                         jiffies_to_msecs(recovery_interval) / MSEC_PER_SEC);
5319                 return false;
5320         }
5321
5322         if (ue_err_code == err_rec->last_err_code) {
5323                 dev_err(&adapter->pdev->dev,
5324                         "Cannot recover from a consecutive TPE error\n");
5325                 return false;
5326         }
5327
5328         err_rec->last_recovery_time = jiffies;
5329         err_rec->last_err_code = ue_err_code;
5330         return true;
5331 }
5332
5333 static int be_tpe_recover(struct be_adapter *adapter)
5334 {
5335         struct be_error_recovery *err_rec = &adapter->error_recovery;
5336         int status = -EAGAIN;
5337         u32 val;
5338
5339         switch (err_rec->recovery_state) {
5340         case ERR_RECOVERY_ST_NONE:
5341                 err_rec->recovery_state = ERR_RECOVERY_ST_DETECT;
5342                 err_rec->resched_delay = ERR_RECOVERY_UE_DETECT_DURATION;
5343                 break;
5344
5345         case ERR_RECOVERY_ST_DETECT:
5346                 val = be_POST_stage_get(adapter);
5347                 if ((val & POST_STAGE_RECOVERABLE_ERR) !=
5348                     POST_STAGE_RECOVERABLE_ERR) {
5349                         dev_err(&adapter->pdev->dev,
5350                                 "Unrecoverable HW error detected: 0x%x\n", val);
5351                         status = -EINVAL;
5352                         err_rec->resched_delay = 0;
5353                         break;
5354                 }
5355
5356                 dev_err(&adapter->pdev->dev, "Recoverable HW error detected\n");
5357
5358                 /* Only PF0 initiates Chip Soft Reset. But PF0 must wait UE2SR
5359                  * milliseconds before it checks for final error status in
5360                  * SLIPORT_SEMAPHORE to determine if recovery criteria is met.
5361                  * If it does, then PF0 initiates a Soft Reset.
5362                  */
5363                 if (adapter->pf_num == 0) {
5364                         err_rec->recovery_state = ERR_RECOVERY_ST_RESET;
5365                         err_rec->resched_delay = err_rec->ue_to_reset_time -
5366                                         ERR_RECOVERY_UE_DETECT_DURATION;
5367                         break;
5368                 }
5369
5370                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5371                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5372                                         ERR_RECOVERY_UE_DETECT_DURATION;
5373                 break;
5374
5375         case ERR_RECOVERY_ST_RESET:
5376                 if (!be_err_is_recoverable(adapter)) {
5377                         dev_err(&adapter->pdev->dev,
5378                                 "Failed to meet recovery criteria\n");
5379                         status = -EIO;
5380                         err_rec->resched_delay = 0;
5381                         break;
5382                 }
5383                 be_soft_reset(adapter);
5384                 err_rec->recovery_state = ERR_RECOVERY_ST_PRE_POLL;
5385                 err_rec->resched_delay = err_rec->ue_to_poll_time -
5386                                         err_rec->ue_to_reset_time;
5387                 break;
5388
5389         case ERR_RECOVERY_ST_PRE_POLL:
5390                 err_rec->recovery_state = ERR_RECOVERY_ST_REINIT;
5391                 err_rec->resched_delay = 0;
5392                 status = 0;                     /* done */
5393                 break;
5394
5395         default:
5396                 status = -EINVAL;
5397                 err_rec->resched_delay = 0;
5398                 break;
5399         }
5400
5401         return status;
5402 }
5403
5404 static int be_err_recover(struct be_adapter *adapter)
5405 {
5406         int status;
5407
5408         if (!lancer_chip(adapter)) {
5409                 if (!adapter->error_recovery.recovery_supported ||
5410                     adapter->priv_flags & BE_DISABLE_TPE_RECOVERY)
5411                         return -EIO;
5412                 status = be_tpe_recover(adapter);
5413                 if (status)
5414                         goto err;
5415         }
5416
5417         /* Wait for adapter to reach quiescent state before
5418          * destroying queues
5419          */
5420         status = be_fw_wait_ready(adapter);
5421         if (status)
5422                 goto err;
5423
5424         adapter->flags |= BE_FLAGS_TRY_RECOVERY;
5425
5426         be_cleanup(adapter);
5427
5428         status = be_resume(adapter);
5429         if (status)
5430                 goto err;
5431
5432         adapter->flags &= ~BE_FLAGS_TRY_RECOVERY;
5433
5434 err:
5435         return status;
5436 }
5437
5438 static void be_err_detection_task(struct work_struct *work)
5439 {
5440         struct be_error_recovery *err_rec =
5441                         container_of(work, struct be_error_recovery,
5442                                      err_detection_work.work);
5443         struct be_adapter *adapter =
5444                         container_of(err_rec, struct be_adapter,
5445                                      error_recovery);
5446         u32 resched_delay = ERR_RECOVERY_DETECTION_DELAY;
5447         struct device *dev = &adapter->pdev->dev;
5448         int recovery_status;
5449
5450         be_detect_error(adapter);
5451         if (!be_check_error(adapter, BE_ERROR_HW))
5452                 goto reschedule_task;
5453
5454         recovery_status = be_err_recover(adapter);
5455         if (!recovery_status) {
5456                 err_rec->recovery_retries = 0;
5457                 err_rec->recovery_state = ERR_RECOVERY_ST_NONE;
5458                 dev_info(dev, "Adapter recovery successful\n");
5459                 goto reschedule_task;
5460         } else if (!lancer_chip(adapter) && err_rec->resched_delay) {
5461                 /* BEx/SH recovery state machine */
5462                 if (adapter->pf_num == 0 &&
5463                     err_rec->recovery_state > ERR_RECOVERY_ST_DETECT)
5464                         dev_err(&adapter->pdev->dev,
5465                                 "Adapter recovery in progress\n");
5466                 resched_delay = err_rec->resched_delay;
5467                 goto reschedule_task;
5468         } else if (lancer_chip(adapter) && be_virtfn(adapter)) {
5469                 /* For VFs, check if PF have allocated resources
5470                  * every second.
5471                  */
5472                 dev_err(dev, "Re-trying adapter recovery\n");
5473                 goto reschedule_task;
5474         } else if (lancer_chip(adapter) && err_rec->recovery_retries++ <
5475                    ERR_RECOVERY_MAX_RETRY_COUNT) {
5476                 /* In case of another error during recovery, it takes 30 sec
5477                  * for adapter to come out of error. Retry error recovery after
5478                  * this time interval.
5479                  */
5480                 dev_err(&adapter->pdev->dev, "Re-trying adapter recovery\n");
5481                 resched_delay = ERR_RECOVERY_RETRY_DELAY;
5482                 goto reschedule_task;
5483         } else {
5484                 dev_err(dev, "Adapter recovery failed\n");
5485                 dev_err(dev, "Please reboot server to recover\n");
5486         }
5487
5488         return;
5489
5490 reschedule_task:
5491         be_schedule_err_detection(adapter, resched_delay);
5492 }
5493
5494 static void be_log_sfp_info(struct be_adapter *adapter)
5495 {
5496         int status;
5497
5498         status = be_cmd_query_sfp_info(adapter);
5499         if (!status) {
5500                 dev_err(&adapter->pdev->dev,
5501                         "Port %c: %s Vendor: %s part no: %s",
5502                         adapter->port_name,
5503                         be_misconfig_evt_port_state[adapter->phy_state],
5504                         adapter->phy.vendor_name,
5505                         adapter->phy.vendor_pn);
5506         }
5507         adapter->flags &= ~BE_FLAGS_PHY_MISCONFIGURED;
5508 }
5509
5510 static void be_worker(struct work_struct *work)
5511 {
5512         struct be_adapter *adapter =
5513                 container_of(work, struct be_adapter, work.work);
5514         struct be_rx_obj *rxo;
5515         int i;
5516
5517         if (be_physfn(adapter) &&
5518             MODULO(adapter->work_counter, adapter->be_get_temp_freq) == 0)
5519                 be_cmd_get_die_temperature(adapter);
5520
5521         /* when interrupts are not yet enabled, just reap any pending
5522          * mcc completions
5523          */
5524         if (!netif_running(adapter->netdev)) {
5525                 local_bh_disable();
5526                 be_process_mcc(adapter);
5527                 local_bh_enable();
5528                 goto reschedule;
5529         }
5530
5531         if (!adapter->stats_cmd_sent) {
5532                 if (lancer_chip(adapter))
5533                         lancer_cmd_get_pport_stats(adapter,
5534                                                    &adapter->stats_cmd);
5535                 else
5536                         be_cmd_get_stats(adapter, &adapter->stats_cmd);
5537         }
5538
5539         for_all_rx_queues(adapter, rxo, i) {
5540                 /* Replenish RX-queues starved due to memory
5541                  * allocation failures.
5542                  */
5543                 if (rxo->rx_post_starved)
5544                         be_post_rx_frags(rxo, GFP_KERNEL, MAX_RX_POST);
5545         }
5546
5547         /* EQ-delay update for Skyhawk is done while notifying EQ */
5548         if (!skyhawk_chip(adapter))
5549                 be_eqd_update(adapter, false);
5550
5551         if (adapter->flags & BE_FLAGS_PHY_MISCONFIGURED)
5552                 be_log_sfp_info(adapter);
5553
5554 reschedule:
5555         adapter->work_counter++;
5556         queue_delayed_work(be_wq, &adapter->work, msecs_to_jiffies(1000));
5557 }
5558
5559 static void be_unmap_pci_bars(struct be_adapter *adapter)
5560 {
5561         if (adapter->csr)
5562                 pci_iounmap(adapter->pdev, adapter->csr);
5563         if (adapter->db)
5564                 pci_iounmap(adapter->pdev, adapter->db);
5565         if (adapter->pcicfg && adapter->pcicfg_mapped)
5566                 pci_iounmap(adapter->pdev, adapter->pcicfg);
5567 }
5568
5569 static int db_bar(struct be_adapter *adapter)
5570 {
5571         if (lancer_chip(adapter) || be_virtfn(adapter))
5572                 return 0;
5573         else
5574                 return 4;
5575 }
5576
5577 static int be_roce_map_pci_bars(struct be_adapter *adapter)
5578 {
5579         if (skyhawk_chip(adapter)) {
5580                 adapter->roce_db.size = 4096;
5581                 adapter->roce_db.io_addr = pci_resource_start(adapter->pdev,
5582                                                               db_bar(adapter));
5583                 adapter->roce_db.total_size = pci_resource_len(adapter->pdev,
5584                                                                db_bar(adapter));
5585         }
5586         return 0;
5587 }
5588
5589 static int be_map_pci_bars(struct be_adapter *adapter)
5590 {
5591         struct pci_dev *pdev = adapter->pdev;
5592         u8 __iomem *addr;
5593         u32 sli_intf;
5594
5595         pci_read_config_dword(adapter->pdev, SLI_INTF_REG_OFFSET, &sli_intf);
5596         adapter->sli_family = (sli_intf & SLI_INTF_FAMILY_MASK) >>
5597                                 SLI_INTF_FAMILY_SHIFT;
5598         adapter->virtfn = (sli_intf & SLI_INTF_FT_MASK) ? 1 : 0;
5599
5600         if (BEx_chip(adapter) && be_physfn(adapter)) {
5601                 adapter->csr = pci_iomap(pdev, 2, 0);
5602                 if (!adapter->csr)
5603                         return -ENOMEM;
5604         }
5605
5606         addr = pci_iomap(pdev, db_bar(adapter), 0);
5607         if (!addr)
5608                 goto pci_map_err;
5609         adapter->db = addr;
5610
5611         if (skyhawk_chip(adapter) || BEx_chip(adapter)) {
5612                 if (be_physfn(adapter)) {
5613                         /* PCICFG is the 2nd BAR in BE2 */
5614                         addr = pci_iomap(pdev, BE2_chip(adapter) ? 1 : 0, 0);
5615                         if (!addr)
5616                                 goto pci_map_err;
5617                         adapter->pcicfg = addr;
5618                         adapter->pcicfg_mapped = true;
5619                 } else {
5620                         adapter->pcicfg = adapter->db + SRIOV_VF_PCICFG_OFFSET;
5621                         adapter->pcicfg_mapped = false;
5622                 }
5623         }
5624
5625         be_roce_map_pci_bars(adapter);
5626         return 0;
5627
5628 pci_map_err:
5629         dev_err(&pdev->dev, "Error in mapping PCI BARs\n");
5630         be_unmap_pci_bars(adapter);
5631         return -ENOMEM;
5632 }
5633
5634 static void be_drv_cleanup(struct be_adapter *adapter)
5635 {
5636         struct be_dma_mem *mem = &adapter->mbox_mem_alloced;
5637         struct device *dev = &adapter->pdev->dev;
5638
5639         if (mem->va)
5640                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5641
5642         mem = &adapter->rx_filter;
5643         if (mem->va)
5644                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5645
5646         mem = &adapter->stats_cmd;
5647         if (mem->va)
5648                 dma_free_coherent(dev, mem->size, mem->va, mem->dma);
5649 }
5650
5651 /* Allocate and initialize various fields in be_adapter struct */
5652 static int be_drv_init(struct be_adapter *adapter)
5653 {
5654         struct be_dma_mem *mbox_mem_alloc = &adapter->mbox_mem_alloced;
5655         struct be_dma_mem *mbox_mem_align = &adapter->mbox_mem;
5656         struct be_dma_mem *rx_filter = &adapter->rx_filter;
5657         struct be_dma_mem *stats_cmd = &adapter->stats_cmd;
5658         struct device *dev = &adapter->pdev->dev;
5659         int status = 0;
5660
5661         mbox_mem_alloc->size = sizeof(struct be_mcc_mailbox) + 16;
5662         mbox_mem_alloc->va = dma_zalloc_coherent(dev, mbox_mem_alloc->size,
5663                                                  &mbox_mem_alloc->dma,
5664                                                  GFP_KERNEL);
5665         if (!mbox_mem_alloc->va)
5666                 return -ENOMEM;
5667
5668         mbox_mem_align->size = sizeof(struct be_mcc_mailbox);
5669         mbox_mem_align->va = PTR_ALIGN(mbox_mem_alloc->va, 16);
5670         mbox_mem_align->dma = PTR_ALIGN(mbox_mem_alloc->dma, 16);
5671
5672         rx_filter->size = sizeof(struct be_cmd_req_rx_filter);
5673         rx_filter->va = dma_zalloc_coherent(dev, rx_filter->size,
5674                                             &rx_filter->dma, GFP_KERNEL);
5675         if (!rx_filter->va) {
5676                 status = -ENOMEM;
5677                 goto free_mbox;
5678         }
5679
5680         if (lancer_chip(adapter))
5681                 stats_cmd->size = sizeof(struct lancer_cmd_req_pport_stats);
5682         else if (BE2_chip(adapter))
5683                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v0);
5684         else if (BE3_chip(adapter))
5685                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v1);
5686         else
5687                 stats_cmd->size = sizeof(struct be_cmd_req_get_stats_v2);
5688         stats_cmd->va = dma_zalloc_coherent(dev, stats_cmd->size,
5689                                             &stats_cmd->dma, GFP_KERNEL);
5690         if (!stats_cmd->va) {
5691                 status = -ENOMEM;
5692                 goto free_rx_filter;
5693         }
5694
5695         mutex_init(&adapter->mbox_lock);
5696         mutex_init(&adapter->mcc_lock);
5697         mutex_init(&adapter->rx_filter_lock);
5698         spin_lock_init(&adapter->mcc_cq_lock);
5699         init_completion(&adapter->et_cmd_compl);
5700
5701         pci_save_state(adapter->pdev);
5702
5703         INIT_DELAYED_WORK(&adapter->work, be_worker);
5704
5705         adapter->error_recovery.recovery_state = ERR_RECOVERY_ST_NONE;
5706         adapter->error_recovery.resched_delay = 0;
5707         INIT_DELAYED_WORK(&adapter->error_recovery.err_detection_work,
5708                           be_err_detection_task);
5709
5710         adapter->rx_fc = true;
5711         adapter->tx_fc = true;
5712
5713         /* Must be a power of 2 or else MODULO will BUG_ON */
5714         adapter->be_get_temp_freq = 64;
5715
5716         INIT_LIST_HEAD(&adapter->vxlan_port_list);
5717         return 0;
5718
5719 free_rx_filter:
5720         dma_free_coherent(dev, rx_filter->size, rx_filter->va, rx_filter->dma);
5721 free_mbox:
5722         dma_free_coherent(dev, mbox_mem_alloc->size, mbox_mem_alloc->va,
5723                           mbox_mem_alloc->dma);
5724         return status;
5725 }
5726
5727 static void be_remove(struct pci_dev *pdev)
5728 {
5729         struct be_adapter *adapter = pci_get_drvdata(pdev);
5730
5731         if (!adapter)
5732                 return;
5733
5734         be_roce_dev_remove(adapter);
5735         be_intr_set(adapter, false);
5736
5737         be_cancel_err_detection(adapter);
5738
5739         unregister_netdev(adapter->netdev);
5740
5741         be_clear(adapter);
5742
5743         if (!pci_vfs_assigned(adapter->pdev))
5744                 be_cmd_reset_function(adapter);
5745
5746         /* tell fw we're done with firing cmds */
5747         be_cmd_fw_clean(adapter);
5748
5749         be_unmap_pci_bars(adapter);
5750         be_drv_cleanup(adapter);
5751
5752         pci_disable_pcie_error_reporting(pdev);
5753
5754         pci_release_regions(pdev);
5755         pci_disable_device(pdev);
5756
5757         free_netdev(adapter->netdev);
5758 }
5759
5760 static ssize_t be_hwmon_show_temp(struct device *dev,
5761                                   struct device_attribute *dev_attr,
5762                                   char *buf)
5763 {
5764         struct be_adapter *adapter = dev_get_drvdata(dev);
5765
5766         /* Unit: millidegree Celsius */
5767         if (adapter->hwmon_info.be_on_die_temp == BE_INVALID_DIE_TEMP)
5768                 return -EIO;
5769         else
5770                 return sprintf(buf, "%u\n",
5771                                adapter->hwmon_info.be_on_die_temp * 1000);
5772 }
5773
5774 static SENSOR_DEVICE_ATTR(temp1_input, S_IRUGO,
5775                           be_hwmon_show_temp, NULL, 1);
5776
5777 static struct attribute *be_hwmon_attrs[] = {
5778         &sensor_dev_attr_temp1_input.dev_attr.attr,
5779         NULL
5780 };
5781
5782 ATTRIBUTE_GROUPS(be_hwmon);
5783
5784 static char *mc_name(struct be_adapter *adapter)
5785 {
5786         char *str = ""; /* default */
5787
5788         switch (adapter->mc_type) {
5789         case UMC:
5790                 str = "UMC";
5791                 break;
5792         case FLEX10:
5793                 str = "FLEX10";
5794                 break;
5795         case vNIC1:
5796                 str = "vNIC-1";
5797                 break;
5798         case nPAR:
5799                 str = "nPAR";
5800                 break;
5801         case UFP:
5802                 str = "UFP";
5803                 break;
5804         case vNIC2:
5805                 str = "vNIC-2";
5806                 break;
5807         default:
5808                 str = "";
5809         }
5810
5811         return str;
5812 }
5813
5814 static inline char *func_name(struct be_adapter *adapter)
5815 {
5816         return be_physfn(adapter) ? "PF" : "VF";
5817 }
5818
5819 static inline char *nic_name(struct pci_dev *pdev)
5820 {
5821         switch (pdev->device) {
5822         case OC_DEVICE_ID1:
5823                 return OC_NAME;
5824         case OC_DEVICE_ID2:
5825                 return OC_NAME_BE;
5826         case OC_DEVICE_ID3:
5827         case OC_DEVICE_ID4:
5828                 return OC_NAME_LANCER;
5829         case BE_DEVICE_ID2:
5830                 return BE3_NAME;
5831         case OC_DEVICE_ID5:
5832         case OC_DEVICE_ID6:
5833                 return OC_NAME_SH;
5834         default:
5835                 return BE_NAME;
5836         }
5837 }
5838
5839 static int be_probe(struct pci_dev *pdev, const struct pci_device_id *pdev_id)
5840 {
5841         struct be_adapter *adapter;
5842         struct net_device *netdev;
5843         int status = 0;
5844
5845         dev_info(&pdev->dev, "%s version is %s\n", DRV_NAME, DRV_VER);
5846
5847         status = pci_enable_device(pdev);
5848         if (status)
5849                 goto do_none;
5850
5851         status = pci_request_regions(pdev, DRV_NAME);
5852         if (status)
5853                 goto disable_dev;
5854         pci_set_master(pdev);
5855
5856         netdev = alloc_etherdev_mqs(sizeof(*adapter), MAX_TX_QS, MAX_RX_QS);
5857         if (!netdev) {
5858                 status = -ENOMEM;
5859                 goto rel_reg;
5860         }
5861         adapter = netdev_priv(netdev);
5862         adapter->pdev = pdev;
5863         pci_set_drvdata(pdev, adapter);
5864         adapter->netdev = netdev;
5865         SET_NETDEV_DEV(netdev, &pdev->dev);
5866
5867         status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
5868         if (!status) {
5869                 netdev->features |= NETIF_F_HIGHDMA;
5870         } else {
5871                 status = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
5872                 if (status) {
5873                         dev_err(&pdev->dev, "Could not set PCI DMA Mask\n");
5874                         goto free_netdev;
5875                 }
5876         }
5877
5878         status = pci_enable_pcie_error_reporting(pdev);
5879         if (!status)
5880                 dev_info(&pdev->dev, "PCIe error reporting enabled\n");
5881
5882         status = be_map_pci_bars(adapter);
5883         if (status)
5884                 goto free_netdev;
5885
5886         status = be_drv_init(adapter);
5887         if (status)
5888                 goto unmap_bars;
5889
5890         status = be_setup(adapter);
5891         if (status)
5892                 goto drv_cleanup;
5893
5894         be_netdev_init(netdev);
5895         status = register_netdev(netdev);
5896         if (status != 0)
5897                 goto unsetup;
5898
5899         be_roce_dev_add(adapter);
5900
5901         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5902         adapter->error_recovery.probe_time = jiffies;
5903
5904         /* On Die temperature not supported for VF. */
5905         if (be_physfn(adapter) && IS_ENABLED(CONFIG_BE2NET_HWMON)) {
5906                 adapter->hwmon_info.hwmon_dev =
5907                         devm_hwmon_device_register_with_groups(&pdev->dev,
5908                                                                DRV_NAME,
5909                                                                adapter,
5910                                                                be_hwmon_groups);
5911                 adapter->hwmon_info.be_on_die_temp = BE_INVALID_DIE_TEMP;
5912         }
5913
5914         dev_info(&pdev->dev, "%s: %s %s port %c\n", nic_name(pdev),
5915                  func_name(adapter), mc_name(adapter), adapter->port_name);
5916
5917         return 0;
5918
5919 unsetup:
5920         be_clear(adapter);
5921 drv_cleanup:
5922         be_drv_cleanup(adapter);
5923 unmap_bars:
5924         be_unmap_pci_bars(adapter);
5925 free_netdev:
5926         free_netdev(netdev);
5927 rel_reg:
5928         pci_release_regions(pdev);
5929 disable_dev:
5930         pci_disable_device(pdev);
5931 do_none:
5932         dev_err(&pdev->dev, "%s initialization failed\n", nic_name(pdev));
5933         return status;
5934 }
5935
5936 static int be_suspend(struct pci_dev *pdev, pm_message_t state)
5937 {
5938         struct be_adapter *adapter = pci_get_drvdata(pdev);
5939
5940         be_intr_set(adapter, false);
5941         be_cancel_err_detection(adapter);
5942
5943         be_cleanup(adapter);
5944
5945         pci_save_state(pdev);
5946         pci_disable_device(pdev);
5947         pci_set_power_state(pdev, pci_choose_state(pdev, state));
5948         return 0;
5949 }
5950
5951 static int be_pci_resume(struct pci_dev *pdev)
5952 {
5953         struct be_adapter *adapter = pci_get_drvdata(pdev);
5954         int status = 0;
5955
5956         status = pci_enable_device(pdev);
5957         if (status)
5958                 return status;
5959
5960         pci_restore_state(pdev);
5961
5962         status = be_resume(adapter);
5963         if (status)
5964                 return status;
5965
5966         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
5967
5968         return 0;
5969 }
5970
5971 /*
5972  * An FLR will stop BE from DMAing any data.
5973  */
5974 static void be_shutdown(struct pci_dev *pdev)
5975 {
5976         struct be_adapter *adapter = pci_get_drvdata(pdev);
5977
5978         if (!adapter)
5979                 return;
5980
5981         be_roce_dev_shutdown(adapter);
5982         cancel_delayed_work_sync(&adapter->work);
5983         be_cancel_err_detection(adapter);
5984
5985         netif_device_detach(adapter->netdev);
5986
5987         be_cmd_reset_function(adapter);
5988
5989         pci_disable_device(pdev);
5990 }
5991
5992 static pci_ers_result_t be_eeh_err_detected(struct pci_dev *pdev,
5993                                             pci_channel_state_t state)
5994 {
5995         struct be_adapter *adapter = pci_get_drvdata(pdev);
5996
5997         dev_err(&adapter->pdev->dev, "EEH error detected\n");
5998
5999         be_roce_dev_remove(adapter);
6000
6001         if (!be_check_error(adapter, BE_ERROR_EEH)) {
6002                 be_set_error(adapter, BE_ERROR_EEH);
6003
6004                 be_cancel_err_detection(adapter);
6005
6006                 be_cleanup(adapter);
6007         }
6008
6009         if (state == pci_channel_io_perm_failure)
6010                 return PCI_ERS_RESULT_DISCONNECT;
6011
6012         pci_disable_device(pdev);
6013
6014         /* The error could cause the FW to trigger a flash debug dump.
6015          * Resetting the card while flash dump is in progress
6016          * can cause it not to recover; wait for it to finish.
6017          * Wait only for first function as it is needed only once per
6018          * adapter.
6019          */
6020         if (pdev->devfn == 0)
6021                 ssleep(30);
6022
6023         return PCI_ERS_RESULT_NEED_RESET;
6024 }
6025
6026 static pci_ers_result_t be_eeh_reset(struct pci_dev *pdev)
6027 {
6028         struct be_adapter *adapter = pci_get_drvdata(pdev);
6029         int status;
6030
6031         dev_info(&adapter->pdev->dev, "EEH reset\n");
6032
6033         status = pci_enable_device(pdev);
6034         if (status)
6035                 return PCI_ERS_RESULT_DISCONNECT;
6036
6037         pci_set_master(pdev);
6038         pci_restore_state(pdev);
6039
6040         /* Check if card is ok and fw is ready */
6041         dev_info(&adapter->pdev->dev,
6042                  "Waiting for FW to be ready after EEH reset\n");
6043         status = be_fw_wait_ready(adapter);
6044         if (status)
6045                 return PCI_ERS_RESULT_DISCONNECT;
6046
6047         pci_cleanup_aer_uncorrect_error_status(pdev);
6048         be_clear_error(adapter, BE_CLEAR_ALL);
6049         return PCI_ERS_RESULT_RECOVERED;
6050 }
6051
6052 static void be_eeh_resume(struct pci_dev *pdev)
6053 {
6054         int status = 0;
6055         struct be_adapter *adapter = pci_get_drvdata(pdev);
6056
6057         dev_info(&adapter->pdev->dev, "EEH resume\n");
6058
6059         pci_save_state(pdev);
6060
6061         status = be_resume(adapter);
6062         if (status)
6063                 goto err;
6064
6065         be_roce_dev_add(adapter);
6066
6067         be_schedule_err_detection(adapter, ERR_DETECTION_DELAY);
6068         return;
6069 err:
6070         dev_err(&adapter->pdev->dev, "EEH resume failed\n");
6071 }
6072
6073 static int be_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
6074 {
6075         struct be_adapter *adapter = pci_get_drvdata(pdev);
6076         struct be_resources vft_res = {0};
6077         int status;
6078
6079         if (!num_vfs)
6080                 be_vf_clear(adapter);
6081
6082         adapter->num_vfs = num_vfs;
6083
6084         if (adapter->num_vfs == 0 && pci_vfs_assigned(pdev)) {
6085                 dev_warn(&pdev->dev,
6086                          "Cannot disable VFs while they are assigned\n");
6087                 return -EBUSY;
6088         }
6089
6090         /* When the HW is in SRIOV capable configuration, the PF-pool resources
6091          * are equally distributed across the max-number of VFs. The user may
6092          * request only a subset of the max-vfs to be enabled.
6093          * Based on num_vfs, redistribute the resources across num_vfs so that
6094          * each VF will have access to more number of resources.
6095          * This facility is not available in BE3 FW.
6096          * Also, this is done by FW in Lancer chip.
6097          */
6098         if (skyhawk_chip(adapter) && !pci_num_vf(pdev)) {
6099                 be_calculate_vf_res(adapter, adapter->num_vfs,
6100                                     &vft_res);
6101                 status = be_cmd_set_sriov_config(adapter, adapter->pool_res,
6102                                                  adapter->num_vfs, &vft_res);
6103                 if (status)
6104                         dev_err(&pdev->dev,
6105                                 "Failed to optimize SR-IOV resources\n");
6106         }
6107
6108         status = be_get_resources(adapter);
6109         if (status)
6110                 return be_cmd_status(status);
6111
6112         /* Updating real_num_tx/rx_queues() requires rtnl_lock() */
6113         rtnl_lock();
6114         status = be_update_queues(adapter);
6115         rtnl_unlock();
6116         if (status)
6117                 return be_cmd_status(status);
6118
6119         if (adapter->num_vfs)
6120                 status = be_vf_setup(adapter);
6121
6122         if (!status)
6123                 return adapter->num_vfs;
6124
6125         return 0;
6126 }
6127
6128 static const struct pci_error_handlers be_eeh_handlers = {
6129         .error_detected = be_eeh_err_detected,
6130         .slot_reset = be_eeh_reset,
6131         .resume = be_eeh_resume,
6132 };
6133
6134 static struct pci_driver be_driver = {
6135         .name = DRV_NAME,
6136         .id_table = be_dev_ids,
6137         .probe = be_probe,
6138         .remove = be_remove,
6139         .suspend = be_suspend,
6140         .resume = be_pci_resume,
6141         .shutdown = be_shutdown,
6142         .sriov_configure = be_pci_sriov_configure,
6143         .err_handler = &be_eeh_handlers
6144 };
6145
6146 static int __init be_init_module(void)
6147 {
6148         int status;
6149
6150         if (rx_frag_size != 8192 && rx_frag_size != 4096 &&
6151             rx_frag_size != 2048) {
6152                 printk(KERN_WARNING DRV_NAME
6153                         " : Module param rx_frag_size must be 2048/4096/8192."
6154                         " Using 2048\n");
6155                 rx_frag_size = 2048;
6156         }
6157
6158         if (num_vfs > 0) {
6159                 pr_info(DRV_NAME " : Module param num_vfs is obsolete.");
6160                 pr_info(DRV_NAME " : Use sysfs method to enable VFs\n");
6161         }
6162
6163         be_wq = create_singlethread_workqueue("be_wq");
6164         if (!be_wq) {
6165                 pr_warn(DRV_NAME "workqueue creation failed\n");
6166                 return -1;
6167         }
6168
6169         be_err_recovery_workq =
6170                 create_singlethread_workqueue("be_err_recover");
6171         if (!be_err_recovery_workq)
6172                 pr_warn(DRV_NAME "Could not create error recovery workqueue\n");
6173
6174         status = pci_register_driver(&be_driver);
6175         if (status) {
6176                 destroy_workqueue(be_wq);
6177                 be_destroy_err_recovery_workq();
6178         }
6179         return status;
6180 }
6181 module_init(be_init_module);
6182
6183 static void __exit be_exit_module(void)
6184 {
6185         pci_unregister_driver(&be_driver);
6186
6187         be_destroy_err_recovery_workq();
6188
6189         if (be_wq)
6190                 destroy_workqueue(be_wq);
6191 }
6192 module_exit(be_exit_module);