From 1738cd3ed342294360d6a74d4e58800004bff854 Mon Sep 17 00:00:00 2001 From: Netanel Belgazal Date: Wed, 10 Aug 2016 14:03:22 +0300 Subject: [PATCH 1/1] net: ena: Add a driver for Amazon Elastic Network Adapters (ENA) This is a driver for the ENA family of networking devices. Signed-off-by: Netanel Belgazal Signed-off-by: David S. Miller --- Documentation/networking/00-INDEX | 2 + Documentation/networking/ena.txt | 305 ++ MAINTAINERS | 9 + drivers/net/ethernet/Kconfig | 1 + drivers/net/ethernet/Makefile | 1 + drivers/net/ethernet/amazon/Kconfig | 27 + drivers/net/ethernet/amazon/Makefile | 5 + drivers/net/ethernet/amazon/ena/Makefile | 7 + .../net/ethernet/amazon/ena/ena_admin_defs.h | 973 +++++ drivers/net/ethernet/amazon/ena/ena_com.c | 2666 ++++++++++++++ drivers/net/ethernet/amazon/ena/ena_com.h | 1038 ++++++ .../net/ethernet/amazon/ena/ena_common_defs.h | 48 + drivers/net/ethernet/amazon/ena/ena_eth_com.c | 501 +++ drivers/net/ethernet/amazon/ena/ena_eth_com.h | 160 + .../net/ethernet/amazon/ena/ena_eth_io_defs.h | 416 +++ drivers/net/ethernet/amazon/ena/ena_ethtool.c | 895 +++++ drivers/net/ethernet/amazon/ena/ena_netdev.c | 3280 +++++++++++++++++ drivers/net/ethernet/amazon/ena/ena_netdev.h | 324 ++ .../net/ethernet/amazon/ena/ena_pci_id_tbl.h | 67 + .../net/ethernet/amazon/ena/ena_regs_defs.h | 133 + 20 files changed, 10858 insertions(+) create mode 100644 Documentation/networking/ena.txt create mode 100644 drivers/net/ethernet/amazon/Kconfig create mode 100644 drivers/net/ethernet/amazon/Makefile create mode 100644 drivers/net/ethernet/amazon/ena/Makefile create mode 100644 drivers/net/ethernet/amazon/ena/ena_admin_defs.h create mode 100644 drivers/net/ethernet/amazon/ena/ena_com.c create mode 100644 drivers/net/ethernet/amazon/ena/ena_com.h create mode 100644 drivers/net/ethernet/amazon/ena/ena_common_defs.h create mode 100644 drivers/net/ethernet/amazon/ena/ena_eth_com.c create mode 100644 drivers/net/ethernet/amazon/ena/ena_eth_com.h create mode 100644 drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h create mode 100644 drivers/net/ethernet/amazon/ena/ena_ethtool.c create mode 100644 drivers/net/ethernet/amazon/ena/ena_netdev.c create mode 100644 drivers/net/ethernet/amazon/ena/ena_netdev.h create mode 100644 drivers/net/ethernet/amazon/ena/ena_pci_id_tbl.h create mode 100644 drivers/net/ethernet/amazon/ena/ena_regs_defs.h diff --git a/Documentation/networking/00-INDEX b/Documentation/networking/00-INDEX index 415154a487d0..a7697783ac4c 100644 --- a/Documentation/networking/00-INDEX +++ b/Documentation/networking/00-INDEX @@ -74,6 +74,8 @@ dns_resolver.txt - The DNS resolver module allows kernel servies to make DNS queries. driver.txt - Softnet driver issues. +ena.txt + - info on Amazon's Elastic Network Adapter (ENA) e100.txt - info on Intel's EtherExpress PRO/100 line of 10/100 boards e1000.txt diff --git a/Documentation/networking/ena.txt b/Documentation/networking/ena.txt new file mode 100644 index 000000000000..2b4b6f57e549 --- /dev/null +++ b/Documentation/networking/ena.txt @@ -0,0 +1,305 @@ +Linux kernel driver for Elastic Network Adapter (ENA) family: +============================================================= + +Overview: +========= +ENA is a networking interface designed to make good use of modern CPU +features and system architectures. + +The ENA device exposes a lightweight management interface with a +minimal set of memory mapped registers and extendable command set +through an Admin Queue. + +The driver supports a range of ENA devices, is link-speed independent +(i.e., the same driver is used for 10GbE, 25GbE, 40GbE, etc.), and has +a negotiated and extendable feature set. + +Some ENA devices support SR-IOV. This driver is used for both the +SR-IOV Physical Function (PF) and Virtual Function (VF) devices. + +ENA devices enable high speed and low overhead network traffic +processing by providing multiple Tx/Rx queue pairs (the maximum number +is advertised by the device via the Admin Queue), a dedicated MSI-X +interrupt vector per Tx/Rx queue pair, adaptive interrupt moderation, +and CPU cacheline optimized data placement. + +The ENA driver supports industry standard TCP/IP offload features such +as checksum offload and TCP transmit segmentation offload (TSO). +Receive-side scaling (RSS) is supported for multi-core scaling. + +The ENA driver and its corresponding devices implement health +monitoring mechanisms such as watchdog, enabling the device and driver +to recover in a manner transparent to the application, as well as +debug logs. + +Some of the ENA devices support a working mode called Low-latency +Queue (LLQ), which saves several more microseconds. + +Supported PCI vendor ID/device IDs: +=================================== +1d0f:0ec2 - ENA PF +1d0f:1ec2 - ENA PF with LLQ support +1d0f:ec20 - ENA VF +1d0f:ec21 - ENA VF with LLQ support + +ENA Source Code Directory Structure: +==================================== +ena_com.[ch] - Management communication layer. This layer is + responsible for the handling all the management + (admin) communication between the device and the + driver. +ena_eth_com.[ch] - Tx/Rx data path. +ena_admin_defs.h - Definition of ENA management interface. +ena_eth_io_defs.h - Definition of ENA data path interface. +ena_common_defs.h - Common definitions for ena_com layer. +ena_regs_defs.h - Definition of ENA PCI memory-mapped (MMIO) registers. +ena_netdev.[ch] - Main Linux kernel driver. +ena_syfsfs.[ch] - Sysfs files. +ena_ethtool.c - ethtool callbacks. +ena_pci_id_tbl.h - Supported device IDs. + +Management Interface: +===================== +ENA management interface is exposed by means of: +- PCIe Configuration Space +- Device Registers +- Admin Queue (AQ) and Admin Completion Queue (ACQ) +- Asynchronous Event Notification Queue (AENQ) + +ENA device MMIO Registers are accessed only during driver +initialization and are not involved in further normal device +operation. + +AQ is used for submitting management commands, and the +results/responses are reported asynchronously through ACQ. + +ENA introduces a very small set of management commands with room for +vendor-specific extensions. Most of the management operations are +framed in a generic Get/Set feature command. + +The following admin queue commands are supported: +- Create I/O submission queue +- Create I/O completion queue +- Destroy I/O submission queue +- Destroy I/O completion queue +- Get feature +- Set feature +- Configure AENQ +- Get statistics + +Refer to ena_admin_defs.h for the list of supported Get/Set Feature +properties. + +The Asynchronous Event Notification Queue (AENQ) is a uni-directional +queue used by the ENA device to send to the driver events that cannot +be reported using ACQ. AENQ events are subdivided into groups. Each +group may have multiple syndromes, as shown below + +The events are: + Group Syndrome + Link state change - X - + Fatal error - X - + Notification Suspend traffic + Notification Resume traffic + Keep-Alive - X - + +ACQ and AENQ share the same MSI-X vector. + +Keep-Alive is a special mechanism that allows monitoring of the +device's health. The driver maintains a watchdog (WD) handler which, +if fired, logs the current state and statistics then resets and +restarts the ENA device and driver. A Keep-Alive event is delivered by +the device every second. The driver re-arms the WD upon reception of a +Keep-Alive event. A missed Keep-Alive event causes the WD handler to +fire. + +Data Path Interface: +==================== +I/O operations are based on Tx and Rx Submission Queues (Tx SQ and Rx +SQ correspondingly). Each SQ has a completion queue (CQ) associated +with it. + +The SQs and CQs are implemented as descriptor rings in contiguous +physical memory. + +The ENA driver supports two Queue Operation modes for Tx SQs: +- Regular mode + * In this mode the Tx SQs reside in the host's memory. The ENA + device fetches the ENA Tx descriptors and packet data from host + memory. +- Low Latency Queue (LLQ) mode or "push-mode". + * In this mode the driver pushes the transmit descriptors and the + first 128 bytes of the packet directly to the ENA device memory + space. The rest of the packet payload is fetched by the + device. For this operation mode, the driver uses a dedicated PCI + device memory BAR, which is mapped with write-combine capability. + +The Rx SQs support only the regular mode. + +Note: Not all ENA devices support LLQ, and this feature is negotiated + with the device upon initialization. If the ENA device does not + support LLQ mode, the driver falls back to the regular mode. + +The driver supports multi-queue for both Tx and Rx. This has various +benefits: +- Reduced CPU/thread/process contention on a given Ethernet interface. +- Cache miss rate on completion is reduced, particularly for data + cache lines that hold the sk_buff structures. +- Increased process-level parallelism when handling received packets. +- Increased data cache hit rate, by steering kernel processing of + packets to the CPU, where the application thread consuming the + packet is running. +- In hardware interrupt re-direction. + +Interrupt Modes: +================ +The driver assigns a single MSI-X vector per queue pair (for both Tx +and Rx directions). The driver assigns an additional dedicated MSI-X vector +for management (for ACQ and AENQ). + +Management interrupt registration is performed when the Linux kernel +probes the adapter, and it is de-registered when the adapter is +removed. I/O queue interrupt registration is performed when the Linux +interface of the adapter is opened, and it is de-registered when the +interface is closed. + +The management interrupt is named: + ena-mgmnt@pci: +and for each queue pair, an interrupt is named: + -Tx-Rx- + +The ENA device operates in auto-mask and auto-clear interrupt +modes. That is, once MSI-X is delivered to the host, its Cause bit is +automatically cleared and the interrupt is masked. The interrupt is +unmasked by the driver after NAPI processing is complete. + +Interrupt Moderation: +===================== +ENA driver and device can operate in conventional or adaptive interrupt +moderation mode. + +In conventional mode the driver instructs device to postpone interrupt +posting according to static interrupt delay value. The interrupt delay +value can be configured through ethtool(8). The following ethtool +parameters are supported by the driver: tx-usecs, rx-usecs + +In adaptive interrupt moderation mode the interrupt delay value is +updated by the driver dynamically and adjusted every NAPI cycle +according to the traffic nature. + +By default ENA driver applies adaptive coalescing on Rx traffic and +conventional coalescing on Tx traffic. + +Adaptive coalescing can be switched on/off through ethtool(8) +adaptive_rx on|off parameter. + +The driver chooses interrupt delay value according to the number of +bytes and packets received between interrupt unmasking and interrupt +posting. The driver uses interrupt delay table that subdivides the +range of received bytes/packets into 5 levels and assigns interrupt +delay value to each level. + +The user can enable/disable adaptive moderation, modify the interrupt +delay table and restore its default values through sysfs. + +The rx_copybreak is initialized by default to ENA_DEFAULT_RX_COPYBREAK +and can be configured by the ETHTOOL_STUNABLE command of the +SIOCETHTOOL ioctl. + +SKB: +The driver-allocated SKB for frames received from Rx handling using +NAPI context. The allocation method depends on the size of the packet. +If the frame length is larger than rx_copybreak, napi_get_frags() +is used, otherwise netdev_alloc_skb_ip_align() is used, the buffer +content is copied (by CPU) to the SKB, and the buffer is recycled. + +Statistics: +=========== +The user can obtain ENA device and driver statistics using ethtool. +The driver can collect regular or extended statistics (including +per-queue stats) from the device. + +In addition the driver logs the stats to syslog upon device reset. + +MTU: +==== +The driver supports an arbitrarily large MTU with a maximum that is +negotiated with the device. The driver configures MTU using the +SetFeature command (ENA_ADMIN_MTU property). The user can change MTU +via ip(8) and similar legacy tools. + +Stateless Offloads: +=================== +The ENA driver supports: +- TSO over IPv4/IPv6 +- TSO with ECN +- IPv4 header checksum offload +- TCP/UDP over IPv4/IPv6 checksum offloads + +RSS: +==== +- The ENA device supports RSS that allows flexible Rx traffic + steering. +- Toeplitz and CRC32 hash functions are supported. +- Different combinations of L2/L3/L4 fields can be configured as + inputs for hash functions. +- The driver configures RSS settings using the AQ SetFeature command + (ENA_ADMIN_RSS_HASH_FUNCTION, ENA_ADMIN_RSS_HASH_INPUT and + ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG properties). +- If the NETIF_F_RXHASH flag is set, the 32-bit result of the hash + function delivered in the Rx CQ descriptor is set in the received + SKB. +- The user can provide a hash key, hash function, and configure the + indirection table through ethtool(8). + +DATA PATH: +========== +Tx: +--- +end_start_xmit() is called by the stack. This function does the following: +- Maps data buffers (skb->data and frags). +- Populates ena_buf for the push buffer (if the driver and device are + in push mode.) +- Prepares ENA bufs for the remaining frags. +- Allocates a new request ID from the empty req_id ring. The request + ID is the index of the packet in the Tx info. This is used for + out-of-order TX completions. +- Adds the packet to the proper place in the Tx ring. +- Calls ena_com_prepare_tx(), an ENA communication layer that converts + the ena_bufs to ENA descriptors (and adds meta ENA descriptors as + needed.) + * This function also copies the ENA descriptors and the push buffer + to the Device memory space (if in push mode.) +- Writes doorbell to the ENA device. +- When the ENA device finishes sending the packet, a completion + interrupt is raised. +- The interrupt handler schedules NAPI. +- The ena_clean_tx_irq() function is called. This function handles the + completion descriptors generated by the ENA, with a single + completion descriptor per completed packet. + * req_id is retrieved from the completion descriptor. The tx_info of + the packet is retrieved via the req_id. The data buffers are + unmapped and req_id is returned to the empty req_id ring. + * The function stops when the completion descriptors are completed or + the budget is reached. + +Rx: +--- +- When a packet is received from the ENA device. +- The interrupt handler schedules NAPI. +- The ena_clean_rx_irq() function is called. This function calls + ena_rx_pkt(), an ENA communication layer function, which returns the + number of descriptors used for a new unhandled packet, and zero if + no new packet is found. +- Then it calls the ena_clean_rx_irq() function. +- ena_eth_rx_skb() checks packet length: + * If the packet is small (len < rx_copybreak), the driver allocates + a SKB for the new packet, and copies the packet payload into the + SKB data buffer. + - In this way the original data buffer is not passed to the stack + and is reused for future Rx packets. + * Otherwise the function unmaps the Rx buffer, then allocates the + new SKB structure and hooks the Rx buffer to the SKB frags. +- The new SKB is updated with the necessary information (protocol, + checksum hw verify result, etc.), and then passed to the network + stack, using the NAPI interface function napi_gro_receive(). diff --git a/MAINTAINERS b/MAINTAINERS index 429fc61bee81..50ce33dc62f6 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -636,6 +636,15 @@ F: drivers/tty/serial/altera_jtaguart.c F: include/linux/altera_uart.h F: include/linux/altera_jtaguart.h +AMAZON ETHERNET DRIVERS +M: Netanel Belgazal +R: Saeed Bishara +R: Zorik Machulsky +L: netdev@vger.kernel.org +S: Supported +F: Documentation/networking/ena.txt +F: drivers/net/ethernet/amazon/ + AMD CRYPTOGRAPHIC COPROCESSOR (CCP) DRIVER M: Tom Lendacky M: Gary Hook diff --git a/drivers/net/ethernet/Kconfig b/drivers/net/ethernet/Kconfig index 2ffd63463299..8cc7467b6c1f 100644 --- a/drivers/net/ethernet/Kconfig +++ b/drivers/net/ethernet/Kconfig @@ -24,6 +24,7 @@ source "drivers/net/ethernet/agere/Kconfig" source "drivers/net/ethernet/allwinner/Kconfig" source "drivers/net/ethernet/alteon/Kconfig" source "drivers/net/ethernet/altera/Kconfig" +source "drivers/net/ethernet/amazon/Kconfig" source "drivers/net/ethernet/amd/Kconfig" source "drivers/net/ethernet/apm/Kconfig" source "drivers/net/ethernet/apple/Kconfig" diff --git a/drivers/net/ethernet/Makefile b/drivers/net/ethernet/Makefile index 1d349e9aa9a6..a09423df83f2 100644 --- a/drivers/net/ethernet/Makefile +++ b/drivers/net/ethernet/Makefile @@ -10,6 +10,7 @@ obj-$(CONFIG_NET_VENDOR_AGERE) += agere/ obj-$(CONFIG_NET_VENDOR_ALLWINNER) += allwinner/ obj-$(CONFIG_NET_VENDOR_ALTEON) += alteon/ obj-$(CONFIG_ALTERA_TSE) += altera/ +obj-$(CONFIG_NET_VENDOR_AMAZON) += amazon/ obj-$(CONFIG_NET_VENDOR_AMD) += amd/ obj-$(CONFIG_NET_XGENE) += apm/ obj-$(CONFIG_NET_VENDOR_APPLE) += apple/ diff --git a/drivers/net/ethernet/amazon/Kconfig b/drivers/net/ethernet/amazon/Kconfig new file mode 100644 index 000000000000..99b30353541a --- /dev/null +++ b/drivers/net/ethernet/amazon/Kconfig @@ -0,0 +1,27 @@ +# +# Amazon network device configuration +# + +config NET_VENDOR_AMAZON + bool "Amazon Devices" + default y + ---help--- + If you have a network (Ethernet) device belonging to this class, say Y. + + Note that the answer to this question doesn't directly affect the + kernel: saying N will just cause the configurator to skip all + the questions about Amazon devices. If you say Y, you will be asked + for your specific device in the following questions. + +if NET_VENDOR_AMAZON + +config ENA_ETHERNET + tristate "Elastic Network Adapter (ENA) support" + depends on (PCI_MSI && X86) + ---help--- + This driver supports Elastic Network Adapter (ENA)" + + To compile this driver as a module, choose M here. + The module will be called ena. + +endif #NET_VENDOR_AMAZON diff --git a/drivers/net/ethernet/amazon/Makefile b/drivers/net/ethernet/amazon/Makefile new file mode 100644 index 000000000000..8e0b73f60d51 --- /dev/null +++ b/drivers/net/ethernet/amazon/Makefile @@ -0,0 +1,5 @@ +# +# Makefile for the Amazon network device drivers. +# + +obj-$(CONFIG_ENA_ETHERNET) += ena/ diff --git a/drivers/net/ethernet/amazon/ena/Makefile b/drivers/net/ethernet/amazon/ena/Makefile new file mode 100644 index 000000000000..eaeeae06c5d9 --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/Makefile @@ -0,0 +1,7 @@ +# +# Makefile for the Elastic Network Adapter (ENA) device drivers. +# + +obj-$(CONFIG_ENA_ETHERNET) += ena.o + +ena-y := ena_netdev.o ena_com.o ena_eth_com.o ena_ethtool.o diff --git a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h new file mode 100644 index 000000000000..a46e749bf226 --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h @@ -0,0 +1,973 @@ +/* + * Copyright 2015 - 2016 Amazon.com, Inc. or its affiliates. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _ENA_ADMIN_H_ +#define _ENA_ADMIN_H_ + +enum ena_admin_aq_opcode { + ENA_ADMIN_CREATE_SQ = 1, + + ENA_ADMIN_DESTROY_SQ = 2, + + ENA_ADMIN_CREATE_CQ = 3, + + ENA_ADMIN_DESTROY_CQ = 4, + + ENA_ADMIN_GET_FEATURE = 8, + + ENA_ADMIN_SET_FEATURE = 9, + + ENA_ADMIN_GET_STATS = 11, +}; + +enum ena_admin_aq_completion_status { + ENA_ADMIN_SUCCESS = 0, + + ENA_ADMIN_RESOURCE_ALLOCATION_FAILURE = 1, + + ENA_ADMIN_BAD_OPCODE = 2, + + ENA_ADMIN_UNSUPPORTED_OPCODE = 3, + + ENA_ADMIN_MALFORMED_REQUEST = 4, + + /* Additional status is provided in ACQ entry extended_status */ + ENA_ADMIN_ILLEGAL_PARAMETER = 5, + + ENA_ADMIN_UNKNOWN_ERROR = 6, +}; + +enum ena_admin_aq_feature_id { + ENA_ADMIN_DEVICE_ATTRIBUTES = 1, + + ENA_ADMIN_MAX_QUEUES_NUM = 2, + + ENA_ADMIN_RSS_HASH_FUNCTION = 10, + + ENA_ADMIN_STATELESS_OFFLOAD_CONFIG = 11, + + ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG = 12, + + ENA_ADMIN_MTU = 14, + + ENA_ADMIN_RSS_HASH_INPUT = 18, + + ENA_ADMIN_INTERRUPT_MODERATION = 20, + + ENA_ADMIN_AENQ_CONFIG = 26, + + ENA_ADMIN_LINK_CONFIG = 27, + + ENA_ADMIN_HOST_ATTR_CONFIG = 28, + + ENA_ADMIN_FEATURES_OPCODE_NUM = 32, +}; + +enum ena_admin_placement_policy_type { + /* descriptors and headers are in host memory */ + ENA_ADMIN_PLACEMENT_POLICY_HOST = 1, + + /* descriptors and headers are in device memory (a.k.a Low Latency + * Queue) + */ + ENA_ADMIN_PLACEMENT_POLICY_DEV = 3, +}; + +enum ena_admin_link_types { + ENA_ADMIN_LINK_SPEED_1G = 0x1, + + ENA_ADMIN_LINK_SPEED_2_HALF_G = 0x2, + + ENA_ADMIN_LINK_SPEED_5G = 0x4, + + ENA_ADMIN_LINK_SPEED_10G = 0x8, + + ENA_ADMIN_LINK_SPEED_25G = 0x10, + + ENA_ADMIN_LINK_SPEED_40G = 0x20, + + ENA_ADMIN_LINK_SPEED_50G = 0x40, + + ENA_ADMIN_LINK_SPEED_100G = 0x80, + + ENA_ADMIN_LINK_SPEED_200G = 0x100, + + ENA_ADMIN_LINK_SPEED_400G = 0x200, +}; + +enum ena_admin_completion_policy_type { + /* completion queue entry for each sq descriptor */ + ENA_ADMIN_COMPLETION_POLICY_DESC = 0, + + /* completion queue entry upon request in sq descriptor */ + ENA_ADMIN_COMPLETION_POLICY_DESC_ON_DEMAND = 1, + + /* current queue head pointer is updated in OS memory upon sq + * descriptor request + */ + ENA_ADMIN_COMPLETION_POLICY_HEAD_ON_DEMAND = 2, + + /* current queue head pointer is updated in OS memory for each sq + * descriptor + */ + ENA_ADMIN_COMPLETION_POLICY_HEAD = 3, +}; + +/* basic stats return ena_admin_basic_stats while extanded stats return a + * buffer (string format) with additional statistics per queue and per + * device id + */ +enum ena_admin_get_stats_type { + ENA_ADMIN_GET_STATS_TYPE_BASIC = 0, + + ENA_ADMIN_GET_STATS_TYPE_EXTENDED = 1, +}; + +enum ena_admin_get_stats_scope { + ENA_ADMIN_SPECIFIC_QUEUE = 0, + + ENA_ADMIN_ETH_TRAFFIC = 1, +}; + +struct ena_admin_aq_common_desc { + /* 11:0 : command_id + * 15:12 : reserved12 + */ + u16 command_id; + + /* as appears in ena_admin_aq_opcode */ + u8 opcode; + + /* 0 : phase + * 1 : ctrl_data - control buffer address valid + * 2 : ctrl_data_indirect - control buffer address + * points to list of pages with addresses of control + * buffers + * 7:3 : reserved3 + */ + u8 flags; +}; + +/* used in ena_admin_aq_entry. Can point directly to control data, or to a + * page list chunk. Used also at the end of indirect mode page list chunks, + * for chaining. + */ +struct ena_admin_ctrl_buff_info { + u32 length; + + struct ena_common_mem_addr address; +}; + +struct ena_admin_sq { + u16 sq_idx; + + /* 4:0 : reserved + * 7:5 : sq_direction - 0x1 - Tx; 0x2 - Rx + */ + u8 sq_identity; + + u8 reserved1; +}; + +struct ena_admin_aq_entry { + struct ena_admin_aq_common_desc aq_common_descriptor; + + union { + u32 inline_data_w1[3]; + + struct ena_admin_ctrl_buff_info control_buffer; + } u; + + u32 inline_data_w4[12]; +}; + +struct ena_admin_acq_common_desc { + /* command identifier to associate it with the aq descriptor + * 11:0 : command_id + * 15:12 : reserved12 + */ + u16 command; + + u8 status; + + /* 0 : phase + * 7:1 : reserved1 + */ + u8 flags; + + u16 extended_status; + + /* serves as a hint what AQ entries can be revoked */ + u16 sq_head_indx; +}; + +struct ena_admin_acq_entry { + struct ena_admin_acq_common_desc acq_common_descriptor; + + u32 response_specific_data[14]; +}; + +struct ena_admin_aq_create_sq_cmd { + struct ena_admin_aq_common_desc aq_common_descriptor; + + /* 4:0 : reserved0_w1 + * 7:5 : sq_direction - 0x1 - Tx, 0x2 - Rx + */ + u8 sq_identity; + + u8 reserved8_w1; + + /* 3:0 : placement_policy - Describing where the SQ + * descriptor ring and the SQ packet headers reside: + * 0x1 - descriptors and headers are in OS memory, + * 0x3 - descriptors and headers in device memory + * (a.k.a Low Latency Queue) + * 6:4 : completion_policy - Describing what policy + * to use for generation completion entry (cqe) in + * the CQ associated with this SQ: 0x0 - cqe for each + * sq descriptor, 0x1 - cqe upon request in sq + * descriptor, 0x2 - current queue head pointer is + * updated in OS memory upon sq descriptor request + * 0x3 - current queue head pointer is updated in OS + * memory for each sq descriptor + * 7 : reserved15_w1 + */ + u8 sq_caps_2; + + /* 0 : is_physically_contiguous - Described if the + * queue ring memory is allocated in physical + * contiguous pages or split. + * 7:1 : reserved17_w1 + */ + u8 sq_caps_3; + + /* associated completion queue id. This CQ must be created prior to + * SQ creation + */ + u16 cq_idx; + + /* submission queue depth in entries */ + u16 sq_depth; + + /* SQ physical base address in OS memory. This field should not be + * used for Low Latency queues. Has to be page aligned. + */ + struct ena_common_mem_addr sq_ba; + + /* specifies queue head writeback location in OS memory. Valid if + * completion_policy is set to completion_policy_head_on_demand or + * completion_policy_head. Has to be cache aligned + */ + struct ena_common_mem_addr sq_head_writeback; + + u32 reserved0_w7; + + u32 reserved0_w8; +}; + +enum ena_admin_sq_direction { + ENA_ADMIN_SQ_DIRECTION_TX = 1, + + ENA_ADMIN_SQ_DIRECTION_RX = 2, +}; + +struct ena_admin_acq_create_sq_resp_desc { + struct ena_admin_acq_common_desc acq_common_desc; + + u16 sq_idx; + + u16 reserved; + + /* queue doorbell address as an offset to PCIe MMIO REG BAR */ + u32 sq_doorbell_offset; + + /* low latency queue ring base address as an offset to PCIe MMIO + * LLQ_MEM BAR + */ + u32 llq_descriptors_offset; + + /* low latency queue headers' memory as an offset to PCIe MMIO + * LLQ_MEM BAR + */ + u32 llq_headers_offset; +}; + +struct ena_admin_aq_destroy_sq_cmd { + struct ena_admin_aq_common_desc aq_common_descriptor; + + struct ena_admin_sq sq; +}; + +struct ena_admin_acq_destroy_sq_resp_desc { + struct ena_admin_acq_common_desc acq_common_desc; +}; + +struct ena_admin_aq_create_cq_cmd { + struct ena_admin_aq_common_desc aq_common_descriptor; + + /* 4:0 : reserved5 + * 5 : interrupt_mode_enabled - if set, cq operates + * in interrupt mode, otherwise - polling + * 7:6 : reserved6 + */ + u8 cq_caps_1; + + /* 4:0 : cq_entry_size_words - size of CQ entry in + * 32-bit words, valid values: 4, 8. + * 7:5 : reserved7 + */ + u8 cq_caps_2; + + /* completion queue depth in # of entries. must be power of 2 */ + u16 cq_depth; + + /* msix vector assigned to this cq */ + u32 msix_vector; + + /* cq physical base address in OS memory. CQ must be physically + * contiguous + */ + struct ena_common_mem_addr cq_ba; +}; + +struct ena_admin_acq_create_cq_resp_desc { + struct ena_admin_acq_common_desc acq_common_desc; + + u16 cq_idx; + + /* actual cq depth in number of entries */ + u16 cq_actual_depth; + + u32 numa_node_register_offset; + + u32 cq_head_db_register_offset; + + u32 cq_interrupt_unmask_register_offset; +}; + +struct ena_admin_aq_destroy_cq_cmd { + struct ena_admin_aq_common_desc aq_common_descriptor; + + u16 cq_idx; + + u16 reserved1; +}; + +struct ena_admin_acq_destroy_cq_resp_desc { + struct ena_admin_acq_common_desc acq_common_desc; +}; + +/* ENA AQ Get Statistics command. Extended statistics are placed in control + * buffer pointed by AQ entry + */ +struct ena_admin_aq_get_stats_cmd { + struct ena_admin_aq_common_desc aq_common_descriptor; + + union { + /* command specific inline data */ + u32 inline_data_w1[3]; + + struct ena_admin_ctrl_buff_info control_buffer; + } u; + + /* stats type as defined in enum ena_admin_get_stats_type */ + u8 type; + + /* stats scope defined in enum ena_admin_get_stats_scope */ + u8 scope; + + u16 reserved3; + + /* queue id. used when scope is specific_queue */ + u16 queue_idx; + + /* device id, value 0xFFFF means mine. only privileged device can get + * stats of other device + */ + u16 device_id; +}; + +/* Basic Statistics Command. */ +struct ena_admin_basic_stats { + u32 tx_bytes_low; + + u32 tx_bytes_high; + + u32 tx_pkts_low; + + u32 tx_pkts_high; + + u32 rx_bytes_low; + + u32 rx_bytes_high; + + u32 rx_pkts_low; + + u32 rx_pkts_high; + + u32 rx_drops_low; + + u32 rx_drops_high; +}; + +struct ena_admin_acq_get_stats_resp { + struct ena_admin_acq_common_desc acq_common_desc; + + struct ena_admin_basic_stats basic_stats; +}; + +struct ena_admin_get_set_feature_common_desc { + /* 1:0 : select - 0x1 - current value; 0x3 - default + * value + * 7:3 : reserved3 + */ + u8 flags; + + /* as appears in ena_admin_aq_feature_id */ + u8 feature_id; + + u16 reserved16; +}; + +struct ena_admin_device_attr_feature_desc { + u32 impl_id; + + u32 device_version; + + /* bitmap of ena_admin_aq_feature_id */ + u32 supported_features; + + u32 reserved3; + + /* Indicates how many bits are used physical address access. */ + u32 phys_addr_width; + + /* Indicates how many bits are used virtual address access. */ + u32 virt_addr_width; + + /* unicast MAC address (in Network byte order) */ + u8 mac_addr[6]; + + u8 reserved7[2]; + + u32 max_mtu; +}; + +struct ena_admin_queue_feature_desc { + /* including LLQs */ + u32 max_sq_num; + + u32 max_sq_depth; + + u32 max_cq_num; + + u32 max_cq_depth; + + u32 max_llq_num; + + u32 max_llq_depth; + + u32 max_header_size; + + /* Maximum Descriptors number, including meta descriptor, allowed for + * a single Tx packet + */ + u16 max_packet_tx_descs; + + /* Maximum Descriptors number allowed for a single Rx packet */ + u16 max_packet_rx_descs; +}; + +struct ena_admin_set_feature_mtu_desc { + /* exclude L2 */ + u32 mtu; +}; + +struct ena_admin_set_feature_host_attr_desc { + /* host OS info base address in OS memory. host info is 4KB of + * physically contiguous + */ + struct ena_common_mem_addr os_info_ba; + + /* host debug area base address in OS memory. debug area must be + * physically contiguous + */ + struct ena_common_mem_addr debug_ba; + + /* debug area size */ + u32 debug_area_size; +}; + +struct ena_admin_feature_intr_moder_desc { + /* interrupt delay granularity in usec */ + u16 intr_delay_resolution; + + u16 reserved; +}; + +struct ena_admin_get_feature_link_desc { + /* Link speed in Mb */ + u32 speed; + + /* bit field of enum ena_admin_link types */ + u32 supported; + + /* 0 : autoneg + * 1 : duplex - Full Duplex + * 31:2 : reserved2 + */ + u32 flags; +}; + +struct ena_admin_feature_aenq_desc { + /* bitmask for AENQ groups the device can report */ + u32 supported_groups; + + /* bitmask for AENQ groups to report */ + u32 enabled_groups; +}; + +struct ena_admin_feature_offload_desc { + /* 0 : TX_L3_csum_ipv4 + * 1 : TX_L4_ipv4_csum_part - The checksum field + * should be initialized with pseudo header checksum + * 2 : TX_L4_ipv4_csum_full + * 3 : TX_L4_ipv6_csum_part - The checksum field + * should be initialized with pseudo header checksum + * 4 : TX_L4_ipv6_csum_full + * 5 : tso_ipv4 + * 6 : tso_ipv6 + * 7 : tso_ecn + */ + u32 tx; + + /* Receive side supported stateless offload + * 0 : RX_L3_csum_ipv4 - IPv4 checksum + * 1 : RX_L4_ipv4_csum - TCP/UDP/IPv4 checksum + * 2 : RX_L4_ipv6_csum - TCP/UDP/IPv6 checksum + * 3 : RX_hash - Hash calculation + */ + u32 rx_supported; + + u32 rx_enabled; +}; + +enum ena_admin_hash_functions { + ENA_ADMIN_TOEPLITZ = 1, + + ENA_ADMIN_CRC32 = 2, +}; + +struct ena_admin_feature_rss_flow_hash_control { + u32 keys_num; + + u32 reserved; + + u32 key[10]; +}; + +struct ena_admin_feature_rss_flow_hash_function { + /* 7:0 : funcs - bitmask of ena_admin_hash_functions */ + u32 supported_func; + + /* 7:0 : selected_func - bitmask of + * ena_admin_hash_functions + */ + u32 selected_func; + + /* initial value */ + u32 init_val; +}; + +/* RSS flow hash protocols */ +enum ena_admin_flow_hash_proto { + ENA_ADMIN_RSS_TCP4 = 0, + + ENA_ADMIN_RSS_UDP4 = 1, + + ENA_ADMIN_RSS_TCP6 = 2, + + ENA_ADMIN_RSS_UDP6 = 3, + + ENA_ADMIN_RSS_IP4 = 4, + + ENA_ADMIN_RSS_IP6 = 5, + + ENA_ADMIN_RSS_IP4_FRAG = 6, + + ENA_ADMIN_RSS_NOT_IP = 7, + + ENA_ADMIN_RSS_PROTO_NUM = 16, +}; + +/* RSS flow hash fields */ +enum ena_admin_flow_hash_fields { + /* Ethernet Dest Addr */ + ENA_ADMIN_RSS_L2_DA = 0, + + /* Ethernet Src Addr */ + ENA_ADMIN_RSS_L2_SA = 1, + + /* ipv4/6 Dest Addr */ + ENA_ADMIN_RSS_L3_DA = 2, + + /* ipv4/6 Src Addr */ + ENA_ADMIN_RSS_L3_SA = 5, + + /* tcp/udp Dest Port */ + ENA_ADMIN_RSS_L4_DP = 6, + + /* tcp/udp Src Port */ + ENA_ADMIN_RSS_L4_SP = 7, +}; + +struct ena_admin_proto_input { + /* flow hash fields (bitwise according to ena_admin_flow_hash_fields) */ + u16 fields; + + u16 reserved2; +}; + +struct ena_admin_feature_rss_hash_control { + struct ena_admin_proto_input supported_fields[ENA_ADMIN_RSS_PROTO_NUM]; + + struct ena_admin_proto_input selected_fields[ENA_ADMIN_RSS_PROTO_NUM]; + + struct ena_admin_proto_input reserved2[ENA_ADMIN_RSS_PROTO_NUM]; + + struct ena_admin_proto_input reserved3[ENA_ADMIN_RSS_PROTO_NUM]; +}; + +struct ena_admin_feature_rss_flow_hash_input { + /* supported hash input sorting + * 1 : L3_sort - support swap L3 addresses if DA is + * smaller than SA + * 2 : L4_sort - support swap L4 ports if DP smaller + * SP + */ + u16 supported_input_sort; + + /* enabled hash input sorting + * 1 : enable_L3_sort - enable swap L3 addresses if + * DA smaller than SA + * 2 : enable_L4_sort - enable swap L4 ports if DP + * smaller than SP + */ + u16 enabled_input_sort; +}; + +enum ena_admin_os_type { + ENA_ADMIN_OS_LINUX = 1, + + ENA_ADMIN_OS_WIN = 2, + + ENA_ADMIN_OS_DPDK = 3, + + ENA_ADMIN_OS_FREEBSD = 4, + + ENA_ADMIN_OS_IPXE = 5, +}; + +struct ena_admin_host_info { + /* defined in enum ena_admin_os_type */ + u32 os_type; + + /* os distribution string format */ + u8 os_dist_str[128]; + + /* OS distribution numeric format */ + u32 os_dist; + + /* kernel version string format */ + u8 kernel_ver_str[32]; + + /* Kernel version numeric format */ + u32 kernel_ver; + + /* 7:0 : major + * 15:8 : minor + * 23:16 : sub_minor + */ + u32 driver_version; + + /* features bitmap */ + u32 supported_network_features[4]; +}; + +struct ena_admin_rss_ind_table_entry { + u16 cq_idx; + + u16 reserved; +}; + +struct ena_admin_feature_rss_ind_table { + /* min supported table size (2^min_size) */ + u16 min_size; + + /* max supported table size (2^max_size) */ + u16 max_size; + + /* table size (2^size) */ + u16 size; + + u16 reserved; + + /* index of the inline entry. 0xFFFFFFFF means invalid */ + u32 inline_index; + + /* used for updating single entry, ignored when setting the entire + * table through the control buffer. + */ + struct ena_admin_rss_ind_table_entry inline_entry; +}; + +struct ena_admin_get_feat_cmd { + struct ena_admin_aq_common_desc aq_common_descriptor; + + struct ena_admin_ctrl_buff_info control_buffer; + + struct ena_admin_get_set_feature_common_desc feat_common; + + u32 raw[11]; +}; + +struct ena_admin_get_feat_resp { + struct ena_admin_acq_common_desc acq_common_desc; + + union { + u32 raw[14]; + + struct ena_admin_device_attr_feature_desc dev_attr; + + struct ena_admin_queue_feature_desc max_queue; + + struct ena_admin_feature_aenq_desc aenq; + + struct ena_admin_get_feature_link_desc link; + + struct ena_admin_feature_offload_desc offload; + + struct ena_admin_feature_rss_flow_hash_function flow_hash_func; + + struct ena_admin_feature_rss_flow_hash_input flow_hash_input; + + struct ena_admin_feature_rss_ind_table ind_table; + + struct ena_admin_feature_intr_moder_desc intr_moderation; + } u; +}; + +struct ena_admin_set_feat_cmd { + struct ena_admin_aq_common_desc aq_common_descriptor; + + struct ena_admin_ctrl_buff_info control_buffer; + + struct ena_admin_get_set_feature_common_desc feat_common; + + union { + u32 raw[11]; + + /* mtu size */ + struct ena_admin_set_feature_mtu_desc mtu; + + /* host attributes */ + struct ena_admin_set_feature_host_attr_desc host_attr; + + /* AENQ configuration */ + struct ena_admin_feature_aenq_desc aenq; + + /* rss flow hash function */ + struct ena_admin_feature_rss_flow_hash_function flow_hash_func; + + /* rss flow hash input */ + struct ena_admin_feature_rss_flow_hash_input flow_hash_input; + + /* rss indirection table */ + struct ena_admin_feature_rss_ind_table ind_table; + } u; +}; + +struct ena_admin_set_feat_resp { + struct ena_admin_acq_common_desc acq_common_desc; + + union { + u32 raw[14]; + } u; +}; + +struct ena_admin_aenq_common_desc { + u16 group; + + u16 syndrom; + + /* 0 : phase */ + u8 flags; + + u8 reserved1[3]; + + u32 timestamp_low; + + u32 timestamp_high; +}; + +/* asynchronous event notification groups */ +enum ena_admin_aenq_group { + ENA_ADMIN_LINK_CHANGE = 0, + + ENA_ADMIN_FATAL_ERROR = 1, + + ENA_ADMIN_WARNING = 2, + + ENA_ADMIN_NOTIFICATION = 3, + + ENA_ADMIN_KEEP_ALIVE = 4, + + ENA_ADMIN_AENQ_GROUPS_NUM = 5, +}; + +enum ena_admin_aenq_notification_syndrom { + ENA_ADMIN_SUSPEND = 0, + + ENA_ADMIN_RESUME = 1, +}; + +struct ena_admin_aenq_entry { + struct ena_admin_aenq_common_desc aenq_common_desc; + + /* command specific inline data */ + u32 inline_data_w4[12]; +}; + +struct ena_admin_aenq_link_change_desc { + struct ena_admin_aenq_common_desc aenq_common_desc; + + /* 0 : link_status */ + u32 flags; +}; + +struct ena_admin_ena_mmio_req_read_less_resp { + u16 req_id; + + u16 reg_off; + + /* value is valid when poll is cleared */ + u32 reg_val; +}; + +/* aq_common_desc */ +#define ENA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK GENMASK(11, 0) +#define ENA_ADMIN_AQ_COMMON_DESC_PHASE_MASK BIT(0) +#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_SHIFT 1 +#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_MASK BIT(1) +#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_SHIFT 2 +#define ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK BIT(2) + +/* sq */ +#define ENA_ADMIN_SQ_SQ_DIRECTION_SHIFT 5 +#define ENA_ADMIN_SQ_SQ_DIRECTION_MASK GENMASK(7, 5) + +/* acq_common_desc */ +#define ENA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK GENMASK(11, 0) +#define ENA_ADMIN_ACQ_COMMON_DESC_PHASE_MASK BIT(0) + +/* aq_create_sq_cmd */ +#define ENA_ADMIN_AQ_CREATE_SQ_CMD_SQ_DIRECTION_SHIFT 5 +#define ENA_ADMIN_AQ_CREATE_SQ_CMD_SQ_DIRECTION_MASK GENMASK(7, 5) +#define ENA_ADMIN_AQ_CREATE_SQ_CMD_PLACEMENT_POLICY_MASK GENMASK(3, 0) +#define ENA_ADMIN_AQ_CREATE_SQ_CMD_COMPLETION_POLICY_SHIFT 4 +#define ENA_ADMIN_AQ_CREATE_SQ_CMD_COMPLETION_POLICY_MASK GENMASK(6, 4) +#define ENA_ADMIN_AQ_CREATE_SQ_CMD_IS_PHYSICALLY_CONTIGUOUS_MASK BIT(0) + +/* aq_create_cq_cmd */ +#define ENA_ADMIN_AQ_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_SHIFT 5 +#define ENA_ADMIN_AQ_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_MASK BIT(5) +#define ENA_ADMIN_AQ_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK GENMASK(4, 0) + +/* get_set_feature_common_desc */ +#define ENA_ADMIN_GET_SET_FEATURE_COMMON_DESC_SELECT_MASK GENMASK(1, 0) + +/* get_feature_link_desc */ +#define ENA_ADMIN_GET_FEATURE_LINK_DESC_AUTONEG_MASK BIT(0) +#define ENA_ADMIN_GET_FEATURE_LINK_DESC_DUPLEX_SHIFT 1 +#define ENA_ADMIN_GET_FEATURE_LINK_DESC_DUPLEX_MASK BIT(1) + +/* feature_offload_desc */ +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L3_CSUM_IPV4_MASK BIT(0) +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_SHIFT 1 +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK BIT(1) +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_SHIFT 2 +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_FULL_MASK BIT(2) +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_SHIFT 3 +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK BIT(3) +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_FULL_SHIFT 4 +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_FULL_MASK BIT(4) +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_SHIFT 5 +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK BIT(5) +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_SHIFT 6 +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_MASK BIT(6) +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_ECN_SHIFT 7 +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_ECN_MASK BIT(7) +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L3_CSUM_IPV4_MASK BIT(0) +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_SHIFT 1 +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK BIT(1) +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_SHIFT 2 +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK BIT(2) +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_HASH_SHIFT 3 +#define ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_HASH_MASK BIT(3) + +/* feature_rss_flow_hash_function */ +#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_FUNCTION_FUNCS_MASK GENMASK(7, 0) +#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_FUNCTION_SELECTED_FUNC_MASK GENMASK(7, 0) + +/* feature_rss_flow_hash_input */ +#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L3_SORT_SHIFT 1 +#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L3_SORT_MASK BIT(1) +#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L4_SORT_SHIFT 2 +#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L4_SORT_MASK BIT(2) +#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_ENABLE_L3_SORT_SHIFT 1 +#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_ENABLE_L3_SORT_MASK BIT(1) +#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_ENABLE_L4_SORT_SHIFT 2 +#define ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_ENABLE_L4_SORT_MASK BIT(2) + +/* host_info */ +#define ENA_ADMIN_HOST_INFO_MAJOR_MASK GENMASK(7, 0) +#define ENA_ADMIN_HOST_INFO_MINOR_SHIFT 8 +#define ENA_ADMIN_HOST_INFO_MINOR_MASK GENMASK(15, 8) +#define ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT 16 +#define ENA_ADMIN_HOST_INFO_SUB_MINOR_MASK GENMASK(23, 16) + +/* aenq_common_desc */ +#define ENA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK BIT(0) + +/* aenq_link_change_desc */ +#define ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK BIT(0) + +#endif /*_ENA_ADMIN_H_ */ diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c new file mode 100644 index 000000000000..3066d9c99984 --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/ena_com.c @@ -0,0 +1,2666 @@ +/* + * Copyright 2015 Amazon.com, Inc. or its affiliates. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ena_com.h" + +/*****************************************************************************/ +/*****************************************************************************/ + +/* Timeout in micro-sec */ +#define ADMIN_CMD_TIMEOUT_US (1000000) + +#define ENA_ASYNC_QUEUE_DEPTH 4 +#define ENA_ADMIN_QUEUE_DEPTH 32 + +#define MIN_ENA_VER (((ENA_COMMON_SPEC_VERSION_MAJOR) << \ + ENA_REGS_VERSION_MAJOR_VERSION_SHIFT) \ + | (ENA_COMMON_SPEC_VERSION_MINOR)) + +#define ENA_CTRL_MAJOR 0 +#define ENA_CTRL_MINOR 0 +#define ENA_CTRL_SUB_MINOR 1 + +#define MIN_ENA_CTRL_VER \ + (((ENA_CTRL_MAJOR) << \ + (ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT)) | \ + ((ENA_CTRL_MINOR) << \ + (ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT)) | \ + (ENA_CTRL_SUB_MINOR)) + +#define ENA_DMA_ADDR_TO_UINT32_LOW(x) ((u32)((u64)(x))) +#define ENA_DMA_ADDR_TO_UINT32_HIGH(x) ((u32)(((u64)(x)) >> 32)) + +#define ENA_MMIO_READ_TIMEOUT 0xFFFFFFFF + +/*****************************************************************************/ +/*****************************************************************************/ +/*****************************************************************************/ + +enum ena_cmd_status { + ENA_CMD_SUBMITTED, + ENA_CMD_COMPLETED, + /* Abort - canceled by the driver */ + ENA_CMD_ABORTED, +}; + +struct ena_comp_ctx { + struct completion wait_event; + struct ena_admin_acq_entry *user_cqe; + u32 comp_size; + enum ena_cmd_status status; + /* status from the device */ + u8 comp_status; + u8 cmd_opcode; + bool occupied; +}; + +struct ena_com_stats_ctx { + struct ena_admin_aq_get_stats_cmd get_cmd; + struct ena_admin_acq_get_stats_resp get_resp; +}; + +static inline int ena_com_mem_addr_set(struct ena_com_dev *ena_dev, + struct ena_common_mem_addr *ena_addr, + dma_addr_t addr) +{ + if ((addr & GENMASK_ULL(ena_dev->dma_addr_bits - 1, 0)) != addr) { + pr_err("dma address has more bits that the device supports\n"); + return -EINVAL; + } + + ena_addr->mem_addr_low = (u32)addr; + ena_addr->mem_addr_high = (u64)addr >> 32; + + return 0; +} + +static int ena_com_admin_init_sq(struct ena_com_admin_queue *queue) +{ + struct ena_com_admin_sq *sq = &queue->sq; + u16 size = ADMIN_SQ_SIZE(queue->q_depth); + + sq->entries = dma_zalloc_coherent(queue->q_dmadev, size, &sq->dma_addr, + GFP_KERNEL); + + if (!sq->entries) { + pr_err("memory allocation failed"); + return -ENOMEM; + } + + sq->head = 0; + sq->tail = 0; + sq->phase = 1; + + sq->db_addr = NULL; + + return 0; +} + +static int ena_com_admin_init_cq(struct ena_com_admin_queue *queue) +{ + struct ena_com_admin_cq *cq = &queue->cq; + u16 size = ADMIN_CQ_SIZE(queue->q_depth); + + cq->entries = dma_zalloc_coherent(queue->q_dmadev, size, &cq->dma_addr, + GFP_KERNEL); + + if (!cq->entries) { + pr_err("memory allocation failed"); + return -ENOMEM; + } + + cq->head = 0; + cq->phase = 1; + + return 0; +} + +static int ena_com_admin_init_aenq(struct ena_com_dev *dev, + struct ena_aenq_handlers *aenq_handlers) +{ + struct ena_com_aenq *aenq = &dev->aenq; + u32 addr_low, addr_high, aenq_caps; + u16 size; + + dev->aenq.q_depth = ENA_ASYNC_QUEUE_DEPTH; + size = ADMIN_AENQ_SIZE(ENA_ASYNC_QUEUE_DEPTH); + aenq->entries = dma_zalloc_coherent(dev->dmadev, size, &aenq->dma_addr, + GFP_KERNEL); + + if (!aenq->entries) { + pr_err("memory allocation failed"); + return -ENOMEM; + } + + aenq->head = aenq->q_depth; + aenq->phase = 1; + + addr_low = ENA_DMA_ADDR_TO_UINT32_LOW(aenq->dma_addr); + addr_high = ENA_DMA_ADDR_TO_UINT32_HIGH(aenq->dma_addr); + + writel(addr_low, dev->reg_bar + ENA_REGS_AENQ_BASE_LO_OFF); + writel(addr_high, dev->reg_bar + ENA_REGS_AENQ_BASE_HI_OFF); + + aenq_caps = 0; + aenq_caps |= dev->aenq.q_depth & ENA_REGS_AENQ_CAPS_AENQ_DEPTH_MASK; + aenq_caps |= (sizeof(struct ena_admin_aenq_entry) + << ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_SHIFT) & + ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_MASK; + writel(aenq_caps, dev->reg_bar + ENA_REGS_AENQ_CAPS_OFF); + + if (unlikely(!aenq_handlers)) { + pr_err("aenq handlers pointer is NULL\n"); + return -EINVAL; + } + + aenq->aenq_handlers = aenq_handlers; + + return 0; +} + +static inline void comp_ctxt_release(struct ena_com_admin_queue *queue, + struct ena_comp_ctx *comp_ctx) +{ + comp_ctx->occupied = false; + atomic_dec(&queue->outstanding_cmds); +} + +static struct ena_comp_ctx *get_comp_ctxt(struct ena_com_admin_queue *queue, + u16 command_id, bool capture) +{ + if (unlikely(command_id >= queue->q_depth)) { + pr_err("command id is larger than the queue size. cmd_id: %u queue size %d\n", + command_id, queue->q_depth); + return NULL; + } + + if (unlikely(queue->comp_ctx[command_id].occupied && capture)) { + pr_err("Completion context is occupied\n"); + return NULL; + } + + if (capture) { + atomic_inc(&queue->outstanding_cmds); + queue->comp_ctx[command_id].occupied = true; + } + + return &queue->comp_ctx[command_id]; +} + +static struct ena_comp_ctx *__ena_com_submit_admin_cmd(struct ena_com_admin_queue *admin_queue, + struct ena_admin_aq_entry *cmd, + size_t cmd_size_in_bytes, + struct ena_admin_acq_entry *comp, + size_t comp_size_in_bytes) +{ + struct ena_comp_ctx *comp_ctx; + u16 tail_masked, cmd_id; + u16 queue_size_mask; + u16 cnt; + + queue_size_mask = admin_queue->q_depth - 1; + + tail_masked = admin_queue->sq.tail & queue_size_mask; + + /* In case of queue FULL */ + cnt = admin_queue->sq.tail - admin_queue->sq.head; + if (cnt >= admin_queue->q_depth) { + pr_debug("admin queue is FULL (tail %d head %d depth: %d)\n", + admin_queue->sq.tail, admin_queue->sq.head, + admin_queue->q_depth); + admin_queue->stats.out_of_space++; + return ERR_PTR(-ENOSPC); + } + + cmd_id = admin_queue->curr_cmd_id; + + cmd->aq_common_descriptor.flags |= admin_queue->sq.phase & + ENA_ADMIN_AQ_COMMON_DESC_PHASE_MASK; + + cmd->aq_common_descriptor.command_id |= cmd_id & + ENA_ADMIN_AQ_COMMON_DESC_COMMAND_ID_MASK; + + comp_ctx = get_comp_ctxt(admin_queue, cmd_id, true); + if (unlikely(!comp_ctx)) + return ERR_PTR(-EINVAL); + + comp_ctx->status = ENA_CMD_SUBMITTED; + comp_ctx->comp_size = (u32)comp_size_in_bytes; + comp_ctx->user_cqe = comp; + comp_ctx->cmd_opcode = cmd->aq_common_descriptor.opcode; + + reinit_completion(&comp_ctx->wait_event); + + memcpy(&admin_queue->sq.entries[tail_masked], cmd, cmd_size_in_bytes); + + admin_queue->curr_cmd_id = (admin_queue->curr_cmd_id + 1) & + queue_size_mask; + + admin_queue->sq.tail++; + admin_queue->stats.submitted_cmd++; + + if (unlikely((admin_queue->sq.tail & queue_size_mask) == 0)) + admin_queue->sq.phase = !admin_queue->sq.phase; + + writel(admin_queue->sq.tail, admin_queue->sq.db_addr); + + return comp_ctx; +} + +static inline int ena_com_init_comp_ctxt(struct ena_com_admin_queue *queue) +{ + size_t size = queue->q_depth * sizeof(struct ena_comp_ctx); + struct ena_comp_ctx *comp_ctx; + u16 i; + + queue->comp_ctx = devm_kzalloc(queue->q_dmadev, size, GFP_KERNEL); + if (unlikely(!queue->comp_ctx)) { + pr_err("memory allocation failed"); + return -ENOMEM; + } + + for (i = 0; i < queue->q_depth; i++) { + comp_ctx = get_comp_ctxt(queue, i, false); + if (comp_ctx) + init_completion(&comp_ctx->wait_event); + } + + return 0; +} + +static struct ena_comp_ctx *ena_com_submit_admin_cmd(struct ena_com_admin_queue *admin_queue, + struct ena_admin_aq_entry *cmd, + size_t cmd_size_in_bytes, + struct ena_admin_acq_entry *comp, + size_t comp_size_in_bytes) +{ + unsigned long flags; + struct ena_comp_ctx *comp_ctx; + + spin_lock_irqsave(&admin_queue->q_lock, flags); + if (unlikely(!admin_queue->running_state)) { + spin_unlock_irqrestore(&admin_queue->q_lock, flags); + return ERR_PTR(-ENODEV); + } + comp_ctx = __ena_com_submit_admin_cmd(admin_queue, cmd, + cmd_size_in_bytes, + comp, + comp_size_in_bytes); + if (unlikely(IS_ERR(comp_ctx))) + admin_queue->running_state = false; + spin_unlock_irqrestore(&admin_queue->q_lock, flags); + + return comp_ctx; +} + +static int ena_com_init_io_sq(struct ena_com_dev *ena_dev, + struct ena_com_create_io_ctx *ctx, + struct ena_com_io_sq *io_sq) +{ + size_t size; + int dev_node = 0; + + memset(&io_sq->desc_addr, 0x0, sizeof(struct ena_com_io_desc_addr)); + + io_sq->desc_entry_size = + (io_sq->direction == ENA_COM_IO_QUEUE_DIRECTION_TX) ? + sizeof(struct ena_eth_io_tx_desc) : + sizeof(struct ena_eth_io_rx_desc); + + size = io_sq->desc_entry_size * io_sq->q_depth; + + if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) { + dev_node = dev_to_node(ena_dev->dmadev); + set_dev_node(ena_dev->dmadev, ctx->numa_node); + io_sq->desc_addr.virt_addr = + dma_zalloc_coherent(ena_dev->dmadev, size, + &io_sq->desc_addr.phys_addr, + GFP_KERNEL); + set_dev_node(ena_dev->dmadev, dev_node); + if (!io_sq->desc_addr.virt_addr) { + io_sq->desc_addr.virt_addr = + dma_zalloc_coherent(ena_dev->dmadev, size, + &io_sq->desc_addr.phys_addr, + GFP_KERNEL); + } + } else { + dev_node = dev_to_node(ena_dev->dmadev); + set_dev_node(ena_dev->dmadev, ctx->numa_node); + io_sq->desc_addr.virt_addr = + devm_kzalloc(ena_dev->dmadev, size, GFP_KERNEL); + set_dev_node(ena_dev->dmadev, dev_node); + if (!io_sq->desc_addr.virt_addr) { + io_sq->desc_addr.virt_addr = + devm_kzalloc(ena_dev->dmadev, size, GFP_KERNEL); + } + } + + if (!io_sq->desc_addr.virt_addr) { + pr_err("memory allocation failed"); + return -ENOMEM; + } + + io_sq->tail = 0; + io_sq->next_to_comp = 0; + io_sq->phase = 1; + + return 0; +} + +static int ena_com_init_io_cq(struct ena_com_dev *ena_dev, + struct ena_com_create_io_ctx *ctx, + struct ena_com_io_cq *io_cq) +{ + size_t size; + int prev_node = 0; + + memset(&io_cq->cdesc_addr, 0x0, sizeof(struct ena_com_io_desc_addr)); + + /* Use the basic completion descriptor for Rx */ + io_cq->cdesc_entry_size_in_bytes = + (io_cq->direction == ENA_COM_IO_QUEUE_DIRECTION_TX) ? + sizeof(struct ena_eth_io_tx_cdesc) : + sizeof(struct ena_eth_io_rx_cdesc_base); + + size = io_cq->cdesc_entry_size_in_bytes * io_cq->q_depth; + + prev_node = dev_to_node(ena_dev->dmadev); + set_dev_node(ena_dev->dmadev, ctx->numa_node); + io_cq->cdesc_addr.virt_addr = + dma_zalloc_coherent(ena_dev->dmadev, size, + &io_cq->cdesc_addr.phys_addr, GFP_KERNEL); + set_dev_node(ena_dev->dmadev, prev_node); + if (!io_cq->cdesc_addr.virt_addr) { + io_cq->cdesc_addr.virt_addr = + dma_zalloc_coherent(ena_dev->dmadev, size, + &io_cq->cdesc_addr.phys_addr, + GFP_KERNEL); + } + + if (!io_cq->cdesc_addr.virt_addr) { + pr_err("memory allocation failed"); + return -ENOMEM; + } + + io_cq->phase = 1; + io_cq->head = 0; + + return 0; +} + +static void ena_com_handle_single_admin_completion(struct ena_com_admin_queue *admin_queue, + struct ena_admin_acq_entry *cqe) +{ + struct ena_comp_ctx *comp_ctx; + u16 cmd_id; + + cmd_id = cqe->acq_common_descriptor.command & + ENA_ADMIN_ACQ_COMMON_DESC_COMMAND_ID_MASK; + + comp_ctx = get_comp_ctxt(admin_queue, cmd_id, false); + if (unlikely(!comp_ctx)) { + pr_err("comp_ctx is NULL. Changing the admin queue running state\n"); + admin_queue->running_state = false; + return; + } + + comp_ctx->status = ENA_CMD_COMPLETED; + comp_ctx->comp_status = cqe->acq_common_descriptor.status; + + if (comp_ctx->user_cqe) + memcpy(comp_ctx->user_cqe, (void *)cqe, comp_ctx->comp_size); + + if (!admin_queue->polling) + complete(&comp_ctx->wait_event); +} + +static void ena_com_handle_admin_completion(struct ena_com_admin_queue *admin_queue) +{ + struct ena_admin_acq_entry *cqe = NULL; + u16 comp_num = 0; + u16 head_masked; + u8 phase; + + head_masked = admin_queue->cq.head & (admin_queue->q_depth - 1); + phase = admin_queue->cq.phase; + + cqe = &admin_queue->cq.entries[head_masked]; + + /* Go over all the completions */ + while ((cqe->acq_common_descriptor.flags & + ENA_ADMIN_ACQ_COMMON_DESC_PHASE_MASK) == phase) { + /* Do not read the rest of the completion entry before the + * phase bit was validated + */ + rmb(); + ena_com_handle_single_admin_completion(admin_queue, cqe); + + head_masked++; + comp_num++; + if (unlikely(head_masked == admin_queue->q_depth)) { + head_masked = 0; + phase = !phase; + } + + cqe = &admin_queue->cq.entries[head_masked]; + } + + admin_queue->cq.head += comp_num; + admin_queue->cq.phase = phase; + admin_queue->sq.head += comp_num; + admin_queue->stats.completed_cmd += comp_num; +} + +static int ena_com_comp_status_to_errno(u8 comp_status) +{ + if (unlikely(comp_status != 0)) + pr_err("admin command failed[%u]\n", comp_status); + + if (unlikely(comp_status > ENA_ADMIN_UNKNOWN_ERROR)) + return -EINVAL; + + switch (comp_status) { + case ENA_ADMIN_SUCCESS: + return 0; + case ENA_ADMIN_RESOURCE_ALLOCATION_FAILURE: + return -ENOMEM; + case ENA_ADMIN_UNSUPPORTED_OPCODE: + return -EPERM; + case ENA_ADMIN_BAD_OPCODE: + case ENA_ADMIN_MALFORMED_REQUEST: + case ENA_ADMIN_ILLEGAL_PARAMETER: + case ENA_ADMIN_UNKNOWN_ERROR: + return -EINVAL; + } + + return 0; +} + +static int ena_com_wait_and_process_admin_cq_polling(struct ena_comp_ctx *comp_ctx, + struct ena_com_admin_queue *admin_queue) +{ + unsigned long flags; + u32 start_time; + int ret; + + start_time = ((u32)jiffies_to_usecs(jiffies)); + + while (comp_ctx->status == ENA_CMD_SUBMITTED) { + if ((((u32)jiffies_to_usecs(jiffies)) - start_time) > + ADMIN_CMD_TIMEOUT_US) { + pr_err("Wait for completion (polling) timeout\n"); + /* ENA didn't have any completion */ + spin_lock_irqsave(&admin_queue->q_lock, flags); + admin_queue->stats.no_completion++; + admin_queue->running_state = false; + spin_unlock_irqrestore(&admin_queue->q_lock, flags); + + ret = -ETIME; + goto err; + } + + spin_lock_irqsave(&admin_queue->q_lock, flags); + ena_com_handle_admin_completion(admin_queue); + spin_unlock_irqrestore(&admin_queue->q_lock, flags); + + msleep(100); + } + + if (unlikely(comp_ctx->status == ENA_CMD_ABORTED)) { + pr_err("Command was aborted\n"); + spin_lock_irqsave(&admin_queue->q_lock, flags); + admin_queue->stats.aborted_cmd++; + spin_unlock_irqrestore(&admin_queue->q_lock, flags); + ret = -ENODEV; + goto err; + } + + WARN(comp_ctx->status != ENA_CMD_COMPLETED, "Invalid comp status %d\n", + comp_ctx->status); + + ret = ena_com_comp_status_to_errno(comp_ctx->comp_status); +err: + comp_ctxt_release(admin_queue, comp_ctx); + return ret; +} + +static int ena_com_wait_and_process_admin_cq_interrupts(struct ena_comp_ctx *comp_ctx, + struct ena_com_admin_queue *admin_queue) +{ + unsigned long flags; + int ret; + + wait_for_completion_timeout(&comp_ctx->wait_event, + usecs_to_jiffies(ADMIN_CMD_TIMEOUT_US)); + + /* In case the command wasn't completed find out the root cause. + * There might be 2 kinds of errors + * 1) No completion (timeout reached) + * 2) There is completion but the device didn't get any msi-x interrupt. + */ + if (unlikely(comp_ctx->status == ENA_CMD_SUBMITTED)) { + spin_lock_irqsave(&admin_queue->q_lock, flags); + ena_com_handle_admin_completion(admin_queue); + admin_queue->stats.no_completion++; + spin_unlock_irqrestore(&admin_queue->q_lock, flags); + + if (comp_ctx->status == ENA_CMD_COMPLETED) + pr_err("The ena device have completion but the driver didn't receive any MSI-X interrupt (cmd %d)\n", + comp_ctx->cmd_opcode); + else + pr_err("The ena device doesn't send any completion for the admin cmd %d status %d\n", + comp_ctx->cmd_opcode, comp_ctx->status); + + admin_queue->running_state = false; + ret = -ETIME; + goto err; + } + + ret = ena_com_comp_status_to_errno(comp_ctx->comp_status); +err: + comp_ctxt_release(admin_queue, comp_ctx); + return ret; +} + +/* This method read the hardware device register through posting writes + * and waiting for response + * On timeout the function will return ENA_MMIO_READ_TIMEOUT + */ +static u32 ena_com_reg_bar_read32(struct ena_com_dev *ena_dev, u16 offset) +{ + struct ena_com_mmio_read *mmio_read = &ena_dev->mmio_read; + volatile struct ena_admin_ena_mmio_req_read_less_resp *read_resp = + mmio_read->read_resp; + u32 mmio_read_reg, ret; + unsigned long flags; + int i; + + might_sleep(); + + /* If readless is disabled, perform regular read */ + if (!mmio_read->readless_supported) + return readl(ena_dev->reg_bar + offset); + + spin_lock_irqsave(&mmio_read->lock, flags); + mmio_read->seq_num++; + + read_resp->req_id = mmio_read->seq_num + 0xDEAD; + mmio_read_reg = (offset << ENA_REGS_MMIO_REG_READ_REG_OFF_SHIFT) & + ENA_REGS_MMIO_REG_READ_REG_OFF_MASK; + mmio_read_reg |= mmio_read->seq_num & + ENA_REGS_MMIO_REG_READ_REQ_ID_MASK; + + /* make sure read_resp->req_id get updated before the hw can write + * there + */ + wmb(); + + writel(mmio_read_reg, ena_dev->reg_bar + ENA_REGS_MMIO_REG_READ_OFF); + + for (i = 0; i < ENA_REG_READ_TIMEOUT; i++) { + if (read_resp->req_id == mmio_read->seq_num) + break; + + udelay(1); + } + + if (unlikely(i == ENA_REG_READ_TIMEOUT)) { + pr_err("reading reg failed for timeout. expected: req id[%hu] offset[%hu] actual: req id[%hu] offset[%hu]\n", + mmio_read->seq_num, offset, read_resp->req_id, + read_resp->reg_off); + ret = ENA_MMIO_READ_TIMEOUT; + goto err; + } + + if (read_resp->reg_off != offset) { + pr_err("Read failure: wrong offset provided"); + ret = ENA_MMIO_READ_TIMEOUT; + } else { + ret = read_resp->reg_val; + } +err: + spin_unlock_irqrestore(&mmio_read->lock, flags); + + return ret; +} + +/* There are two types to wait for completion. + * Polling mode - wait until the completion is available. + * Async mode - wait on wait queue until the completion is ready + * (or the timeout expired). + * It is expected that the IRQ called ena_com_handle_admin_completion + * to mark the completions. + */ +static int ena_com_wait_and_process_admin_cq(struct ena_comp_ctx *comp_ctx, + struct ena_com_admin_queue *admin_queue) +{ + if (admin_queue->polling) + return ena_com_wait_and_process_admin_cq_polling(comp_ctx, + admin_queue); + + return ena_com_wait_and_process_admin_cq_interrupts(comp_ctx, + admin_queue); +} + +static int ena_com_destroy_io_sq(struct ena_com_dev *ena_dev, + struct ena_com_io_sq *io_sq) +{ + struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue; + struct ena_admin_aq_destroy_sq_cmd destroy_cmd; + struct ena_admin_acq_destroy_sq_resp_desc destroy_resp; + u8 direction; + int ret; + + memset(&destroy_cmd, 0x0, sizeof(struct ena_admin_aq_destroy_sq_cmd)); + + if (io_sq->direction == ENA_COM_IO_QUEUE_DIRECTION_TX) + direction = ENA_ADMIN_SQ_DIRECTION_TX; + else + direction = ENA_ADMIN_SQ_DIRECTION_RX; + + destroy_cmd.sq.sq_identity |= (direction << + ENA_ADMIN_SQ_SQ_DIRECTION_SHIFT) & + ENA_ADMIN_SQ_SQ_DIRECTION_MASK; + + destroy_cmd.sq.sq_idx = io_sq->idx; + destroy_cmd.aq_common_descriptor.opcode = ENA_ADMIN_DESTROY_SQ; + + ret = ena_com_execute_admin_command(admin_queue, + (struct ena_admin_aq_entry *)&destroy_cmd, + sizeof(destroy_cmd), + (struct ena_admin_acq_entry *)&destroy_resp, + sizeof(destroy_resp)); + + if (unlikely(ret && (ret != -ENODEV))) + pr_err("failed to destroy io sq error: %d\n", ret); + + return ret; +} + +static void ena_com_io_queue_free(struct ena_com_dev *ena_dev, + struct ena_com_io_sq *io_sq, + struct ena_com_io_cq *io_cq) +{ + size_t size; + + if (io_cq->cdesc_addr.virt_addr) { + size = io_cq->cdesc_entry_size_in_bytes * io_cq->q_depth; + + dma_free_coherent(ena_dev->dmadev, size, + io_cq->cdesc_addr.virt_addr, + io_cq->cdesc_addr.phys_addr); + + io_cq->cdesc_addr.virt_addr = NULL; + } + + if (io_sq->desc_addr.virt_addr) { + size = io_sq->desc_entry_size * io_sq->q_depth; + + if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) + dma_free_coherent(ena_dev->dmadev, size, + io_sq->desc_addr.virt_addr, + io_sq->desc_addr.phys_addr); + else + devm_kfree(ena_dev->dmadev, io_sq->desc_addr.virt_addr); + + io_sq->desc_addr.virt_addr = NULL; + } +} + +static int wait_for_reset_state(struct ena_com_dev *ena_dev, u32 timeout, + u16 exp_state) +{ + u32 val, i; + + for (i = 0; i < timeout; i++) { + val = ena_com_reg_bar_read32(ena_dev, ENA_REGS_DEV_STS_OFF); + + if (unlikely(val == ENA_MMIO_READ_TIMEOUT)) { + pr_err("Reg read timeout occurred\n"); + return -ETIME; + } + + if ((val & ENA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK) == + exp_state) + return 0; + + /* The resolution of the timeout is 100ms */ + msleep(100); + } + + return -ETIME; +} + +static bool ena_com_check_supported_feature_id(struct ena_com_dev *ena_dev, + enum ena_admin_aq_feature_id feature_id) +{ + u32 feature_mask = 1 << feature_id; + + /* Device attributes is always supported */ + if ((feature_id != ENA_ADMIN_DEVICE_ATTRIBUTES) && + !(ena_dev->supported_features & feature_mask)) + return false; + + return true; +} + +static int ena_com_get_feature_ex(struct ena_com_dev *ena_dev, + struct ena_admin_get_feat_resp *get_resp, + enum ena_admin_aq_feature_id feature_id, + dma_addr_t control_buf_dma_addr, + u32 control_buff_size) +{ + struct ena_com_admin_queue *admin_queue; + struct ena_admin_get_feat_cmd get_cmd; + int ret; + + if (!ena_com_check_supported_feature_id(ena_dev, feature_id)) { + pr_info("Feature %d isn't supported\n", feature_id); + return -EPERM; + } + + memset(&get_cmd, 0x0, sizeof(get_cmd)); + admin_queue = &ena_dev->admin_queue; + + get_cmd.aq_common_descriptor.opcode = ENA_ADMIN_GET_FEATURE; + + if (control_buff_size) + get_cmd.aq_common_descriptor.flags = + ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK; + else + get_cmd.aq_common_descriptor.flags = 0; + + ret = ena_com_mem_addr_set(ena_dev, + &get_cmd.control_buffer.address, + control_buf_dma_addr); + if (unlikely(ret)) { + pr_err("memory address set failed\n"); + return ret; + } + + get_cmd.control_buffer.length = control_buff_size; + + get_cmd.feat_common.feature_id = feature_id; + + ret = ena_com_execute_admin_command(admin_queue, + (struct ena_admin_aq_entry *) + &get_cmd, + sizeof(get_cmd), + (struct ena_admin_acq_entry *) + get_resp, + sizeof(*get_resp)); + + if (unlikely(ret)) + pr_err("Failed to submit get_feature command %d error: %d\n", + feature_id, ret); + + return ret; +} + +static int ena_com_get_feature(struct ena_com_dev *ena_dev, + struct ena_admin_get_feat_resp *get_resp, + enum ena_admin_aq_feature_id feature_id) +{ + return ena_com_get_feature_ex(ena_dev, + get_resp, + feature_id, + 0, + 0); +} + +static int ena_com_hash_key_allocate(struct ena_com_dev *ena_dev) +{ + struct ena_rss *rss = &ena_dev->rss; + + rss->hash_key = + dma_zalloc_coherent(ena_dev->dmadev, sizeof(*rss->hash_key), + &rss->hash_key_dma_addr, GFP_KERNEL); + + if (unlikely(!rss->hash_key)) + return -ENOMEM; + + return 0; +} + +static void ena_com_hash_key_destroy(struct ena_com_dev *ena_dev) +{ + struct ena_rss *rss = &ena_dev->rss; + + if (rss->hash_key) + dma_free_coherent(ena_dev->dmadev, sizeof(*rss->hash_key), + rss->hash_key, rss->hash_key_dma_addr); + rss->hash_key = NULL; +} + +static int ena_com_hash_ctrl_init(struct ena_com_dev *ena_dev) +{ + struct ena_rss *rss = &ena_dev->rss; + + rss->hash_ctrl = + dma_zalloc_coherent(ena_dev->dmadev, sizeof(*rss->hash_ctrl), + &rss->hash_ctrl_dma_addr, GFP_KERNEL); + + if (unlikely(!rss->hash_ctrl)) + return -ENOMEM; + + return 0; +} + +static void ena_com_hash_ctrl_destroy(struct ena_com_dev *ena_dev) +{ + struct ena_rss *rss = &ena_dev->rss; + + if (rss->hash_ctrl) + dma_free_coherent(ena_dev->dmadev, sizeof(*rss->hash_ctrl), + rss->hash_ctrl, rss->hash_ctrl_dma_addr); + rss->hash_ctrl = NULL; +} + +static int ena_com_indirect_table_allocate(struct ena_com_dev *ena_dev, + u16 log_size) +{ + struct ena_rss *rss = &ena_dev->rss; + struct ena_admin_get_feat_resp get_resp; + size_t tbl_size; + int ret; + + ret = ena_com_get_feature(ena_dev, &get_resp, + ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG); + if (unlikely(ret)) + return ret; + + if ((get_resp.u.ind_table.min_size > log_size) || + (get_resp.u.ind_table.max_size < log_size)) { + pr_err("indirect table size doesn't fit. requested size: %d while min is:%d and max %d\n", + 1 << log_size, 1 << get_resp.u.ind_table.min_size, + 1 << get_resp.u.ind_table.max_size); + return -EINVAL; + } + + tbl_size = (1ULL << log_size) * + sizeof(struct ena_admin_rss_ind_table_entry); + + rss->rss_ind_tbl = + dma_zalloc_coherent(ena_dev->dmadev, tbl_size, + &rss->rss_ind_tbl_dma_addr, GFP_KERNEL); + if (unlikely(!rss->rss_ind_tbl)) + goto mem_err1; + + tbl_size = (1ULL << log_size) * sizeof(u16); + rss->host_rss_ind_tbl = + devm_kzalloc(ena_dev->dmadev, tbl_size, GFP_KERNEL); + if (unlikely(!rss->host_rss_ind_tbl)) + goto mem_err2; + + rss->tbl_log_size = log_size; + + return 0; + +mem_err2: + tbl_size = (1ULL << log_size) * + sizeof(struct ena_admin_rss_ind_table_entry); + + dma_free_coherent(ena_dev->dmadev, tbl_size, rss->rss_ind_tbl, + rss->rss_ind_tbl_dma_addr); + rss->rss_ind_tbl = NULL; +mem_err1: + rss->tbl_log_size = 0; + return -ENOMEM; +} + +static void ena_com_indirect_table_destroy(struct ena_com_dev *ena_dev) +{ + struct ena_rss *rss = &ena_dev->rss; + size_t tbl_size = (1ULL << rss->tbl_log_size) * + sizeof(struct ena_admin_rss_ind_table_entry); + + if (rss->rss_ind_tbl) + dma_free_coherent(ena_dev->dmadev, tbl_size, rss->rss_ind_tbl, + rss->rss_ind_tbl_dma_addr); + rss->rss_ind_tbl = NULL; + + if (rss->host_rss_ind_tbl) + devm_kfree(ena_dev->dmadev, rss->host_rss_ind_tbl); + rss->host_rss_ind_tbl = NULL; +} + +static int ena_com_create_io_sq(struct ena_com_dev *ena_dev, + struct ena_com_io_sq *io_sq, u16 cq_idx) +{ + struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue; + struct ena_admin_aq_create_sq_cmd create_cmd; + struct ena_admin_acq_create_sq_resp_desc cmd_completion; + u8 direction; + int ret; + + memset(&create_cmd, 0x0, sizeof(struct ena_admin_aq_create_sq_cmd)); + + create_cmd.aq_common_descriptor.opcode = ENA_ADMIN_CREATE_SQ; + + if (io_sq->direction == ENA_COM_IO_QUEUE_DIRECTION_TX) + direction = ENA_ADMIN_SQ_DIRECTION_TX; + else + direction = ENA_ADMIN_SQ_DIRECTION_RX; + + create_cmd.sq_identity |= (direction << + ENA_ADMIN_AQ_CREATE_SQ_CMD_SQ_DIRECTION_SHIFT) & + ENA_ADMIN_AQ_CREATE_SQ_CMD_SQ_DIRECTION_MASK; + + create_cmd.sq_caps_2 |= io_sq->mem_queue_type & + ENA_ADMIN_AQ_CREATE_SQ_CMD_PLACEMENT_POLICY_MASK; + + create_cmd.sq_caps_2 |= (ENA_ADMIN_COMPLETION_POLICY_DESC << + ENA_ADMIN_AQ_CREATE_SQ_CMD_COMPLETION_POLICY_SHIFT) & + ENA_ADMIN_AQ_CREATE_SQ_CMD_COMPLETION_POLICY_MASK; + + create_cmd.sq_caps_3 |= + ENA_ADMIN_AQ_CREATE_SQ_CMD_IS_PHYSICALLY_CONTIGUOUS_MASK; + + create_cmd.cq_idx = cq_idx; + create_cmd.sq_depth = io_sq->q_depth; + + if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) { + ret = ena_com_mem_addr_set(ena_dev, + &create_cmd.sq_ba, + io_sq->desc_addr.phys_addr); + if (unlikely(ret)) { + pr_err("memory address set failed\n"); + return ret; + } + } + + ret = ena_com_execute_admin_command(admin_queue, + (struct ena_admin_aq_entry *)&create_cmd, + sizeof(create_cmd), + (struct ena_admin_acq_entry *)&cmd_completion, + sizeof(cmd_completion)); + if (unlikely(ret)) { + pr_err("Failed to create IO SQ. error: %d\n", ret); + return ret; + } + + io_sq->idx = cmd_completion.sq_idx; + + io_sq->db_addr = (u32 __iomem *)((uintptr_t)ena_dev->reg_bar + + (uintptr_t)cmd_completion.sq_doorbell_offset); + + if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { + io_sq->header_addr = (u8 __iomem *)((uintptr_t)ena_dev->mem_bar + + cmd_completion.llq_headers_offset); + + io_sq->desc_addr.pbuf_dev_addr = + (u8 __iomem *)((uintptr_t)ena_dev->mem_bar + + cmd_completion.llq_descriptors_offset); + } + + pr_debug("created sq[%u], depth[%u]\n", io_sq->idx, io_sq->q_depth); + + return ret; +} + +static int ena_com_ind_tbl_convert_to_device(struct ena_com_dev *ena_dev) +{ + struct ena_rss *rss = &ena_dev->rss; + struct ena_com_io_sq *io_sq; + u16 qid; + int i; + + for (i = 0; i < 1 << rss->tbl_log_size; i++) { + qid = rss->host_rss_ind_tbl[i]; + if (qid >= ENA_TOTAL_NUM_QUEUES) + return -EINVAL; + + io_sq = &ena_dev->io_sq_queues[qid]; + + if (io_sq->direction != ENA_COM_IO_QUEUE_DIRECTION_RX) + return -EINVAL; + + rss->rss_ind_tbl[i].cq_idx = io_sq->idx; + } + + return 0; +} + +static int ena_com_ind_tbl_convert_from_device(struct ena_com_dev *ena_dev) +{ + u16 dev_idx_to_host_tbl[ENA_TOTAL_NUM_QUEUES] = { (u16)-1 }; + struct ena_rss *rss = &ena_dev->rss; + u8 idx; + u16 i; + + for (i = 0; i < ENA_TOTAL_NUM_QUEUES; i++) + dev_idx_to_host_tbl[ena_dev->io_sq_queues[i].idx] = i; + + for (i = 0; i < 1 << rss->tbl_log_size; i++) { + if (rss->rss_ind_tbl[i].cq_idx > ENA_TOTAL_NUM_QUEUES) + return -EINVAL; + idx = (u8)rss->rss_ind_tbl[i].cq_idx; + + if (dev_idx_to_host_tbl[idx] > ENA_TOTAL_NUM_QUEUES) + return -EINVAL; + + rss->host_rss_ind_tbl[i] = dev_idx_to_host_tbl[idx]; + } + + return 0; +} + +static int ena_com_init_interrupt_moderation_table(struct ena_com_dev *ena_dev) +{ + size_t size; + + size = sizeof(struct ena_intr_moder_entry) * ENA_INTR_MAX_NUM_OF_LEVELS; + + ena_dev->intr_moder_tbl = + devm_kzalloc(ena_dev->dmadev, size, GFP_KERNEL); + if (!ena_dev->intr_moder_tbl) + return -ENOMEM; + + ena_com_config_default_interrupt_moderation_table(ena_dev); + + return 0; +} + +static void ena_com_update_intr_delay_resolution(struct ena_com_dev *ena_dev, + u16 intr_delay_resolution) +{ + struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl; + unsigned int i; + + if (!intr_delay_resolution) { + pr_err("Illegal intr_delay_resolution provided. Going to use default 1 usec resolution\n"); + intr_delay_resolution = 1; + } + ena_dev->intr_delay_resolution = intr_delay_resolution; + + /* update Rx */ + for (i = 0; i < ENA_INTR_MAX_NUM_OF_LEVELS; i++) + intr_moder_tbl[i].intr_moder_interval /= intr_delay_resolution; + + /* update Tx */ + ena_dev->intr_moder_tx_interval /= intr_delay_resolution; +} + +/*****************************************************************************/ +/******************************* API ******************************/ +/*****************************************************************************/ + +int ena_com_execute_admin_command(struct ena_com_admin_queue *admin_queue, + struct ena_admin_aq_entry *cmd, + size_t cmd_size, + struct ena_admin_acq_entry *comp, + size_t comp_size) +{ + struct ena_comp_ctx *comp_ctx; + int ret; + + comp_ctx = ena_com_submit_admin_cmd(admin_queue, cmd, cmd_size, + comp, comp_size); + if (unlikely(IS_ERR(comp_ctx))) { + pr_err("Failed to submit command [%ld]\n", PTR_ERR(comp_ctx)); + return PTR_ERR(comp_ctx); + } + + ret = ena_com_wait_and_process_admin_cq(comp_ctx, admin_queue); + if (unlikely(ret)) { + if (admin_queue->running_state) + pr_err("Failed to process command. ret = %d\n", ret); + else + pr_debug("Failed to process command. ret = %d\n", ret); + } + return ret; +} + +int ena_com_create_io_cq(struct ena_com_dev *ena_dev, + struct ena_com_io_cq *io_cq) +{ + struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue; + struct ena_admin_aq_create_cq_cmd create_cmd; + struct ena_admin_acq_create_cq_resp_desc cmd_completion; + int ret; + + memset(&create_cmd, 0x0, sizeof(struct ena_admin_aq_create_cq_cmd)); + + create_cmd.aq_common_descriptor.opcode = ENA_ADMIN_CREATE_CQ; + + create_cmd.cq_caps_2 |= (io_cq->cdesc_entry_size_in_bytes / 4) & + ENA_ADMIN_AQ_CREATE_CQ_CMD_CQ_ENTRY_SIZE_WORDS_MASK; + create_cmd.cq_caps_1 |= + ENA_ADMIN_AQ_CREATE_CQ_CMD_INTERRUPT_MODE_ENABLED_MASK; + + create_cmd.msix_vector = io_cq->msix_vector; + create_cmd.cq_depth = io_cq->q_depth; + + ret = ena_com_mem_addr_set(ena_dev, + &create_cmd.cq_ba, + io_cq->cdesc_addr.phys_addr); + if (unlikely(ret)) { + pr_err("memory address set failed\n"); + return ret; + } + + ret = ena_com_execute_admin_command(admin_queue, + (struct ena_admin_aq_entry *)&create_cmd, + sizeof(create_cmd), + (struct ena_admin_acq_entry *)&cmd_completion, + sizeof(cmd_completion)); + if (unlikely(ret)) { + pr_err("Failed to create IO CQ. error: %d\n", ret); + return ret; + } + + io_cq->idx = cmd_completion.cq_idx; + + io_cq->unmask_reg = (u32 __iomem *)((uintptr_t)ena_dev->reg_bar + + cmd_completion.cq_interrupt_unmask_register_offset); + + if (cmd_completion.cq_head_db_register_offset) + io_cq->cq_head_db_reg = + (u32 __iomem *)((uintptr_t)ena_dev->reg_bar + + cmd_completion.cq_head_db_register_offset); + + if (cmd_completion.numa_node_register_offset) + io_cq->numa_node_cfg_reg = + (u32 __iomem *)((uintptr_t)ena_dev->reg_bar + + cmd_completion.numa_node_register_offset); + + pr_debug("created cq[%u], depth[%u]\n", io_cq->idx, io_cq->q_depth); + + return ret; +} + +int ena_com_get_io_handlers(struct ena_com_dev *ena_dev, u16 qid, + struct ena_com_io_sq **io_sq, + struct ena_com_io_cq **io_cq) +{ + if (qid >= ENA_TOTAL_NUM_QUEUES) { + pr_err("Invalid queue number %d but the max is %d\n", qid, + ENA_TOTAL_NUM_QUEUES); + return -EINVAL; + } + + *io_sq = &ena_dev->io_sq_queues[qid]; + *io_cq = &ena_dev->io_cq_queues[qid]; + + return 0; +} + +void ena_com_abort_admin_commands(struct ena_com_dev *ena_dev) +{ + struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue; + struct ena_comp_ctx *comp_ctx; + u16 i; + + if (!admin_queue->comp_ctx) + return; + + for (i = 0; i < admin_queue->q_depth; i++) { + comp_ctx = get_comp_ctxt(admin_queue, i, false); + if (unlikely(!comp_ctx)) + break; + + comp_ctx->status = ENA_CMD_ABORTED; + + complete(&comp_ctx->wait_event); + } +} + +void ena_com_wait_for_abort_completion(struct ena_com_dev *ena_dev) +{ + struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue; + unsigned long flags; + + spin_lock_irqsave(&admin_queue->q_lock, flags); + while (atomic_read(&admin_queue->outstanding_cmds) != 0) { + spin_unlock_irqrestore(&admin_queue->q_lock, flags); + msleep(20); + spin_lock_irqsave(&admin_queue->q_lock, flags); + } + spin_unlock_irqrestore(&admin_queue->q_lock, flags); +} + +int ena_com_destroy_io_cq(struct ena_com_dev *ena_dev, + struct ena_com_io_cq *io_cq) +{ + struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue; + struct ena_admin_aq_destroy_cq_cmd destroy_cmd; + struct ena_admin_acq_destroy_cq_resp_desc destroy_resp; + int ret; + + memset(&destroy_cmd, 0x0, sizeof(struct ena_admin_aq_destroy_sq_cmd)); + + destroy_cmd.cq_idx = io_cq->idx; + destroy_cmd.aq_common_descriptor.opcode = ENA_ADMIN_DESTROY_CQ; + + ret = ena_com_execute_admin_command(admin_queue, + (struct ena_admin_aq_entry *)&destroy_cmd, + sizeof(destroy_cmd), + (struct ena_admin_acq_entry *)&destroy_resp, + sizeof(destroy_resp)); + + if (unlikely(ret && (ret != -ENODEV))) + pr_err("Failed to destroy IO CQ. error: %d\n", ret); + + return ret; +} + +bool ena_com_get_admin_running_state(struct ena_com_dev *ena_dev) +{ + return ena_dev->admin_queue.running_state; +} + +void ena_com_set_admin_running_state(struct ena_com_dev *ena_dev, bool state) +{ + struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue; + unsigned long flags; + + spin_lock_irqsave(&admin_queue->q_lock, flags); + ena_dev->admin_queue.running_state = state; + spin_unlock_irqrestore(&admin_queue->q_lock, flags); +} + +void ena_com_admin_aenq_enable(struct ena_com_dev *ena_dev) +{ + u16 depth = ena_dev->aenq.q_depth; + + WARN(ena_dev->aenq.head != depth, "Invalid AENQ state\n"); + + /* Init head_db to mark that all entries in the queue + * are initially available + */ + writel(depth, ena_dev->reg_bar + ENA_REGS_AENQ_HEAD_DB_OFF); +} + +int ena_com_set_aenq_config(struct ena_com_dev *ena_dev, u32 groups_flag) +{ + struct ena_com_admin_queue *admin_queue; + struct ena_admin_set_feat_cmd cmd; + struct ena_admin_set_feat_resp resp; + struct ena_admin_get_feat_resp get_resp; + int ret; + + ret = ena_com_get_feature(ena_dev, &get_resp, ENA_ADMIN_AENQ_CONFIG); + if (ret) { + pr_info("Can't get aenq configuration\n"); + return ret; + } + + if ((get_resp.u.aenq.supported_groups & groups_flag) != groups_flag) { + pr_warn("Trying to set unsupported aenq events. supported flag: %x asked flag: %x\n", + get_resp.u.aenq.supported_groups, groups_flag); + return -EPERM; + } + + memset(&cmd, 0x0, sizeof(cmd)); + admin_queue = &ena_dev->admin_queue; + + cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE; + cmd.aq_common_descriptor.flags = 0; + cmd.feat_common.feature_id = ENA_ADMIN_AENQ_CONFIG; + cmd.u.aenq.enabled_groups = groups_flag; + + ret = ena_com_execute_admin_command(admin_queue, + (struct ena_admin_aq_entry *)&cmd, + sizeof(cmd), + (struct ena_admin_acq_entry *)&resp, + sizeof(resp)); + + if (unlikely(ret)) + pr_err("Failed to config AENQ ret: %d\n", ret); + + return ret; +} + +int ena_com_get_dma_width(struct ena_com_dev *ena_dev) +{ + u32 caps = ena_com_reg_bar_read32(ena_dev, ENA_REGS_CAPS_OFF); + int width; + + if (unlikely(caps == ENA_MMIO_READ_TIMEOUT)) { + pr_err("Reg read timeout occurred\n"); + return -ETIME; + } + + width = (caps & ENA_REGS_CAPS_DMA_ADDR_WIDTH_MASK) >> + ENA_REGS_CAPS_DMA_ADDR_WIDTH_SHIFT; + + pr_debug("ENA dma width: %d\n", width); + + if ((width < 32) || width > ENA_MAX_PHYS_ADDR_SIZE_BITS) { + pr_err("DMA width illegal value: %d\n", width); + return -EINVAL; + } + + ena_dev->dma_addr_bits = width; + + return width; +} + +int ena_com_validate_version(struct ena_com_dev *ena_dev) +{ + u32 ver; + u32 ctrl_ver; + u32 ctrl_ver_masked; + + /* Make sure the ENA version and the controller version are at least + * as the driver expects + */ + ver = ena_com_reg_bar_read32(ena_dev, ENA_REGS_VERSION_OFF); + ctrl_ver = ena_com_reg_bar_read32(ena_dev, + ENA_REGS_CONTROLLER_VERSION_OFF); + + if (unlikely((ver == ENA_MMIO_READ_TIMEOUT) || + (ctrl_ver == ENA_MMIO_READ_TIMEOUT))) { + pr_err("Reg read timeout occurred\n"); + return -ETIME; + } + + pr_info("ena device version: %d.%d\n", + (ver & ENA_REGS_VERSION_MAJOR_VERSION_MASK) >> + ENA_REGS_VERSION_MAJOR_VERSION_SHIFT, + ver & ENA_REGS_VERSION_MINOR_VERSION_MASK); + + if (ver < MIN_ENA_VER) { + pr_err("ENA version is lower than the minimal version the driver supports\n"); + return -1; + } + + pr_info("ena controller version: %d.%d.%d implementation version %d\n", + (ctrl_ver & ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK) >> + ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT, + (ctrl_ver & ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK) >> + ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT, + (ctrl_ver & ENA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK), + (ctrl_ver & ENA_REGS_CONTROLLER_VERSION_IMPL_ID_MASK) >> + ENA_REGS_CONTROLLER_VERSION_IMPL_ID_SHIFT); + + ctrl_ver_masked = + (ctrl_ver & ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK) | + (ctrl_ver & ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK) | + (ctrl_ver & ENA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK); + + /* Validate the ctrl version without the implementation ID */ + if (ctrl_ver_masked < MIN_ENA_CTRL_VER) { + pr_err("ENA ctrl version is lower than the minimal ctrl version the driver supports\n"); + return -1; + } + + return 0; +} + +void ena_com_admin_destroy(struct ena_com_dev *ena_dev) +{ + struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue; + struct ena_com_admin_cq *cq = &admin_queue->cq; + struct ena_com_admin_sq *sq = &admin_queue->sq; + struct ena_com_aenq *aenq = &ena_dev->aenq; + u16 size; + + if (admin_queue->comp_ctx) + devm_kfree(ena_dev->dmadev, admin_queue->comp_ctx); + admin_queue->comp_ctx = NULL; + size = ADMIN_SQ_SIZE(admin_queue->q_depth); + if (sq->entries) + dma_free_coherent(ena_dev->dmadev, size, sq->entries, + sq->dma_addr); + sq->entries = NULL; + + size = ADMIN_CQ_SIZE(admin_queue->q_depth); + if (cq->entries) + dma_free_coherent(ena_dev->dmadev, size, cq->entries, + cq->dma_addr); + cq->entries = NULL; + + size = ADMIN_AENQ_SIZE(aenq->q_depth); + if (ena_dev->aenq.entries) + dma_free_coherent(ena_dev->dmadev, size, aenq->entries, + aenq->dma_addr); + aenq->entries = NULL; +} + +void ena_com_set_admin_polling_mode(struct ena_com_dev *ena_dev, bool polling) +{ + ena_dev->admin_queue.polling = polling; +} + +int ena_com_mmio_reg_read_request_init(struct ena_com_dev *ena_dev) +{ + struct ena_com_mmio_read *mmio_read = &ena_dev->mmio_read; + + spin_lock_init(&mmio_read->lock); + mmio_read->read_resp = + dma_zalloc_coherent(ena_dev->dmadev, + sizeof(*mmio_read->read_resp), + &mmio_read->read_resp_dma_addr, GFP_KERNEL); + if (unlikely(!mmio_read->read_resp)) + return -ENOMEM; + + ena_com_mmio_reg_read_request_write_dev_addr(ena_dev); + + mmio_read->read_resp->req_id = 0x0; + mmio_read->seq_num = 0x0; + mmio_read->readless_supported = true; + + return 0; +} + +void ena_com_set_mmio_read_mode(struct ena_com_dev *ena_dev, bool readless_supported) +{ + struct ena_com_mmio_read *mmio_read = &ena_dev->mmio_read; + + mmio_read->readless_supported = readless_supported; +} + +void ena_com_mmio_reg_read_request_destroy(struct ena_com_dev *ena_dev) +{ + struct ena_com_mmio_read *mmio_read = &ena_dev->mmio_read; + + writel(0x0, ena_dev->reg_bar + ENA_REGS_MMIO_RESP_LO_OFF); + writel(0x0, ena_dev->reg_bar + ENA_REGS_MMIO_RESP_HI_OFF); + + dma_free_coherent(ena_dev->dmadev, sizeof(*mmio_read->read_resp), + mmio_read->read_resp, mmio_read->read_resp_dma_addr); + + mmio_read->read_resp = NULL; +} + +void ena_com_mmio_reg_read_request_write_dev_addr(struct ena_com_dev *ena_dev) +{ + struct ena_com_mmio_read *mmio_read = &ena_dev->mmio_read; + u32 addr_low, addr_high; + + addr_low = ENA_DMA_ADDR_TO_UINT32_LOW(mmio_read->read_resp_dma_addr); + addr_high = ENA_DMA_ADDR_TO_UINT32_HIGH(mmio_read->read_resp_dma_addr); + + writel(addr_low, ena_dev->reg_bar + ENA_REGS_MMIO_RESP_LO_OFF); + writel(addr_high, ena_dev->reg_bar + ENA_REGS_MMIO_RESP_HI_OFF); +} + +int ena_com_admin_init(struct ena_com_dev *ena_dev, + struct ena_aenq_handlers *aenq_handlers, + bool init_spinlock) +{ + struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue; + u32 aq_caps, acq_caps, dev_sts, addr_low, addr_high; + int ret; + + dev_sts = ena_com_reg_bar_read32(ena_dev, ENA_REGS_DEV_STS_OFF); + + if (unlikely(dev_sts == ENA_MMIO_READ_TIMEOUT)) { + pr_err("Reg read timeout occurred\n"); + return -ETIME; + } + + if (!(dev_sts & ENA_REGS_DEV_STS_READY_MASK)) { + pr_err("Device isn't ready, abort com init\n"); + return -ENODEV; + } + + admin_queue->q_depth = ENA_ADMIN_QUEUE_DEPTH; + + admin_queue->q_dmadev = ena_dev->dmadev; + admin_queue->polling = false; + admin_queue->curr_cmd_id = 0; + + atomic_set(&admin_queue->outstanding_cmds, 0); + + if (init_spinlock) + spin_lock_init(&admin_queue->q_lock); + + ret = ena_com_init_comp_ctxt(admin_queue); + if (ret) + goto error; + + ret = ena_com_admin_init_sq(admin_queue); + if (ret) + goto error; + + ret = ena_com_admin_init_cq(admin_queue); + if (ret) + goto error; + + admin_queue->sq.db_addr = (u32 __iomem *)((uintptr_t)ena_dev->reg_bar + + ENA_REGS_AQ_DB_OFF); + + addr_low = ENA_DMA_ADDR_TO_UINT32_LOW(admin_queue->sq.dma_addr); + addr_high = ENA_DMA_ADDR_TO_UINT32_HIGH(admin_queue->sq.dma_addr); + + writel(addr_low, ena_dev->reg_bar + ENA_REGS_AQ_BASE_LO_OFF); + writel(addr_high, ena_dev->reg_bar + ENA_REGS_AQ_BASE_HI_OFF); + + addr_low = ENA_DMA_ADDR_TO_UINT32_LOW(admin_queue->cq.dma_addr); + addr_high = ENA_DMA_ADDR_TO_UINT32_HIGH(admin_queue->cq.dma_addr); + + writel(addr_low, ena_dev->reg_bar + ENA_REGS_ACQ_BASE_LO_OFF); + writel(addr_high, ena_dev->reg_bar + ENA_REGS_ACQ_BASE_HI_OFF); + + aq_caps = 0; + aq_caps |= admin_queue->q_depth & ENA_REGS_AQ_CAPS_AQ_DEPTH_MASK; + aq_caps |= (sizeof(struct ena_admin_aq_entry) << + ENA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_SHIFT) & + ENA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_MASK; + + acq_caps = 0; + acq_caps |= admin_queue->q_depth & ENA_REGS_ACQ_CAPS_ACQ_DEPTH_MASK; + acq_caps |= (sizeof(struct ena_admin_acq_entry) << + ENA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_SHIFT) & + ENA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_MASK; + + writel(aq_caps, ena_dev->reg_bar + ENA_REGS_AQ_CAPS_OFF); + writel(acq_caps, ena_dev->reg_bar + ENA_REGS_ACQ_CAPS_OFF); + ret = ena_com_admin_init_aenq(ena_dev, aenq_handlers); + if (ret) + goto error; + + admin_queue->running_state = true; + + return 0; +error: + ena_com_admin_destroy(ena_dev); + + return ret; +} + +int ena_com_create_io_queue(struct ena_com_dev *ena_dev, + struct ena_com_create_io_ctx *ctx) +{ + struct ena_com_io_sq *io_sq; + struct ena_com_io_cq *io_cq; + int ret; + + if (ctx->qid >= ENA_TOTAL_NUM_QUEUES) { + pr_err("Qid (%d) is bigger than max num of queues (%d)\n", + ctx->qid, ENA_TOTAL_NUM_QUEUES); + return -EINVAL; + } + + io_sq = &ena_dev->io_sq_queues[ctx->qid]; + io_cq = &ena_dev->io_cq_queues[ctx->qid]; + + memset(io_sq, 0x0, sizeof(struct ena_com_io_sq)); + memset(io_cq, 0x0, sizeof(struct ena_com_io_cq)); + + /* Init CQ */ + io_cq->q_depth = ctx->queue_size; + io_cq->direction = ctx->direction; + io_cq->qid = ctx->qid; + + io_cq->msix_vector = ctx->msix_vector; + + io_sq->q_depth = ctx->queue_size; + io_sq->direction = ctx->direction; + io_sq->qid = ctx->qid; + + io_sq->mem_queue_type = ctx->mem_queue_type; + + if (ctx->direction == ENA_COM_IO_QUEUE_DIRECTION_TX) + /* header length is limited to 8 bits */ + io_sq->tx_max_header_size = + min_t(u32, ena_dev->tx_max_header_size, SZ_256); + + ret = ena_com_init_io_sq(ena_dev, ctx, io_sq); + if (ret) + goto error; + ret = ena_com_init_io_cq(ena_dev, ctx, io_cq); + if (ret) + goto error; + + ret = ena_com_create_io_cq(ena_dev, io_cq); + if (ret) + goto error; + + ret = ena_com_create_io_sq(ena_dev, io_sq, io_cq->idx); + if (ret) + goto destroy_io_cq; + + return 0; + +destroy_io_cq: + ena_com_destroy_io_cq(ena_dev, io_cq); +error: + ena_com_io_queue_free(ena_dev, io_sq, io_cq); + return ret; +} + +void ena_com_destroy_io_queue(struct ena_com_dev *ena_dev, u16 qid) +{ + struct ena_com_io_sq *io_sq; + struct ena_com_io_cq *io_cq; + + if (qid >= ENA_TOTAL_NUM_QUEUES) { + pr_err("Qid (%d) is bigger than max num of queues (%d)\n", qid, + ENA_TOTAL_NUM_QUEUES); + return; + } + + io_sq = &ena_dev->io_sq_queues[qid]; + io_cq = &ena_dev->io_cq_queues[qid]; + + ena_com_destroy_io_sq(ena_dev, io_sq); + ena_com_destroy_io_cq(ena_dev, io_cq); + + ena_com_io_queue_free(ena_dev, io_sq, io_cq); +} + +int ena_com_get_link_params(struct ena_com_dev *ena_dev, + struct ena_admin_get_feat_resp *resp) +{ + return ena_com_get_feature(ena_dev, resp, ENA_ADMIN_LINK_CONFIG); +} + +int ena_com_get_dev_attr_feat(struct ena_com_dev *ena_dev, + struct ena_com_dev_get_features_ctx *get_feat_ctx) +{ + struct ena_admin_get_feat_resp get_resp; + int rc; + + rc = ena_com_get_feature(ena_dev, &get_resp, + ENA_ADMIN_DEVICE_ATTRIBUTES); + if (rc) + return rc; + + memcpy(&get_feat_ctx->dev_attr, &get_resp.u.dev_attr, + sizeof(get_resp.u.dev_attr)); + ena_dev->supported_features = get_resp.u.dev_attr.supported_features; + + rc = ena_com_get_feature(ena_dev, &get_resp, + ENA_ADMIN_MAX_QUEUES_NUM); + if (rc) + return rc; + + memcpy(&get_feat_ctx->max_queues, &get_resp.u.max_queue, + sizeof(get_resp.u.max_queue)); + ena_dev->tx_max_header_size = get_resp.u.max_queue.max_header_size; + + rc = ena_com_get_feature(ena_dev, &get_resp, + ENA_ADMIN_AENQ_CONFIG); + if (rc) + return rc; + + memcpy(&get_feat_ctx->aenq, &get_resp.u.aenq, + sizeof(get_resp.u.aenq)); + + rc = ena_com_get_feature(ena_dev, &get_resp, + ENA_ADMIN_STATELESS_OFFLOAD_CONFIG); + if (rc) + return rc; + + memcpy(&get_feat_ctx->offload, &get_resp.u.offload, + sizeof(get_resp.u.offload)); + + return 0; +} + +void ena_com_admin_q_comp_intr_handler(struct ena_com_dev *ena_dev) +{ + ena_com_handle_admin_completion(&ena_dev->admin_queue); +} + +/* ena_handle_specific_aenq_event: + * return the handler that is relevant to the specific event group + */ +static ena_aenq_handler ena_com_get_specific_aenq_cb(struct ena_com_dev *dev, + u16 group) +{ + struct ena_aenq_handlers *aenq_handlers = dev->aenq.aenq_handlers; + + if ((group < ENA_MAX_HANDLERS) && aenq_handlers->handlers[group]) + return aenq_handlers->handlers[group]; + + return aenq_handlers->unimplemented_handler; +} + +/* ena_aenq_intr_handler: + * handles the aenq incoming events. + * pop events from the queue and apply the specific handler + */ +void ena_com_aenq_intr_handler(struct ena_com_dev *dev, void *data) +{ + struct ena_admin_aenq_entry *aenq_e; + struct ena_admin_aenq_common_desc *aenq_common; + struct ena_com_aenq *aenq = &dev->aenq; + ena_aenq_handler handler_cb; + u16 masked_head, processed = 0; + u8 phase; + + masked_head = aenq->head & (aenq->q_depth - 1); + phase = aenq->phase; + aenq_e = &aenq->entries[masked_head]; /* Get first entry */ + aenq_common = &aenq_e->aenq_common_desc; + + /* Go over all the events */ + while ((aenq_common->flags & ENA_ADMIN_AENQ_COMMON_DESC_PHASE_MASK) == + phase) { + pr_debug("AENQ! Group[%x] Syndrom[%x] timestamp: [%llus]\n", + aenq_common->group, aenq_common->syndrom, + (u64)aenq_common->timestamp_low + + ((u64)aenq_common->timestamp_high << 32)); + + /* Handle specific event*/ + handler_cb = ena_com_get_specific_aenq_cb(dev, + aenq_common->group); + handler_cb(data, aenq_e); /* call the actual event handler*/ + + /* Get next event entry */ + masked_head++; + processed++; + + if (unlikely(masked_head == aenq->q_depth)) { + masked_head = 0; + phase = !phase; + } + aenq_e = &aenq->entries[masked_head]; + aenq_common = &aenq_e->aenq_common_desc; + } + + aenq->head += processed; + aenq->phase = phase; + + /* Don't update aenq doorbell if there weren't any processed events */ + if (!processed) + return; + + /* write the aenq doorbell after all AENQ descriptors were read */ + mb(); + writel((u32)aenq->head, dev->reg_bar + ENA_REGS_AENQ_HEAD_DB_OFF); +} + +int ena_com_dev_reset(struct ena_com_dev *ena_dev) +{ + u32 stat, timeout, cap, reset_val; + int rc; + + stat = ena_com_reg_bar_read32(ena_dev, ENA_REGS_DEV_STS_OFF); + cap = ena_com_reg_bar_read32(ena_dev, ENA_REGS_CAPS_OFF); + + if (unlikely((stat == ENA_MMIO_READ_TIMEOUT) || + (cap == ENA_MMIO_READ_TIMEOUT))) { + pr_err("Reg read32 timeout occurred\n"); + return -ETIME; + } + + if ((stat & ENA_REGS_DEV_STS_READY_MASK) == 0) { + pr_err("Device isn't ready, can't reset device\n"); + return -EINVAL; + } + + timeout = (cap & ENA_REGS_CAPS_RESET_TIMEOUT_MASK) >> + ENA_REGS_CAPS_RESET_TIMEOUT_SHIFT; + if (timeout == 0) { + pr_err("Invalid timeout value\n"); + return -EINVAL; + } + + /* start reset */ + reset_val = ENA_REGS_DEV_CTL_DEV_RESET_MASK; + writel(reset_val, ena_dev->reg_bar + ENA_REGS_DEV_CTL_OFF); + + /* Write again the MMIO read request address */ + ena_com_mmio_reg_read_request_write_dev_addr(ena_dev); + + rc = wait_for_reset_state(ena_dev, timeout, + ENA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK); + if (rc != 0) { + pr_err("Reset indication didn't turn on\n"); + return rc; + } + + /* reset done */ + writel(0, ena_dev->reg_bar + ENA_REGS_DEV_CTL_OFF); + rc = wait_for_reset_state(ena_dev, timeout, 0); + if (rc != 0) { + pr_err("Reset indication didn't turn off\n"); + return rc; + } + + return 0; +} + +static int ena_get_dev_stats(struct ena_com_dev *ena_dev, + struct ena_com_stats_ctx *ctx, + enum ena_admin_get_stats_type type) +{ + struct ena_admin_aq_get_stats_cmd *get_cmd = &ctx->get_cmd; + struct ena_admin_acq_get_stats_resp *get_resp = &ctx->get_resp; + struct ena_com_admin_queue *admin_queue; + int ret; + + admin_queue = &ena_dev->admin_queue; + + get_cmd->aq_common_descriptor.opcode = ENA_ADMIN_GET_STATS; + get_cmd->aq_common_descriptor.flags = 0; + get_cmd->type = type; + + ret = ena_com_execute_admin_command(admin_queue, + (struct ena_admin_aq_entry *)get_cmd, + sizeof(*get_cmd), + (struct ena_admin_acq_entry *)get_resp, + sizeof(*get_resp)); + + if (unlikely(ret)) + pr_err("Failed to get stats. error: %d\n", ret); + + return ret; +} + +int ena_com_get_dev_basic_stats(struct ena_com_dev *ena_dev, + struct ena_admin_basic_stats *stats) +{ + struct ena_com_stats_ctx ctx; + int ret; + + memset(&ctx, 0x0, sizeof(ctx)); + ret = ena_get_dev_stats(ena_dev, &ctx, ENA_ADMIN_GET_STATS_TYPE_BASIC); + if (likely(ret == 0)) + memcpy(stats, &ctx.get_resp.basic_stats, + sizeof(ctx.get_resp.basic_stats)); + + return ret; +} + +int ena_com_set_dev_mtu(struct ena_com_dev *ena_dev, int mtu) +{ + struct ena_com_admin_queue *admin_queue; + struct ena_admin_set_feat_cmd cmd; + struct ena_admin_set_feat_resp resp; + int ret; + + if (!ena_com_check_supported_feature_id(ena_dev, ENA_ADMIN_MTU)) { + pr_info("Feature %d isn't supported\n", ENA_ADMIN_MTU); + return -EPERM; + } + + memset(&cmd, 0x0, sizeof(cmd)); + admin_queue = &ena_dev->admin_queue; + + cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE; + cmd.aq_common_descriptor.flags = 0; + cmd.feat_common.feature_id = ENA_ADMIN_MTU; + cmd.u.mtu.mtu = mtu; + + ret = ena_com_execute_admin_command(admin_queue, + (struct ena_admin_aq_entry *)&cmd, + sizeof(cmd), + (struct ena_admin_acq_entry *)&resp, + sizeof(resp)); + + if (unlikely(ret)) + pr_err("Failed to set mtu %d. error: %d\n", mtu, ret); + + return ret; +} + +int ena_com_get_offload_settings(struct ena_com_dev *ena_dev, + struct ena_admin_feature_offload_desc *offload) +{ + int ret; + struct ena_admin_get_feat_resp resp; + + ret = ena_com_get_feature(ena_dev, &resp, + ENA_ADMIN_STATELESS_OFFLOAD_CONFIG); + if (unlikely(ret)) { + pr_err("Failed to get offload capabilities %d\n", ret); + return ret; + } + + memcpy(offload, &resp.u.offload, sizeof(resp.u.offload)); + + return 0; +} + +int ena_com_set_hash_function(struct ena_com_dev *ena_dev) +{ + struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue; + struct ena_rss *rss = &ena_dev->rss; + struct ena_admin_set_feat_cmd cmd; + struct ena_admin_set_feat_resp resp; + struct ena_admin_get_feat_resp get_resp; + int ret; + + if (!ena_com_check_supported_feature_id(ena_dev, + ENA_ADMIN_RSS_HASH_FUNCTION)) { + pr_info("Feature %d isn't supported\n", + ENA_ADMIN_RSS_HASH_FUNCTION); + return -EPERM; + } + + /* Validate hash function is supported */ + ret = ena_com_get_feature(ena_dev, &get_resp, + ENA_ADMIN_RSS_HASH_FUNCTION); + if (unlikely(ret)) + return ret; + + if (get_resp.u.flow_hash_func.supported_func & (1 << rss->hash_func)) { + pr_err("Func hash %d isn't supported by device, abort\n", + rss->hash_func); + return -EPERM; + } + + memset(&cmd, 0x0, sizeof(cmd)); + + cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE; + cmd.aq_common_descriptor.flags = + ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK; + cmd.feat_common.feature_id = ENA_ADMIN_RSS_HASH_FUNCTION; + cmd.u.flow_hash_func.init_val = rss->hash_init_val; + cmd.u.flow_hash_func.selected_func = 1 << rss->hash_func; + + ret = ena_com_mem_addr_set(ena_dev, + &cmd.control_buffer.address, + rss->hash_key_dma_addr); + if (unlikely(ret)) { + pr_err("memory address set failed\n"); + return ret; + } + + cmd.control_buffer.length = sizeof(*rss->hash_key); + + ret = ena_com_execute_admin_command(admin_queue, + (struct ena_admin_aq_entry *)&cmd, + sizeof(cmd), + (struct ena_admin_acq_entry *)&resp, + sizeof(resp)); + if (unlikely(ret)) { + pr_err("Failed to set hash function %d. error: %d\n", + rss->hash_func, ret); + return -EINVAL; + } + + return 0; +} + +int ena_com_fill_hash_function(struct ena_com_dev *ena_dev, + enum ena_admin_hash_functions func, + const u8 *key, u16 key_len, u32 init_val) +{ + struct ena_rss *rss = &ena_dev->rss; + struct ena_admin_get_feat_resp get_resp; + struct ena_admin_feature_rss_flow_hash_control *hash_key = + rss->hash_key; + int rc; + + /* Make sure size is a mult of DWs */ + if (unlikely(key_len & 0x3)) + return -EINVAL; + + rc = ena_com_get_feature_ex(ena_dev, &get_resp, + ENA_ADMIN_RSS_HASH_FUNCTION, + rss->hash_key_dma_addr, + sizeof(*rss->hash_key)); + if (unlikely(rc)) + return rc; + + if (!((1 << func) & get_resp.u.flow_hash_func.supported_func)) { + pr_err("Flow hash function %d isn't supported\n", func); + return -EPERM; + } + + switch (func) { + case ENA_ADMIN_TOEPLITZ: + if (key_len > sizeof(hash_key->key)) { + pr_err("key len (%hu) is bigger than the max supported (%zu)\n", + key_len, sizeof(hash_key->key)); + return -EINVAL; + } + + memcpy(hash_key->key, key, key_len); + rss->hash_init_val = init_val; + hash_key->keys_num = key_len >> 2; + break; + case ENA_ADMIN_CRC32: + rss->hash_init_val = init_val; + break; + default: + pr_err("Invalid hash function (%d)\n", func); + return -EINVAL; + } + + rc = ena_com_set_hash_function(ena_dev); + + /* Restore the old function */ + if (unlikely(rc)) + ena_com_get_hash_function(ena_dev, NULL, NULL); + + return rc; +} + +int ena_com_get_hash_function(struct ena_com_dev *ena_dev, + enum ena_admin_hash_functions *func, + u8 *key) +{ + struct ena_rss *rss = &ena_dev->rss; + struct ena_admin_get_feat_resp get_resp; + struct ena_admin_feature_rss_flow_hash_control *hash_key = + rss->hash_key; + int rc; + + rc = ena_com_get_feature_ex(ena_dev, &get_resp, + ENA_ADMIN_RSS_HASH_FUNCTION, + rss->hash_key_dma_addr, + sizeof(*rss->hash_key)); + if (unlikely(rc)) + return rc; + + rss->hash_func = get_resp.u.flow_hash_func.selected_func; + if (func) + *func = rss->hash_func; + + if (key) + memcpy(key, hash_key->key, (size_t)(hash_key->keys_num) << 2); + + return 0; +} + +int ena_com_get_hash_ctrl(struct ena_com_dev *ena_dev, + enum ena_admin_flow_hash_proto proto, + u16 *fields) +{ + struct ena_rss *rss = &ena_dev->rss; + struct ena_admin_get_feat_resp get_resp; + int rc; + + rc = ena_com_get_feature_ex(ena_dev, &get_resp, + ENA_ADMIN_RSS_HASH_INPUT, + rss->hash_ctrl_dma_addr, + sizeof(*rss->hash_ctrl)); + if (unlikely(rc)) + return rc; + + if (fields) + *fields = rss->hash_ctrl->selected_fields[proto].fields; + + return 0; +} + +int ena_com_set_hash_ctrl(struct ena_com_dev *ena_dev) +{ + struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue; + struct ena_rss *rss = &ena_dev->rss; + struct ena_admin_feature_rss_hash_control *hash_ctrl = rss->hash_ctrl; + struct ena_admin_set_feat_cmd cmd; + struct ena_admin_set_feat_resp resp; + int ret; + + if (!ena_com_check_supported_feature_id(ena_dev, + ENA_ADMIN_RSS_HASH_INPUT)) { + pr_info("Feature %d isn't supported\n", ENA_ADMIN_RSS_HASH_INPUT); + return -EPERM; + } + + memset(&cmd, 0x0, sizeof(cmd)); + + cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE; + cmd.aq_common_descriptor.flags = + ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK; + cmd.feat_common.feature_id = ENA_ADMIN_RSS_HASH_INPUT; + cmd.u.flow_hash_input.enabled_input_sort = + ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L3_SORT_MASK | + ENA_ADMIN_FEATURE_RSS_FLOW_HASH_INPUT_L4_SORT_MASK; + + ret = ena_com_mem_addr_set(ena_dev, + &cmd.control_buffer.address, + rss->hash_ctrl_dma_addr); + if (unlikely(ret)) { + pr_err("memory address set failed\n"); + return ret; + } + cmd.control_buffer.length = sizeof(*hash_ctrl); + + ret = ena_com_execute_admin_command(admin_queue, + (struct ena_admin_aq_entry *)&cmd, + sizeof(cmd), + (struct ena_admin_acq_entry *)&resp, + sizeof(resp)); + if (unlikely(ret)) + pr_err("Failed to set hash input. error: %d\n", ret); + + return ret; +} + +int ena_com_set_default_hash_ctrl(struct ena_com_dev *ena_dev) +{ + struct ena_rss *rss = &ena_dev->rss; + struct ena_admin_feature_rss_hash_control *hash_ctrl = + rss->hash_ctrl; + u16 available_fields = 0; + int rc, i; + + /* Get the supported hash input */ + rc = ena_com_get_hash_ctrl(ena_dev, 0, NULL); + if (unlikely(rc)) + return rc; + + hash_ctrl->selected_fields[ENA_ADMIN_RSS_TCP4].fields = + ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA | + ENA_ADMIN_RSS_L4_DP | ENA_ADMIN_RSS_L4_SP; + + hash_ctrl->selected_fields[ENA_ADMIN_RSS_UDP4].fields = + ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA | + ENA_ADMIN_RSS_L4_DP | ENA_ADMIN_RSS_L4_SP; + + hash_ctrl->selected_fields[ENA_ADMIN_RSS_TCP6].fields = + ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA | + ENA_ADMIN_RSS_L4_DP | ENA_ADMIN_RSS_L4_SP; + + hash_ctrl->selected_fields[ENA_ADMIN_RSS_UDP6].fields = + ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA | + ENA_ADMIN_RSS_L4_DP | ENA_ADMIN_RSS_L4_SP; + + hash_ctrl->selected_fields[ENA_ADMIN_RSS_IP4].fields = + ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA; + + hash_ctrl->selected_fields[ENA_ADMIN_RSS_IP6].fields = + ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA; + + hash_ctrl->selected_fields[ENA_ADMIN_RSS_IP4_FRAG].fields = + ENA_ADMIN_RSS_L3_SA | ENA_ADMIN_RSS_L3_DA; + + hash_ctrl->selected_fields[ENA_ADMIN_RSS_IP4_FRAG].fields = + ENA_ADMIN_RSS_L2_DA | ENA_ADMIN_RSS_L2_SA; + + for (i = 0; i < ENA_ADMIN_RSS_PROTO_NUM; i++) { + available_fields = hash_ctrl->selected_fields[i].fields & + hash_ctrl->supported_fields[i].fields; + if (available_fields != hash_ctrl->selected_fields[i].fields) { + pr_err("hash control doesn't support all the desire configuration. proto %x supported %x selected %x\n", + i, hash_ctrl->supported_fields[i].fields, + hash_ctrl->selected_fields[i].fields); + return -EPERM; + } + } + + rc = ena_com_set_hash_ctrl(ena_dev); + + /* In case of failure, restore the old hash ctrl */ + if (unlikely(rc)) + ena_com_get_hash_ctrl(ena_dev, 0, NULL); + + return rc; +} + +int ena_com_fill_hash_ctrl(struct ena_com_dev *ena_dev, + enum ena_admin_flow_hash_proto proto, + u16 hash_fields) +{ + struct ena_rss *rss = &ena_dev->rss; + struct ena_admin_feature_rss_hash_control *hash_ctrl = rss->hash_ctrl; + u16 supported_fields; + int rc; + + if (proto >= ENA_ADMIN_RSS_PROTO_NUM) { + pr_err("Invalid proto num (%u)\n", proto); + return -EINVAL; + } + + /* Get the ctrl table */ + rc = ena_com_get_hash_ctrl(ena_dev, proto, NULL); + if (unlikely(rc)) + return rc; + + /* Make sure all the fields are supported */ + supported_fields = hash_ctrl->supported_fields[proto].fields; + if ((hash_fields & supported_fields) != hash_fields) { + pr_err("proto %d doesn't support the required fields %x. supports only: %x\n", + proto, hash_fields, supported_fields); + } + + hash_ctrl->selected_fields[proto].fields = hash_fields; + + rc = ena_com_set_hash_ctrl(ena_dev); + + /* In case of failure, restore the old hash ctrl */ + if (unlikely(rc)) + ena_com_get_hash_ctrl(ena_dev, 0, NULL); + + return 0; +} + +int ena_com_indirect_table_fill_entry(struct ena_com_dev *ena_dev, + u16 entry_idx, u16 entry_value) +{ + struct ena_rss *rss = &ena_dev->rss; + + if (unlikely(entry_idx >= (1 << rss->tbl_log_size))) + return -EINVAL; + + if (unlikely((entry_value > ENA_TOTAL_NUM_QUEUES))) + return -EINVAL; + + rss->host_rss_ind_tbl[entry_idx] = entry_value; + + return 0; +} + +int ena_com_indirect_table_set(struct ena_com_dev *ena_dev) +{ + struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue; + struct ena_rss *rss = &ena_dev->rss; + struct ena_admin_set_feat_cmd cmd; + struct ena_admin_set_feat_resp resp; + int ret; + + if (!ena_com_check_supported_feature_id( + ena_dev, ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG)) { + pr_info("Feature %d isn't supported\n", + ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG); + return -EPERM; + } + + ret = ena_com_ind_tbl_convert_to_device(ena_dev); + if (ret) { + pr_err("Failed to convert host indirection table to device table\n"); + return ret; + } + + memset(&cmd, 0x0, sizeof(cmd)); + + cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE; + cmd.aq_common_descriptor.flags = + ENA_ADMIN_AQ_COMMON_DESC_CTRL_DATA_INDIRECT_MASK; + cmd.feat_common.feature_id = ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG; + cmd.u.ind_table.size = rss->tbl_log_size; + cmd.u.ind_table.inline_index = 0xFFFFFFFF; + + ret = ena_com_mem_addr_set(ena_dev, + &cmd.control_buffer.address, + rss->rss_ind_tbl_dma_addr); + if (unlikely(ret)) { + pr_err("memory address set failed\n"); + return ret; + } + + cmd.control_buffer.length = (1ULL << rss->tbl_log_size) * + sizeof(struct ena_admin_rss_ind_table_entry); + + ret = ena_com_execute_admin_command(admin_queue, + (struct ena_admin_aq_entry *)&cmd, + sizeof(cmd), + (struct ena_admin_acq_entry *)&resp, + sizeof(resp)); + + if (unlikely(ret)) + pr_err("Failed to set indirect table. error: %d\n", ret); + + return ret; +} + +int ena_com_indirect_table_get(struct ena_com_dev *ena_dev, u32 *ind_tbl) +{ + struct ena_rss *rss = &ena_dev->rss; + struct ena_admin_get_feat_resp get_resp; + u32 tbl_size; + int i, rc; + + tbl_size = (1ULL << rss->tbl_log_size) * + sizeof(struct ena_admin_rss_ind_table_entry); + + rc = ena_com_get_feature_ex(ena_dev, &get_resp, + ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG, + rss->rss_ind_tbl_dma_addr, + tbl_size); + if (unlikely(rc)) + return rc; + + if (!ind_tbl) + return 0; + + rc = ena_com_ind_tbl_convert_from_device(ena_dev); + if (unlikely(rc)) + return rc; + + for (i = 0; i < (1 << rss->tbl_log_size); i++) + ind_tbl[i] = rss->host_rss_ind_tbl[i]; + + return 0; +} + +int ena_com_rss_init(struct ena_com_dev *ena_dev, u16 indr_tbl_log_size) +{ + int rc; + + memset(&ena_dev->rss, 0x0, sizeof(ena_dev->rss)); + + rc = ena_com_indirect_table_allocate(ena_dev, indr_tbl_log_size); + if (unlikely(rc)) + goto err_indr_tbl; + + rc = ena_com_hash_key_allocate(ena_dev); + if (unlikely(rc)) + goto err_hash_key; + + rc = ena_com_hash_ctrl_init(ena_dev); + if (unlikely(rc)) + goto err_hash_ctrl; + + return 0; + +err_hash_ctrl: + ena_com_hash_key_destroy(ena_dev); +err_hash_key: + ena_com_indirect_table_destroy(ena_dev); +err_indr_tbl: + + return rc; +} + +void ena_com_rss_destroy(struct ena_com_dev *ena_dev) +{ + ena_com_indirect_table_destroy(ena_dev); + ena_com_hash_key_destroy(ena_dev); + ena_com_hash_ctrl_destroy(ena_dev); + + memset(&ena_dev->rss, 0x0, sizeof(ena_dev->rss)); +} + +int ena_com_allocate_host_info(struct ena_com_dev *ena_dev) +{ + struct ena_host_attribute *host_attr = &ena_dev->host_attr; + + host_attr->host_info = + dma_zalloc_coherent(ena_dev->dmadev, SZ_4K, + &host_attr->host_info_dma_addr, GFP_KERNEL); + if (unlikely(!host_attr->host_info)) + return -ENOMEM; + + return 0; +} + +int ena_com_allocate_debug_area(struct ena_com_dev *ena_dev, + u32 debug_area_size) +{ + struct ena_host_attribute *host_attr = &ena_dev->host_attr; + + host_attr->debug_area_virt_addr = + dma_zalloc_coherent(ena_dev->dmadev, debug_area_size, + &host_attr->debug_area_dma_addr, GFP_KERNEL); + if (unlikely(!host_attr->debug_area_virt_addr)) { + host_attr->debug_area_size = 0; + return -ENOMEM; + } + + host_attr->debug_area_size = debug_area_size; + + return 0; +} + +void ena_com_delete_host_info(struct ena_com_dev *ena_dev) +{ + struct ena_host_attribute *host_attr = &ena_dev->host_attr; + + if (host_attr->host_info) { + dma_free_coherent(ena_dev->dmadev, SZ_4K, host_attr->host_info, + host_attr->host_info_dma_addr); + host_attr->host_info = NULL; + } +} + +void ena_com_delete_debug_area(struct ena_com_dev *ena_dev) +{ + struct ena_host_attribute *host_attr = &ena_dev->host_attr; + + if (host_attr->debug_area_virt_addr) { + dma_free_coherent(ena_dev->dmadev, host_attr->debug_area_size, + host_attr->debug_area_virt_addr, + host_attr->debug_area_dma_addr); + host_attr->debug_area_virt_addr = NULL; + } +} + +int ena_com_set_host_attributes(struct ena_com_dev *ena_dev) +{ + struct ena_host_attribute *host_attr = &ena_dev->host_attr; + struct ena_com_admin_queue *admin_queue; + struct ena_admin_set_feat_cmd cmd; + struct ena_admin_set_feat_resp resp; + + int ret; + + if (!ena_com_check_supported_feature_id(ena_dev, + ENA_ADMIN_HOST_ATTR_CONFIG)) { + pr_warn("Set host attribute isn't supported\n"); + return -EPERM; + } + + memset(&cmd, 0x0, sizeof(cmd)); + admin_queue = &ena_dev->admin_queue; + + cmd.aq_common_descriptor.opcode = ENA_ADMIN_SET_FEATURE; + cmd.feat_common.feature_id = ENA_ADMIN_HOST_ATTR_CONFIG; + + ret = ena_com_mem_addr_set(ena_dev, + &cmd.u.host_attr.debug_ba, + host_attr->debug_area_dma_addr); + if (unlikely(ret)) { + pr_err("memory address set failed\n"); + return ret; + } + + ret = ena_com_mem_addr_set(ena_dev, + &cmd.u.host_attr.os_info_ba, + host_attr->host_info_dma_addr); + if (unlikely(ret)) { + pr_err("memory address set failed\n"); + return ret; + } + + cmd.u.host_attr.debug_area_size = host_attr->debug_area_size; + + ret = ena_com_execute_admin_command(admin_queue, + (struct ena_admin_aq_entry *)&cmd, + sizeof(cmd), + (struct ena_admin_acq_entry *)&resp, + sizeof(resp)); + + if (unlikely(ret)) + pr_err("Failed to set host attributes: %d\n", ret); + + return ret; +} + +/* Interrupt moderation */ +bool ena_com_interrupt_moderation_supported(struct ena_com_dev *ena_dev) +{ + return ena_com_check_supported_feature_id(ena_dev, + ENA_ADMIN_INTERRUPT_MODERATION); +} + +int ena_com_update_nonadaptive_moderation_interval_tx(struct ena_com_dev *ena_dev, + u32 tx_coalesce_usecs) +{ + if (!ena_dev->intr_delay_resolution) { + pr_err("Illegal interrupt delay granularity value\n"); + return -EFAULT; + } + + ena_dev->intr_moder_tx_interval = tx_coalesce_usecs / + ena_dev->intr_delay_resolution; + + return 0; +} + +int ena_com_update_nonadaptive_moderation_interval_rx(struct ena_com_dev *ena_dev, + u32 rx_coalesce_usecs) +{ + if (!ena_dev->intr_delay_resolution) { + pr_err("Illegal interrupt delay granularity value\n"); + return -EFAULT; + } + + /* We use LOWEST entry of moderation table for storing + * nonadaptive interrupt coalescing values + */ + ena_dev->intr_moder_tbl[ENA_INTR_MODER_LOWEST].intr_moder_interval = + rx_coalesce_usecs / ena_dev->intr_delay_resolution; + + return 0; +} + +void ena_com_destroy_interrupt_moderation(struct ena_com_dev *ena_dev) +{ + if (ena_dev->intr_moder_tbl) + devm_kfree(ena_dev->dmadev, ena_dev->intr_moder_tbl); + ena_dev->intr_moder_tbl = NULL; +} + +int ena_com_init_interrupt_moderation(struct ena_com_dev *ena_dev) +{ + struct ena_admin_get_feat_resp get_resp; + u16 delay_resolution; + int rc; + + rc = ena_com_get_feature(ena_dev, &get_resp, + ENA_ADMIN_INTERRUPT_MODERATION); + + if (rc) { + if (rc == -EPERM) { + pr_info("Feature %d isn't supported\n", + ENA_ADMIN_INTERRUPT_MODERATION); + rc = 0; + } else { + pr_err("Failed to get interrupt moderation admin cmd. rc: %d\n", + rc); + } + + /* no moderation supported, disable adaptive support */ + ena_com_disable_adaptive_moderation(ena_dev); + return rc; + } + + rc = ena_com_init_interrupt_moderation_table(ena_dev); + if (rc) + goto err; + + /* if moderation is supported by device we set adaptive moderation */ + delay_resolution = get_resp.u.intr_moderation.intr_delay_resolution; + ena_com_update_intr_delay_resolution(ena_dev, delay_resolution); + ena_com_enable_adaptive_moderation(ena_dev); + + return 0; +err: + ena_com_destroy_interrupt_moderation(ena_dev); + return rc; +} + +void ena_com_config_default_interrupt_moderation_table(struct ena_com_dev *ena_dev) +{ + struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl; + + if (!intr_moder_tbl) + return; + + intr_moder_tbl[ENA_INTR_MODER_LOWEST].intr_moder_interval = + ENA_INTR_LOWEST_USECS; + intr_moder_tbl[ENA_INTR_MODER_LOWEST].pkts_per_interval = + ENA_INTR_LOWEST_PKTS; + intr_moder_tbl[ENA_INTR_MODER_LOWEST].bytes_per_interval = + ENA_INTR_LOWEST_BYTES; + + intr_moder_tbl[ENA_INTR_MODER_LOW].intr_moder_interval = + ENA_INTR_LOW_USECS; + intr_moder_tbl[ENA_INTR_MODER_LOW].pkts_per_interval = + ENA_INTR_LOW_PKTS; + intr_moder_tbl[ENA_INTR_MODER_LOW].bytes_per_interval = + ENA_INTR_LOW_BYTES; + + intr_moder_tbl[ENA_INTR_MODER_MID].intr_moder_interval = + ENA_INTR_MID_USECS; + intr_moder_tbl[ENA_INTR_MODER_MID].pkts_per_interval = + ENA_INTR_MID_PKTS; + intr_moder_tbl[ENA_INTR_MODER_MID].bytes_per_interval = + ENA_INTR_MID_BYTES; + + intr_moder_tbl[ENA_INTR_MODER_HIGH].intr_moder_interval = + ENA_INTR_HIGH_USECS; + intr_moder_tbl[ENA_INTR_MODER_HIGH].pkts_per_interval = + ENA_INTR_HIGH_PKTS; + intr_moder_tbl[ENA_INTR_MODER_HIGH].bytes_per_interval = + ENA_INTR_HIGH_BYTES; + + intr_moder_tbl[ENA_INTR_MODER_HIGHEST].intr_moder_interval = + ENA_INTR_HIGHEST_USECS; + intr_moder_tbl[ENA_INTR_MODER_HIGHEST].pkts_per_interval = + ENA_INTR_HIGHEST_PKTS; + intr_moder_tbl[ENA_INTR_MODER_HIGHEST].bytes_per_interval = + ENA_INTR_HIGHEST_BYTES; +} + +unsigned int ena_com_get_nonadaptive_moderation_interval_tx(struct ena_com_dev *ena_dev) +{ + return ena_dev->intr_moder_tx_interval; +} + +unsigned int ena_com_get_nonadaptive_moderation_interval_rx(struct ena_com_dev *ena_dev) +{ + struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl; + + if (intr_moder_tbl) + return intr_moder_tbl[ENA_INTR_MODER_LOWEST].intr_moder_interval; + + return 0; +} + +void ena_com_init_intr_moderation_entry(struct ena_com_dev *ena_dev, + enum ena_intr_moder_level level, + struct ena_intr_moder_entry *entry) +{ + struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl; + + if (level >= ENA_INTR_MAX_NUM_OF_LEVELS) + return; + + intr_moder_tbl[level].intr_moder_interval = entry->intr_moder_interval; + if (ena_dev->intr_delay_resolution) + intr_moder_tbl[level].intr_moder_interval /= + ena_dev->intr_delay_resolution; + intr_moder_tbl[level].pkts_per_interval = entry->pkts_per_interval; + + /* use hardcoded value until ethtool supports bytecount parameter */ + if (entry->bytes_per_interval != ENA_INTR_BYTE_COUNT_NOT_SUPPORTED) + intr_moder_tbl[level].bytes_per_interval = entry->bytes_per_interval; +} + +void ena_com_get_intr_moderation_entry(struct ena_com_dev *ena_dev, + enum ena_intr_moder_level level, + struct ena_intr_moder_entry *entry) +{ + struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl; + + if (level >= ENA_INTR_MAX_NUM_OF_LEVELS) + return; + + entry->intr_moder_interval = intr_moder_tbl[level].intr_moder_interval; + if (ena_dev->intr_delay_resolution) + entry->intr_moder_interval *= ena_dev->intr_delay_resolution; + entry->pkts_per_interval = + intr_moder_tbl[level].pkts_per_interval; + entry->bytes_per_interval = intr_moder_tbl[level].bytes_per_interval; +} diff --git a/drivers/net/ethernet/amazon/ena/ena_com.h b/drivers/net/ethernet/amazon/ena/ena_com.h new file mode 100644 index 000000000000..509d7b8e15ab --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/ena_com.h @@ -0,0 +1,1038 @@ +/* + * Copyright 2015 Amazon.com, Inc. or its affiliates. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ENA_COM +#define ENA_COM + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ena_common_defs.h" +#include "ena_admin_defs.h" +#include "ena_eth_io_defs.h" +#include "ena_regs_defs.h" + +#undef pr_fmt +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#define ENA_MAX_NUM_IO_QUEUES 128U +/* We need to queues for each IO (on for Tx and one for Rx) */ +#define ENA_TOTAL_NUM_QUEUES (2 * (ENA_MAX_NUM_IO_QUEUES)) + +#define ENA_MAX_HANDLERS 256 + +#define ENA_MAX_PHYS_ADDR_SIZE_BITS 48 + +/* Unit in usec */ +#define ENA_REG_READ_TIMEOUT 200000 + +#define ADMIN_SQ_SIZE(depth) ((depth) * sizeof(struct ena_admin_aq_entry)) +#define ADMIN_CQ_SIZE(depth) ((depth) * sizeof(struct ena_admin_acq_entry)) +#define ADMIN_AENQ_SIZE(depth) ((depth) * sizeof(struct ena_admin_aenq_entry)) + +/*****************************************************************************/ +/*****************************************************************************/ +/* ENA adaptive interrupt moderation settings */ + +#define ENA_INTR_LOWEST_USECS (0) +#define ENA_INTR_LOWEST_PKTS (3) +#define ENA_INTR_LOWEST_BYTES (2 * 1524) + +#define ENA_INTR_LOW_USECS (32) +#define ENA_INTR_LOW_PKTS (12) +#define ENA_INTR_LOW_BYTES (16 * 1024) + +#define ENA_INTR_MID_USECS (80) +#define ENA_INTR_MID_PKTS (48) +#define ENA_INTR_MID_BYTES (64 * 1024) + +#define ENA_INTR_HIGH_USECS (128) +#define ENA_INTR_HIGH_PKTS (96) +#define ENA_INTR_HIGH_BYTES (128 * 1024) + +#define ENA_INTR_HIGHEST_USECS (192) +#define ENA_INTR_HIGHEST_PKTS (128) +#define ENA_INTR_HIGHEST_BYTES (192 * 1024) + +#define ENA_INTR_INITIAL_TX_INTERVAL_USECS 196 +#define ENA_INTR_INITIAL_RX_INTERVAL_USECS 4 +#define ENA_INTR_DELAY_OLD_VALUE_WEIGHT 6 +#define ENA_INTR_DELAY_NEW_VALUE_WEIGHT 4 +#define ENA_INTR_MODER_LEVEL_STRIDE 2 +#define ENA_INTR_BYTE_COUNT_NOT_SUPPORTED 0xFFFFFF + +enum ena_intr_moder_level { + ENA_INTR_MODER_LOWEST = 0, + ENA_INTR_MODER_LOW, + ENA_INTR_MODER_MID, + ENA_INTR_MODER_HIGH, + ENA_INTR_MODER_HIGHEST, + ENA_INTR_MAX_NUM_OF_LEVELS, +}; + +struct ena_intr_moder_entry { + unsigned int intr_moder_interval; + unsigned int pkts_per_interval; + unsigned int bytes_per_interval; +}; + +enum queue_direction { + ENA_COM_IO_QUEUE_DIRECTION_TX, + ENA_COM_IO_QUEUE_DIRECTION_RX +}; + +struct ena_com_buf { + dma_addr_t paddr; /**< Buffer physical address */ + u16 len; /**< Buffer length in bytes */ +}; + +struct ena_com_rx_buf_info { + u16 len; + u16 req_id; +}; + +struct ena_com_io_desc_addr { + u8 __iomem *pbuf_dev_addr; /* LLQ address */ + u8 *virt_addr; + dma_addr_t phys_addr; +}; + +struct ena_com_tx_meta { + u16 mss; + u16 l3_hdr_len; + u16 l3_hdr_offset; + u16 l4_hdr_len; /* In words */ +}; + +struct ena_com_io_cq { + struct ena_com_io_desc_addr cdesc_addr; + + /* Interrupt unmask register */ + u32 __iomem *unmask_reg; + + /* The completion queue head doorbell register */ + u32 __iomem *cq_head_db_reg; + + /* numa configuration register (for TPH) */ + u32 __iomem *numa_node_cfg_reg; + + /* The value to write to the above register to unmask + * the interrupt of this queue + */ + u32 msix_vector; + + enum queue_direction direction; + + /* holds the number of cdesc of the current packet */ + u16 cur_rx_pkt_cdesc_count; + /* save the firt cdesc idx of the current packet */ + u16 cur_rx_pkt_cdesc_start_idx; + + u16 q_depth; + /* Caller qid */ + u16 qid; + + /* Device queue index */ + u16 idx; + u16 head; + u16 last_head_update; + u8 phase; + u8 cdesc_entry_size_in_bytes; + +} ____cacheline_aligned; + +struct ena_com_io_sq { + struct ena_com_io_desc_addr desc_addr; + + u32 __iomem *db_addr; + u8 __iomem *header_addr; + + enum queue_direction direction; + enum ena_admin_placement_policy_type mem_queue_type; + + u32 msix_vector; + struct ena_com_tx_meta cached_tx_meta; + + u16 q_depth; + u16 qid; + + u16 idx; + u16 tail; + u16 next_to_comp; + u32 tx_max_header_size; + u8 phase; + u8 desc_entry_size; + u8 dma_addr_bits; +} ____cacheline_aligned; + +struct ena_com_admin_cq { + struct ena_admin_acq_entry *entries; + dma_addr_t dma_addr; + + u16 head; + u8 phase; +}; + +struct ena_com_admin_sq { + struct ena_admin_aq_entry *entries; + dma_addr_t dma_addr; + + u32 __iomem *db_addr; + + u16 head; + u16 tail; + u8 phase; + +}; + +struct ena_com_stats_admin { + u32 aborted_cmd; + u32 submitted_cmd; + u32 completed_cmd; + u32 out_of_space; + u32 no_completion; +}; + +struct ena_com_admin_queue { + void *q_dmadev; + spinlock_t q_lock; /* spinlock for the admin queue */ + struct ena_comp_ctx *comp_ctx; + u16 q_depth; + struct ena_com_admin_cq cq; + struct ena_com_admin_sq sq; + + /* Indicate if the admin queue should poll for completion */ + bool polling; + + u16 curr_cmd_id; + + /* Indicate that the ena was initialized and can + * process new admin commands + */ + bool running_state; + + /* Count the number of outstanding admin commands */ + atomic_t outstanding_cmds; + + struct ena_com_stats_admin stats; +}; + +struct ena_aenq_handlers; + +struct ena_com_aenq { + u16 head; + u8 phase; + struct ena_admin_aenq_entry *entries; + dma_addr_t dma_addr; + u16 q_depth; + struct ena_aenq_handlers *aenq_handlers; +}; + +struct ena_com_mmio_read { + struct ena_admin_ena_mmio_req_read_less_resp *read_resp; + dma_addr_t read_resp_dma_addr; + u16 seq_num; + bool readless_supported; + /* spin lock to ensure a single outstanding read */ + spinlock_t lock; +}; + +struct ena_rss { + /* Indirect table */ + u16 *host_rss_ind_tbl; + struct ena_admin_rss_ind_table_entry *rss_ind_tbl; + dma_addr_t rss_ind_tbl_dma_addr; + u16 tbl_log_size; + + /* Hash key */ + enum ena_admin_hash_functions hash_func; + struct ena_admin_feature_rss_flow_hash_control *hash_key; + dma_addr_t hash_key_dma_addr; + u32 hash_init_val; + + /* Flow Control */ + struct ena_admin_feature_rss_hash_control *hash_ctrl; + dma_addr_t hash_ctrl_dma_addr; + +}; + +struct ena_host_attribute { + /* Debug area */ + u8 *debug_area_virt_addr; + dma_addr_t debug_area_dma_addr; + u32 debug_area_size; + + /* Host information */ + struct ena_admin_host_info *host_info; + dma_addr_t host_info_dma_addr; +}; + +/* Each ena_dev is a PCI function. */ +struct ena_com_dev { + struct ena_com_admin_queue admin_queue; + struct ena_com_aenq aenq; + struct ena_com_io_cq io_cq_queues[ENA_TOTAL_NUM_QUEUES]; + struct ena_com_io_sq io_sq_queues[ENA_TOTAL_NUM_QUEUES]; + u8 __iomem *reg_bar; + void __iomem *mem_bar; + void *dmadev; + + enum ena_admin_placement_policy_type tx_mem_queue_type; + u32 tx_max_header_size; + u16 stats_func; /* Selected function for extended statistic dump */ + u16 stats_queue; /* Selected queue for extended statistic dump */ + + struct ena_com_mmio_read mmio_read; + + struct ena_rss rss; + u32 supported_features; + u32 dma_addr_bits; + + struct ena_host_attribute host_attr; + bool adaptive_coalescing; + u16 intr_delay_resolution; + u32 intr_moder_tx_interval; + struct ena_intr_moder_entry *intr_moder_tbl; +}; + +struct ena_com_dev_get_features_ctx { + struct ena_admin_queue_feature_desc max_queues; + struct ena_admin_device_attr_feature_desc dev_attr; + struct ena_admin_feature_aenq_desc aenq; + struct ena_admin_feature_offload_desc offload; +}; + +struct ena_com_create_io_ctx { + enum ena_admin_placement_policy_type mem_queue_type; + enum queue_direction direction; + int numa_node; + u32 msix_vector; + u16 queue_size; + u16 qid; +}; + +typedef void (*ena_aenq_handler)(void *data, + struct ena_admin_aenq_entry *aenq_e); + +/* Holds aenq handlers. Indexed by AENQ event group */ +struct ena_aenq_handlers { + ena_aenq_handler handlers[ENA_MAX_HANDLERS]; + ena_aenq_handler unimplemented_handler; +}; + +/*****************************************************************************/ +/*****************************************************************************/ + +/* ena_com_mmio_reg_read_request_init - Init the mmio reg read mechanism + * @ena_dev: ENA communication layer struct + * + * Initialize the register read mechanism. + * + * @note: This method must be the first stage in the initialization sequence. + * + * @return - 0 on success, negative value on failure. + */ +int ena_com_mmio_reg_read_request_init(struct ena_com_dev *ena_dev); + +/* ena_com_set_mmio_read_mode - Enable/disable the mmio reg read mechanism + * @ena_dev: ENA communication layer struct + * @readless_supported: readless mode (enable/disable) + */ +void ena_com_set_mmio_read_mode(struct ena_com_dev *ena_dev, + bool readless_supported); + +/* ena_com_mmio_reg_read_request_write_dev_addr - Write the mmio reg read return + * value physical address. + * @ena_dev: ENA communication layer struct + */ +void ena_com_mmio_reg_read_request_write_dev_addr(struct ena_com_dev *ena_dev); + +/* ena_com_mmio_reg_read_request_destroy - Destroy the mmio reg read mechanism + * @ena_dev: ENA communication layer struct + */ +void ena_com_mmio_reg_read_request_destroy(struct ena_com_dev *ena_dev); + +/* ena_com_admin_init - Init the admin and the async queues + * @ena_dev: ENA communication layer struct + * @aenq_handlers: Those handlers to be called upon event. + * @init_spinlock: Indicate if this method should init the admin spinlock or + * the spinlock was init before (for example, in a case of FLR). + * + * Initialize the admin submission and completion queues. + * Initialize the asynchronous events notification queues. + * + * @return - 0 on success, negative value on failure. + */ +int ena_com_admin_init(struct ena_com_dev *ena_dev, + struct ena_aenq_handlers *aenq_handlers, + bool init_spinlock); + +/* ena_com_admin_destroy - Destroy the admin and the async events queues. + * @ena_dev: ENA communication layer struct + * + * @note: Before calling this method, the caller must validate that the device + * won't send any additional admin completions/aenq. + * To achieve that, a FLR is recommended. + */ +void ena_com_admin_destroy(struct ena_com_dev *ena_dev); + +/* ena_com_dev_reset - Perform device FLR to the device. + * @ena_dev: ENA communication layer struct + * + * @return - 0 on success, negative value on failure. + */ +int ena_com_dev_reset(struct ena_com_dev *ena_dev); + +/* ena_com_create_io_queue - Create io queue. + * @ena_dev: ENA communication layer struct + * @ctx - create context structure + * + * Create the submission and the completion queues. + * + * @return - 0 on success, negative value on failure. + */ +int ena_com_create_io_queue(struct ena_com_dev *ena_dev, + struct ena_com_create_io_ctx *ctx); + +/* ena_com_destroy_io_queue - Destroy IO queue with the queue id - qid. + * @ena_dev: ENA communication layer struct + * @qid - the caller virtual queue id. + */ +void ena_com_destroy_io_queue(struct ena_com_dev *ena_dev, u16 qid); + +/* ena_com_get_io_handlers - Return the io queue handlers + * @ena_dev: ENA communication layer struct + * @qid - the caller virtual queue id. + * @io_sq - IO submission queue handler + * @io_cq - IO completion queue handler. + * + * @return - 0 on success, negative value on failure. + */ +int ena_com_get_io_handlers(struct ena_com_dev *ena_dev, u16 qid, + struct ena_com_io_sq **io_sq, + struct ena_com_io_cq **io_cq); + +/* ena_com_admin_aenq_enable - ENAble asynchronous event notifications + * @ena_dev: ENA communication layer struct + * + * After this method, aenq event can be received via AENQ. + */ +void ena_com_admin_aenq_enable(struct ena_com_dev *ena_dev); + +/* ena_com_set_admin_running_state - Set the state of the admin queue + * @ena_dev: ENA communication layer struct + * + * Change the state of the admin queue (enable/disable) + */ +void ena_com_set_admin_running_state(struct ena_com_dev *ena_dev, bool state); + +/* ena_com_get_admin_running_state - Get the admin queue state + * @ena_dev: ENA communication layer struct + * + * Retrieve the state of the admin queue (enable/disable) + * + * @return - current polling mode (enable/disable) + */ +bool ena_com_get_admin_running_state(struct ena_com_dev *ena_dev); + +/* ena_com_set_admin_polling_mode - Set the admin completion queue polling mode + * @ena_dev: ENA communication layer struct + * @polling: ENAble/Disable polling mode + * + * Set the admin completion mode. + */ +void ena_com_set_admin_polling_mode(struct ena_com_dev *ena_dev, bool polling); + +/* ena_com_set_admin_polling_mode - Get the admin completion queue polling mode + * @ena_dev: ENA communication layer struct + * + * Get the admin completion mode. + * If polling mode is on, ena_com_execute_admin_command will perform a + * polling on the admin completion queue for the commands completion, + * otherwise it will wait on wait event. + * + * @return state + */ +bool ena_com_get_ena_admin_polling_mode(struct ena_com_dev *ena_dev); + +/* ena_com_admin_q_comp_intr_handler - admin queue interrupt handler + * @ena_dev: ENA communication layer struct + * + * This method go over the admin completion queue and wake up all the pending + * threads that wait on the commands wait event. + * + * @note: Should be called after MSI-X interrupt. + */ +void ena_com_admin_q_comp_intr_handler(struct ena_com_dev *ena_dev); + +/* ena_com_aenq_intr_handler - AENQ interrupt handler + * @ena_dev: ENA communication layer struct + * + * This method go over the async event notification queue and call the proper + * aenq handler. + */ +void ena_com_aenq_intr_handler(struct ena_com_dev *dev, void *data); + +/* ena_com_abort_admin_commands - Abort all the outstanding admin commands. + * @ena_dev: ENA communication layer struct + * + * This method aborts all the outstanding admin commands. + * The caller should then call ena_com_wait_for_abort_completion to make sure + * all the commands were completed. + */ +void ena_com_abort_admin_commands(struct ena_com_dev *ena_dev); + +/* ena_com_wait_for_abort_completion - Wait for admin commands abort. + * @ena_dev: ENA communication layer struct + * + * This method wait until all the outstanding admin commands will be completed. + */ +void ena_com_wait_for_abort_completion(struct ena_com_dev *ena_dev); + +/* ena_com_validate_version - Validate the device parameters + * @ena_dev: ENA communication layer struct + * + * This method validate the device parameters are the same as the saved + * parameters in ena_dev. + * This method is useful after device reset, to validate the device mac address + * and the device offloads are the same as before the reset. + * + * @return - 0 on success negative value otherwise. + */ +int ena_com_validate_version(struct ena_com_dev *ena_dev); + +/* ena_com_get_link_params - Retrieve physical link parameters. + * @ena_dev: ENA communication layer struct + * @resp: Link parameters + * + * Retrieve the physical link parameters, + * like speed, auto-negotiation and full duplex support. + * + * @return - 0 on Success negative value otherwise. + */ +int ena_com_get_link_params(struct ena_com_dev *ena_dev, + struct ena_admin_get_feat_resp *resp); + +/* ena_com_get_dma_width - Retrieve physical dma address width the device + * supports. + * @ena_dev: ENA communication layer struct + * + * Retrieve the maximum physical address bits the device can handle. + * + * @return: > 0 on Success and negative value otherwise. + */ +int ena_com_get_dma_width(struct ena_com_dev *ena_dev); + +/* ena_com_set_aenq_config - Set aenq groups configurations + * @ena_dev: ENA communication layer struct + * @groups flag: bit fields flags of enum ena_admin_aenq_group. + * + * Configure which aenq event group the driver would like to receive. + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_set_aenq_config(struct ena_com_dev *ena_dev, u32 groups_flag); + +/* ena_com_get_dev_attr_feat - Get device features + * @ena_dev: ENA communication layer struct + * @get_feat_ctx: returned context that contain the get features. + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_get_dev_attr_feat(struct ena_com_dev *ena_dev, + struct ena_com_dev_get_features_ctx *get_feat_ctx); + +/* ena_com_get_dev_basic_stats - Get device basic statistics + * @ena_dev: ENA communication layer struct + * @stats: stats return value + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_get_dev_basic_stats(struct ena_com_dev *ena_dev, + struct ena_admin_basic_stats *stats); + +/* ena_com_set_dev_mtu - Configure the device mtu. + * @ena_dev: ENA communication layer struct + * @mtu: mtu value + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_set_dev_mtu(struct ena_com_dev *ena_dev, int mtu); + +/* ena_com_get_offload_settings - Retrieve the device offloads capabilities + * @ena_dev: ENA communication layer struct + * @offlad: offload return value + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_get_offload_settings(struct ena_com_dev *ena_dev, + struct ena_admin_feature_offload_desc *offload); + +/* ena_com_rss_init - Init RSS + * @ena_dev: ENA communication layer struct + * @log_size: indirection log size + * + * Allocate RSS/RFS resources. + * The caller then can configure rss using ena_com_set_hash_function, + * ena_com_set_hash_ctrl and ena_com_indirect_table_set. + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_rss_init(struct ena_com_dev *ena_dev, u16 log_size); + +/* ena_com_rss_destroy - Destroy rss + * @ena_dev: ENA communication layer struct + * + * Free all the RSS/RFS resources. + */ +void ena_com_rss_destroy(struct ena_com_dev *ena_dev); + +/* ena_com_fill_hash_function - Fill RSS hash function + * @ena_dev: ENA communication layer struct + * @func: The hash function (Toeplitz or crc) + * @key: Hash key (for toeplitz hash) + * @key_len: key length (max length 10 DW) + * @init_val: initial value for the hash function + * + * Fill the ena_dev resources with the desire hash function, hash key, key_len + * and key initial value (if needed by the hash function). + * To flush the key into the device the caller should call + * ena_com_set_hash_function. + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_fill_hash_function(struct ena_com_dev *ena_dev, + enum ena_admin_hash_functions func, + const u8 *key, u16 key_len, u32 init_val); + +/* ena_com_set_hash_function - Flush the hash function and it dependencies to + * the device. + * @ena_dev: ENA communication layer struct + * + * Flush the hash function and it dependencies (key, key length and + * initial value) if needed. + * + * @note: Prior to this method the caller should call ena_com_fill_hash_function + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_set_hash_function(struct ena_com_dev *ena_dev); + +/* ena_com_get_hash_function - Retrieve the hash function and the hash key + * from the device. + * @ena_dev: ENA communication layer struct + * @func: hash function + * @key: hash key + * + * Retrieve the hash function and the hash key from the device. + * + * @note: If the caller called ena_com_fill_hash_function but didn't flash + * it to the device, the new configuration will be lost. + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_get_hash_function(struct ena_com_dev *ena_dev, + enum ena_admin_hash_functions *func, + u8 *key); + +/* ena_com_fill_hash_ctrl - Fill RSS hash control + * @ena_dev: ENA communication layer struct. + * @proto: The protocol to configure. + * @hash_fields: bit mask of ena_admin_flow_hash_fields + * + * Fill the ena_dev resources with the desire hash control (the ethernet + * fields that take part of the hash) for a specific protocol. + * To flush the hash control to the device, the caller should call + * ena_com_set_hash_ctrl. + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_fill_hash_ctrl(struct ena_com_dev *ena_dev, + enum ena_admin_flow_hash_proto proto, + u16 hash_fields); + +/* ena_com_set_hash_ctrl - Flush the hash control resources to the device. + * @ena_dev: ENA communication layer struct + * + * Flush the hash control (the ethernet fields that take part of the hash) + * + * @note: Prior to this method the caller should call ena_com_fill_hash_ctrl. + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_set_hash_ctrl(struct ena_com_dev *ena_dev); + +/* ena_com_get_hash_ctrl - Retrieve the hash control from the device. + * @ena_dev: ENA communication layer struct + * @proto: The protocol to retrieve. + * @fields: bit mask of ena_admin_flow_hash_fields. + * + * Retrieve the hash control from the device. + * + * @note, If the caller called ena_com_fill_hash_ctrl but didn't flash + * it to the device, the new configuration will be lost. + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_get_hash_ctrl(struct ena_com_dev *ena_dev, + enum ena_admin_flow_hash_proto proto, + u16 *fields); + +/* ena_com_set_default_hash_ctrl - Set the hash control to a default + * configuration. + * @ena_dev: ENA communication layer struct + * + * Fill the ena_dev resources with the default hash control configuration. + * To flush the hash control to the device, the caller should call + * ena_com_set_hash_ctrl. + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_set_default_hash_ctrl(struct ena_com_dev *ena_dev); + +/* ena_com_indirect_table_fill_entry - Fill a single entry in the RSS + * indirection table + * @ena_dev: ENA communication layer struct. + * @entry_idx - indirection table entry. + * @entry_value - redirection value + * + * Fill a single entry of the RSS indirection table in the ena_dev resources. + * To flush the indirection table to the device, the called should call + * ena_com_indirect_table_set. + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_indirect_table_fill_entry(struct ena_com_dev *ena_dev, + u16 entry_idx, u16 entry_value); + +/* ena_com_indirect_table_set - Flush the indirection table to the device. + * @ena_dev: ENA communication layer struct + * + * Flush the indirection hash control to the device. + * Prior to this method the caller should call ena_com_indirect_table_fill_entry + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_indirect_table_set(struct ena_com_dev *ena_dev); + +/* ena_com_indirect_table_get - Retrieve the indirection table from the device. + * @ena_dev: ENA communication layer struct + * @ind_tbl: indirection table + * + * Retrieve the RSS indirection table from the device. + * + * @note: If the caller called ena_com_indirect_table_fill_entry but didn't flash + * it to the device, the new configuration will be lost. + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_indirect_table_get(struct ena_com_dev *ena_dev, u32 *ind_tbl); + +/* ena_com_allocate_host_info - Allocate host info resources. + * @ena_dev: ENA communication layer struct + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_allocate_host_info(struct ena_com_dev *ena_dev); + +/* ena_com_allocate_debug_area - Allocate debug area. + * @ena_dev: ENA communication layer struct + * @debug_area_size - debug area size. + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_allocate_debug_area(struct ena_com_dev *ena_dev, + u32 debug_area_size); + +/* ena_com_delete_debug_area - Free the debug area resources. + * @ena_dev: ENA communication layer struct + * + * Free the allocate debug area. + */ +void ena_com_delete_debug_area(struct ena_com_dev *ena_dev); + +/* ena_com_delete_host_info - Free the host info resources. + * @ena_dev: ENA communication layer struct + * + * Free the allocate host info. + */ +void ena_com_delete_host_info(struct ena_com_dev *ena_dev); + +/* ena_com_set_host_attributes - Update the device with the host + * attributes (debug area and host info) base address. + * @ena_dev: ENA communication layer struct + * + * @return: 0 on Success and negative value otherwise. + */ +int ena_com_set_host_attributes(struct ena_com_dev *ena_dev); + +/* ena_com_create_io_cq - Create io completion queue. + * @ena_dev: ENA communication layer struct + * @io_cq - io completion queue handler + + * Create IO completion queue. + * + * @return - 0 on success, negative value on failure. + */ +int ena_com_create_io_cq(struct ena_com_dev *ena_dev, + struct ena_com_io_cq *io_cq); + +/* ena_com_destroy_io_cq - Destroy io completion queue. + * @ena_dev: ENA communication layer struct + * @io_cq - io completion queue handler + + * Destroy IO completion queue. + * + * @return - 0 on success, negative value on failure. + */ +int ena_com_destroy_io_cq(struct ena_com_dev *ena_dev, + struct ena_com_io_cq *io_cq); + +/* ena_com_execute_admin_command - Execute admin command + * @admin_queue: admin queue. + * @cmd: the admin command to execute. + * @cmd_size: the command size. + * @cmd_completion: command completion return value. + * @cmd_comp_size: command completion size. + + * Submit an admin command and then wait until the device will return a + * completion. + * The completion will be copyed into cmd_comp. + * + * @return - 0 on success, negative value on failure. + */ +int ena_com_execute_admin_command(struct ena_com_admin_queue *admin_queue, + struct ena_admin_aq_entry *cmd, + size_t cmd_size, + struct ena_admin_acq_entry *cmd_comp, + size_t cmd_comp_size); + +/* ena_com_init_interrupt_moderation - Init interrupt moderation + * @ena_dev: ENA communication layer struct + * + * @return - 0 on success, negative value on failure. + */ +int ena_com_init_interrupt_moderation(struct ena_com_dev *ena_dev); + +/* ena_com_destroy_interrupt_moderation - Destroy interrupt moderation resources + * @ena_dev: ENA communication layer struct + */ +void ena_com_destroy_interrupt_moderation(struct ena_com_dev *ena_dev); + +/* ena_com_interrupt_moderation_supported - Return if interrupt moderation + * capability is supported by the device. + * + * @return - supported or not. + */ +bool ena_com_interrupt_moderation_supported(struct ena_com_dev *ena_dev); + +/* ena_com_config_default_interrupt_moderation_table - Restore the interrupt + * moderation table back to the default parameters. + * @ena_dev: ENA communication layer struct + */ +void ena_com_config_default_interrupt_moderation_table(struct ena_com_dev *ena_dev); + +/* ena_com_update_nonadaptive_moderation_interval_tx - Update the + * non-adaptive interval in Tx direction. + * @ena_dev: ENA communication layer struct + * @tx_coalesce_usecs: Interval in usec. + * + * @return - 0 on success, negative value on failure. + */ +int ena_com_update_nonadaptive_moderation_interval_tx(struct ena_com_dev *ena_dev, + u32 tx_coalesce_usecs); + +/* ena_com_update_nonadaptive_moderation_interval_rx - Update the + * non-adaptive interval in Rx direction. + * @ena_dev: ENA communication layer struct + * @rx_coalesce_usecs: Interval in usec. + * + * @return - 0 on success, negative value on failure. + */ +int ena_com_update_nonadaptive_moderation_interval_rx(struct ena_com_dev *ena_dev, + u32 rx_coalesce_usecs); + +/* ena_com_get_nonadaptive_moderation_interval_tx - Retrieve the + * non-adaptive interval in Tx direction. + * @ena_dev: ENA communication layer struct + * + * @return - interval in usec + */ +unsigned int ena_com_get_nonadaptive_moderation_interval_tx(struct ena_com_dev *ena_dev); + +/* ena_com_get_nonadaptive_moderation_interval_rx - Retrieve the + * non-adaptive interval in Rx direction. + * @ena_dev: ENA communication layer struct + * + * @return - interval in usec + */ +unsigned int ena_com_get_nonadaptive_moderation_interval_rx(struct ena_com_dev *ena_dev); + +/* ena_com_init_intr_moderation_entry - Update a single entry in the interrupt + * moderation table. + * @ena_dev: ENA communication layer struct + * @level: Interrupt moderation table level + * @entry: Entry value + * + * Update a single entry in the interrupt moderation table. + */ +void ena_com_init_intr_moderation_entry(struct ena_com_dev *ena_dev, + enum ena_intr_moder_level level, + struct ena_intr_moder_entry *entry); + +/* ena_com_get_intr_moderation_entry - Init ena_intr_moder_entry. + * @ena_dev: ENA communication layer struct + * @level: Interrupt moderation table level + * @entry: Entry to fill. + * + * Initialize the entry according to the adaptive interrupt moderation table. + */ +void ena_com_get_intr_moderation_entry(struct ena_com_dev *ena_dev, + enum ena_intr_moder_level level, + struct ena_intr_moder_entry *entry); + +static inline bool ena_com_get_adaptive_moderation_enabled(struct ena_com_dev *ena_dev) +{ + return ena_dev->adaptive_coalescing; +} + +static inline void ena_com_enable_adaptive_moderation(struct ena_com_dev *ena_dev) +{ + ena_dev->adaptive_coalescing = true; +} + +static inline void ena_com_disable_adaptive_moderation(struct ena_com_dev *ena_dev) +{ + ena_dev->adaptive_coalescing = false; +} + +/* ena_com_calculate_interrupt_delay - Calculate new interrupt delay + * @ena_dev: ENA communication layer struct + * @pkts: Number of packets since the last update + * @bytes: Number of bytes received since the last update. + * @smoothed_interval: Returned interval + * @moder_tbl_idx: Current table level as input update new level as return + * value. + */ +static inline void ena_com_calculate_interrupt_delay(struct ena_com_dev *ena_dev, + unsigned int pkts, + unsigned int bytes, + unsigned int *smoothed_interval, + unsigned int *moder_tbl_idx) +{ + enum ena_intr_moder_level curr_moder_idx, new_moder_idx; + struct ena_intr_moder_entry *curr_moder_entry; + struct ena_intr_moder_entry *pred_moder_entry; + struct ena_intr_moder_entry *new_moder_entry; + struct ena_intr_moder_entry *intr_moder_tbl = ena_dev->intr_moder_tbl; + unsigned int interval; + + /* We apply adaptive moderation on Rx path only. + * Tx uses static interrupt moderation. + */ + if (!pkts || !bytes) + /* Tx interrupt, or spurious interrupt, + * in both cases we just use same delay values + */ + return; + + curr_moder_idx = (enum ena_intr_moder_level)(*moder_tbl_idx); + if (unlikely(curr_moder_idx >= ENA_INTR_MAX_NUM_OF_LEVELS)) { + pr_err("Wrong moderation index %u\n", curr_moder_idx); + return; + } + + curr_moder_entry = &intr_moder_tbl[curr_moder_idx]; + new_moder_idx = curr_moder_idx; + + if (curr_moder_idx == ENA_INTR_MODER_LOWEST) { + if ((pkts > curr_moder_entry->pkts_per_interval) || + (bytes > curr_moder_entry->bytes_per_interval)) + new_moder_idx = + (enum ena_intr_moder_level)(curr_moder_idx + ENA_INTR_MODER_LEVEL_STRIDE); + } else { + pred_moder_entry = &intr_moder_tbl[curr_moder_idx - ENA_INTR_MODER_LEVEL_STRIDE]; + + if ((pkts <= pred_moder_entry->pkts_per_interval) || + (bytes <= pred_moder_entry->bytes_per_interval)) + new_moder_idx = + (enum ena_intr_moder_level)(curr_moder_idx - ENA_INTR_MODER_LEVEL_STRIDE); + else if ((pkts > curr_moder_entry->pkts_per_interval) || + (bytes > curr_moder_entry->bytes_per_interval)) { + if (curr_moder_idx != ENA_INTR_MODER_HIGHEST) + new_moder_idx = + (enum ena_intr_moder_level)(curr_moder_idx + ENA_INTR_MODER_LEVEL_STRIDE); + } + } + new_moder_entry = &intr_moder_tbl[new_moder_idx]; + + interval = new_moder_entry->intr_moder_interval; + *smoothed_interval = ( + (interval * ENA_INTR_DELAY_NEW_VALUE_WEIGHT + + ENA_INTR_DELAY_OLD_VALUE_WEIGHT * (*smoothed_interval)) + 5) / + 10; + + *moder_tbl_idx = new_moder_idx; +} + +/* ena_com_update_intr_reg - Prepare interrupt register + * @intr_reg: interrupt register to update. + * @rx_delay_interval: Rx interval in usecs + * @tx_delay_interval: Tx interval in usecs + * @unmask: unask enable/disable + * + * Prepare interrupt update register with the supplied parameters. + */ +static inline void ena_com_update_intr_reg(struct ena_eth_io_intr_reg *intr_reg, + u32 rx_delay_interval, + u32 tx_delay_interval, + bool unmask) +{ + intr_reg->intr_control = 0; + intr_reg->intr_control |= rx_delay_interval & + ENA_ETH_IO_INTR_REG_RX_INTR_DELAY_MASK; + + intr_reg->intr_control |= + (tx_delay_interval << ENA_ETH_IO_INTR_REG_TX_INTR_DELAY_SHIFT) + & ENA_ETH_IO_INTR_REG_TX_INTR_DELAY_MASK; + + if (unmask) + intr_reg->intr_control |= ENA_ETH_IO_INTR_REG_INTR_UNMASK_MASK; +} + +#endif /* !(ENA_COM) */ diff --git a/drivers/net/ethernet/amazon/ena/ena_common_defs.h b/drivers/net/ethernet/amazon/ena/ena_common_defs.h new file mode 100644 index 000000000000..bb8d73676eab --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/ena_common_defs.h @@ -0,0 +1,48 @@ +/* + * Copyright 2015 - 2016 Amazon.com, Inc. or its affiliates. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _ENA_COMMON_H_ +#define _ENA_COMMON_H_ + +#define ENA_COMMON_SPEC_VERSION_MAJOR 0 /* */ +#define ENA_COMMON_SPEC_VERSION_MINOR 10 /* */ + +/* ENA operates with 48-bit memory addresses. ena_mem_addr_t */ +struct ena_common_mem_addr { + u32 mem_addr_low; + + u16 mem_addr_high; + + /* MBZ */ + u16 reserved16; +}; + +#endif /*_ENA_COMMON_H_ */ diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.c b/drivers/net/ethernet/amazon/ena/ena_eth_com.c new file mode 100644 index 000000000000..539c536464a5 --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.c @@ -0,0 +1,501 @@ +/* + * Copyright 2015 Amazon.com, Inc. or its affiliates. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ena_eth_com.h" + +static inline struct ena_eth_io_rx_cdesc_base *ena_com_get_next_rx_cdesc( + struct ena_com_io_cq *io_cq) +{ + struct ena_eth_io_rx_cdesc_base *cdesc; + u16 expected_phase, head_masked; + u16 desc_phase; + + head_masked = io_cq->head & (io_cq->q_depth - 1); + expected_phase = io_cq->phase; + + cdesc = (struct ena_eth_io_rx_cdesc_base *)(io_cq->cdesc_addr.virt_addr + + (head_masked * io_cq->cdesc_entry_size_in_bytes)); + + desc_phase = (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_PHASE_MASK) >> + ENA_ETH_IO_RX_CDESC_BASE_PHASE_SHIFT; + + if (desc_phase != expected_phase) + return NULL; + + return cdesc; +} + +static inline void ena_com_cq_inc_head(struct ena_com_io_cq *io_cq) +{ + io_cq->head++; + + /* Switch phase bit in case of wrap around */ + if (unlikely((io_cq->head & (io_cq->q_depth - 1)) == 0)) + io_cq->phase ^= 1; +} + +static inline void *get_sq_desc(struct ena_com_io_sq *io_sq) +{ + u16 tail_masked; + u32 offset; + + tail_masked = io_sq->tail & (io_sq->q_depth - 1); + + offset = tail_masked * io_sq->desc_entry_size; + + return (void *)((uintptr_t)io_sq->desc_addr.virt_addr + offset); +} + +static inline void ena_com_copy_curr_sq_desc_to_dev(struct ena_com_io_sq *io_sq) +{ + u16 tail_masked = io_sq->tail & (io_sq->q_depth - 1); + u32 offset = tail_masked * io_sq->desc_entry_size; + + /* In case this queue isn't a LLQ */ + if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) + return; + + memcpy_toio(io_sq->desc_addr.pbuf_dev_addr + offset, + io_sq->desc_addr.virt_addr + offset, + io_sq->desc_entry_size); +} + +static inline void ena_com_sq_update_tail(struct ena_com_io_sq *io_sq) +{ + io_sq->tail++; + + /* Switch phase bit in case of wrap around */ + if (unlikely((io_sq->tail & (io_sq->q_depth - 1)) == 0)) + io_sq->phase ^= 1; +} + +static inline int ena_com_write_header(struct ena_com_io_sq *io_sq, + u8 *head_src, u16 header_len) +{ + u16 tail_masked = io_sq->tail & (io_sq->q_depth - 1); + u8 __iomem *dev_head_addr = + io_sq->header_addr + (tail_masked * io_sq->tx_max_header_size); + + if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST) + return 0; + + if (unlikely(!io_sq->header_addr)) { + pr_err("Push buffer header ptr is NULL\n"); + return -EINVAL; + } + + memcpy_toio(dev_head_addr, head_src, header_len); + + return 0; +} + +static inline struct ena_eth_io_rx_cdesc_base * + ena_com_rx_cdesc_idx_to_ptr(struct ena_com_io_cq *io_cq, u16 idx) +{ + idx &= (io_cq->q_depth - 1); + return (struct ena_eth_io_rx_cdesc_base *) + ((uintptr_t)io_cq->cdesc_addr.virt_addr + + idx * io_cq->cdesc_entry_size_in_bytes); +} + +static inline u16 ena_com_cdesc_rx_pkt_get(struct ena_com_io_cq *io_cq, + u16 *first_cdesc_idx) +{ + struct ena_eth_io_rx_cdesc_base *cdesc; + u16 count = 0, head_masked; + u32 last = 0; + + do { + cdesc = ena_com_get_next_rx_cdesc(io_cq); + if (!cdesc) + break; + + ena_com_cq_inc_head(io_cq); + count++; + last = (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_LAST_MASK) >> + ENA_ETH_IO_RX_CDESC_BASE_LAST_SHIFT; + } while (!last); + + if (last) { + *first_cdesc_idx = io_cq->cur_rx_pkt_cdesc_start_idx; + count += io_cq->cur_rx_pkt_cdesc_count; + + head_masked = io_cq->head & (io_cq->q_depth - 1); + + io_cq->cur_rx_pkt_cdesc_count = 0; + io_cq->cur_rx_pkt_cdesc_start_idx = head_masked; + + pr_debug("ena q_id: %d packets were completed. first desc idx %u descs# %d\n", + io_cq->qid, *first_cdesc_idx, count); + } else { + io_cq->cur_rx_pkt_cdesc_count += count; + count = 0; + } + + return count; +} + +static inline bool ena_com_meta_desc_changed(struct ena_com_io_sq *io_sq, + struct ena_com_tx_ctx *ena_tx_ctx) +{ + int rc; + + if (ena_tx_ctx->meta_valid) { + rc = memcmp(&io_sq->cached_tx_meta, + &ena_tx_ctx->ena_meta, + sizeof(struct ena_com_tx_meta)); + + if (unlikely(rc != 0)) + return true; + } + + return false; +} + +static inline void ena_com_create_and_store_tx_meta_desc(struct ena_com_io_sq *io_sq, + struct ena_com_tx_ctx *ena_tx_ctx) +{ + struct ena_eth_io_tx_meta_desc *meta_desc = NULL; + struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta; + + meta_desc = get_sq_desc(io_sq); + memset(meta_desc, 0x0, sizeof(struct ena_eth_io_tx_meta_desc)); + + meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_META_DESC_MASK; + + meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_EXT_VALID_MASK; + + /* bits 0-9 of the mss */ + meta_desc->word2 |= (ena_meta->mss << + ENA_ETH_IO_TX_META_DESC_MSS_LO_SHIFT) & + ENA_ETH_IO_TX_META_DESC_MSS_LO_MASK; + /* bits 10-13 of the mss */ + meta_desc->len_ctrl |= ((ena_meta->mss >> 10) << + ENA_ETH_IO_TX_META_DESC_MSS_HI_SHIFT) & + ENA_ETH_IO_TX_META_DESC_MSS_HI_MASK; + + /* Extended meta desc */ + meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_ETH_META_TYPE_MASK; + meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_META_STORE_MASK; + meta_desc->len_ctrl |= (io_sq->phase << + ENA_ETH_IO_TX_META_DESC_PHASE_SHIFT) & + ENA_ETH_IO_TX_META_DESC_PHASE_MASK; + + meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_FIRST_MASK; + meta_desc->word2 |= ena_meta->l3_hdr_len & + ENA_ETH_IO_TX_META_DESC_L3_HDR_LEN_MASK; + meta_desc->word2 |= (ena_meta->l3_hdr_offset << + ENA_ETH_IO_TX_META_DESC_L3_HDR_OFF_SHIFT) & + ENA_ETH_IO_TX_META_DESC_L3_HDR_OFF_MASK; + + meta_desc->word2 |= (ena_meta->l4_hdr_len << + ENA_ETH_IO_TX_META_DESC_L4_HDR_LEN_IN_WORDS_SHIFT) & + ENA_ETH_IO_TX_META_DESC_L4_HDR_LEN_IN_WORDS_MASK; + + meta_desc->len_ctrl |= ENA_ETH_IO_TX_META_DESC_META_STORE_MASK; + + /* Cached the meta desc */ + memcpy(&io_sq->cached_tx_meta, ena_meta, + sizeof(struct ena_com_tx_meta)); + + ena_com_copy_curr_sq_desc_to_dev(io_sq); + ena_com_sq_update_tail(io_sq); +} + +static inline void ena_com_rx_set_flags(struct ena_com_rx_ctx *ena_rx_ctx, + struct ena_eth_io_rx_cdesc_base *cdesc) +{ + ena_rx_ctx->l3_proto = cdesc->status & + ENA_ETH_IO_RX_CDESC_BASE_L3_PROTO_IDX_MASK; + ena_rx_ctx->l4_proto = + (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_MASK) >> + ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_SHIFT; + ena_rx_ctx->l3_csum_err = + (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_MASK) >> + ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_SHIFT; + ena_rx_ctx->l4_csum_err = + (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_MASK) >> + ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_SHIFT; + ena_rx_ctx->hash = cdesc->hash; + ena_rx_ctx->frag = + (cdesc->status & ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_MASK) >> + ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_SHIFT; + + pr_debug("ena_rx_ctx->l3_proto %d ena_rx_ctx->l4_proto %d\nena_rx_ctx->l3_csum_err %d ena_rx_ctx->l4_csum_err %d\nhash frag %d frag: %d cdesc_status: %x\n", + ena_rx_ctx->l3_proto, ena_rx_ctx->l4_proto, + ena_rx_ctx->l3_csum_err, ena_rx_ctx->l4_csum_err, + ena_rx_ctx->hash, ena_rx_ctx->frag, cdesc->status); +} + +/*****************************************************************************/ +/***************************** API **********************************/ +/*****************************************************************************/ + +int ena_com_prepare_tx(struct ena_com_io_sq *io_sq, + struct ena_com_tx_ctx *ena_tx_ctx, + int *nb_hw_desc) +{ + struct ena_eth_io_tx_desc *desc = NULL; + struct ena_com_buf *ena_bufs = ena_tx_ctx->ena_bufs; + void *push_header = ena_tx_ctx->push_header; + u16 header_len = ena_tx_ctx->header_len; + u16 num_bufs = ena_tx_ctx->num_bufs; + int total_desc, i, rc; + bool have_meta; + u64 addr_hi; + + WARN(io_sq->direction != ENA_COM_IO_QUEUE_DIRECTION_TX, "wrong Q type"); + + /* num_bufs +1 for potential meta desc */ + if (ena_com_sq_empty_space(io_sq) < (num_bufs + 1)) { + pr_err("Not enough space in the tx queue\n"); + return -ENOMEM; + } + + if (unlikely(header_len > io_sq->tx_max_header_size)) { + pr_err("header size is too large %d max header: %d\n", + header_len, io_sq->tx_max_header_size); + return -EINVAL; + } + + /* start with pushing the header (if needed) */ + rc = ena_com_write_header(io_sq, push_header, header_len); + if (unlikely(rc)) + return rc; + + have_meta = ena_tx_ctx->meta_valid && ena_com_meta_desc_changed(io_sq, + ena_tx_ctx); + if (have_meta) + ena_com_create_and_store_tx_meta_desc(io_sq, ena_tx_ctx); + + /* If the caller doesn't want send packets */ + if (unlikely(!num_bufs && !header_len)) { + *nb_hw_desc = have_meta ? 0 : 1; + return 0; + } + + desc = get_sq_desc(io_sq); + memset(desc, 0x0, sizeof(struct ena_eth_io_tx_desc)); + + /* Set first desc when we don't have meta descriptor */ + if (!have_meta) + desc->len_ctrl |= ENA_ETH_IO_TX_DESC_FIRST_MASK; + + desc->buff_addr_hi_hdr_sz |= (header_len << + ENA_ETH_IO_TX_DESC_HEADER_LENGTH_SHIFT) & + ENA_ETH_IO_TX_DESC_HEADER_LENGTH_MASK; + desc->len_ctrl |= (io_sq->phase << ENA_ETH_IO_TX_DESC_PHASE_SHIFT) & + ENA_ETH_IO_TX_DESC_PHASE_MASK; + + desc->len_ctrl |= ENA_ETH_IO_TX_DESC_COMP_REQ_MASK; + + /* Bits 0-9 */ + desc->meta_ctrl |= (ena_tx_ctx->req_id << + ENA_ETH_IO_TX_DESC_REQ_ID_LO_SHIFT) & + ENA_ETH_IO_TX_DESC_REQ_ID_LO_MASK; + + desc->meta_ctrl |= (ena_tx_ctx->df << + ENA_ETH_IO_TX_DESC_DF_SHIFT) & + ENA_ETH_IO_TX_DESC_DF_MASK; + + /* Bits 10-15 */ + desc->len_ctrl |= ((ena_tx_ctx->req_id >> 10) << + ENA_ETH_IO_TX_DESC_REQ_ID_HI_SHIFT) & + ENA_ETH_IO_TX_DESC_REQ_ID_HI_MASK; + + if (ena_tx_ctx->meta_valid) { + desc->meta_ctrl |= (ena_tx_ctx->tso_enable << + ENA_ETH_IO_TX_DESC_TSO_EN_SHIFT) & + ENA_ETH_IO_TX_DESC_TSO_EN_MASK; + desc->meta_ctrl |= ena_tx_ctx->l3_proto & + ENA_ETH_IO_TX_DESC_L3_PROTO_IDX_MASK; + desc->meta_ctrl |= (ena_tx_ctx->l4_proto << + ENA_ETH_IO_TX_DESC_L4_PROTO_IDX_SHIFT) & + ENA_ETH_IO_TX_DESC_L4_PROTO_IDX_MASK; + desc->meta_ctrl |= (ena_tx_ctx->l3_csum_enable << + ENA_ETH_IO_TX_DESC_L3_CSUM_EN_SHIFT) & + ENA_ETH_IO_TX_DESC_L3_CSUM_EN_MASK; + desc->meta_ctrl |= (ena_tx_ctx->l4_csum_enable << + ENA_ETH_IO_TX_DESC_L4_CSUM_EN_SHIFT) & + ENA_ETH_IO_TX_DESC_L4_CSUM_EN_MASK; + desc->meta_ctrl |= (ena_tx_ctx->l4_csum_partial << + ENA_ETH_IO_TX_DESC_L4_CSUM_PARTIAL_SHIFT) & + ENA_ETH_IO_TX_DESC_L4_CSUM_PARTIAL_MASK; + } + + for (i = 0; i < num_bufs; i++) { + /* The first desc share the same desc as the header */ + if (likely(i != 0)) { + ena_com_copy_curr_sq_desc_to_dev(io_sq); + ena_com_sq_update_tail(io_sq); + + desc = get_sq_desc(io_sq); + memset(desc, 0x0, sizeof(struct ena_eth_io_tx_desc)); + + desc->len_ctrl |= (io_sq->phase << + ENA_ETH_IO_TX_DESC_PHASE_SHIFT) & + ENA_ETH_IO_TX_DESC_PHASE_MASK; + } + + desc->len_ctrl |= ena_bufs->len & + ENA_ETH_IO_TX_DESC_LENGTH_MASK; + + addr_hi = ((ena_bufs->paddr & + GENMASK_ULL(io_sq->dma_addr_bits - 1, 32)) >> 32); + + desc->buff_addr_lo = (u32)ena_bufs->paddr; + desc->buff_addr_hi_hdr_sz |= addr_hi & + ENA_ETH_IO_TX_DESC_ADDR_HI_MASK; + ena_bufs++; + } + + /* set the last desc indicator */ + desc->len_ctrl |= ENA_ETH_IO_TX_DESC_LAST_MASK; + + ena_com_copy_curr_sq_desc_to_dev(io_sq); + + ena_com_sq_update_tail(io_sq); + + total_desc = max_t(u16, num_bufs, 1); + total_desc += have_meta ? 1 : 0; + + *nb_hw_desc = total_desc; + return 0; +} + +int ena_com_rx_pkt(struct ena_com_io_cq *io_cq, + struct ena_com_io_sq *io_sq, + struct ena_com_rx_ctx *ena_rx_ctx) +{ + struct ena_com_rx_buf_info *ena_buf = &ena_rx_ctx->ena_bufs[0]; + struct ena_eth_io_rx_cdesc_base *cdesc = NULL; + u16 cdesc_idx = 0; + u16 nb_hw_desc; + u16 i; + + WARN(io_cq->direction != ENA_COM_IO_QUEUE_DIRECTION_RX, "wrong Q type"); + + nb_hw_desc = ena_com_cdesc_rx_pkt_get(io_cq, &cdesc_idx); + if (nb_hw_desc == 0) { + ena_rx_ctx->descs = nb_hw_desc; + return 0; + } + + pr_debug("fetch rx packet: queue %d completed desc: %d\n", io_cq->qid, + nb_hw_desc); + + if (unlikely(nb_hw_desc > ena_rx_ctx->max_bufs)) { + pr_err("Too many RX cdescs (%d) > MAX(%d)\n", nb_hw_desc, + ena_rx_ctx->max_bufs); + return -ENOSPC; + } + + for (i = 0; i < nb_hw_desc; i++) { + cdesc = ena_com_rx_cdesc_idx_to_ptr(io_cq, cdesc_idx + i); + + ena_buf->len = cdesc->length; + ena_buf->req_id = cdesc->req_id; + ena_buf++; + } + + /* Update SQ head ptr */ + io_sq->next_to_comp += nb_hw_desc; + + pr_debug("[%s][QID#%d] Updating SQ head to: %d\n", __func__, io_sq->qid, + io_sq->next_to_comp); + + /* Get rx flags from the last pkt */ + ena_com_rx_set_flags(ena_rx_ctx, cdesc); + + ena_rx_ctx->descs = nb_hw_desc; + return 0; +} + +int ena_com_add_single_rx_desc(struct ena_com_io_sq *io_sq, + struct ena_com_buf *ena_buf, + u16 req_id) +{ + struct ena_eth_io_rx_desc *desc; + + WARN(io_sq->direction != ENA_COM_IO_QUEUE_DIRECTION_RX, "wrong Q type"); + + if (unlikely(ena_com_sq_empty_space(io_sq) == 0)) + return -ENOSPC; + + desc = get_sq_desc(io_sq); + memset(desc, 0x0, sizeof(struct ena_eth_io_rx_desc)); + + desc->length = ena_buf->len; + + desc->ctrl |= ENA_ETH_IO_RX_DESC_FIRST_MASK; + desc->ctrl |= ENA_ETH_IO_RX_DESC_LAST_MASK; + desc->ctrl |= io_sq->phase & ENA_ETH_IO_RX_DESC_PHASE_MASK; + desc->ctrl |= ENA_ETH_IO_RX_DESC_COMP_REQ_MASK; + + desc->req_id = req_id; + + desc->buff_addr_lo = (u32)ena_buf->paddr; + desc->buff_addr_hi = + ((ena_buf->paddr & GENMASK_ULL(io_sq->dma_addr_bits - 1, 32)) >> 32); + + ena_com_sq_update_tail(io_sq); + + return 0; +} + +int ena_com_tx_comp_req_id_get(struct ena_com_io_cq *io_cq, u16 *req_id) +{ + u8 expected_phase, cdesc_phase; + struct ena_eth_io_tx_cdesc *cdesc; + u16 masked_head; + + masked_head = io_cq->head & (io_cq->q_depth - 1); + expected_phase = io_cq->phase; + + cdesc = (struct ena_eth_io_tx_cdesc *) + ((uintptr_t)io_cq->cdesc_addr.virt_addr + + (masked_head * io_cq->cdesc_entry_size_in_bytes)); + + /* When the current completion descriptor phase isn't the same as the + * expected, it mean that the device still didn't update + * this completion. + */ + cdesc_phase = cdesc->flags & ENA_ETH_IO_TX_CDESC_PHASE_MASK; + if (cdesc_phase != expected_phase) + return -EAGAIN; + + ena_com_cq_inc_head(io_cq); + + *req_id = cdesc->req_id; + + return 0; +} diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.h b/drivers/net/ethernet/amazon/ena/ena_eth_com.h new file mode 100644 index 000000000000..bb53c3a4f8e9 --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.h @@ -0,0 +1,160 @@ +/* + * Copyright 2015 Amazon.com, Inc. or its affiliates. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ENA_ETH_COM_H_ +#define ENA_ETH_COM_H_ + +#include "ena_com.h" + +/* head update threshold in units of (queue size / ENA_COMP_HEAD_THRESH) */ +#define ENA_COMP_HEAD_THRESH 4 + +struct ena_com_tx_ctx { + struct ena_com_tx_meta ena_meta; + struct ena_com_buf *ena_bufs; + /* For LLQ, header buffer - pushed to the device mem space */ + void *push_header; + + enum ena_eth_io_l3_proto_index l3_proto; + enum ena_eth_io_l4_proto_index l4_proto; + u16 num_bufs; + u16 req_id; + /* For regular queue, indicate the size of the header + * For LLQ, indicate the size of the pushed buffer + */ + u16 header_len; + + u8 meta_valid; + u8 tso_enable; + u8 l3_csum_enable; + u8 l4_csum_enable; + u8 l4_csum_partial; + u8 df; /* Don't fragment */ +}; + +struct ena_com_rx_ctx { + struct ena_com_rx_buf_info *ena_bufs; + enum ena_eth_io_l3_proto_index l3_proto; + enum ena_eth_io_l4_proto_index l4_proto; + bool l3_csum_err; + bool l4_csum_err; + /* fragmented packet */ + bool frag; + u32 hash; + u16 descs; + int max_bufs; +}; + +int ena_com_prepare_tx(struct ena_com_io_sq *io_sq, + struct ena_com_tx_ctx *ena_tx_ctx, + int *nb_hw_desc); + +int ena_com_rx_pkt(struct ena_com_io_cq *io_cq, + struct ena_com_io_sq *io_sq, + struct ena_com_rx_ctx *ena_rx_ctx); + +int ena_com_add_single_rx_desc(struct ena_com_io_sq *io_sq, + struct ena_com_buf *ena_buf, + u16 req_id); + +int ena_com_tx_comp_req_id_get(struct ena_com_io_cq *io_cq, u16 *req_id); + +static inline void ena_com_unmask_intr(struct ena_com_io_cq *io_cq, + struct ena_eth_io_intr_reg *intr_reg) +{ + writel(intr_reg->intr_control, io_cq->unmask_reg); +} + +static inline int ena_com_sq_empty_space(struct ena_com_io_sq *io_sq) +{ + u16 tail, next_to_comp, cnt; + + next_to_comp = io_sq->next_to_comp; + tail = io_sq->tail; + cnt = tail - next_to_comp; + + return io_sq->q_depth - 1 - cnt; +} + +static inline int ena_com_write_sq_doorbell(struct ena_com_io_sq *io_sq) +{ + u16 tail; + + tail = io_sq->tail; + + pr_debug("write submission queue doorbell for queue: %d tail: %d\n", + io_sq->qid, tail); + + writel(tail, io_sq->db_addr); + + return 0; +} + +static inline int ena_com_update_dev_comp_head(struct ena_com_io_cq *io_cq) +{ + u16 unreported_comp, head; + bool need_update; + + head = io_cq->head; + unreported_comp = head - io_cq->last_head_update; + need_update = unreported_comp > (io_cq->q_depth / ENA_COMP_HEAD_THRESH); + + if (io_cq->cq_head_db_reg && need_update) { + pr_debug("Write completion queue doorbell for queue %d: head: %d\n", + io_cq->qid, head); + writel(head, io_cq->cq_head_db_reg); + io_cq->last_head_update = head; + } + + return 0; +} + +static inline void ena_com_update_numa_node(struct ena_com_io_cq *io_cq, + u8 numa_node) +{ + struct ena_eth_io_numa_node_cfg_reg numa_cfg; + + if (!io_cq->numa_node_cfg_reg) + return; + + numa_cfg.numa_cfg = (numa_node & ENA_ETH_IO_NUMA_NODE_CFG_REG_NUMA_MASK) + | ENA_ETH_IO_NUMA_NODE_CFG_REG_ENABLED_MASK; + + writel(numa_cfg.numa_cfg, io_cq->numa_node_cfg_reg); +} + +static inline void ena_com_comp_ack(struct ena_com_io_sq *io_sq, u16 elem) +{ + io_sq->next_to_comp += elem; +} + +#endif /* ENA_ETH_COM_H_ */ diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h b/drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h new file mode 100644 index 000000000000..f320c58793a5 --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h @@ -0,0 +1,416 @@ +/* + * Copyright 2015 - 2016 Amazon.com, Inc. or its affiliates. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _ENA_ETH_IO_H_ +#define _ENA_ETH_IO_H_ + +enum ena_eth_io_l3_proto_index { + ENA_ETH_IO_L3_PROTO_UNKNOWN = 0, + + ENA_ETH_IO_L3_PROTO_IPV4 = 8, + + ENA_ETH_IO_L3_PROTO_IPV6 = 11, + + ENA_ETH_IO_L3_PROTO_FCOE = 21, + + ENA_ETH_IO_L3_PROTO_ROCE = 22, +}; + +enum ena_eth_io_l4_proto_index { + ENA_ETH_IO_L4_PROTO_UNKNOWN = 0, + + ENA_ETH_IO_L4_PROTO_TCP = 12, + + ENA_ETH_IO_L4_PROTO_UDP = 13, + + ENA_ETH_IO_L4_PROTO_ROUTEABLE_ROCE = 23, +}; + +struct ena_eth_io_tx_desc { + /* 15:0 : length - Buffer length in bytes, must + * include any packet trailers that the ENA supposed + * to update like End-to-End CRC, Authentication GMAC + * etc. This length must not include the + * 'Push_Buffer' length. This length must not include + * the 4-byte added in the end for 802.3 Ethernet FCS + * 21:16 : req_id_hi - Request ID[15:10] + * 22 : reserved22 - MBZ + * 23 : meta_desc - MBZ + * 24 : phase + * 25 : reserved1 - MBZ + * 26 : first - Indicates first descriptor in + * transaction + * 27 : last - Indicates last descriptor in + * transaction + * 28 : comp_req - Indicates whether completion + * should be posted, after packet is transmitted. + * Valid only for first descriptor + * 30:29 : reserved29 - MBZ + * 31 : reserved31 - MBZ + */ + u32 len_ctrl; + + /* 3:0 : l3_proto_idx - L3 protocol. This field + * required when l3_csum_en,l3_csum or tso_en are set. + * 4 : DF - IPv4 DF, must be 0 if packet is IPv4 and + * DF flags of the IPv4 header is 0. Otherwise must + * be set to 1 + * 6:5 : reserved5 + * 7 : tso_en - Enable TSO, For TCP only. + * 12:8 : l4_proto_idx - L4 protocol. This field need + * to be set when l4_csum_en or tso_en are set. + * 13 : l3_csum_en - enable IPv4 header checksum. + * 14 : l4_csum_en - enable TCP/UDP checksum. + * 15 : ethernet_fcs_dis - when set, the controller + * will not append the 802.3 Ethernet Frame Check + * Sequence to the packet + * 16 : reserved16 + * 17 : l4_csum_partial - L4 partial checksum. when + * set to 0, the ENA calculates the L4 checksum, + * where the Destination Address required for the + * TCP/UDP pseudo-header is taken from the actual + * packet L3 header. when set to 1, the ENA doesn't + * calculate the sum of the pseudo-header, instead, + * the checksum field of the L4 is used instead. When + * TSO enabled, the checksum of the pseudo-header + * must not include the tcp length field. L4 partial + * checksum should be used for IPv6 packet that + * contains Routing Headers. + * 20:18 : reserved18 - MBZ + * 21 : reserved21 - MBZ + * 31:22 : req_id_lo - Request ID[9:0] + */ + u32 meta_ctrl; + + u32 buff_addr_lo; + + /* address high and header size + * 15:0 : addr_hi - Buffer Pointer[47:32] + * 23:16 : reserved16_w2 + * 31:24 : header_length - Header length. For Low + * Latency Queues, this fields indicates the number + * of bytes written to the headers' memory. For + * normal queues, if packet is TCP or UDP, and longer + * than max_header_size, then this field should be + * set to the sum of L4 header offset and L4 header + * size(without options), otherwise, this field + * should be set to 0. For both modes, this field + * must not exceed the max_header_size. + * max_header_size value is reported by the Max + * Queues Feature descriptor + */ + u32 buff_addr_hi_hdr_sz; +}; + +struct ena_eth_io_tx_meta_desc { + /* 9:0 : req_id_lo - Request ID[9:0] + * 11:10 : reserved10 - MBZ + * 12 : reserved12 - MBZ + * 13 : reserved13 - MBZ + * 14 : ext_valid - if set, offset fields in Word2 + * are valid Also MSS High in Word 0 and bits [31:24] + * in Word 3 + * 15 : reserved15 + * 19:16 : mss_hi + * 20 : eth_meta_type - 0: Tx Metadata Descriptor, 1: + * Extended Metadata Descriptor + * 21 : meta_store - Store extended metadata in queue + * cache + * 22 : reserved22 - MBZ + * 23 : meta_desc - MBO + * 24 : phase + * 25 : reserved25 - MBZ + * 26 : first - Indicates first descriptor in + * transaction + * 27 : last - Indicates last descriptor in + * transaction + * 28 : comp_req - Indicates whether completion + * should be posted, after packet is transmitted. + * Valid only for first descriptor + * 30:29 : reserved29 - MBZ + * 31 : reserved31 - MBZ + */ + u32 len_ctrl; + + /* 5:0 : req_id_hi + * 31:6 : reserved6 - MBZ + */ + u32 word1; + + /* 7:0 : l3_hdr_len + * 15:8 : l3_hdr_off + * 21:16 : l4_hdr_len_in_words - counts the L4 header + * length in words. there is an explicit assumption + * that L4 header appears right after L3 header and + * L4 offset is based on l3_hdr_off+l3_hdr_len + * 31:22 : mss_lo + */ + u32 word2; + + u32 reserved; +}; + +struct ena_eth_io_tx_cdesc { + /* Request ID[15:0] */ + u16 req_id; + + u8 status; + + /* flags + * 0 : phase + * 7:1 : reserved1 + */ + u8 flags; + + u16 sub_qid; + + u16 sq_head_idx; +}; + +struct ena_eth_io_rx_desc { + /* In bytes. 0 means 64KB */ + u16 length; + + /* MBZ */ + u8 reserved2; + + /* 0 : phase + * 1 : reserved1 - MBZ + * 2 : first - Indicates first descriptor in + * transaction + * 3 : last - Indicates last descriptor in transaction + * 4 : comp_req + * 5 : reserved5 - MBO + * 7:6 : reserved6 - MBZ + */ + u8 ctrl; + + u16 req_id; + + /* MBZ */ + u16 reserved6; + + u32 buff_addr_lo; + + u16 buff_addr_hi; + + /* MBZ */ + u16 reserved16_w3; +}; + +/* 4-word format Note: all ethernet parsing information are valid only when + * last=1 + */ +struct ena_eth_io_rx_cdesc_base { + /* 4:0 : l3_proto_idx + * 6:5 : src_vlan_cnt + * 7 : reserved7 - MBZ + * 12:8 : l4_proto_idx + * 13 : l3_csum_err - when set, either the L3 + * checksum error detected, or, the controller didn't + * validate the checksum. This bit is valid only when + * l3_proto_idx indicates IPv4 packet + * 14 : l4_csum_err - when set, either the L4 + * checksum error detected, or, the controller didn't + * validate the checksum. This bit is valid only when + * l4_proto_idx indicates TCP/UDP packet, and, + * ipv4_frag is not set + * 15 : ipv4_frag - Indicates IPv4 fragmented packet + * 23:16 : reserved16 + * 24 : phase + * 25 : l3_csum2 - second checksum engine result + * 26 : first - Indicates first descriptor in + * transaction + * 27 : last - Indicates last descriptor in + * transaction + * 29:28 : reserved28 + * 30 : buffer - 0: Metadata descriptor. 1: Buffer + * Descriptor was used + * 31 : reserved31 + */ + u32 status; + + u16 length; + + u16 req_id; + + /* 32-bit hash result */ + u32 hash; + + u16 sub_qid; + + u16 reserved; +}; + +/* 8-word format */ +struct ena_eth_io_rx_cdesc_ext { + struct ena_eth_io_rx_cdesc_base base; + + u32 buff_addr_lo; + + u16 buff_addr_hi; + + u16 reserved16; + + u32 reserved_w6; + + u32 reserved_w7; +}; + +struct ena_eth_io_intr_reg { + /* 14:0 : rx_intr_delay + * 29:15 : tx_intr_delay + * 30 : intr_unmask + * 31 : reserved + */ + u32 intr_control; +}; + +struct ena_eth_io_numa_node_cfg_reg { + /* 7:0 : numa + * 30:8 : reserved + * 31 : enabled + */ + u32 numa_cfg; +}; + +/* tx_desc */ +#define ENA_ETH_IO_TX_DESC_LENGTH_MASK GENMASK(15, 0) +#define ENA_ETH_IO_TX_DESC_REQ_ID_HI_SHIFT 16 +#define ENA_ETH_IO_TX_DESC_REQ_ID_HI_MASK GENMASK(21, 16) +#define ENA_ETH_IO_TX_DESC_META_DESC_SHIFT 23 +#define ENA_ETH_IO_TX_DESC_META_DESC_MASK BIT(23) +#define ENA_ETH_IO_TX_DESC_PHASE_SHIFT 24 +#define ENA_ETH_IO_TX_DESC_PHASE_MASK BIT(24) +#define ENA_ETH_IO_TX_DESC_FIRST_SHIFT 26 +#define ENA_ETH_IO_TX_DESC_FIRST_MASK BIT(26) +#define ENA_ETH_IO_TX_DESC_LAST_SHIFT 27 +#define ENA_ETH_IO_TX_DESC_LAST_MASK BIT(27) +#define ENA_ETH_IO_TX_DESC_COMP_REQ_SHIFT 28 +#define ENA_ETH_IO_TX_DESC_COMP_REQ_MASK BIT(28) +#define ENA_ETH_IO_TX_DESC_L3_PROTO_IDX_MASK GENMASK(3, 0) +#define ENA_ETH_IO_TX_DESC_DF_SHIFT 4 +#define ENA_ETH_IO_TX_DESC_DF_MASK BIT(4) +#define ENA_ETH_IO_TX_DESC_TSO_EN_SHIFT 7 +#define ENA_ETH_IO_TX_DESC_TSO_EN_MASK BIT(7) +#define ENA_ETH_IO_TX_DESC_L4_PROTO_IDX_SHIFT 8 +#define ENA_ETH_IO_TX_DESC_L4_PROTO_IDX_MASK GENMASK(12, 8) +#define ENA_ETH_IO_TX_DESC_L3_CSUM_EN_SHIFT 13 +#define ENA_ETH_IO_TX_DESC_L3_CSUM_EN_MASK BIT(13) +#define ENA_ETH_IO_TX_DESC_L4_CSUM_EN_SHIFT 14 +#define ENA_ETH_IO_TX_DESC_L4_CSUM_EN_MASK BIT(14) +#define ENA_ETH_IO_TX_DESC_ETHERNET_FCS_DIS_SHIFT 15 +#define ENA_ETH_IO_TX_DESC_ETHERNET_FCS_DIS_MASK BIT(15) +#define ENA_ETH_IO_TX_DESC_L4_CSUM_PARTIAL_SHIFT 17 +#define ENA_ETH_IO_TX_DESC_L4_CSUM_PARTIAL_MASK BIT(17) +#define ENA_ETH_IO_TX_DESC_REQ_ID_LO_SHIFT 22 +#define ENA_ETH_IO_TX_DESC_REQ_ID_LO_MASK GENMASK(31, 22) +#define ENA_ETH_IO_TX_DESC_ADDR_HI_MASK GENMASK(15, 0) +#define ENA_ETH_IO_TX_DESC_HEADER_LENGTH_SHIFT 24 +#define ENA_ETH_IO_TX_DESC_HEADER_LENGTH_MASK GENMASK(31, 24) + +/* tx_meta_desc */ +#define ENA_ETH_IO_TX_META_DESC_REQ_ID_LO_MASK GENMASK(9, 0) +#define ENA_ETH_IO_TX_META_DESC_EXT_VALID_SHIFT 14 +#define ENA_ETH_IO_TX_META_DESC_EXT_VALID_MASK BIT(14) +#define ENA_ETH_IO_TX_META_DESC_MSS_HI_SHIFT 16 +#define ENA_ETH_IO_TX_META_DESC_MSS_HI_MASK GENMASK(19, 16) +#define ENA_ETH_IO_TX_META_DESC_ETH_META_TYPE_SHIFT 20 +#define ENA_ETH_IO_TX_META_DESC_ETH_META_TYPE_MASK BIT(20) +#define ENA_ETH_IO_TX_META_DESC_META_STORE_SHIFT 21 +#define ENA_ETH_IO_TX_META_DESC_META_STORE_MASK BIT(21) +#define ENA_ETH_IO_TX_META_DESC_META_DESC_SHIFT 23 +#define ENA_ETH_IO_TX_META_DESC_META_DESC_MASK BIT(23) +#define ENA_ETH_IO_TX_META_DESC_PHASE_SHIFT 24 +#define ENA_ETH_IO_TX_META_DESC_PHASE_MASK BIT(24) +#define ENA_ETH_IO_TX_META_DESC_FIRST_SHIFT 26 +#define ENA_ETH_IO_TX_META_DESC_FIRST_MASK BIT(26) +#define ENA_ETH_IO_TX_META_DESC_LAST_SHIFT 27 +#define ENA_ETH_IO_TX_META_DESC_LAST_MASK BIT(27) +#define ENA_ETH_IO_TX_META_DESC_COMP_REQ_SHIFT 28 +#define ENA_ETH_IO_TX_META_DESC_COMP_REQ_MASK BIT(28) +#define ENA_ETH_IO_TX_META_DESC_REQ_ID_HI_MASK GENMASK(5, 0) +#define ENA_ETH_IO_TX_META_DESC_L3_HDR_LEN_MASK GENMASK(7, 0) +#define ENA_ETH_IO_TX_META_DESC_L3_HDR_OFF_SHIFT 8 +#define ENA_ETH_IO_TX_META_DESC_L3_HDR_OFF_MASK GENMASK(15, 8) +#define ENA_ETH_IO_TX_META_DESC_L4_HDR_LEN_IN_WORDS_SHIFT 16 +#define ENA_ETH_IO_TX_META_DESC_L4_HDR_LEN_IN_WORDS_MASK GENMASK(21, 16) +#define ENA_ETH_IO_TX_META_DESC_MSS_LO_SHIFT 22 +#define ENA_ETH_IO_TX_META_DESC_MSS_LO_MASK GENMASK(31, 22) + +/* tx_cdesc */ +#define ENA_ETH_IO_TX_CDESC_PHASE_MASK BIT(0) + +/* rx_desc */ +#define ENA_ETH_IO_RX_DESC_PHASE_MASK BIT(0) +#define ENA_ETH_IO_RX_DESC_FIRST_SHIFT 2 +#define ENA_ETH_IO_RX_DESC_FIRST_MASK BIT(2) +#define ENA_ETH_IO_RX_DESC_LAST_SHIFT 3 +#define ENA_ETH_IO_RX_DESC_LAST_MASK BIT(3) +#define ENA_ETH_IO_RX_DESC_COMP_REQ_SHIFT 4 +#define ENA_ETH_IO_RX_DESC_COMP_REQ_MASK BIT(4) + +/* rx_cdesc_base */ +#define ENA_ETH_IO_RX_CDESC_BASE_L3_PROTO_IDX_MASK GENMASK(4, 0) +#define ENA_ETH_IO_RX_CDESC_BASE_SRC_VLAN_CNT_SHIFT 5 +#define ENA_ETH_IO_RX_CDESC_BASE_SRC_VLAN_CNT_MASK GENMASK(6, 5) +#define ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_SHIFT 8 +#define ENA_ETH_IO_RX_CDESC_BASE_L4_PROTO_IDX_MASK GENMASK(12, 8) +#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_SHIFT 13 +#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM_ERR_MASK BIT(13) +#define ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_SHIFT 14 +#define ENA_ETH_IO_RX_CDESC_BASE_L4_CSUM_ERR_MASK BIT(14) +#define ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_SHIFT 15 +#define ENA_ETH_IO_RX_CDESC_BASE_IPV4_FRAG_MASK BIT(15) +#define ENA_ETH_IO_RX_CDESC_BASE_PHASE_SHIFT 24 +#define ENA_ETH_IO_RX_CDESC_BASE_PHASE_MASK BIT(24) +#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM2_SHIFT 25 +#define ENA_ETH_IO_RX_CDESC_BASE_L3_CSUM2_MASK BIT(25) +#define ENA_ETH_IO_RX_CDESC_BASE_FIRST_SHIFT 26 +#define ENA_ETH_IO_RX_CDESC_BASE_FIRST_MASK BIT(26) +#define ENA_ETH_IO_RX_CDESC_BASE_LAST_SHIFT 27 +#define ENA_ETH_IO_RX_CDESC_BASE_LAST_MASK BIT(27) +#define ENA_ETH_IO_RX_CDESC_BASE_BUFFER_SHIFT 30 +#define ENA_ETH_IO_RX_CDESC_BASE_BUFFER_MASK BIT(30) + +/* intr_reg */ +#define ENA_ETH_IO_INTR_REG_RX_INTR_DELAY_MASK GENMASK(14, 0) +#define ENA_ETH_IO_INTR_REG_TX_INTR_DELAY_SHIFT 15 +#define ENA_ETH_IO_INTR_REG_TX_INTR_DELAY_MASK GENMASK(29, 15) +#define ENA_ETH_IO_INTR_REG_INTR_UNMASK_SHIFT 30 +#define ENA_ETH_IO_INTR_REG_INTR_UNMASK_MASK BIT(30) + +/* numa_node_cfg_reg */ +#define ENA_ETH_IO_NUMA_NODE_CFG_REG_NUMA_MASK GENMASK(7, 0) +#define ENA_ETH_IO_NUMA_NODE_CFG_REG_ENABLED_SHIFT 31 +#define ENA_ETH_IO_NUMA_NODE_CFG_REG_ENABLED_MASK BIT(31) + +#endif /*_ENA_ETH_IO_H_ */ diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c new file mode 100644 index 000000000000..67b2338f8fb3 --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c @@ -0,0 +1,895 @@ +/* + * Copyright 2015 Amazon.com, Inc. or its affiliates. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include + +#include "ena_netdev.h" + +struct ena_stats { + char name[ETH_GSTRING_LEN]; + int stat_offset; +}; + +#define ENA_STAT_ENA_COM_ENTRY(stat) { \ + .name = #stat, \ + .stat_offset = offsetof(struct ena_com_stats_admin, stat) \ +} + +#define ENA_STAT_ENTRY(stat, stat_type) { \ + .name = #stat, \ + .stat_offset = offsetof(struct ena_stats_##stat_type, stat) \ +} + +#define ENA_STAT_RX_ENTRY(stat) \ + ENA_STAT_ENTRY(stat, rx) + +#define ENA_STAT_TX_ENTRY(stat) \ + ENA_STAT_ENTRY(stat, tx) + +#define ENA_STAT_GLOBAL_ENTRY(stat) \ + ENA_STAT_ENTRY(stat, dev) + +static const struct ena_stats ena_stats_global_strings[] = { + ENA_STAT_GLOBAL_ENTRY(tx_timeout), + ENA_STAT_GLOBAL_ENTRY(io_suspend), + ENA_STAT_GLOBAL_ENTRY(io_resume), + ENA_STAT_GLOBAL_ENTRY(wd_expired), + ENA_STAT_GLOBAL_ENTRY(interface_up), + ENA_STAT_GLOBAL_ENTRY(interface_down), + ENA_STAT_GLOBAL_ENTRY(admin_q_pause), +}; + +static const struct ena_stats ena_stats_tx_strings[] = { + ENA_STAT_TX_ENTRY(cnt), + ENA_STAT_TX_ENTRY(bytes), + ENA_STAT_TX_ENTRY(queue_stop), + ENA_STAT_TX_ENTRY(queue_wakeup), + ENA_STAT_TX_ENTRY(dma_mapping_err), + ENA_STAT_TX_ENTRY(linearize), + ENA_STAT_TX_ENTRY(linearize_failed), + ENA_STAT_TX_ENTRY(napi_comp), + ENA_STAT_TX_ENTRY(tx_poll), + ENA_STAT_TX_ENTRY(doorbells), + ENA_STAT_TX_ENTRY(prepare_ctx_err), + ENA_STAT_TX_ENTRY(missing_tx_comp), + ENA_STAT_TX_ENTRY(bad_req_id), +}; + +static const struct ena_stats ena_stats_rx_strings[] = { + ENA_STAT_RX_ENTRY(cnt), + ENA_STAT_RX_ENTRY(bytes), + ENA_STAT_RX_ENTRY(refil_partial), + ENA_STAT_RX_ENTRY(bad_csum), + ENA_STAT_RX_ENTRY(page_alloc_fail), + ENA_STAT_RX_ENTRY(skb_alloc_fail), + ENA_STAT_RX_ENTRY(dma_mapping_err), + ENA_STAT_RX_ENTRY(bad_desc_num), + ENA_STAT_RX_ENTRY(rx_copybreak_pkt), +}; + +static const struct ena_stats ena_stats_ena_com_strings[] = { + ENA_STAT_ENA_COM_ENTRY(aborted_cmd), + ENA_STAT_ENA_COM_ENTRY(submitted_cmd), + ENA_STAT_ENA_COM_ENTRY(completed_cmd), + ENA_STAT_ENA_COM_ENTRY(out_of_space), + ENA_STAT_ENA_COM_ENTRY(no_completion), +}; + +#define ENA_STATS_ARRAY_GLOBAL ARRAY_SIZE(ena_stats_global_strings) +#define ENA_STATS_ARRAY_TX ARRAY_SIZE(ena_stats_tx_strings) +#define ENA_STATS_ARRAY_RX ARRAY_SIZE(ena_stats_rx_strings) +#define ENA_STATS_ARRAY_ENA_COM ARRAY_SIZE(ena_stats_ena_com_strings) + +static void ena_safe_update_stat(u64 *src, u64 *dst, + struct u64_stats_sync *syncp) +{ + unsigned int start; + + do { + start = u64_stats_fetch_begin_irq(syncp); + *(dst) = *src; + } while (u64_stats_fetch_retry_irq(syncp, start)); +} + +static void ena_queue_stats(struct ena_adapter *adapter, u64 **data) +{ + const struct ena_stats *ena_stats; + struct ena_ring *ring; + + u64 *ptr; + int i, j; + + for (i = 0; i < adapter->num_queues; i++) { + /* Tx stats */ + ring = &adapter->tx_ring[i]; + + for (j = 0; j < ENA_STATS_ARRAY_TX; j++) { + ena_stats = &ena_stats_tx_strings[j]; + + ptr = (u64 *)((uintptr_t)&ring->tx_stats + + (uintptr_t)ena_stats->stat_offset); + + ena_safe_update_stat(ptr, (*data)++, &ring->syncp); + } + + /* Rx stats */ + ring = &adapter->rx_ring[i]; + + for (j = 0; j < ENA_STATS_ARRAY_RX; j++) { + ena_stats = &ena_stats_rx_strings[j]; + + ptr = (u64 *)((uintptr_t)&ring->rx_stats + + (uintptr_t)ena_stats->stat_offset); + + ena_safe_update_stat(ptr, (*data)++, &ring->syncp); + } + } +} + +static void ena_dev_admin_queue_stats(struct ena_adapter *adapter, u64 **data) +{ + const struct ena_stats *ena_stats; + u32 *ptr; + int i; + + for (i = 0; i < ENA_STATS_ARRAY_ENA_COM; i++) { + ena_stats = &ena_stats_ena_com_strings[i]; + + ptr = (u32 *)((uintptr_t)&adapter->ena_dev->admin_queue.stats + + (uintptr_t)ena_stats->stat_offset); + + *(*data)++ = *ptr; + } +} + +static void ena_get_ethtool_stats(struct net_device *netdev, + struct ethtool_stats *stats, + u64 *data) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + const struct ena_stats *ena_stats; + u64 *ptr; + int i; + + for (i = 0; i < ENA_STATS_ARRAY_GLOBAL; i++) { + ena_stats = &ena_stats_global_strings[i]; + + ptr = (u64 *)((uintptr_t)&adapter->dev_stats + + (uintptr_t)ena_stats->stat_offset); + + ena_safe_update_stat(ptr, data++, &adapter->syncp); + } + + ena_queue_stats(adapter, &data); + ena_dev_admin_queue_stats(adapter, &data); +} + +int ena_get_sset_count(struct net_device *netdev, int sset) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + + if (sset != ETH_SS_STATS) + return -EOPNOTSUPP; + + return adapter->num_queues * (ENA_STATS_ARRAY_TX + ENA_STATS_ARRAY_RX) + + ENA_STATS_ARRAY_GLOBAL + ENA_STATS_ARRAY_ENA_COM; +} + +static void ena_queue_strings(struct ena_adapter *adapter, u8 **data) +{ + const struct ena_stats *ena_stats; + int i, j; + + for (i = 0; i < adapter->num_queues; i++) { + /* Tx stats */ + for (j = 0; j < ENA_STATS_ARRAY_TX; j++) { + ena_stats = &ena_stats_tx_strings[j]; + + snprintf(*data, ETH_GSTRING_LEN, + "queue_%u_tx_%s", i, ena_stats->name); + (*data) += ETH_GSTRING_LEN; + } + /* Rx stats */ + for (j = 0; j < ENA_STATS_ARRAY_RX; j++) { + ena_stats = &ena_stats_rx_strings[j]; + + snprintf(*data, ETH_GSTRING_LEN, + "queue_%u_rx_%s", i, ena_stats->name); + (*data) += ETH_GSTRING_LEN; + } + } +} + +static void ena_com_dev_strings(u8 **data) +{ + const struct ena_stats *ena_stats; + int i; + + for (i = 0; i < ENA_STATS_ARRAY_ENA_COM; i++) { + ena_stats = &ena_stats_ena_com_strings[i]; + + snprintf(*data, ETH_GSTRING_LEN, + "ena_admin_q_%s", ena_stats->name); + (*data) += ETH_GSTRING_LEN; + } +} + +static void ena_get_strings(struct net_device *netdev, u32 sset, u8 *data) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + const struct ena_stats *ena_stats; + int i; + + if (sset != ETH_SS_STATS) + return; + + for (i = 0; i < ENA_STATS_ARRAY_GLOBAL; i++) { + ena_stats = &ena_stats_global_strings[i]; + + memcpy(data, ena_stats->name, ETH_GSTRING_LEN); + data += ETH_GSTRING_LEN; + } + + ena_queue_strings(adapter, &data); + ena_com_dev_strings(&data); +} + +static int ena_get_link_ksettings(struct net_device *netdev, + struct ethtool_link_ksettings *link_ksettings) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + struct ena_com_dev *ena_dev = adapter->ena_dev; + struct ena_admin_get_feature_link_desc *link; + struct ena_admin_get_feat_resp feat_resp; + int rc; + + rc = ena_com_get_link_params(ena_dev, &feat_resp); + if (rc) + return rc; + + link = &feat_resp.u.link; + link_ksettings->base.speed = link->speed; + + if (link->flags & ENA_ADMIN_GET_FEATURE_LINK_DESC_AUTONEG_MASK) { + ethtool_link_ksettings_add_link_mode(link_ksettings, + supported, Autoneg); + ethtool_link_ksettings_add_link_mode(link_ksettings, + supported, Autoneg); + } + + link_ksettings->base.autoneg = + (link->flags & ENA_ADMIN_GET_FEATURE_LINK_DESC_AUTONEG_MASK) ? + AUTONEG_ENABLE : AUTONEG_DISABLE; + + link_ksettings->base.duplex = DUPLEX_FULL; + + return 0; +} + +static int ena_get_coalesce(struct net_device *net_dev, + struct ethtool_coalesce *coalesce) +{ + struct ena_adapter *adapter = netdev_priv(net_dev); + struct ena_com_dev *ena_dev = adapter->ena_dev; + struct ena_intr_moder_entry intr_moder_entry; + + if (!ena_com_interrupt_moderation_supported(ena_dev)) { + /* the devie doesn't support interrupt moderation */ + return -EOPNOTSUPP; + } + coalesce->tx_coalesce_usecs = + ena_com_get_nonadaptive_moderation_interval_tx(ena_dev) / + ena_dev->intr_delay_resolution; + if (!ena_com_get_adaptive_moderation_enabled(ena_dev)) { + coalesce->rx_coalesce_usecs = + ena_com_get_nonadaptive_moderation_interval_rx(ena_dev) + / ena_dev->intr_delay_resolution; + } else { + ena_com_get_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_LOWEST, &intr_moder_entry); + coalesce->rx_coalesce_usecs_low = intr_moder_entry.intr_moder_interval; + coalesce->rx_max_coalesced_frames_low = intr_moder_entry.pkts_per_interval; + + ena_com_get_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_MID, &intr_moder_entry); + coalesce->rx_coalesce_usecs = intr_moder_entry.intr_moder_interval; + coalesce->rx_max_coalesced_frames = intr_moder_entry.pkts_per_interval; + + ena_com_get_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_HIGHEST, &intr_moder_entry); + coalesce->rx_coalesce_usecs_high = intr_moder_entry.intr_moder_interval; + coalesce->rx_max_coalesced_frames_high = intr_moder_entry.pkts_per_interval; + } + coalesce->use_adaptive_rx_coalesce = + ena_com_get_adaptive_moderation_enabled(ena_dev); + + return 0; +} + +static void ena_update_tx_rings_intr_moderation(struct ena_adapter *adapter) +{ + unsigned int val; + int i; + + val = ena_com_get_nonadaptive_moderation_interval_tx(adapter->ena_dev); + + for (i = 0; i < adapter->num_queues; i++) + adapter->tx_ring[i].smoothed_interval = val; +} + +static int ena_set_coalesce(struct net_device *net_dev, + struct ethtool_coalesce *coalesce) +{ + struct ena_adapter *adapter = netdev_priv(net_dev); + struct ena_com_dev *ena_dev = adapter->ena_dev; + struct ena_intr_moder_entry intr_moder_entry; + int rc; + + if (!ena_com_interrupt_moderation_supported(ena_dev)) { + /* the devie doesn't support interrupt moderation */ + return -EOPNOTSUPP; + } + + if (coalesce->rx_coalesce_usecs_irq || + coalesce->rx_max_coalesced_frames_irq || + coalesce->tx_coalesce_usecs_irq || + coalesce->tx_max_coalesced_frames || + coalesce->tx_max_coalesced_frames_irq || + coalesce->stats_block_coalesce_usecs || + coalesce->use_adaptive_tx_coalesce || + coalesce->pkt_rate_low || + coalesce->tx_coalesce_usecs_low || + coalesce->tx_max_coalesced_frames_low || + coalesce->pkt_rate_high || + coalesce->tx_coalesce_usecs_high || + coalesce->tx_max_coalesced_frames_high || + coalesce->rate_sample_interval) + return -EINVAL; + + rc = ena_com_update_nonadaptive_moderation_interval_tx(ena_dev, + coalesce->tx_coalesce_usecs); + if (rc) + return rc; + + ena_update_tx_rings_intr_moderation(adapter); + + if (ena_com_get_adaptive_moderation_enabled(ena_dev)) { + if (!coalesce->use_adaptive_rx_coalesce) { + ena_com_disable_adaptive_moderation(ena_dev); + rc = ena_com_update_nonadaptive_moderation_interval_rx(ena_dev, + coalesce->rx_coalesce_usecs); + return rc; + } + } else { /* was in non-adaptive mode */ + if (coalesce->use_adaptive_rx_coalesce) { + ena_com_enable_adaptive_moderation(ena_dev); + } else { + rc = ena_com_update_nonadaptive_moderation_interval_rx(ena_dev, + coalesce->rx_coalesce_usecs); + return rc; + } + } + + intr_moder_entry.intr_moder_interval = coalesce->rx_coalesce_usecs_low; + intr_moder_entry.pkts_per_interval = coalesce->rx_max_coalesced_frames_low; + intr_moder_entry.bytes_per_interval = ENA_INTR_BYTE_COUNT_NOT_SUPPORTED; + ena_com_init_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_LOWEST, &intr_moder_entry); + + intr_moder_entry.intr_moder_interval = coalesce->rx_coalesce_usecs; + intr_moder_entry.pkts_per_interval = coalesce->rx_max_coalesced_frames; + intr_moder_entry.bytes_per_interval = ENA_INTR_BYTE_COUNT_NOT_SUPPORTED; + ena_com_init_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_MID, &intr_moder_entry); + + intr_moder_entry.intr_moder_interval = coalesce->rx_coalesce_usecs_high; + intr_moder_entry.pkts_per_interval = coalesce->rx_max_coalesced_frames_high; + intr_moder_entry.bytes_per_interval = ENA_INTR_BYTE_COUNT_NOT_SUPPORTED; + ena_com_init_intr_moderation_entry(adapter->ena_dev, ENA_INTR_MODER_HIGHEST, &intr_moder_entry); + + return 0; +} + +static u32 ena_get_msglevel(struct net_device *netdev) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + + return adapter->msg_enable; +} + +static void ena_set_msglevel(struct net_device *netdev, u32 value) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + + adapter->msg_enable = value; +} + +static void ena_get_drvinfo(struct net_device *dev, + struct ethtool_drvinfo *info) +{ + struct ena_adapter *adapter = netdev_priv(dev); + + strlcpy(info->driver, DRV_MODULE_NAME, sizeof(info->driver)); + strlcpy(info->version, DRV_MODULE_VERSION, sizeof(info->version)); + strlcpy(info->bus_info, pci_name(adapter->pdev), + sizeof(info->bus_info)); +} + +static void ena_get_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + struct ena_ring *tx_ring = &adapter->tx_ring[0]; + struct ena_ring *rx_ring = &adapter->rx_ring[0]; + + ring->rx_max_pending = rx_ring->ring_size; + ring->tx_max_pending = tx_ring->ring_size; + ring->rx_pending = rx_ring->ring_size; + ring->tx_pending = tx_ring->ring_size; +} + +static u32 ena_flow_hash_to_flow_type(u16 hash_fields) +{ + u32 data = 0; + + if (hash_fields & ENA_ADMIN_RSS_L2_DA) + data |= RXH_L2DA; + + if (hash_fields & ENA_ADMIN_RSS_L3_DA) + data |= RXH_IP_DST; + + if (hash_fields & ENA_ADMIN_RSS_L3_SA) + data |= RXH_IP_SRC; + + if (hash_fields & ENA_ADMIN_RSS_L4_DP) + data |= RXH_L4_B_2_3; + + if (hash_fields & ENA_ADMIN_RSS_L4_SP) + data |= RXH_L4_B_0_1; + + return data; +} + +static u16 ena_flow_data_to_flow_hash(u32 hash_fields) +{ + u16 data = 0; + + if (hash_fields & RXH_L2DA) + data |= ENA_ADMIN_RSS_L2_DA; + + if (hash_fields & RXH_IP_DST) + data |= ENA_ADMIN_RSS_L3_DA; + + if (hash_fields & RXH_IP_SRC) + data |= ENA_ADMIN_RSS_L3_SA; + + if (hash_fields & RXH_L4_B_2_3) + data |= ENA_ADMIN_RSS_L4_DP; + + if (hash_fields & RXH_L4_B_0_1) + data |= ENA_ADMIN_RSS_L4_SP; + + return data; +} + +static int ena_get_rss_hash(struct ena_com_dev *ena_dev, + struct ethtool_rxnfc *cmd) +{ + enum ena_admin_flow_hash_proto proto; + u16 hash_fields; + int rc; + + cmd->data = 0; + + switch (cmd->flow_type) { + case TCP_V4_FLOW: + proto = ENA_ADMIN_RSS_TCP4; + break; + case UDP_V4_FLOW: + proto = ENA_ADMIN_RSS_UDP4; + break; + case TCP_V6_FLOW: + proto = ENA_ADMIN_RSS_TCP6; + break; + case UDP_V6_FLOW: + proto = ENA_ADMIN_RSS_UDP6; + break; + case IPV4_FLOW: + proto = ENA_ADMIN_RSS_IP4; + break; + case IPV6_FLOW: + proto = ENA_ADMIN_RSS_IP6; + break; + case ETHER_FLOW: + proto = ENA_ADMIN_RSS_NOT_IP; + break; + case AH_V4_FLOW: + case ESP_V4_FLOW: + case AH_V6_FLOW: + case ESP_V6_FLOW: + case SCTP_V4_FLOW: + case AH_ESP_V4_FLOW: + return -EOPNOTSUPP; + default: + return -EINVAL; + } + + rc = ena_com_get_hash_ctrl(ena_dev, proto, &hash_fields); + if (rc) { + /* If device don't have permission, return unsupported */ + if (rc == -EPERM) + rc = -EOPNOTSUPP; + return rc; + } + + cmd->data = ena_flow_hash_to_flow_type(hash_fields); + + return 0; +} + +static int ena_set_rss_hash(struct ena_com_dev *ena_dev, + struct ethtool_rxnfc *cmd) +{ + enum ena_admin_flow_hash_proto proto; + u16 hash_fields; + + switch (cmd->flow_type) { + case TCP_V4_FLOW: + proto = ENA_ADMIN_RSS_TCP4; + break; + case UDP_V4_FLOW: + proto = ENA_ADMIN_RSS_UDP4; + break; + case TCP_V6_FLOW: + proto = ENA_ADMIN_RSS_TCP6; + break; + case UDP_V6_FLOW: + proto = ENA_ADMIN_RSS_UDP6; + break; + case IPV4_FLOW: + proto = ENA_ADMIN_RSS_IP4; + break; + case IPV6_FLOW: + proto = ENA_ADMIN_RSS_IP6; + break; + case ETHER_FLOW: + proto = ENA_ADMIN_RSS_NOT_IP; + break; + case AH_V4_FLOW: + case ESP_V4_FLOW: + case AH_V6_FLOW: + case ESP_V6_FLOW: + case SCTP_V4_FLOW: + case AH_ESP_V4_FLOW: + return -EOPNOTSUPP; + default: + return -EINVAL; + } + + hash_fields = ena_flow_data_to_flow_hash(cmd->data); + + return ena_com_fill_hash_ctrl(ena_dev, proto, hash_fields); +} + +static int ena_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *info) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + int rc = 0; + + switch (info->cmd) { + case ETHTOOL_SRXFH: + rc = ena_set_rss_hash(adapter->ena_dev, info); + break; + case ETHTOOL_SRXCLSRLDEL: + case ETHTOOL_SRXCLSRLINS: + default: + netif_err(adapter, drv, netdev, + "Command parameter %d is not supported\n", info->cmd); + rc = -EOPNOTSUPP; + } + + return (rc == -EPERM) ? -EOPNOTSUPP : rc; +} + +static int ena_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *info, + u32 *rules) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + int rc = 0; + + switch (info->cmd) { + case ETHTOOL_GRXRINGS: + info->data = adapter->num_queues; + rc = 0; + break; + case ETHTOOL_GRXFH: + rc = ena_get_rss_hash(adapter->ena_dev, info); + break; + case ETHTOOL_GRXCLSRLCNT: + case ETHTOOL_GRXCLSRULE: + case ETHTOOL_GRXCLSRLALL: + default: + netif_err(adapter, drv, netdev, + "Command parameter %d is not supported\n", info->cmd); + rc = -EOPNOTSUPP; + } + + return (rc == -EPERM) ? -EOPNOTSUPP : rc; +} + +static u32 ena_get_rxfh_indir_size(struct net_device *netdev) +{ + return ENA_RX_RSS_TABLE_SIZE; +} + +static u32 ena_get_rxfh_key_size(struct net_device *netdev) +{ + return ENA_HASH_KEY_SIZE; +} + +static int ena_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key, + u8 *hfunc) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + enum ena_admin_hash_functions ena_func; + u8 func; + int rc; + + rc = ena_com_indirect_table_get(adapter->ena_dev, indir); + if (rc) + return rc; + + rc = ena_com_get_hash_function(adapter->ena_dev, &ena_func, key); + if (rc) + return rc; + + switch (ena_func) { + case ENA_ADMIN_TOEPLITZ: + func = ETH_RSS_HASH_TOP; + break; + case ENA_ADMIN_CRC32: + func = ETH_RSS_HASH_XOR; + break; + default: + netif_err(adapter, drv, netdev, + "Command parameter is not supported\n"); + return -EOPNOTSUPP; + } + + if (hfunc) + *hfunc = func; + + return rc; +} + +static int ena_set_rxfh(struct net_device *netdev, const u32 *indir, + const u8 *key, const u8 hfunc) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + struct ena_com_dev *ena_dev = adapter->ena_dev; + enum ena_admin_hash_functions func; + int rc, i; + + if (indir) { + for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) { + rc = ena_com_indirect_table_fill_entry(ena_dev, + ENA_IO_RXQ_IDX(indir[i]), + i); + if (unlikely(rc)) { + netif_err(adapter, drv, netdev, + "Cannot fill indirect table (index is too large)\n"); + return rc; + } + } + + rc = ena_com_indirect_table_set(ena_dev); + if (rc) { + netif_err(adapter, drv, netdev, + "Cannot set indirect table\n"); + return rc == -EPERM ? -EOPNOTSUPP : rc; + } + } + + switch (hfunc) { + case ETH_RSS_HASH_TOP: + func = ENA_ADMIN_TOEPLITZ; + break; + case ETH_RSS_HASH_XOR: + func = ENA_ADMIN_CRC32; + break; + default: + netif_err(adapter, drv, netdev, "Unsupported hfunc %d\n", + hfunc); + return -EOPNOTSUPP; + } + + if (key) { + rc = ena_com_fill_hash_function(ena_dev, func, key, + ENA_HASH_KEY_SIZE, + 0xFFFFFFFF); + if (unlikely(rc)) { + netif_err(adapter, drv, netdev, "Cannot fill key\n"); + return rc == -EPERM ? -EOPNOTSUPP : rc; + } + } + + return 0; +} + +static void ena_get_channels(struct net_device *netdev, + struct ethtool_channels *channels) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + + channels->max_rx = ENA_MAX_NUM_IO_QUEUES; + channels->max_tx = ENA_MAX_NUM_IO_QUEUES; + channels->max_other = 0; + channels->max_combined = 0; + channels->rx_count = adapter->num_queues; + channels->tx_count = adapter->num_queues; + channels->other_count = 0; + channels->combined_count = 0; +} + +static int ena_get_tunable(struct net_device *netdev, + const struct ethtool_tunable *tuna, void *data) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + int ret = 0; + + switch (tuna->id) { + case ETHTOOL_RX_COPYBREAK: + *(u32 *)data = adapter->rx_copybreak; + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +static int ena_set_tunable(struct net_device *netdev, + const struct ethtool_tunable *tuna, + const void *data) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + int ret = 0; + u32 len; + + switch (tuna->id) { + case ETHTOOL_RX_COPYBREAK: + len = *(u32 *)data; + if (len > adapter->netdev->mtu) { + ret = -EINVAL; + break; + } + adapter->rx_copybreak = len; + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +static const struct ethtool_ops ena_ethtool_ops = { + .get_link_ksettings = ena_get_link_ksettings, + .get_drvinfo = ena_get_drvinfo, + .get_msglevel = ena_get_msglevel, + .set_msglevel = ena_set_msglevel, + .get_link = ethtool_op_get_link, + .get_coalesce = ena_get_coalesce, + .set_coalesce = ena_set_coalesce, + .get_ringparam = ena_get_ringparam, + .get_sset_count = ena_get_sset_count, + .get_strings = ena_get_strings, + .get_ethtool_stats = ena_get_ethtool_stats, + .get_rxnfc = ena_get_rxnfc, + .set_rxnfc = ena_set_rxnfc, + .get_rxfh_indir_size = ena_get_rxfh_indir_size, + .get_rxfh_key_size = ena_get_rxfh_key_size, + .get_rxfh = ena_get_rxfh, + .set_rxfh = ena_set_rxfh, + .get_channels = ena_get_channels, + .get_tunable = ena_get_tunable, + .set_tunable = ena_set_tunable, +}; + +void ena_set_ethtool_ops(struct net_device *netdev) +{ + netdev->ethtool_ops = &ena_ethtool_ops; +} + +static void ena_dump_stats_ex(struct ena_adapter *adapter, u8 *buf) +{ + struct net_device *netdev = adapter->netdev; + u8 *strings_buf; + u64 *data_buf; + int strings_num; + int i, rc; + + strings_num = ena_get_sset_count(netdev, ETH_SS_STATS); + if (strings_num <= 0) { + netif_err(adapter, drv, netdev, "Can't get stats num\n"); + return; + } + + strings_buf = devm_kzalloc(&adapter->pdev->dev, + strings_num * ETH_GSTRING_LEN, + GFP_ATOMIC); + if (!strings_buf) { + netif_err(adapter, drv, netdev, + "failed to alloc strings_buf\n"); + return; + } + + data_buf = devm_kzalloc(&adapter->pdev->dev, + strings_num * sizeof(u64), + GFP_ATOMIC); + if (!data_buf) { + netif_err(adapter, drv, netdev, + "failed to allocate data buf\n"); + devm_kfree(&adapter->pdev->dev, strings_buf); + return; + } + + ena_get_strings(netdev, ETH_SS_STATS, strings_buf); + ena_get_ethtool_stats(netdev, NULL, data_buf); + + /* If there is a buffer, dump stats, otherwise print them to dmesg */ + if (buf) + for (i = 0; i < strings_num; i++) { + rc = snprintf(buf, ETH_GSTRING_LEN + sizeof(u64), + "%s %llu\n", + strings_buf + i * ETH_GSTRING_LEN, + data_buf[i]); + buf += rc; + } + else + for (i = 0; i < strings_num; i++) + netif_err(adapter, drv, netdev, "%s: %llu\n", + strings_buf + i * ETH_GSTRING_LEN, + data_buf[i]); + + devm_kfree(&adapter->pdev->dev, strings_buf); + devm_kfree(&adapter->pdev->dev, data_buf); +} + +void ena_dump_stats_to_buf(struct ena_adapter *adapter, u8 *buf) +{ + if (!buf) + return; + + ena_dump_stats_ex(adapter, buf); +} + +void ena_dump_stats_to_dmesg(struct ena_adapter *adapter) +{ + ena_dump_stats_ex(adapter, NULL); +} diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c new file mode 100644 index 000000000000..94790df2d559 --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c @@ -0,0 +1,3280 @@ +/* + * Copyright 2015 Amazon.com, Inc. or its affiliates. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#ifdef CONFIG_RFS_ACCEL +#include +#endif /* CONFIG_RFS_ACCEL */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "ena_netdev.h" +#include "ena_pci_id_tbl.h" + +static char version[] = DEVICE_NAME " v" DRV_MODULE_VERSION "\n"; + +MODULE_AUTHOR("Amazon.com, Inc. or its affiliates"); +MODULE_DESCRIPTION(DEVICE_NAME); +MODULE_LICENSE("GPL"); +MODULE_VERSION(DRV_MODULE_VERSION); + +/* Time in jiffies before concluding the transmitter is hung. */ +#define TX_TIMEOUT (5 * HZ) + +#define ENA_NAPI_BUDGET 64 + +#define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_IFUP | \ + NETIF_MSG_TX_DONE | NETIF_MSG_TX_ERR | NETIF_MSG_RX_ERR) +static int debug = -1; +module_param(debug, int, 0); +MODULE_PARM_DESC(debug, "Debug level (0=none,...,16=all)"); + +static struct ena_aenq_handlers aenq_handlers; + +static struct workqueue_struct *ena_wq; + +MODULE_DEVICE_TABLE(pci, ena_pci_tbl); + +static int ena_rss_init_default(struct ena_adapter *adapter); + +static void ena_tx_timeout(struct net_device *dev) +{ + struct ena_adapter *adapter = netdev_priv(dev); + + u64_stats_update_begin(&adapter->syncp); + adapter->dev_stats.tx_timeout++; + u64_stats_update_end(&adapter->syncp); + + netif_err(adapter, tx_err, dev, "Transmit time out\n"); + + /* Change the state of the device to trigger reset */ + set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); +} + +static void update_rx_ring_mtu(struct ena_adapter *adapter, int mtu) +{ + int i; + + for (i = 0; i < adapter->num_queues; i++) + adapter->rx_ring[i].mtu = mtu; +} + +static int ena_change_mtu(struct net_device *dev, int new_mtu) +{ + struct ena_adapter *adapter = netdev_priv(dev); + int ret; + + if ((new_mtu > adapter->max_mtu) || (new_mtu < ENA_MIN_MTU)) { + netif_err(adapter, drv, dev, + "Invalid MTU setting. new_mtu: %d\n", new_mtu); + + return -EINVAL; + } + + ret = ena_com_set_dev_mtu(adapter->ena_dev, new_mtu); + if (!ret) { + netif_dbg(adapter, drv, dev, "set MTU to %d\n", new_mtu); + update_rx_ring_mtu(adapter, new_mtu); + dev->mtu = new_mtu; + } else { + netif_err(adapter, drv, dev, "Failed to set MTU to %d\n", + new_mtu); + } + + return ret; +} + +static int ena_init_rx_cpu_rmap(struct ena_adapter *adapter) +{ +#ifdef CONFIG_RFS_ACCEL + u32 i; + int rc; + + adapter->netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(adapter->num_queues); + if (!adapter->netdev->rx_cpu_rmap) + return -ENOMEM; + for (i = 0; i < adapter->num_queues; i++) { + int irq_idx = ENA_IO_IRQ_IDX(i); + + rc = irq_cpu_rmap_add(adapter->netdev->rx_cpu_rmap, + adapter->msix_entries[irq_idx].vector); + if (rc) { + free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap); + adapter->netdev->rx_cpu_rmap = NULL; + return rc; + } + } +#endif /* CONFIG_RFS_ACCEL */ + return 0; +} + +static void ena_init_io_rings_common(struct ena_adapter *adapter, + struct ena_ring *ring, u16 qid) +{ + ring->qid = qid; + ring->pdev = adapter->pdev; + ring->dev = &adapter->pdev->dev; + ring->netdev = adapter->netdev; + ring->napi = &adapter->ena_napi[qid].napi; + ring->adapter = adapter; + ring->ena_dev = adapter->ena_dev; + ring->per_napi_packets = 0; + ring->per_napi_bytes = 0; + ring->cpu = 0; + u64_stats_init(&ring->syncp); +} + +static void ena_init_io_rings(struct ena_adapter *adapter) +{ + struct ena_com_dev *ena_dev; + struct ena_ring *txr, *rxr; + int i; + + ena_dev = adapter->ena_dev; + + for (i = 0; i < adapter->num_queues; i++) { + txr = &adapter->tx_ring[i]; + rxr = &adapter->rx_ring[i]; + + /* TX/RX common ring state */ + ena_init_io_rings_common(adapter, txr, i); + ena_init_io_rings_common(adapter, rxr, i); + + /* TX specific ring state */ + txr->ring_size = adapter->tx_ring_size; + txr->tx_max_header_size = ena_dev->tx_max_header_size; + txr->tx_mem_queue_type = ena_dev->tx_mem_queue_type; + txr->sgl_size = adapter->max_tx_sgl_size; + txr->smoothed_interval = + ena_com_get_nonadaptive_moderation_interval_tx(ena_dev); + + /* RX specific ring state */ + rxr->ring_size = adapter->rx_ring_size; + rxr->rx_copybreak = adapter->rx_copybreak; + rxr->sgl_size = adapter->max_rx_sgl_size; + rxr->smoothed_interval = + ena_com_get_nonadaptive_moderation_interval_rx(ena_dev); + } +} + +/* ena_setup_tx_resources - allocate I/O Tx resources (Descriptors) + * @adapter: network interface device structure + * @qid: queue index + * + * Return 0 on success, negative on failure + */ +static int ena_setup_tx_resources(struct ena_adapter *adapter, int qid) +{ + struct ena_ring *tx_ring = &adapter->tx_ring[qid]; + struct ena_irq *ena_irq = &adapter->irq_tbl[ENA_IO_IRQ_IDX(qid)]; + int size, i, node; + + if (tx_ring->tx_buffer_info) { + netif_err(adapter, ifup, + adapter->netdev, "tx_buffer_info info is not NULL"); + return -EEXIST; + } + + size = sizeof(struct ena_tx_buffer) * tx_ring->ring_size; + node = cpu_to_node(ena_irq->cpu); + + tx_ring->tx_buffer_info = vzalloc_node(size, node); + if (!tx_ring->tx_buffer_info) { + tx_ring->tx_buffer_info = vzalloc(size); + if (!tx_ring->tx_buffer_info) + return -ENOMEM; + } + + size = sizeof(u16) * tx_ring->ring_size; + tx_ring->free_tx_ids = vzalloc_node(size, node); + if (!tx_ring->free_tx_ids) { + tx_ring->free_tx_ids = vzalloc(size); + if (!tx_ring->free_tx_ids) { + vfree(tx_ring->tx_buffer_info); + return -ENOMEM; + } + } + + /* Req id ring for TX out of order completions */ + for (i = 0; i < tx_ring->ring_size; i++) + tx_ring->free_tx_ids[i] = i; + + /* Reset tx statistics */ + memset(&tx_ring->tx_stats, 0x0, sizeof(tx_ring->tx_stats)); + + tx_ring->next_to_use = 0; + tx_ring->next_to_clean = 0; + tx_ring->cpu = ena_irq->cpu; + return 0; +} + +/* ena_free_tx_resources - Free I/O Tx Resources per Queue + * @adapter: network interface device structure + * @qid: queue index + * + * Free all transmit software resources + */ +static void ena_free_tx_resources(struct ena_adapter *adapter, int qid) +{ + struct ena_ring *tx_ring = &adapter->tx_ring[qid]; + + vfree(tx_ring->tx_buffer_info); + tx_ring->tx_buffer_info = NULL; + + vfree(tx_ring->free_tx_ids); + tx_ring->free_tx_ids = NULL; +} + +/* ena_setup_all_tx_resources - allocate I/O Tx queues resources for All queues + * @adapter: private structure + * + * Return 0 on success, negative on failure + */ +static int ena_setup_all_tx_resources(struct ena_adapter *adapter) +{ + int i, rc = 0; + + for (i = 0; i < adapter->num_queues; i++) { + rc = ena_setup_tx_resources(adapter, i); + if (rc) + goto err_setup_tx; + } + + return 0; + +err_setup_tx: + + netif_err(adapter, ifup, adapter->netdev, + "Tx queue %d: allocation failed\n", i); + + /* rewind the index freeing the rings as we go */ + while (i--) + ena_free_tx_resources(adapter, i); + return rc; +} + +/* ena_free_all_io_tx_resources - Free I/O Tx Resources for All Queues + * @adapter: board private structure + * + * Free all transmit software resources + */ +static void ena_free_all_io_tx_resources(struct ena_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_queues; i++) + ena_free_tx_resources(adapter, i); +} + +/* ena_setup_rx_resources - allocate I/O Rx resources (Descriptors) + * @adapter: network interface device structure + * @qid: queue index + * + * Returns 0 on success, negative on failure + */ +static int ena_setup_rx_resources(struct ena_adapter *adapter, + u32 qid) +{ + struct ena_ring *rx_ring = &adapter->rx_ring[qid]; + struct ena_irq *ena_irq = &adapter->irq_tbl[ENA_IO_IRQ_IDX(qid)]; + int size, node; + + if (rx_ring->rx_buffer_info) { + netif_err(adapter, ifup, adapter->netdev, + "rx_buffer_info is not NULL"); + return -EEXIST; + } + + /* alloc extra element so in rx path + * we can always prefetch rx_info + 1 + */ + size = sizeof(struct ena_rx_buffer) * (rx_ring->ring_size + 1); + node = cpu_to_node(ena_irq->cpu); + + rx_ring->rx_buffer_info = vzalloc_node(size, node); + if (!rx_ring->rx_buffer_info) { + rx_ring->rx_buffer_info = vzalloc(size); + if (!rx_ring->rx_buffer_info) + return -ENOMEM; + } + + /* Reset rx statistics */ + memset(&rx_ring->rx_stats, 0x0, sizeof(rx_ring->rx_stats)); + + rx_ring->next_to_clean = 0; + rx_ring->next_to_use = 0; + rx_ring->cpu = ena_irq->cpu; + + return 0; +} + +/* ena_free_rx_resources - Free I/O Rx Resources + * @adapter: network interface device structure + * @qid: queue index + * + * Free all receive software resources + */ +static void ena_free_rx_resources(struct ena_adapter *adapter, + u32 qid) +{ + struct ena_ring *rx_ring = &adapter->rx_ring[qid]; + + vfree(rx_ring->rx_buffer_info); + rx_ring->rx_buffer_info = NULL; +} + +/* ena_setup_all_rx_resources - allocate I/O Rx queues resources for all queues + * @adapter: board private structure + * + * Return 0 on success, negative on failure + */ +static int ena_setup_all_rx_resources(struct ena_adapter *adapter) +{ + int i, rc = 0; + + for (i = 0; i < adapter->num_queues; i++) { + rc = ena_setup_rx_resources(adapter, i); + if (rc) + goto err_setup_rx; + } + + return 0; + +err_setup_rx: + + netif_err(adapter, ifup, adapter->netdev, + "Rx queue %d: allocation failed\n", i); + + /* rewind the index freeing the rings as we go */ + while (i--) + ena_free_rx_resources(adapter, i); + return rc; +} + +/* ena_free_all_io_rx_resources - Free I/O Rx Resources for All Queues + * @adapter: board private structure + * + * Free all receive software resources + */ +static void ena_free_all_io_rx_resources(struct ena_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_queues; i++) + ena_free_rx_resources(adapter, i); +} + +static inline int ena_alloc_rx_page(struct ena_ring *rx_ring, + struct ena_rx_buffer *rx_info, gfp_t gfp) +{ + struct ena_com_buf *ena_buf; + struct page *page; + dma_addr_t dma; + + /* if previous allocated page is not used */ + if (unlikely(rx_info->page)) + return 0; + + page = alloc_page(gfp); + if (unlikely(!page)) { + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->rx_stats.page_alloc_fail++; + u64_stats_update_end(&rx_ring->syncp); + return -ENOMEM; + } + + dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, + DMA_FROM_DEVICE); + if (unlikely(dma_mapping_error(rx_ring->dev, dma))) { + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->rx_stats.dma_mapping_err++; + u64_stats_update_end(&rx_ring->syncp); + + __free_page(page); + return -EIO; + } + netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, + "alloc page %p, rx_info %p\n", page, rx_info); + + rx_info->page = page; + rx_info->page_offset = 0; + ena_buf = &rx_info->ena_buf; + ena_buf->paddr = dma; + ena_buf->len = PAGE_SIZE; + + return 0; +} + +static void ena_free_rx_page(struct ena_ring *rx_ring, + struct ena_rx_buffer *rx_info) +{ + struct page *page = rx_info->page; + struct ena_com_buf *ena_buf = &rx_info->ena_buf; + + if (unlikely(!page)) { + netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev, + "Trying to free unallocated buffer\n"); + return; + } + + dma_unmap_page(rx_ring->dev, ena_buf->paddr, PAGE_SIZE, + DMA_FROM_DEVICE); + + __free_page(page); + rx_info->page = NULL; +} + +static int ena_refill_rx_bufs(struct ena_ring *rx_ring, u32 num) +{ + u16 next_to_use; + u32 i; + int rc; + + next_to_use = rx_ring->next_to_use; + + for (i = 0; i < num; i++) { + struct ena_rx_buffer *rx_info = + &rx_ring->rx_buffer_info[next_to_use]; + + rc = ena_alloc_rx_page(rx_ring, rx_info, + __GFP_COLD | GFP_ATOMIC | __GFP_COMP); + if (unlikely(rc < 0)) { + netif_warn(rx_ring->adapter, rx_err, rx_ring->netdev, + "failed to alloc buffer for rx queue %d\n", + rx_ring->qid); + break; + } + rc = ena_com_add_single_rx_desc(rx_ring->ena_com_io_sq, + &rx_info->ena_buf, + next_to_use); + if (unlikely(rc)) { + netif_warn(rx_ring->adapter, rx_status, rx_ring->netdev, + "failed to add buffer for rx queue %d\n", + rx_ring->qid); + break; + } + next_to_use = ENA_RX_RING_IDX_NEXT(next_to_use, + rx_ring->ring_size); + } + + if (unlikely(i < num)) { + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->rx_stats.refil_partial++; + u64_stats_update_end(&rx_ring->syncp); + netdev_warn(rx_ring->netdev, + "refilled rx qid %d with only %d buffers (from %d)\n", + rx_ring->qid, i, num); + } + + if (likely(i)) { + /* Add memory barrier to make sure the desc were written before + * issue a doorbell + */ + wmb(); + ena_com_write_sq_doorbell(rx_ring->ena_com_io_sq); + } + + rx_ring->next_to_use = next_to_use; + + return i; +} + +static void ena_free_rx_bufs(struct ena_adapter *adapter, + u32 qid) +{ + struct ena_ring *rx_ring = &adapter->rx_ring[qid]; + u32 i; + + for (i = 0; i < rx_ring->ring_size; i++) { + struct ena_rx_buffer *rx_info = &rx_ring->rx_buffer_info[i]; + + if (rx_info->page) + ena_free_rx_page(rx_ring, rx_info); + } +} + +/* ena_refill_all_rx_bufs - allocate all queues Rx buffers + * @adapter: board private structure + * + */ +static void ena_refill_all_rx_bufs(struct ena_adapter *adapter) +{ + struct ena_ring *rx_ring; + int i, rc, bufs_num; + + for (i = 0; i < adapter->num_queues; i++) { + rx_ring = &adapter->rx_ring[i]; + bufs_num = rx_ring->ring_size - 1; + rc = ena_refill_rx_bufs(rx_ring, bufs_num); + + if (unlikely(rc != bufs_num)) + netif_warn(rx_ring->adapter, rx_status, rx_ring->netdev, + "refilling Queue %d failed. allocated %d buffers from: %d\n", + i, rc, bufs_num); + } +} + +static void ena_free_all_rx_bufs(struct ena_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_queues; i++) + ena_free_rx_bufs(adapter, i); +} + +/* ena_free_tx_bufs - Free Tx Buffers per Queue + * @tx_ring: TX ring for which buffers be freed + */ +static void ena_free_tx_bufs(struct ena_ring *tx_ring) +{ + u32 i; + + for (i = 0; i < tx_ring->ring_size; i++) { + struct ena_tx_buffer *tx_info = &tx_ring->tx_buffer_info[i]; + struct ena_com_buf *ena_buf; + int nr_frags; + int j; + + if (!tx_info->skb) + continue; + + netdev_notice(tx_ring->netdev, + "free uncompleted tx skb qid %d idx 0x%x\n", + tx_ring->qid, i); + + ena_buf = tx_info->bufs; + dma_unmap_single(tx_ring->dev, + ena_buf->paddr, + ena_buf->len, + DMA_TO_DEVICE); + + /* unmap remaining mapped pages */ + nr_frags = tx_info->num_of_bufs - 1; + for (j = 0; j < nr_frags; j++) { + ena_buf++; + dma_unmap_page(tx_ring->dev, + ena_buf->paddr, + ena_buf->len, + DMA_TO_DEVICE); + } + + dev_kfree_skb_any(tx_info->skb); + } + netdev_tx_reset_queue(netdev_get_tx_queue(tx_ring->netdev, + tx_ring->qid)); +} + +static void ena_free_all_tx_bufs(struct ena_adapter *adapter) +{ + struct ena_ring *tx_ring; + int i; + + for (i = 0; i < adapter->num_queues; i++) { + tx_ring = &adapter->tx_ring[i]; + ena_free_tx_bufs(tx_ring); + } +} + +static void ena_destroy_all_tx_queues(struct ena_adapter *adapter) +{ + u16 ena_qid; + int i; + + for (i = 0; i < adapter->num_queues; i++) { + ena_qid = ENA_IO_TXQ_IDX(i); + ena_com_destroy_io_queue(adapter->ena_dev, ena_qid); + } +} + +static void ena_destroy_all_rx_queues(struct ena_adapter *adapter) +{ + u16 ena_qid; + int i; + + for (i = 0; i < adapter->num_queues; i++) { + ena_qid = ENA_IO_RXQ_IDX(i); + ena_com_destroy_io_queue(adapter->ena_dev, ena_qid); + } +} + +static void ena_destroy_all_io_queues(struct ena_adapter *adapter) +{ + ena_destroy_all_tx_queues(adapter); + ena_destroy_all_rx_queues(adapter); +} + +static int validate_tx_req_id(struct ena_ring *tx_ring, u16 req_id) +{ + struct ena_tx_buffer *tx_info = NULL; + + if (likely(req_id < tx_ring->ring_size)) { + tx_info = &tx_ring->tx_buffer_info[req_id]; + if (likely(tx_info->skb)) + return 0; + } + + if (tx_info) + netif_err(tx_ring->adapter, tx_done, tx_ring->netdev, + "tx_info doesn't have valid skb\n"); + else + netif_err(tx_ring->adapter, tx_done, tx_ring->netdev, + "Invalid req_id: %hu\n", req_id); + + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->tx_stats.bad_req_id++; + u64_stats_update_end(&tx_ring->syncp); + + /* Trigger device reset */ + set_bit(ENA_FLAG_TRIGGER_RESET, &tx_ring->adapter->flags); + return -EFAULT; +} + +static int ena_clean_tx_irq(struct ena_ring *tx_ring, u32 budget) +{ + struct netdev_queue *txq; + bool above_thresh; + u32 tx_bytes = 0; + u32 total_done = 0; + u16 next_to_clean; + u16 req_id; + int tx_pkts = 0; + int rc; + + next_to_clean = tx_ring->next_to_clean; + txq = netdev_get_tx_queue(tx_ring->netdev, tx_ring->qid); + + while (tx_pkts < budget) { + struct ena_tx_buffer *tx_info; + struct sk_buff *skb; + struct ena_com_buf *ena_buf; + int i, nr_frags; + + rc = ena_com_tx_comp_req_id_get(tx_ring->ena_com_io_cq, + &req_id); + if (rc) + break; + + rc = validate_tx_req_id(tx_ring, req_id); + if (rc) + break; + + tx_info = &tx_ring->tx_buffer_info[req_id]; + skb = tx_info->skb; + + /* prefetch skb_end_pointer() to speedup skb_shinfo(skb) */ + prefetch(&skb->end); + + tx_info->skb = NULL; + tx_info->last_jiffies = 0; + + if (likely(tx_info->num_of_bufs != 0)) { + ena_buf = tx_info->bufs; + + dma_unmap_single(tx_ring->dev, + dma_unmap_addr(ena_buf, paddr), + dma_unmap_len(ena_buf, len), + DMA_TO_DEVICE); + + /* unmap remaining mapped pages */ + nr_frags = tx_info->num_of_bufs - 1; + for (i = 0; i < nr_frags; i++) { + ena_buf++; + dma_unmap_page(tx_ring->dev, + dma_unmap_addr(ena_buf, paddr), + dma_unmap_len(ena_buf, len), + DMA_TO_DEVICE); + } + } + + netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev, + "tx_poll: q %d skb %p completed\n", tx_ring->qid, + skb); + + tx_bytes += skb->len; + dev_kfree_skb(skb); + tx_pkts++; + total_done += tx_info->tx_descs; + + tx_ring->free_tx_ids[next_to_clean] = req_id; + next_to_clean = ENA_TX_RING_IDX_NEXT(next_to_clean, + tx_ring->ring_size); + } + + tx_ring->next_to_clean = next_to_clean; + ena_com_comp_ack(tx_ring->ena_com_io_sq, total_done); + ena_com_update_dev_comp_head(tx_ring->ena_com_io_cq); + + netdev_tx_completed_queue(txq, tx_pkts, tx_bytes); + + netif_dbg(tx_ring->adapter, tx_done, tx_ring->netdev, + "tx_poll: q %d done. total pkts: %d\n", + tx_ring->qid, tx_pkts); + + /* need to make the rings circular update visible to + * ena_start_xmit() before checking for netif_queue_stopped(). + */ + smp_mb(); + + above_thresh = ena_com_sq_empty_space(tx_ring->ena_com_io_sq) > + ENA_TX_WAKEUP_THRESH; + if (unlikely(netif_tx_queue_stopped(txq) && above_thresh)) { + __netif_tx_lock(txq, smp_processor_id()); + above_thresh = ena_com_sq_empty_space(tx_ring->ena_com_io_sq) > + ENA_TX_WAKEUP_THRESH; + if (netif_tx_queue_stopped(txq) && above_thresh) { + netif_tx_wake_queue(txq); + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->tx_stats.queue_wakeup++; + u64_stats_update_end(&tx_ring->syncp); + } + __netif_tx_unlock(txq); + } + + tx_ring->per_napi_bytes += tx_bytes; + tx_ring->per_napi_packets += tx_pkts; + + return tx_pkts; +} + +static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring, + struct ena_com_rx_buf_info *ena_bufs, + u32 descs, + u16 *next_to_clean) +{ + struct sk_buff *skb; + struct ena_rx_buffer *rx_info = + &rx_ring->rx_buffer_info[*next_to_clean]; + u32 len; + u32 buf = 0; + void *va; + + len = ena_bufs[0].len; + if (unlikely(!rx_info->page)) { + netif_err(rx_ring->adapter, rx_err, rx_ring->netdev, + "Page is NULL\n"); + return NULL; + } + + netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, + "rx_info %p page %p\n", + rx_info, rx_info->page); + + /* save virt address of first buffer */ + va = page_address(rx_info->page) + rx_info->page_offset; + prefetch(va + NET_IP_ALIGN); + + if (len <= rx_ring->rx_copybreak) { + skb = netdev_alloc_skb_ip_align(rx_ring->netdev, + rx_ring->rx_copybreak); + if (unlikely(!skb)) { + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->rx_stats.skb_alloc_fail++; + u64_stats_update_end(&rx_ring->syncp); + netif_err(rx_ring->adapter, rx_err, rx_ring->netdev, + "Failed to allocate skb\n"); + return NULL; + } + + netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, + "rx allocated small packet. len %d. data_len %d\n", + skb->len, skb->data_len); + + /* sync this buffer for CPU use */ + dma_sync_single_for_cpu(rx_ring->dev, + dma_unmap_addr(&rx_info->ena_buf, paddr), + len, + DMA_FROM_DEVICE); + skb_copy_to_linear_data(skb, va, len); + dma_sync_single_for_device(rx_ring->dev, + dma_unmap_addr(&rx_info->ena_buf, paddr), + len, + DMA_FROM_DEVICE); + + skb_put(skb, len); + skb->protocol = eth_type_trans(skb, rx_ring->netdev); + *next_to_clean = ENA_RX_RING_IDX_ADD(*next_to_clean, descs, + rx_ring->ring_size); + return skb; + } + + skb = napi_get_frags(rx_ring->napi); + if (unlikely(!skb)) { + netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, + "Failed allocating skb\n"); + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->rx_stats.skb_alloc_fail++; + u64_stats_update_end(&rx_ring->syncp); + return NULL; + } + + do { + dma_unmap_page(rx_ring->dev, + dma_unmap_addr(&rx_info->ena_buf, paddr), + PAGE_SIZE, DMA_FROM_DEVICE); + + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_info->page, + rx_info->page_offset, len, PAGE_SIZE); + + netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, + "rx skb updated. len %d. data_len %d\n", + skb->len, skb->data_len); + + rx_info->page = NULL; + *next_to_clean = + ENA_RX_RING_IDX_NEXT(*next_to_clean, + rx_ring->ring_size); + if (likely(--descs == 0)) + break; + rx_info = &rx_ring->rx_buffer_info[*next_to_clean]; + len = ena_bufs[++buf].len; + } while (1); + + return skb; +} + +/* ena_rx_checksum - indicate in skb if hw indicated a good cksum + * @adapter: structure containing adapter specific data + * @ena_rx_ctx: received packet context/metadata + * @skb: skb currently being received and modified + */ +static inline void ena_rx_checksum(struct ena_ring *rx_ring, + struct ena_com_rx_ctx *ena_rx_ctx, + struct sk_buff *skb) +{ + /* Rx csum disabled */ + if (unlikely(!(rx_ring->netdev->features & NETIF_F_RXCSUM))) { + skb->ip_summed = CHECKSUM_NONE; + return; + } + + /* For fragmented packets the checksum isn't valid */ + if (ena_rx_ctx->frag) { + skb->ip_summed = CHECKSUM_NONE; + return; + } + + /* if IP and error */ + if (unlikely((ena_rx_ctx->l3_proto == ENA_ETH_IO_L3_PROTO_IPV4) && + (ena_rx_ctx->l3_csum_err))) { + /* ipv4 checksum error */ + skb->ip_summed = CHECKSUM_NONE; + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->rx_stats.bad_csum++; + u64_stats_update_end(&rx_ring->syncp); + netif_err(rx_ring->adapter, rx_err, rx_ring->netdev, + "RX IPv4 header checksum error\n"); + return; + } + + /* if TCP/UDP */ + if (likely((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) || + (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP))) { + if (unlikely(ena_rx_ctx->l4_csum_err)) { + /* TCP/UDP checksum error */ + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->rx_stats.bad_csum++; + u64_stats_update_end(&rx_ring->syncp); + netif_err(rx_ring->adapter, rx_err, rx_ring->netdev, + "RX L4 checksum error\n"); + skb->ip_summed = CHECKSUM_NONE; + return; + } + + skb->ip_summed = CHECKSUM_UNNECESSARY; + } +} + +static void ena_set_rx_hash(struct ena_ring *rx_ring, + struct ena_com_rx_ctx *ena_rx_ctx, + struct sk_buff *skb) +{ + enum pkt_hash_types hash_type; + + if (likely(rx_ring->netdev->features & NETIF_F_RXHASH)) { + if (likely((ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_TCP) || + (ena_rx_ctx->l4_proto == ENA_ETH_IO_L4_PROTO_UDP))) + + hash_type = PKT_HASH_TYPE_L4; + else + hash_type = PKT_HASH_TYPE_NONE; + + /* Override hash type if the packet is fragmented */ + if (ena_rx_ctx->frag) + hash_type = PKT_HASH_TYPE_NONE; + + skb_set_hash(skb, ena_rx_ctx->hash, hash_type); + } +} + +/* ena_clean_rx_irq - Cleanup RX irq + * @rx_ring: RX ring to clean + * @napi: napi handler + * @budget: how many packets driver is allowed to clean + * + * Returns the number of cleaned buffers. + */ +static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi, + u32 budget) +{ + u16 next_to_clean = rx_ring->next_to_clean; + u32 res_budget, work_done; + + struct ena_com_rx_ctx ena_rx_ctx; + struct ena_adapter *adapter; + struct sk_buff *skb; + int refill_required; + int refill_threshold; + int rc = 0; + int total_len = 0; + int rx_copybreak_pkt = 0; + + netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, + "%s qid %d\n", __func__, rx_ring->qid); + res_budget = budget; + + do { + ena_rx_ctx.ena_bufs = rx_ring->ena_bufs; + ena_rx_ctx.max_bufs = rx_ring->sgl_size; + ena_rx_ctx.descs = 0; + rc = ena_com_rx_pkt(rx_ring->ena_com_io_cq, + rx_ring->ena_com_io_sq, + &ena_rx_ctx); + if (unlikely(rc)) + goto error; + + if (unlikely(ena_rx_ctx.descs == 0)) + break; + + netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev, + "rx_poll: q %d got packet from ena. descs #: %d l3 proto %d l4 proto %d hash: %x\n", + rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto, + ena_rx_ctx.l4_proto, ena_rx_ctx.hash); + + /* allocate skb and fill it */ + skb = ena_rx_skb(rx_ring, rx_ring->ena_bufs, ena_rx_ctx.descs, + &next_to_clean); + + /* exit if we failed to retrieve a buffer */ + if (unlikely(!skb)) { + next_to_clean = ENA_RX_RING_IDX_ADD(next_to_clean, + ena_rx_ctx.descs, + rx_ring->ring_size); + break; + } + + ena_rx_checksum(rx_ring, &ena_rx_ctx, skb); + + ena_set_rx_hash(rx_ring, &ena_rx_ctx, skb); + + skb_record_rx_queue(skb, rx_ring->qid); + + if (rx_ring->ena_bufs[0].len <= rx_ring->rx_copybreak) { + total_len += rx_ring->ena_bufs[0].len; + rx_copybreak_pkt++; + napi_gro_receive(napi, skb); + } else { + total_len += skb->len; + napi_gro_frags(napi); + } + + res_budget--; + } while (likely(res_budget)); + + work_done = budget - res_budget; + rx_ring->per_napi_bytes += total_len; + rx_ring->per_napi_packets += work_done; + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->rx_stats.bytes += total_len; + rx_ring->rx_stats.cnt += work_done; + rx_ring->rx_stats.rx_copybreak_pkt += rx_copybreak_pkt; + u64_stats_update_end(&rx_ring->syncp); + + rx_ring->next_to_clean = next_to_clean; + + refill_required = ena_com_sq_empty_space(rx_ring->ena_com_io_sq); + refill_threshold = rx_ring->ring_size / ENA_RX_REFILL_THRESH_DIVIDER; + + /* Optimization, try to batch new rx buffers */ + if (refill_required > refill_threshold) { + ena_com_update_dev_comp_head(rx_ring->ena_com_io_cq); + ena_refill_rx_bufs(rx_ring, refill_required); + } + + return work_done; + +error: + adapter = netdev_priv(rx_ring->netdev); + + u64_stats_update_begin(&rx_ring->syncp); + rx_ring->rx_stats.bad_desc_num++; + u64_stats_update_end(&rx_ring->syncp); + + /* Too many desc from the device. Trigger reset */ + set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); + + return 0; +} + +inline void ena_adjust_intr_moderation(struct ena_ring *rx_ring, + struct ena_ring *tx_ring) +{ + /* We apply adaptive moderation on Rx path only. + * Tx uses static interrupt moderation. + */ + ena_com_calculate_interrupt_delay(rx_ring->ena_dev, + rx_ring->per_napi_packets, + rx_ring->per_napi_bytes, + &rx_ring->smoothed_interval, + &rx_ring->moder_tbl_idx); + + /* Reset per napi packets/bytes */ + tx_ring->per_napi_packets = 0; + tx_ring->per_napi_bytes = 0; + rx_ring->per_napi_packets = 0; + rx_ring->per_napi_bytes = 0; +} + +static inline void ena_update_ring_numa_node(struct ena_ring *tx_ring, + struct ena_ring *rx_ring) +{ + int cpu = get_cpu(); + int numa_node; + + /* Check only one ring since the 2 rings are running on the same cpu */ + if (likely(tx_ring->cpu == cpu)) + goto out; + + numa_node = cpu_to_node(cpu); + put_cpu(); + + if (numa_node != NUMA_NO_NODE) { + ena_com_update_numa_node(tx_ring->ena_com_io_cq, numa_node); + ena_com_update_numa_node(rx_ring->ena_com_io_cq, numa_node); + } + + tx_ring->cpu = cpu; + rx_ring->cpu = cpu; + + return; +out: + put_cpu(); +} + +static int ena_io_poll(struct napi_struct *napi, int budget) +{ + struct ena_napi *ena_napi = container_of(napi, struct ena_napi, napi); + struct ena_ring *tx_ring, *rx_ring; + struct ena_eth_io_intr_reg intr_reg; + + u32 tx_work_done; + u32 rx_work_done; + int tx_budget; + int napi_comp_call = 0; + int ret; + + tx_ring = ena_napi->tx_ring; + rx_ring = ena_napi->rx_ring; + + tx_budget = tx_ring->ring_size / ENA_TX_POLL_BUDGET_DIVIDER; + + if (!test_bit(ENA_FLAG_DEV_UP, &tx_ring->adapter->flags)) { + napi_complete_done(napi, 0); + return 0; + } + + tx_work_done = ena_clean_tx_irq(tx_ring, tx_budget); + rx_work_done = ena_clean_rx_irq(rx_ring, napi, budget); + + if ((budget > rx_work_done) && (tx_budget > tx_work_done)) { + napi_complete_done(napi, rx_work_done); + + napi_comp_call = 1; + /* Tx and Rx share the same interrupt vector */ + if (ena_com_get_adaptive_moderation_enabled(rx_ring->ena_dev)) + ena_adjust_intr_moderation(rx_ring, tx_ring); + + /* Update intr register: rx intr delay, tx intr delay and + * interrupt unmask + */ + ena_com_update_intr_reg(&intr_reg, + rx_ring->smoothed_interval, + tx_ring->smoothed_interval, + true); + + /* It is a shared MSI-X. Tx and Rx CQ have pointer to it. + * So we use one of them to reach the intr reg + */ + ena_com_unmask_intr(rx_ring->ena_com_io_cq, &intr_reg); + + ena_update_ring_numa_node(tx_ring, rx_ring); + + ret = rx_work_done; + } else { + ret = budget; + } + + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->tx_stats.napi_comp += napi_comp_call; + tx_ring->tx_stats.tx_poll++; + u64_stats_update_end(&tx_ring->syncp); + + return ret; +} + +static irqreturn_t ena_intr_msix_mgmnt(int irq, void *data) +{ + struct ena_adapter *adapter = (struct ena_adapter *)data; + + ena_com_admin_q_comp_intr_handler(adapter->ena_dev); + + /* Don't call the aenq handler before probe is done */ + if (likely(test_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags))) + ena_com_aenq_intr_handler(adapter->ena_dev, data); + + return IRQ_HANDLED; +} + +/* ena_intr_msix_io - MSI-X Interrupt Handler for Tx/Rx + * @irq: interrupt number + * @data: pointer to a network interface private napi device structure + */ +static irqreturn_t ena_intr_msix_io(int irq, void *data) +{ + struct ena_napi *ena_napi = data; + + napi_schedule(&ena_napi->napi); + + return IRQ_HANDLED; +} + +static int ena_enable_msix(struct ena_adapter *adapter, int num_queues) +{ + int i, msix_vecs, rc; + + if (test_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) { + netif_err(adapter, probe, adapter->netdev, + "Error, MSI-X is already enabled\n"); + return -EPERM; + } + + /* Reserved the max msix vectors we might need */ + msix_vecs = ENA_MAX_MSIX_VEC(num_queues); + + netif_dbg(adapter, probe, adapter->netdev, + "trying to enable MSI-X, vectors %d\n", msix_vecs); + + adapter->msix_entries = vzalloc(msix_vecs * sizeof(struct msix_entry)); + + if (!adapter->msix_entries) + return -ENOMEM; + + for (i = 0; i < msix_vecs; i++) + adapter->msix_entries[i].entry = i; + + rc = pci_enable_msix(adapter->pdev, adapter->msix_entries, msix_vecs); + if (rc != 0) { + netif_err(adapter, probe, adapter->netdev, + "Failed to enable MSI-X, vectors %d rc %d\n", + msix_vecs, rc); + return -ENOSPC; + } + + netif_dbg(adapter, probe, adapter->netdev, "enable MSI-X, vectors %d\n", + msix_vecs); + + if (msix_vecs >= 1) { + if (ena_init_rx_cpu_rmap(adapter)) + netif_warn(adapter, probe, adapter->netdev, + "Failed to map IRQs to CPUs\n"); + } + + adapter->msix_vecs = msix_vecs; + set_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags); + + return 0; +} + +static void ena_setup_mgmnt_intr(struct ena_adapter *adapter) +{ + u32 cpu; + + snprintf(adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].name, + ENA_IRQNAME_SIZE, "ena-mgmnt@pci:%s", + pci_name(adapter->pdev)); + adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].handler = + ena_intr_msix_mgmnt; + adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].data = adapter; + adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].vector = + adapter->msix_entries[ENA_MGMNT_IRQ_IDX].vector; + cpu = cpumask_first(cpu_online_mask); + adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].cpu = cpu; + cpumask_set_cpu(cpu, + &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX].affinity_hint_mask); +} + +static void ena_setup_io_intr(struct ena_adapter *adapter) +{ + struct net_device *netdev; + int irq_idx, i, cpu; + + netdev = adapter->netdev; + + for (i = 0; i < adapter->num_queues; i++) { + irq_idx = ENA_IO_IRQ_IDX(i); + cpu = i % num_online_cpus(); + + snprintf(adapter->irq_tbl[irq_idx].name, ENA_IRQNAME_SIZE, + "%s-Tx-Rx-%d", netdev->name, i); + adapter->irq_tbl[irq_idx].handler = ena_intr_msix_io; + adapter->irq_tbl[irq_idx].data = &adapter->ena_napi[i]; + adapter->irq_tbl[irq_idx].vector = + adapter->msix_entries[irq_idx].vector; + adapter->irq_tbl[irq_idx].cpu = cpu; + + cpumask_set_cpu(cpu, + &adapter->irq_tbl[irq_idx].affinity_hint_mask); + } +} + +static int ena_request_mgmnt_irq(struct ena_adapter *adapter) +{ + unsigned long flags = 0; + struct ena_irq *irq; + int rc; + + irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX]; + rc = request_irq(irq->vector, irq->handler, flags, irq->name, + irq->data); + if (rc) { + netif_err(adapter, probe, adapter->netdev, + "failed to request admin irq\n"); + return rc; + } + + netif_dbg(adapter, probe, adapter->netdev, + "set affinity hint of mgmnt irq.to 0x%lx (irq vector: %d)\n", + irq->affinity_hint_mask.bits[0], irq->vector); + + irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask); + + return rc; +} + +static int ena_request_io_irq(struct ena_adapter *adapter) +{ + unsigned long flags = 0; + struct ena_irq *irq; + int rc = 0, i, k; + + if (!test_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) { + netif_err(adapter, ifup, adapter->netdev, + "Failed to request I/O IRQ: MSI-X is not enabled\n"); + return -EINVAL; + } + + for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) { + irq = &adapter->irq_tbl[i]; + rc = request_irq(irq->vector, irq->handler, flags, irq->name, + irq->data); + if (rc) { + netif_err(adapter, ifup, adapter->netdev, + "Failed to request I/O IRQ. index %d rc %d\n", + i, rc); + goto err; + } + + netif_dbg(adapter, ifup, adapter->netdev, + "set affinity hint of irq. index %d to 0x%lx (irq vector: %d)\n", + i, irq->affinity_hint_mask.bits[0], irq->vector); + + irq_set_affinity_hint(irq->vector, &irq->affinity_hint_mask); + } + + return rc; + +err: + for (k = ENA_IO_IRQ_FIRST_IDX; k < i; k++) { + irq = &adapter->irq_tbl[k]; + free_irq(irq->vector, irq->data); + } + + return rc; +} + +static void ena_free_mgmnt_irq(struct ena_adapter *adapter) +{ + struct ena_irq *irq; + + irq = &adapter->irq_tbl[ENA_MGMNT_IRQ_IDX]; + synchronize_irq(irq->vector); + irq_set_affinity_hint(irq->vector, NULL); + free_irq(irq->vector, irq->data); +} + +static void ena_free_io_irq(struct ena_adapter *adapter) +{ + struct ena_irq *irq; + int i; + +#ifdef CONFIG_RFS_ACCEL + if (adapter->msix_vecs >= 1) { + free_irq_cpu_rmap(adapter->netdev->rx_cpu_rmap); + adapter->netdev->rx_cpu_rmap = NULL; + } +#endif /* CONFIG_RFS_ACCEL */ + + for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) { + irq = &adapter->irq_tbl[i]; + irq_set_affinity_hint(irq->vector, NULL); + free_irq(irq->vector, irq->data); + } +} + +static void ena_disable_msix(struct ena_adapter *adapter) +{ + if (test_and_clear_bit(ENA_FLAG_MSIX_ENABLED, &adapter->flags)) + pci_disable_msix(adapter->pdev); + + if (adapter->msix_entries) + vfree(adapter->msix_entries); + adapter->msix_entries = NULL; +} + +static void ena_disable_io_intr_sync(struct ena_adapter *adapter) +{ + int i; + + if (!netif_running(adapter->netdev)) + return; + + for (i = ENA_IO_IRQ_FIRST_IDX; i < adapter->msix_vecs; i++) + synchronize_irq(adapter->irq_tbl[i].vector); +} + +static void ena_del_napi(struct ena_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_queues; i++) + netif_napi_del(&adapter->ena_napi[i].napi); +} + +static void ena_init_napi(struct ena_adapter *adapter) +{ + struct ena_napi *napi; + int i; + + for (i = 0; i < adapter->num_queues; i++) { + napi = &adapter->ena_napi[i]; + + netif_napi_add(adapter->netdev, + &adapter->ena_napi[i].napi, + ena_io_poll, + ENA_NAPI_BUDGET); + napi->rx_ring = &adapter->rx_ring[i]; + napi->tx_ring = &adapter->tx_ring[i]; + napi->qid = i; + } +} + +static void ena_napi_disable_all(struct ena_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_queues; i++) + napi_disable(&adapter->ena_napi[i].napi); +} + +static void ena_napi_enable_all(struct ena_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_queues; i++) + napi_enable(&adapter->ena_napi[i].napi); +} + +static void ena_restore_ethtool_params(struct ena_adapter *adapter) +{ + adapter->tx_usecs = 0; + adapter->rx_usecs = 0; + adapter->tx_frames = 1; + adapter->rx_frames = 1; +} + +/* Configure the Rx forwarding */ +static int ena_rss_configure(struct ena_adapter *adapter) +{ + struct ena_com_dev *ena_dev = adapter->ena_dev; + int rc; + + /* In case the RSS table wasn't initialized by probe */ + if (!ena_dev->rss.tbl_log_size) { + rc = ena_rss_init_default(adapter); + if (rc && (rc != -EPERM)) { + netif_err(adapter, ifup, adapter->netdev, + "Failed to init RSS rc: %d\n", rc); + return rc; + } + } + + /* Set indirect table */ + rc = ena_com_indirect_table_set(ena_dev); + if (unlikely(rc && rc != -EPERM)) + return rc; + + /* Configure hash function (if supported) */ + rc = ena_com_set_hash_function(ena_dev); + if (unlikely(rc && (rc != -EPERM))) + return rc; + + /* Configure hash inputs (if supported) */ + rc = ena_com_set_hash_ctrl(ena_dev); + if (unlikely(rc && (rc != -EPERM))) + return rc; + + return 0; +} + +static int ena_up_complete(struct ena_adapter *adapter) +{ + int rc, i; + + rc = ena_rss_configure(adapter); + if (rc) + return rc; + + ena_init_napi(adapter); + + ena_change_mtu(adapter->netdev, adapter->netdev->mtu); + + ena_refill_all_rx_bufs(adapter); + + /* enable transmits */ + netif_tx_start_all_queues(adapter->netdev); + + ena_restore_ethtool_params(adapter); + + ena_napi_enable_all(adapter); + + /* schedule napi in case we had pending packets + * from the last time we disable napi + */ + for (i = 0; i < adapter->num_queues; i++) + napi_schedule(&adapter->ena_napi[i].napi); + + return 0; +} + +static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid) +{ + struct ena_com_create_io_ctx ctx = { 0 }; + struct ena_com_dev *ena_dev; + struct ena_ring *tx_ring; + u32 msix_vector; + u16 ena_qid; + int rc; + + ena_dev = adapter->ena_dev; + + tx_ring = &adapter->tx_ring[qid]; + msix_vector = ENA_IO_IRQ_IDX(qid); + ena_qid = ENA_IO_TXQ_IDX(qid); + + ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_TX; + ctx.qid = ena_qid; + ctx.mem_queue_type = ena_dev->tx_mem_queue_type; + ctx.msix_vector = msix_vector; + ctx.queue_size = adapter->tx_ring_size; + ctx.numa_node = cpu_to_node(tx_ring->cpu); + + rc = ena_com_create_io_queue(ena_dev, &ctx); + if (rc) { + netif_err(adapter, ifup, adapter->netdev, + "Failed to create I/O TX queue num %d rc: %d\n", + qid, rc); + return rc; + } + + rc = ena_com_get_io_handlers(ena_dev, ena_qid, + &tx_ring->ena_com_io_sq, + &tx_ring->ena_com_io_cq); + if (rc) { + netif_err(adapter, ifup, adapter->netdev, + "Failed to get TX queue handlers. TX queue num %d rc: %d\n", + qid, rc); + ena_com_destroy_io_queue(ena_dev, ena_qid); + } + + ena_com_update_numa_node(tx_ring->ena_com_io_cq, ctx.numa_node); + return rc; +} + +static int ena_create_all_io_tx_queues(struct ena_adapter *adapter) +{ + struct ena_com_dev *ena_dev = adapter->ena_dev; + int rc, i; + + for (i = 0; i < adapter->num_queues; i++) { + rc = ena_create_io_tx_queue(adapter, i); + if (rc) + goto create_err; + } + + return 0; + +create_err: + while (i--) + ena_com_destroy_io_queue(ena_dev, ENA_IO_TXQ_IDX(i)); + + return rc; +} + +static int ena_create_io_rx_queue(struct ena_adapter *adapter, int qid) +{ + struct ena_com_dev *ena_dev; + struct ena_com_create_io_ctx ctx = { 0 }; + struct ena_ring *rx_ring; + u32 msix_vector; + u16 ena_qid; + int rc; + + ena_dev = adapter->ena_dev; + + rx_ring = &adapter->rx_ring[qid]; + msix_vector = ENA_IO_IRQ_IDX(qid); + ena_qid = ENA_IO_RXQ_IDX(qid); + + ctx.qid = ena_qid; + ctx.direction = ENA_COM_IO_QUEUE_DIRECTION_RX; + ctx.mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; + ctx.msix_vector = msix_vector; + ctx.queue_size = adapter->rx_ring_size; + ctx.numa_node = cpu_to_node(rx_ring->cpu); + + rc = ena_com_create_io_queue(ena_dev, &ctx); + if (rc) { + netif_err(adapter, ifup, adapter->netdev, + "Failed to create I/O RX queue num %d rc: %d\n", + qid, rc); + return rc; + } + + rc = ena_com_get_io_handlers(ena_dev, ena_qid, + &rx_ring->ena_com_io_sq, + &rx_ring->ena_com_io_cq); + if (rc) { + netif_err(adapter, ifup, adapter->netdev, + "Failed to get RX queue handlers. RX queue num %d rc: %d\n", + qid, rc); + ena_com_destroy_io_queue(ena_dev, ena_qid); + } + + ena_com_update_numa_node(rx_ring->ena_com_io_cq, ctx.numa_node); + + return rc; +} + +static int ena_create_all_io_rx_queues(struct ena_adapter *adapter) +{ + struct ena_com_dev *ena_dev = adapter->ena_dev; + int rc, i; + + for (i = 0; i < adapter->num_queues; i++) { + rc = ena_create_io_rx_queue(adapter, i); + if (rc) + goto create_err; + } + + return 0; + +create_err: + while (i--) + ena_com_destroy_io_queue(ena_dev, ENA_IO_RXQ_IDX(i)); + + return rc; +} + +static int ena_up(struct ena_adapter *adapter) +{ + int rc; + + netdev_dbg(adapter->netdev, "%s\n", __func__); + + ena_setup_io_intr(adapter); + + rc = ena_request_io_irq(adapter); + if (rc) + goto err_req_irq; + + /* allocate transmit descriptors */ + rc = ena_setup_all_tx_resources(adapter); + if (rc) + goto err_setup_tx; + + /* allocate receive descriptors */ + rc = ena_setup_all_rx_resources(adapter); + if (rc) + goto err_setup_rx; + + /* Create TX queues */ + rc = ena_create_all_io_tx_queues(adapter); + if (rc) + goto err_create_tx_queues; + + /* Create RX queues */ + rc = ena_create_all_io_rx_queues(adapter); + if (rc) + goto err_create_rx_queues; + + rc = ena_up_complete(adapter); + if (rc) + goto err_up; + + if (test_bit(ENA_FLAG_LINK_UP, &adapter->flags)) + netif_carrier_on(adapter->netdev); + + u64_stats_update_begin(&adapter->syncp); + adapter->dev_stats.interface_up++; + u64_stats_update_end(&adapter->syncp); + + set_bit(ENA_FLAG_DEV_UP, &adapter->flags); + + return rc; + +err_up: + ena_destroy_all_rx_queues(adapter); +err_create_rx_queues: + ena_destroy_all_tx_queues(adapter); +err_create_tx_queues: + ena_free_all_io_rx_resources(adapter); +err_setup_rx: + ena_free_all_io_tx_resources(adapter); +err_setup_tx: + ena_free_io_irq(adapter); +err_req_irq: + + return rc; +} + +static void ena_down(struct ena_adapter *adapter) +{ + netif_info(adapter, ifdown, adapter->netdev, "%s\n", __func__); + + clear_bit(ENA_FLAG_DEV_UP, &adapter->flags); + + u64_stats_update_begin(&adapter->syncp); + adapter->dev_stats.interface_down++; + u64_stats_update_end(&adapter->syncp); + + /* After this point the napi handler won't enable the tx queue */ + ena_napi_disable_all(adapter); + netif_carrier_off(adapter->netdev); + netif_tx_disable(adapter->netdev); + + /* After destroy the queue there won't be any new interrupts */ + ena_destroy_all_io_queues(adapter); + + ena_disable_io_intr_sync(adapter); + ena_free_io_irq(adapter); + ena_del_napi(adapter); + + ena_free_all_tx_bufs(adapter); + ena_free_all_rx_bufs(adapter); + ena_free_all_io_tx_resources(adapter); + ena_free_all_io_rx_resources(adapter); +} + +/* ena_open - Called when a network interface is made active + * @netdev: network interface device structure + * + * Returns 0 on success, negative value on failure + * + * The open entry point is called when a network interface is made + * active by the system (IFF_UP). At this point all resources needed + * for transmit and receive operations are allocated, the interrupt + * handler is registered with the OS, the watchdog timer is started, + * and the stack is notified that the interface is ready. + */ +static int ena_open(struct net_device *netdev) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + int rc; + + /* Notify the stack of the actual queue counts. */ + rc = netif_set_real_num_tx_queues(netdev, adapter->num_queues); + if (rc) { + netif_err(adapter, ifup, netdev, "Can't set num tx queues\n"); + return rc; + } + + rc = netif_set_real_num_rx_queues(netdev, adapter->num_queues); + if (rc) { + netif_err(adapter, ifup, netdev, "Can't set num rx queues\n"); + return rc; + } + + rc = ena_up(adapter); + if (rc) + return rc; + + return rc; +} + +/* ena_close - Disables a network interface + * @netdev: network interface device structure + * + * Returns 0, this is not allowed to fail + * + * The close entry point is called when an interface is de-activated + * by the OS. The hardware is still under the drivers control, but + * needs to be disabled. A global MAC reset is issued to stop the + * hardware, and all transmit and receive resources are freed. + */ +static int ena_close(struct net_device *netdev) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + + netif_dbg(adapter, ifdown, netdev, "%s\n", __func__); + + if (test_bit(ENA_FLAG_DEV_UP, &adapter->flags)) + ena_down(adapter); + + return 0; +} + +static void ena_tx_csum(struct ena_com_tx_ctx *ena_tx_ctx, struct sk_buff *skb) +{ + u32 mss = skb_shinfo(skb)->gso_size; + struct ena_com_tx_meta *ena_meta = &ena_tx_ctx->ena_meta; + u8 l4_protocol = 0; + + if ((skb->ip_summed == CHECKSUM_PARTIAL) || mss) { + ena_tx_ctx->l4_csum_enable = 1; + if (mss) { + ena_tx_ctx->tso_enable = 1; + ena_meta->l4_hdr_len = tcp_hdr(skb)->doff; + ena_tx_ctx->l4_csum_partial = 0; + } else { + ena_tx_ctx->tso_enable = 0; + ena_meta->l4_hdr_len = 0; + ena_tx_ctx->l4_csum_partial = 1; + } + + switch (ip_hdr(skb)->version) { + case IPVERSION: + ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV4; + if (ip_hdr(skb)->frag_off & htons(IP_DF)) + ena_tx_ctx->df = 1; + if (mss) + ena_tx_ctx->l3_csum_enable = 1; + l4_protocol = ip_hdr(skb)->protocol; + break; + case 6: + ena_tx_ctx->l3_proto = ENA_ETH_IO_L3_PROTO_IPV6; + l4_protocol = ipv6_hdr(skb)->nexthdr; + break; + default: + break; + } + + if (l4_protocol == IPPROTO_TCP) + ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_TCP; + else + ena_tx_ctx->l4_proto = ENA_ETH_IO_L4_PROTO_UDP; + + ena_meta->mss = mss; + ena_meta->l3_hdr_len = skb_network_header_len(skb); + ena_meta->l3_hdr_offset = skb_network_offset(skb); + ena_tx_ctx->meta_valid = 1; + + } else { + ena_tx_ctx->meta_valid = 0; + } +} + +static int ena_check_and_linearize_skb(struct ena_ring *tx_ring, + struct sk_buff *skb) +{ + int num_frags, header_len, rc; + + num_frags = skb_shinfo(skb)->nr_frags; + header_len = skb_headlen(skb); + + if (num_frags < tx_ring->sgl_size) + return 0; + + if ((num_frags == tx_ring->sgl_size) && + (header_len < tx_ring->tx_max_header_size)) + return 0; + + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->tx_stats.linearize++; + u64_stats_update_end(&tx_ring->syncp); + + rc = skb_linearize(skb); + if (unlikely(rc)) { + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->tx_stats.linearize_failed++; + u64_stats_update_end(&tx_ring->syncp); + } + + return rc; +} + +/* Called with netif_tx_lock. */ +static netdev_tx_t ena_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct ena_adapter *adapter = netdev_priv(dev); + struct ena_tx_buffer *tx_info; + struct ena_com_tx_ctx ena_tx_ctx; + struct ena_ring *tx_ring; + struct netdev_queue *txq; + struct ena_com_buf *ena_buf; + void *push_hdr; + u32 len, last_frag; + u16 next_to_use; + u16 req_id; + u16 push_len; + u16 header_len; + dma_addr_t dma; + int qid, rc, nb_hw_desc; + int i = -1; + + netif_dbg(adapter, tx_queued, dev, "%s skb %p\n", __func__, skb); + /* Determine which tx ring we will be placed on */ + qid = skb_get_queue_mapping(skb); + tx_ring = &adapter->tx_ring[qid]; + txq = netdev_get_tx_queue(dev, qid); + + rc = ena_check_and_linearize_skb(tx_ring, skb); + if (unlikely(rc)) + goto error_drop_packet; + + skb_tx_timestamp(skb); + len = skb_headlen(skb); + + next_to_use = tx_ring->next_to_use; + req_id = tx_ring->free_tx_ids[next_to_use]; + tx_info = &tx_ring->tx_buffer_info[req_id]; + tx_info->num_of_bufs = 0; + + WARN(tx_info->skb, "SKB isn't NULL req_id %d\n", req_id); + ena_buf = tx_info->bufs; + tx_info->skb = skb; + + if (tx_ring->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { + /* prepared the push buffer */ + push_len = min_t(u32, len, tx_ring->tx_max_header_size); + header_len = push_len; + push_hdr = skb->data; + } else { + push_len = 0; + header_len = min_t(u32, len, tx_ring->tx_max_header_size); + push_hdr = NULL; + } + + netif_dbg(adapter, tx_queued, dev, + "skb: %p header_buf->vaddr: %p push_len: %d\n", skb, + push_hdr, push_len); + + if (len > push_len) { + dma = dma_map_single(tx_ring->dev, skb->data + push_len, + len - push_len, DMA_TO_DEVICE); + if (dma_mapping_error(tx_ring->dev, dma)) + goto error_report_dma_error; + + ena_buf->paddr = dma; + ena_buf->len = len - push_len; + + ena_buf++; + tx_info->num_of_bufs++; + } + + last_frag = skb_shinfo(skb)->nr_frags; + + for (i = 0; i < last_frag; i++) { + const skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + + len = skb_frag_size(frag); + dma = skb_frag_dma_map(tx_ring->dev, frag, 0, len, + DMA_TO_DEVICE); + if (dma_mapping_error(tx_ring->dev, dma)) + goto error_report_dma_error; + + ena_buf->paddr = dma; + ena_buf->len = len; + ena_buf++; + } + + tx_info->num_of_bufs += last_frag; + + memset(&ena_tx_ctx, 0x0, sizeof(struct ena_com_tx_ctx)); + ena_tx_ctx.ena_bufs = tx_info->bufs; + ena_tx_ctx.push_header = push_hdr; + ena_tx_ctx.num_bufs = tx_info->num_of_bufs; + ena_tx_ctx.req_id = req_id; + ena_tx_ctx.header_len = header_len; + + /* set flags and meta data */ + ena_tx_csum(&ena_tx_ctx, skb); + + /* prepare the packet's descriptors to dma engine */ + rc = ena_com_prepare_tx(tx_ring->ena_com_io_sq, &ena_tx_ctx, + &nb_hw_desc); + + if (unlikely(rc)) { + netif_err(adapter, tx_queued, dev, + "failed to prepare tx bufs\n"); + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->tx_stats.queue_stop++; + tx_ring->tx_stats.prepare_ctx_err++; + u64_stats_update_end(&tx_ring->syncp); + netif_tx_stop_queue(txq); + goto error_unmap_dma; + } + + netdev_tx_sent_queue(txq, skb->len); + + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->tx_stats.cnt++; + tx_ring->tx_stats.bytes += skb->len; + u64_stats_update_end(&tx_ring->syncp); + + tx_info->tx_descs = nb_hw_desc; + tx_info->last_jiffies = jiffies; + + tx_ring->next_to_use = ENA_TX_RING_IDX_NEXT(next_to_use, + tx_ring->ring_size); + + /* This WMB is aimed to: + * 1 - perform smp barrier before reading next_to_completion + * 2 - make sure the desc were written before trigger DB + */ + wmb(); + + /* stop the queue when no more space available, the packet can have up + * to sgl_size + 2. one for the meta descriptor and one for header + * (if the header is larger than tx_max_header_size). + */ + if (unlikely(ena_com_sq_empty_space(tx_ring->ena_com_io_sq) < + (tx_ring->sgl_size + 2))) { + netif_dbg(adapter, tx_queued, dev, "%s stop queue %d\n", + __func__, qid); + + netif_tx_stop_queue(txq); + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->tx_stats.queue_stop++; + u64_stats_update_end(&tx_ring->syncp); + + /* There is a rare condition where this function decide to + * stop the queue but meanwhile clean_tx_irq updates + * next_to_completion and terminates. + * The queue will remain stopped forever. + * To solve this issue this function perform rmb, check + * the wakeup condition and wake up the queue if needed. + */ + smp_rmb(); + + if (ena_com_sq_empty_space(tx_ring->ena_com_io_sq) + > ENA_TX_WAKEUP_THRESH) { + netif_tx_wake_queue(txq); + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->tx_stats.queue_wakeup++; + u64_stats_update_end(&tx_ring->syncp); + } + } + + if (netif_xmit_stopped(txq) || !skb->xmit_more) { + /* trigger the dma engine */ + ena_com_write_sq_doorbell(tx_ring->ena_com_io_sq); + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->tx_stats.doorbells++; + u64_stats_update_end(&tx_ring->syncp); + } + + return NETDEV_TX_OK; + +error_report_dma_error: + u64_stats_update_begin(&tx_ring->syncp); + tx_ring->tx_stats.dma_mapping_err++; + u64_stats_update_end(&tx_ring->syncp); + netdev_warn(adapter->netdev, "failed to map skb\n"); + + tx_info->skb = NULL; + +error_unmap_dma: + if (i >= 0) { + /* save value of frag that failed */ + last_frag = i; + + /* start back at beginning and unmap skb */ + tx_info->skb = NULL; + ena_buf = tx_info->bufs; + dma_unmap_single(tx_ring->dev, dma_unmap_addr(ena_buf, paddr), + dma_unmap_len(ena_buf, len), DMA_TO_DEVICE); + + /* unmap remaining mapped pages */ + for (i = 0; i < last_frag; i++) { + ena_buf++; + dma_unmap_page(tx_ring->dev, dma_unmap_addr(ena_buf, paddr), + dma_unmap_len(ena_buf, len), DMA_TO_DEVICE); + } + } + +error_drop_packet: + + dev_kfree_skb(skb); + return NETDEV_TX_OK; +} + +#ifdef CONFIG_NET_POLL_CONTROLLER +static void ena_netpoll(struct net_device *netdev) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + int i; + + for (i = 0; i < adapter->num_queues; i++) + napi_schedule(&adapter->ena_napi[i].napi); +} +#endif /* CONFIG_NET_POLL_CONTROLLER */ + +static u16 ena_select_queue(struct net_device *dev, struct sk_buff *skb, + void *accel_priv, select_queue_fallback_t fallback) +{ + u16 qid; + /* we suspect that this is good for in--kernel network services that + * want to loop incoming skb rx to tx in normal user generated traffic, + * most probably we will not get to this + */ + if (skb_rx_queue_recorded(skb)) + qid = skb_get_rx_queue(skb); + else + qid = fallback(dev, skb); + + return qid; +} + +static void ena_config_host_info(struct ena_com_dev *ena_dev) +{ + struct ena_admin_host_info *host_info; + int rc; + + /* Allocate only the host info */ + rc = ena_com_allocate_host_info(ena_dev); + if (rc) { + pr_err("Cannot allocate host info\n"); + return; + } + + host_info = ena_dev->host_attr.host_info; + + host_info->os_type = ENA_ADMIN_OS_LINUX; + host_info->kernel_ver = LINUX_VERSION_CODE; + strncpy(host_info->kernel_ver_str, utsname()->version, + sizeof(host_info->kernel_ver_str) - 1); + host_info->os_dist = 0; + strncpy(host_info->os_dist_str, utsname()->release, + sizeof(host_info->os_dist_str) - 1); + host_info->driver_version = + (DRV_MODULE_VER_MAJOR) | + (DRV_MODULE_VER_MINOR << ENA_ADMIN_HOST_INFO_MINOR_SHIFT) | + (DRV_MODULE_VER_SUBMINOR << ENA_ADMIN_HOST_INFO_SUB_MINOR_SHIFT); + + rc = ena_com_set_host_attributes(ena_dev); + if (rc) { + if (rc == -EPERM) + pr_warn("Cannot set host attributes\n"); + else + pr_err("Cannot set host attributes\n"); + + goto err; + } + + return; + +err: + ena_com_delete_host_info(ena_dev); +} + +static void ena_config_debug_area(struct ena_adapter *adapter) +{ + u32 debug_area_size; + int rc, ss_count; + + ss_count = ena_get_sset_count(adapter->netdev, ETH_SS_STATS); + if (ss_count <= 0) { + netif_err(adapter, drv, adapter->netdev, + "SS count is negative\n"); + return; + } + + /* allocate 32 bytes for each string and 64bit for the value */ + debug_area_size = ss_count * ETH_GSTRING_LEN + sizeof(u64) * ss_count; + + rc = ena_com_allocate_debug_area(adapter->ena_dev, debug_area_size); + if (rc) { + pr_err("Cannot allocate debug area\n"); + return; + } + + rc = ena_com_set_host_attributes(adapter->ena_dev); + if (rc) { + if (rc == -EPERM) + netif_warn(adapter, drv, adapter->netdev, + "Cannot set host attributes\n"); + else + netif_err(adapter, drv, adapter->netdev, + "Cannot set host attributes\n"); + goto err; + } + + return; +err: + ena_com_delete_debug_area(adapter->ena_dev); +} + +static struct rtnl_link_stats64 *ena_get_stats64(struct net_device *netdev, + struct rtnl_link_stats64 *stats) +{ + struct ena_adapter *adapter = netdev_priv(netdev); + struct ena_admin_basic_stats ena_stats; + int rc; + + if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags)) + return NULL; + + rc = ena_com_get_dev_basic_stats(adapter->ena_dev, &ena_stats); + if (rc) + return NULL; + + stats->tx_bytes = ((u64)ena_stats.tx_bytes_high << 32) | + ena_stats.tx_bytes_low; + stats->rx_bytes = ((u64)ena_stats.rx_bytes_high << 32) | + ena_stats.rx_bytes_low; + + stats->rx_packets = ((u64)ena_stats.rx_pkts_high << 32) | + ena_stats.rx_pkts_low; + stats->tx_packets = ((u64)ena_stats.tx_pkts_high << 32) | + ena_stats.tx_pkts_low; + + stats->rx_dropped = ((u64)ena_stats.rx_drops_high << 32) | + ena_stats.rx_drops_low; + + stats->multicast = 0; + stats->collisions = 0; + + stats->rx_length_errors = 0; + stats->rx_crc_errors = 0; + stats->rx_frame_errors = 0; + stats->rx_fifo_errors = 0; + stats->rx_missed_errors = 0; + stats->tx_window_errors = 0; + + stats->rx_errors = 0; + stats->tx_errors = 0; + + return stats; +} + +static const struct net_device_ops ena_netdev_ops = { + .ndo_open = ena_open, + .ndo_stop = ena_close, + .ndo_start_xmit = ena_start_xmit, + .ndo_select_queue = ena_select_queue, + .ndo_get_stats64 = ena_get_stats64, + .ndo_tx_timeout = ena_tx_timeout, + .ndo_change_mtu = ena_change_mtu, + .ndo_set_mac_address = NULL, + .ndo_validate_addr = eth_validate_addr, +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = ena_netpoll, +#endif /* CONFIG_NET_POLL_CONTROLLER */ +}; + +static void ena_device_io_suspend(struct work_struct *work) +{ + struct ena_adapter *adapter = + container_of(work, struct ena_adapter, suspend_io_task); + struct net_device *netdev = adapter->netdev; + + /* ena_napi_disable_all disables only the IO handling. + * We are still subject to AENQ keep alive watchdog. + */ + u64_stats_update_begin(&adapter->syncp); + adapter->dev_stats.io_suspend++; + u64_stats_update_begin(&adapter->syncp); + ena_napi_disable_all(adapter); + netif_tx_lock(netdev); + netif_device_detach(netdev); + netif_tx_unlock(netdev); +} + +static void ena_device_io_resume(struct work_struct *work) +{ + struct ena_adapter *adapter = + container_of(work, struct ena_adapter, resume_io_task); + struct net_device *netdev = adapter->netdev; + + u64_stats_update_begin(&adapter->syncp); + adapter->dev_stats.io_resume++; + u64_stats_update_end(&adapter->syncp); + + netif_device_attach(netdev); + ena_napi_enable_all(adapter); +} + +static int ena_device_validate_params(struct ena_adapter *adapter, + struct ena_com_dev_get_features_ctx *get_feat_ctx) +{ + struct net_device *netdev = adapter->netdev; + int rc; + + rc = ether_addr_equal(get_feat_ctx->dev_attr.mac_addr, + adapter->mac_addr); + if (!rc) { + netif_err(adapter, drv, netdev, + "Error, mac address are different\n"); + return -EINVAL; + } + + if ((get_feat_ctx->max_queues.max_cq_num < adapter->num_queues) || + (get_feat_ctx->max_queues.max_sq_num < adapter->num_queues)) { + netif_err(adapter, drv, netdev, + "Error, device doesn't support enough queues\n"); + return -EINVAL; + } + + if (get_feat_ctx->dev_attr.max_mtu < netdev->mtu) { + netif_err(adapter, drv, netdev, + "Error, device max mtu is smaller than netdev MTU\n"); + return -EINVAL; + } + + return 0; +} + +static int ena_device_init(struct ena_com_dev *ena_dev, struct pci_dev *pdev, + struct ena_com_dev_get_features_ctx *get_feat_ctx, + bool *wd_state) +{ + struct device *dev = &pdev->dev; + bool readless_supported; + u32 aenq_groups; + int dma_width; + int rc; + + rc = ena_com_mmio_reg_read_request_init(ena_dev); + if (rc) { + dev_err(dev, "failed to init mmio read less\n"); + return rc; + } + + /* The PCIe configuration space revision id indicate if mmio reg + * read is disabled + */ + readless_supported = !(pdev->revision & ENA_MMIO_DISABLE_REG_READ); + ena_com_set_mmio_read_mode(ena_dev, readless_supported); + + rc = ena_com_dev_reset(ena_dev); + if (rc) { + dev_err(dev, "Can not reset device\n"); + goto err_mmio_read_less; + } + + rc = ena_com_validate_version(ena_dev); + if (rc) { + dev_err(dev, "device version is too low\n"); + goto err_mmio_read_less; + } + + dma_width = ena_com_get_dma_width(ena_dev); + if (dma_width < 0) { + dev_err(dev, "Invalid dma width value %d", dma_width); + goto err_mmio_read_less; + } + + rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(dma_width)); + if (rc) { + dev_err(dev, "pci_set_dma_mask failed 0x%x\n", rc); + goto err_mmio_read_less; + } + + rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(dma_width)); + if (rc) { + dev_err(dev, "err_pci_set_consistent_dma_mask failed 0x%x\n", + rc); + goto err_mmio_read_less; + } + + /* ENA admin level init */ + rc = ena_com_admin_init(ena_dev, &aenq_handlers, true); + if (rc) { + dev_err(dev, + "Can not initialize ena admin queue with device\n"); + goto err_mmio_read_less; + } + + /* To enable the msix interrupts the driver needs to know the number + * of queues. So the driver uses polling mode to retrieve this + * information + */ + ena_com_set_admin_polling_mode(ena_dev, true); + + /* Get Device Attributes*/ + rc = ena_com_get_dev_attr_feat(ena_dev, get_feat_ctx); + if (rc) { + dev_err(dev, "Cannot get attribute for ena device rc=%d\n", rc); + goto err_admin_init; + } + + /* Try to turn all the available aenq groups */ + aenq_groups = BIT(ENA_ADMIN_LINK_CHANGE) | + BIT(ENA_ADMIN_FATAL_ERROR) | + BIT(ENA_ADMIN_WARNING) | + BIT(ENA_ADMIN_NOTIFICATION) | + BIT(ENA_ADMIN_KEEP_ALIVE); + + aenq_groups &= get_feat_ctx->aenq.supported_groups; + + rc = ena_com_set_aenq_config(ena_dev, aenq_groups); + if (rc) { + dev_err(dev, "Cannot configure aenq groups rc= %d\n", rc); + goto err_admin_init; + } + + *wd_state = !!(aenq_groups & BIT(ENA_ADMIN_KEEP_ALIVE)); + + ena_config_host_info(ena_dev); + + return 0; + +err_admin_init: + ena_com_admin_destroy(ena_dev); +err_mmio_read_less: + ena_com_mmio_reg_read_request_destroy(ena_dev); + + return rc; +} + +static int ena_enable_msix_and_set_admin_interrupts(struct ena_adapter *adapter, + int io_vectors) +{ + struct ena_com_dev *ena_dev = adapter->ena_dev; + struct device *dev = &adapter->pdev->dev; + int rc; + + rc = ena_enable_msix(adapter, io_vectors); + if (rc) { + dev_err(dev, "Can not reserve msix vectors\n"); + return rc; + } + + ena_setup_mgmnt_intr(adapter); + + rc = ena_request_mgmnt_irq(adapter); + if (rc) { + dev_err(dev, "Can not setup management interrupts\n"); + goto err_disable_msix; + } + + ena_com_set_admin_polling_mode(ena_dev, false); + + ena_com_admin_aenq_enable(ena_dev); + + return 0; + +err_disable_msix: + ena_disable_msix(adapter); + + return rc; +} + +static void ena_fw_reset_device(struct work_struct *work) +{ + struct ena_com_dev_get_features_ctx get_feat_ctx; + struct ena_adapter *adapter = + container_of(work, struct ena_adapter, reset_task); + struct net_device *netdev = adapter->netdev; + struct ena_com_dev *ena_dev = adapter->ena_dev; + struct pci_dev *pdev = adapter->pdev; + bool dev_up, wd_state; + int rc; + + del_timer_sync(&adapter->timer_service); + + rtnl_lock(); + + dev_up = test_bit(ENA_FLAG_DEV_UP, &adapter->flags); + ena_com_set_admin_running_state(ena_dev, false); + + /* After calling ena_close the tx queues and the napi + * are disabled so no one can interfere or touch the + * data structures + */ + ena_close(netdev); + + rc = ena_com_dev_reset(ena_dev); + if (rc) { + dev_err(&pdev->dev, "Device reset failed\n"); + goto err; + } + + ena_free_mgmnt_irq(adapter); + + ena_disable_msix(adapter); + + ena_com_abort_admin_commands(ena_dev); + + ena_com_wait_for_abort_completion(ena_dev); + + ena_com_admin_destroy(ena_dev); + + ena_com_mmio_reg_read_request_destroy(ena_dev); + + /* Finish with the destroy part. Start the init part */ + + rc = ena_device_init(ena_dev, adapter->pdev, &get_feat_ctx, &wd_state); + if (rc) { + dev_err(&pdev->dev, "Can not initialize device\n"); + goto err; + } + adapter->wd_state = wd_state; + + rc = ena_device_validate_params(adapter, &get_feat_ctx); + if (rc) { + dev_err(&pdev->dev, "Validation of device parameters failed\n"); + goto err_device_destroy; + } + + rc = ena_enable_msix_and_set_admin_interrupts(adapter, + adapter->num_queues); + if (rc) { + dev_err(&pdev->dev, "Enable MSI-X failed\n"); + goto err_device_destroy; + } + /* If the interface was up before the reset bring it up */ + if (dev_up) { + rc = ena_up(adapter); + if (rc) { + dev_err(&pdev->dev, "Failed to create I/O queues\n"); + goto err_disable_msix; + } + } + + mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ)); + + rtnl_unlock(); + + dev_err(&pdev->dev, "Device reset completed successfully\n"); + + return; +err_disable_msix: + ena_free_mgmnt_irq(adapter); + ena_disable_msix(adapter); +err_device_destroy: + ena_com_admin_destroy(ena_dev); +err: + rtnl_unlock(); + + dev_err(&pdev->dev, + "Reset attempt failed. Can not reset the device\n"); +} + +static void check_for_missing_tx_completions(struct ena_adapter *adapter) +{ + struct ena_tx_buffer *tx_buf; + unsigned long last_jiffies; + struct ena_ring *tx_ring; + int i, j, budget; + u32 missed_tx; + + /* Make sure the driver doesn't turn the device in other process */ + smp_rmb(); + + if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags)) + return; + + budget = ENA_MONITORED_TX_QUEUES; + + for (i = adapter->last_monitored_tx_qid; i < adapter->num_queues; i++) { + tx_ring = &adapter->tx_ring[i]; + + for (j = 0; j < tx_ring->ring_size; j++) { + tx_buf = &tx_ring->tx_buffer_info[j]; + last_jiffies = tx_buf->last_jiffies; + if (unlikely(last_jiffies && time_is_before_jiffies(last_jiffies + TX_TIMEOUT))) { + netif_notice(adapter, tx_err, adapter->netdev, + "Found a Tx that wasn't completed on time, qid %d, index %d.\n", + tx_ring->qid, j); + + u64_stats_update_begin(&tx_ring->syncp); + missed_tx = tx_ring->tx_stats.missing_tx_comp++; + u64_stats_update_end(&tx_ring->syncp); + + /* Clear last jiffies so the lost buffer won't + * be counted twice. + */ + tx_buf->last_jiffies = 0; + + if (unlikely(missed_tx > MAX_NUM_OF_TIMEOUTED_PACKETS)) { + netif_err(adapter, tx_err, adapter->netdev, + "The number of lost tx completion is above the threshold (%d > %d). Reset the device\n", + missed_tx, MAX_NUM_OF_TIMEOUTED_PACKETS); + set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); + } + } + } + + budget--; + if (!budget) + break; + } + + adapter->last_monitored_tx_qid = i % adapter->num_queues; +} + +/* Check for keep alive expiration */ +static void check_for_missing_keep_alive(struct ena_adapter *adapter) +{ + unsigned long keep_alive_expired; + + if (!adapter->wd_state) + return; + + keep_alive_expired = round_jiffies(adapter->last_keep_alive_jiffies + + ENA_DEVICE_KALIVE_TIMEOUT); + if (unlikely(time_is_before_jiffies(keep_alive_expired))) { + netif_err(adapter, drv, adapter->netdev, + "Keep alive watchdog timeout.\n"); + u64_stats_update_begin(&adapter->syncp); + adapter->dev_stats.wd_expired++; + u64_stats_update_end(&adapter->syncp); + set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); + } +} + +static void check_for_admin_com_state(struct ena_adapter *adapter) +{ + if (unlikely(!ena_com_get_admin_running_state(adapter->ena_dev))) { + netif_err(adapter, drv, adapter->netdev, + "ENA admin queue is not in running state!\n"); + u64_stats_update_begin(&adapter->syncp); + adapter->dev_stats.admin_q_pause++; + u64_stats_update_end(&adapter->syncp); + set_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags); + } +} + +static void ena_update_host_info(struct ena_admin_host_info *host_info, + struct net_device *netdev) +{ + host_info->supported_network_features[0] = + netdev->features & GENMASK_ULL(31, 0); + host_info->supported_network_features[1] = + (netdev->features & GENMASK_ULL(63, 32)) >> 32; +} + +static void ena_timer_service(unsigned long data) +{ + struct ena_adapter *adapter = (struct ena_adapter *)data; + u8 *debug_area = adapter->ena_dev->host_attr.debug_area_virt_addr; + struct ena_admin_host_info *host_info = + adapter->ena_dev->host_attr.host_info; + + check_for_missing_keep_alive(adapter); + + check_for_admin_com_state(adapter); + + check_for_missing_tx_completions(adapter); + + if (debug_area) + ena_dump_stats_to_buf(adapter, debug_area); + + if (host_info) + ena_update_host_info(host_info, adapter->netdev); + + if (unlikely(test_and_clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags))) { + netif_err(adapter, drv, adapter->netdev, + "Trigger reset is on\n"); + ena_dump_stats_to_dmesg(adapter); + queue_work(ena_wq, &adapter->reset_task); + return; + } + + /* Reset the timer */ + mod_timer(&adapter->timer_service, jiffies + HZ); +} + +static int ena_calc_io_queue_num(struct pci_dev *pdev, + struct ena_com_dev *ena_dev, + struct ena_com_dev_get_features_ctx *get_feat_ctx) +{ + int io_sq_num, io_queue_num; + + /* In case of LLQ use the llq number in the get feature cmd */ + if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { + io_sq_num = get_feat_ctx->max_queues.max_llq_num; + + if (io_sq_num == 0) { + dev_err(&pdev->dev, + "Trying to use LLQ but llq_num is 0. Fall back into regular queues\n"); + + ena_dev->tx_mem_queue_type = + ENA_ADMIN_PLACEMENT_POLICY_HOST; + io_sq_num = get_feat_ctx->max_queues.max_sq_num; + } + } else { + io_sq_num = get_feat_ctx->max_queues.max_sq_num; + } + + io_queue_num = min_t(int, num_possible_cpus(), ENA_MAX_NUM_IO_QUEUES); + io_queue_num = min_t(int, io_queue_num, io_sq_num); + io_queue_num = min_t(int, io_queue_num, + get_feat_ctx->max_queues.max_cq_num); + /* 1 IRQ for for mgmnt and 1 IRQs for each IO direction */ + io_queue_num = min_t(int, io_queue_num, pci_msix_vec_count(pdev) - 1); + if (unlikely(!io_queue_num)) { + dev_err(&pdev->dev, "The device doesn't have io queues\n"); + return -EFAULT; + } + + return io_queue_num; +} + +static int ena_set_push_mode(struct pci_dev *pdev, struct ena_com_dev *ena_dev, + struct ena_com_dev_get_features_ctx *get_feat_ctx) +{ + bool has_mem_bar; + + has_mem_bar = pci_select_bars(pdev, IORESOURCE_MEM) & BIT(ENA_MEM_BAR); + + /* Enable push mode if device supports LLQ */ + if (has_mem_bar && (get_feat_ctx->max_queues.max_llq_num > 0)) + ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_DEV; + else + ena_dev->tx_mem_queue_type = ENA_ADMIN_PLACEMENT_POLICY_HOST; + + return 0; +} + +static void ena_set_dev_offloads(struct ena_com_dev_get_features_ctx *feat, + struct net_device *netdev) +{ + netdev_features_t dev_features = 0; + + /* Set offload features */ + if (feat->offload.tx & + ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV4_CSUM_PART_MASK) + dev_features |= NETIF_F_IP_CSUM; + + if (feat->offload.tx & + ENA_ADMIN_FEATURE_OFFLOAD_DESC_TX_L4_IPV6_CSUM_PART_MASK) + dev_features |= NETIF_F_IPV6_CSUM; + + if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV4_MASK) + dev_features |= NETIF_F_TSO; + + if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_IPV6_MASK) + dev_features |= NETIF_F_TSO6; + + if (feat->offload.tx & ENA_ADMIN_FEATURE_OFFLOAD_DESC_TSO_ECN_MASK) + dev_features |= NETIF_F_TSO_ECN; + + if (feat->offload.rx_supported & + ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV4_CSUM_MASK) + dev_features |= NETIF_F_RXCSUM; + + if (feat->offload.rx_supported & + ENA_ADMIN_FEATURE_OFFLOAD_DESC_RX_L4_IPV6_CSUM_MASK) + dev_features |= NETIF_F_RXCSUM; + + netdev->features = + dev_features | + NETIF_F_SG | + NETIF_F_NTUPLE | + NETIF_F_RXHASH | + NETIF_F_HIGHDMA; + + netdev->hw_features |= netdev->features; + netdev->vlan_features |= netdev->features; +} + +static void ena_set_conf_feat_params(struct ena_adapter *adapter, + struct ena_com_dev_get_features_ctx *feat) +{ + struct net_device *netdev = adapter->netdev; + + /* Copy mac address */ + if (!is_valid_ether_addr(feat->dev_attr.mac_addr)) { + eth_hw_addr_random(netdev); + ether_addr_copy(adapter->mac_addr, netdev->dev_addr); + } else { + ether_addr_copy(adapter->mac_addr, feat->dev_attr.mac_addr); + ether_addr_copy(netdev->dev_addr, adapter->mac_addr); + } + + /* Set offload features */ + ena_set_dev_offloads(feat, netdev); + + adapter->max_mtu = feat->dev_attr.max_mtu; +} + +static int ena_rss_init_default(struct ena_adapter *adapter) +{ + struct ena_com_dev *ena_dev = adapter->ena_dev; + struct device *dev = &adapter->pdev->dev; + int rc, i; + u32 val; + + rc = ena_com_rss_init(ena_dev, ENA_RX_RSS_TABLE_LOG_SIZE); + if (unlikely(rc)) { + dev_err(dev, "Cannot init indirect table\n"); + goto err_rss_init; + } + + for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) { + val = ethtool_rxfh_indir_default(i, adapter->num_queues); + rc = ena_com_indirect_table_fill_entry(ena_dev, i, + ENA_IO_RXQ_IDX(val)); + if (unlikely(rc && (rc != -EPERM))) { + dev_err(dev, "Cannot fill indirect table\n"); + goto err_fill_indir; + } + } + + rc = ena_com_fill_hash_function(ena_dev, ENA_ADMIN_CRC32, NULL, + ENA_HASH_KEY_SIZE, 0xFFFFFFFF); + if (unlikely(rc && (rc != -EPERM))) { + dev_err(dev, "Cannot fill hash function\n"); + goto err_fill_indir; + } + + rc = ena_com_set_default_hash_ctrl(ena_dev); + if (unlikely(rc && (rc != -EPERM))) { + dev_err(dev, "Cannot fill hash control\n"); + goto err_fill_indir; + } + + return 0; + +err_fill_indir: + ena_com_rss_destroy(ena_dev); +err_rss_init: + + return rc; +} + +static void ena_release_bars(struct ena_com_dev *ena_dev, struct pci_dev *pdev) +{ + int release_bars; + + release_bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK; + pci_release_selected_regions(pdev, release_bars); +} + +static int ena_calc_queue_size(struct pci_dev *pdev, + struct ena_com_dev *ena_dev, + u16 *max_tx_sgl_size, + u16 *max_rx_sgl_size, + struct ena_com_dev_get_features_ctx *get_feat_ctx) +{ + u32 queue_size = ENA_DEFAULT_RING_SIZE; + + queue_size = min_t(u32, queue_size, + get_feat_ctx->max_queues.max_cq_depth); + queue_size = min_t(u32, queue_size, + get_feat_ctx->max_queues.max_sq_depth); + + if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) + queue_size = min_t(u32, queue_size, + get_feat_ctx->max_queues.max_llq_depth); + + queue_size = rounddown_pow_of_two(queue_size); + + if (unlikely(!queue_size)) { + dev_err(&pdev->dev, "Invalid queue size\n"); + return -EFAULT; + } + + *max_tx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS, + get_feat_ctx->max_queues.max_packet_tx_descs); + *max_rx_sgl_size = min_t(u16, ENA_PKT_MAX_BUFS, + get_feat_ctx->max_queues.max_packet_rx_descs); + + return queue_size; +} + +/* ena_probe - Device Initialization Routine + * @pdev: PCI device information struct + * @ent: entry in ena_pci_tbl + * + * Returns 0 on success, negative on failure + * + * ena_probe initializes an adapter identified by a pci_dev structure. + * The OS initialization, configuring of the adapter private structure, + * and a hardware reset occur. + */ +static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + struct ena_com_dev_get_features_ctx get_feat_ctx; + static int version_printed; + struct net_device *netdev; + struct ena_adapter *adapter; + struct ena_com_dev *ena_dev = NULL; + static int adapters_found; + int io_queue_num, bars, rc; + int queue_size; + u16 tx_sgl_size = 0; + u16 rx_sgl_size = 0; + bool wd_state; + + dev_dbg(&pdev->dev, "%s\n", __func__); + + if (version_printed++ == 0) + dev_info(&pdev->dev, "%s", version); + + rc = pci_enable_device_mem(pdev); + if (rc) { + dev_err(&pdev->dev, "pci_enable_device_mem() failed!\n"); + return rc; + } + + pci_set_master(pdev); + + ena_dev = vzalloc(sizeof(*ena_dev)); + if (!ena_dev) { + rc = -ENOMEM; + goto err_disable_device; + } + + bars = pci_select_bars(pdev, IORESOURCE_MEM) & ENA_BAR_MASK; + rc = pci_request_selected_regions(pdev, bars, DRV_MODULE_NAME); + if (rc) { + dev_err(&pdev->dev, "pci_request_selected_regions failed %d\n", + rc); + goto err_free_ena_dev; + } + + ena_dev->reg_bar = ioremap(pci_resource_start(pdev, ENA_REG_BAR), + pci_resource_len(pdev, ENA_REG_BAR)); + if (!ena_dev->reg_bar) { + dev_err(&pdev->dev, "failed to remap regs bar\n"); + rc = -EFAULT; + goto err_free_region; + } + + ena_dev->dmadev = &pdev->dev; + + rc = ena_device_init(ena_dev, pdev, &get_feat_ctx, &wd_state); + if (rc) { + dev_err(&pdev->dev, "ena device init failed\n"); + if (rc == -ETIME) + rc = -EPROBE_DEFER; + goto err_free_region; + } + + rc = ena_set_push_mode(pdev, ena_dev, &get_feat_ctx); + if (rc) { + dev_err(&pdev->dev, "Invalid module param(push_mode)\n"); + goto err_device_destroy; + } + + if (ena_dev->tx_mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_DEV) { + ena_dev->mem_bar = ioremap_wc(pci_resource_start(pdev, ENA_MEM_BAR), + pci_resource_len(pdev, ENA_MEM_BAR)); + if (!ena_dev->mem_bar) { + rc = -EFAULT; + goto err_device_destroy; + } + } + + /* initial Tx interrupt delay, Assumes 1 usec granularity. + * Updated during device initialization with the real granularity + */ + ena_dev->intr_moder_tx_interval = ENA_INTR_INITIAL_TX_INTERVAL_USECS; + io_queue_num = ena_calc_io_queue_num(pdev, ena_dev, &get_feat_ctx); + queue_size = ena_calc_queue_size(pdev, ena_dev, &tx_sgl_size, + &rx_sgl_size, &get_feat_ctx); + if ((queue_size <= 0) || (io_queue_num <= 0)) { + rc = -EFAULT; + goto err_device_destroy; + } + + dev_info(&pdev->dev, "creating %d io queues. queue size: %d\n", + io_queue_num, queue_size); + + /* dev zeroed in init_etherdev */ + netdev = alloc_etherdev_mq(sizeof(struct ena_adapter), io_queue_num); + if (!netdev) { + dev_err(&pdev->dev, "alloc_etherdev_mq failed\n"); + rc = -ENOMEM; + goto err_device_destroy; + } + + SET_NETDEV_DEV(netdev, &pdev->dev); + + adapter = netdev_priv(netdev); + pci_set_drvdata(pdev, adapter); + + adapter->ena_dev = ena_dev; + adapter->netdev = netdev; + adapter->pdev = pdev; + + ena_set_conf_feat_params(adapter, &get_feat_ctx); + + adapter->msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE); + + adapter->tx_ring_size = queue_size; + adapter->rx_ring_size = queue_size; + + adapter->max_tx_sgl_size = tx_sgl_size; + adapter->max_rx_sgl_size = rx_sgl_size; + + adapter->num_queues = io_queue_num; + adapter->last_monitored_tx_qid = 0; + + adapter->rx_copybreak = ENA_DEFAULT_RX_COPYBREAK; + adapter->wd_state = wd_state; + + snprintf(adapter->name, ENA_NAME_MAX_LEN, "ena_%d", adapters_found); + + rc = ena_com_init_interrupt_moderation(adapter->ena_dev); + if (rc) { + dev_err(&pdev->dev, + "Failed to query interrupt moderation feature\n"); + goto err_netdev_destroy; + } + ena_init_io_rings(adapter); + + netdev->netdev_ops = &ena_netdev_ops; + netdev->watchdog_timeo = TX_TIMEOUT; + ena_set_ethtool_ops(netdev); + + netdev->priv_flags |= IFF_UNICAST_FLT; + + u64_stats_init(&adapter->syncp); + + rc = ena_enable_msix_and_set_admin_interrupts(adapter, io_queue_num); + if (rc) { + dev_err(&pdev->dev, + "Failed to enable and set the admin interrupts\n"); + goto err_worker_destroy; + } + rc = ena_rss_init_default(adapter); + if (rc && (rc != -EPERM)) { + dev_err(&pdev->dev, "Cannot init RSS rc: %d\n", rc); + goto err_free_msix; + } + + ena_config_debug_area(adapter); + + memcpy(adapter->netdev->perm_addr, adapter->mac_addr, netdev->addr_len); + + netif_carrier_off(netdev); + + rc = register_netdev(netdev); + if (rc) { + dev_err(&pdev->dev, "Cannot register net device\n"); + goto err_rss; + } + + INIT_WORK(&adapter->suspend_io_task, ena_device_io_suspend); + INIT_WORK(&adapter->resume_io_task, ena_device_io_resume); + INIT_WORK(&adapter->reset_task, ena_fw_reset_device); + + adapter->last_keep_alive_jiffies = jiffies; + + init_timer(&adapter->timer_service); + adapter->timer_service.expires = round_jiffies(jiffies + HZ); + adapter->timer_service.function = ena_timer_service; + adapter->timer_service.data = (unsigned long)adapter; + + add_timer(&adapter->timer_service); + + dev_info(&pdev->dev, "%s found at mem %lx, mac addr %pM Queues %d\n", + DEVICE_NAME, (long)pci_resource_start(pdev, 0), + netdev->dev_addr, io_queue_num); + + set_bit(ENA_FLAG_DEVICE_RUNNING, &adapter->flags); + + adapters_found++; + + return 0; + +err_rss: + ena_com_delete_debug_area(ena_dev); + ena_com_rss_destroy(ena_dev); +err_free_msix: + ena_com_dev_reset(ena_dev); + ena_free_mgmnt_irq(adapter); + ena_disable_msix(adapter); +err_worker_destroy: + ena_com_destroy_interrupt_moderation(ena_dev); + del_timer(&adapter->timer_service); + cancel_work_sync(&adapter->suspend_io_task); + cancel_work_sync(&adapter->resume_io_task); +err_netdev_destroy: + free_netdev(netdev); +err_device_destroy: + ena_com_delete_host_info(ena_dev); + ena_com_admin_destroy(ena_dev); +err_free_region: + ena_release_bars(ena_dev, pdev); +err_free_ena_dev: + pci_set_drvdata(pdev, NULL); + vfree(ena_dev); +err_disable_device: + pci_disable_device(pdev); + return rc; +} + +/*****************************************************************************/ +static int ena_sriov_configure(struct pci_dev *dev, int numvfs) +{ + int rc; + + if (numvfs > 0) { + rc = pci_enable_sriov(dev, numvfs); + if (rc != 0) { + dev_err(&dev->dev, + "pci_enable_sriov failed to enable: %d vfs with the error: %d\n", + numvfs, rc); + return rc; + } + + return numvfs; + } + + if (numvfs == 0) { + pci_disable_sriov(dev); + return 0; + } + + return -EINVAL; +} + +/*****************************************************************************/ +/*****************************************************************************/ + +/* ena_remove - Device Removal Routine + * @pdev: PCI device information struct + * + * ena_remove is called by the PCI subsystem to alert the driver + * that it should release a PCI device. + */ +static void ena_remove(struct pci_dev *pdev) +{ + struct ena_adapter *adapter = pci_get_drvdata(pdev); + struct ena_com_dev *ena_dev; + struct net_device *netdev; + + if (!adapter) + /* This device didn't load properly and it's resources + * already released, nothing to do + */ + return; + + ena_dev = adapter->ena_dev; + netdev = adapter->netdev; + +#ifdef CONFIG_RFS_ACCEL + if ((adapter->msix_vecs >= 1) && (netdev->rx_cpu_rmap)) { + free_irq_cpu_rmap(netdev->rx_cpu_rmap); + netdev->rx_cpu_rmap = NULL; + } +#endif /* CONFIG_RFS_ACCEL */ + + unregister_netdev(netdev); + del_timer_sync(&adapter->timer_service); + + cancel_work_sync(&adapter->reset_task); + + cancel_work_sync(&adapter->suspend_io_task); + + cancel_work_sync(&adapter->resume_io_task); + + ena_com_dev_reset(ena_dev); + + ena_free_mgmnt_irq(adapter); + + ena_disable_msix(adapter); + + free_netdev(netdev); + + ena_com_mmio_reg_read_request_destroy(ena_dev); + + ena_com_abort_admin_commands(ena_dev); + + ena_com_wait_for_abort_completion(ena_dev); + + ena_com_admin_destroy(ena_dev); + + ena_com_rss_destroy(ena_dev); + + ena_com_delete_debug_area(ena_dev); + + ena_com_delete_host_info(ena_dev); + + ena_release_bars(ena_dev, pdev); + + pci_set_drvdata(pdev, NULL); + + pci_disable_device(pdev); + + ena_com_destroy_interrupt_moderation(ena_dev); + + vfree(ena_dev); +} + +static struct pci_driver ena_pci_driver = { + .name = DRV_MODULE_NAME, + .id_table = ena_pci_tbl, + .probe = ena_probe, + .remove = ena_remove, + .sriov_configure = ena_sriov_configure, +}; + +static int __init ena_init(void) +{ + pr_info("%s", version); + + ena_wq = create_singlethread_workqueue(DRV_MODULE_NAME); + if (!ena_wq) { + pr_err("Failed to create workqueue\n"); + return -ENOMEM; + } + + return pci_register_driver(&ena_pci_driver); +} + +static void __exit ena_cleanup(void) +{ + pci_unregister_driver(&ena_pci_driver); + + if (ena_wq) { + destroy_workqueue(ena_wq); + ena_wq = NULL; + } +} + +/****************************************************************************** + ******************************** AENQ Handlers ******************************* + *****************************************************************************/ +/* ena_update_on_link_change: + * Notify the network interface about the change in link status + */ +static void ena_update_on_link_change(void *adapter_data, + struct ena_admin_aenq_entry *aenq_e) +{ + struct ena_adapter *adapter = (struct ena_adapter *)adapter_data; + struct ena_admin_aenq_link_change_desc *aenq_desc = + (struct ena_admin_aenq_link_change_desc *)aenq_e; + int status = aenq_desc->flags & + ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK; + + if (status) { + netdev_dbg(adapter->netdev, "%s\n", __func__); + set_bit(ENA_FLAG_LINK_UP, &adapter->flags); + netif_carrier_on(adapter->netdev); + } else { + clear_bit(ENA_FLAG_LINK_UP, &adapter->flags); + netif_carrier_off(adapter->netdev); + } +} + +static void ena_keep_alive_wd(void *adapter_data, + struct ena_admin_aenq_entry *aenq_e) +{ + struct ena_adapter *adapter = (struct ena_adapter *)adapter_data; + + adapter->last_keep_alive_jiffies = jiffies; +} + +static void ena_notification(void *adapter_data, + struct ena_admin_aenq_entry *aenq_e) +{ + struct ena_adapter *adapter = (struct ena_adapter *)adapter_data; + + WARN(aenq_e->aenq_common_desc.group != ENA_ADMIN_NOTIFICATION, + "Invalid group(%x) expected %x\n", + aenq_e->aenq_common_desc.group, + ENA_ADMIN_NOTIFICATION); + + switch (aenq_e->aenq_common_desc.syndrom) { + case ENA_ADMIN_SUSPEND: + /* Suspend just the IO queues. + * We deliberately don't suspend admin so the timer and + * the keep_alive events should remain. + */ + queue_work(ena_wq, &adapter->suspend_io_task); + break; + case ENA_ADMIN_RESUME: + queue_work(ena_wq, &adapter->resume_io_task); + break; + default: + netif_err(adapter, drv, adapter->netdev, + "Invalid aenq notification link state %d\n", + aenq_e->aenq_common_desc.syndrom); + } +} + +/* This handler will called for unknown event group or unimplemented handlers*/ +static void unimplemented_aenq_handler(void *data, + struct ena_admin_aenq_entry *aenq_e) +{ + struct ena_adapter *adapter = (struct ena_adapter *)data; + + netif_err(adapter, drv, adapter->netdev, + "Unknown event was received or event with unimplemented handler\n"); +} + +static struct ena_aenq_handlers aenq_handlers = { + .handlers = { + [ENA_ADMIN_LINK_CHANGE] = ena_update_on_link_change, + [ENA_ADMIN_NOTIFICATION] = ena_notification, + [ENA_ADMIN_KEEP_ALIVE] = ena_keep_alive_wd, + }, + .unimplemented_handler = unimplemented_aenq_handler +}; + +module_init(ena_init); +module_exit(ena_cleanup); diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h new file mode 100644 index 000000000000..69d7e9ed5bc8 --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h @@ -0,0 +1,324 @@ +/* + * Copyright 2015 Amazon.com, Inc. or its affiliates. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ENA_H +#define ENA_H + +#include +#include +#include +#include +#include +#include + +#include "ena_com.h" +#include "ena_eth_com.h" + +#define DRV_MODULE_VER_MAJOR 1 +#define DRV_MODULE_VER_MINOR 0 +#define DRV_MODULE_VER_SUBMINOR 2 + +#define DRV_MODULE_NAME "ena" +#ifndef DRV_MODULE_VERSION +#define DRV_MODULE_VERSION \ + __stringify(DRV_MODULE_VER_MAJOR) "." \ + __stringify(DRV_MODULE_VER_MINOR) "." \ + __stringify(DRV_MODULE_VER_SUBMINOR) +#endif + +#define DEVICE_NAME "Elastic Network Adapter (ENA)" + +/* 1 for AENQ + ADMIN */ +#define ENA_MAX_MSIX_VEC(io_queues) (1 + (io_queues)) + +#define ENA_REG_BAR 0 +#define ENA_MEM_BAR 2 +#define ENA_BAR_MASK (BIT(ENA_REG_BAR) | BIT(ENA_MEM_BAR)) + +#define ENA_DEFAULT_RING_SIZE (1024) + +#define ENA_TX_WAKEUP_THRESH (MAX_SKB_FRAGS + 2) +#define ENA_DEFAULT_RX_COPYBREAK (128 - NET_IP_ALIGN) + +/* limit the buffer size to 600 bytes to handle MTU changes from very + * small to very large, in which case the number of buffers per packet + * could exceed ENA_PKT_MAX_BUFS + */ +#define ENA_DEFAULT_MIN_RX_BUFF_ALLOC_SIZE 600 + +#define ENA_MIN_MTU 128 + +#define ENA_NAME_MAX_LEN 20 +#define ENA_IRQNAME_SIZE 40 + +#define ENA_PKT_MAX_BUFS 19 + +#define ENA_RX_RSS_TABLE_LOG_SIZE 7 +#define ENA_RX_RSS_TABLE_SIZE (1 << ENA_RX_RSS_TABLE_LOG_SIZE) + +#define ENA_HASH_KEY_SIZE 40 + +/* The number of tx packet completions that will be handled each NAPI poll + * cycle is ring_size / ENA_TX_POLL_BUDGET_DIVIDER. + */ +#define ENA_TX_POLL_BUDGET_DIVIDER 4 + +/* Refill Rx queue when number of available descriptors is below + * QUEUE_SIZE / ENA_RX_REFILL_THRESH_DIVIDER + */ +#define ENA_RX_REFILL_THRESH_DIVIDER 8 + +/* Number of queues to check for missing queues per timer service */ +#define ENA_MONITORED_TX_QUEUES 4 +/* Max timeout packets before device reset */ +#define MAX_NUM_OF_TIMEOUTED_PACKETS 32 + +#define ENA_TX_RING_IDX_NEXT(idx, ring_size) (((idx) + 1) & ((ring_size) - 1)) + +#define ENA_RX_RING_IDX_NEXT(idx, ring_size) (((idx) + 1) & ((ring_size) - 1)) +#define ENA_RX_RING_IDX_ADD(idx, n, ring_size) \ + (((idx) + (n)) & ((ring_size) - 1)) + +#define ENA_IO_TXQ_IDX(q) (2 * (q)) +#define ENA_IO_RXQ_IDX(q) (2 * (q) + 1) + +#define ENA_MGMNT_IRQ_IDX 0 +#define ENA_IO_IRQ_FIRST_IDX 1 +#define ENA_IO_IRQ_IDX(q) (ENA_IO_IRQ_FIRST_IDX + (q)) + +/* ENA device should send keep alive msg every 1 sec. + * We wait for 3 sec just to be on the safe side. + */ +#define ENA_DEVICE_KALIVE_TIMEOUT (3 * HZ) + +#define ENA_MMIO_DISABLE_REG_READ BIT(0) + +struct ena_irq { + irq_handler_t handler; + void *data; + int cpu; + u32 vector; + cpumask_t affinity_hint_mask; + char name[ENA_IRQNAME_SIZE]; +}; + +struct ena_napi { + struct napi_struct napi ____cacheline_aligned; + struct ena_ring *tx_ring; + struct ena_ring *rx_ring; + u32 qid; +}; + +struct ena_tx_buffer { + struct sk_buff *skb; + /* num of ena desc for this specific skb + * (includes data desc and metadata desc) + */ + u32 tx_descs; + /* num of buffers used by this skb */ + u32 num_of_bufs; + /* Save the last jiffies to detect missing tx packets */ + unsigned long last_jiffies; + struct ena_com_buf bufs[ENA_PKT_MAX_BUFS]; +} ____cacheline_aligned; + +struct ena_rx_buffer { + struct sk_buff *skb; + struct page *page; + u32 page_offset; + struct ena_com_buf ena_buf; +} ____cacheline_aligned; + +struct ena_stats_tx { + u64 cnt; + u64 bytes; + u64 queue_stop; + u64 prepare_ctx_err; + u64 queue_wakeup; + u64 dma_mapping_err; + u64 linearize; + u64 linearize_failed; + u64 napi_comp; + u64 tx_poll; + u64 doorbells; + u64 missing_tx_comp; + u64 bad_req_id; +}; + +struct ena_stats_rx { + u64 cnt; + u64 bytes; + u64 refil_partial; + u64 bad_csum; + u64 page_alloc_fail; + u64 skb_alloc_fail; + u64 dma_mapping_err; + u64 bad_desc_num; + u64 rx_copybreak_pkt; +}; + +struct ena_ring { + /* Holds the empty requests for TX out of order completions */ + u16 *free_tx_ids; + union { + struct ena_tx_buffer *tx_buffer_info; + struct ena_rx_buffer *rx_buffer_info; + }; + + /* cache ptr to avoid using the adapter */ + struct device *dev; + struct pci_dev *pdev; + struct napi_struct *napi; + struct net_device *netdev; + struct ena_com_dev *ena_dev; + struct ena_adapter *adapter; + struct ena_com_io_cq *ena_com_io_cq; + struct ena_com_io_sq *ena_com_io_sq; + + u16 next_to_use; + u16 next_to_clean; + u16 rx_copybreak; + u16 qid; + u16 mtu; + u16 sgl_size; + + /* The maximum header length the device can handle */ + u8 tx_max_header_size; + + /* cpu for TPH */ + int cpu; + /* number of tx/rx_buffer_info's entries */ + int ring_size; + + enum ena_admin_placement_policy_type tx_mem_queue_type; + + struct ena_com_rx_buf_info ena_bufs[ENA_PKT_MAX_BUFS]; + u32 smoothed_interval; + u32 per_napi_packets; + u32 per_napi_bytes; + enum ena_intr_moder_level moder_tbl_idx; + struct u64_stats_sync syncp; + union { + struct ena_stats_tx tx_stats; + struct ena_stats_rx rx_stats; + }; +} ____cacheline_aligned; + +struct ena_stats_dev { + u64 tx_timeout; + u64 io_suspend; + u64 io_resume; + u64 wd_expired; + u64 interface_up; + u64 interface_down; + u64 admin_q_pause; +}; + +enum ena_flags_t { + ENA_FLAG_DEVICE_RUNNING, + ENA_FLAG_DEV_UP, + ENA_FLAG_LINK_UP, + ENA_FLAG_MSIX_ENABLED, + ENA_FLAG_TRIGGER_RESET +}; + +/* adapter specific private data structure */ +struct ena_adapter { + struct ena_com_dev *ena_dev; + /* OS defined structs */ + struct net_device *netdev; + struct pci_dev *pdev; + + /* rx packets that shorter that this len will be copied to the skb + * header + */ + u32 rx_copybreak; + u32 max_mtu; + + int num_queues; + + struct msix_entry *msix_entries; + int msix_vecs; + + u32 tx_usecs, rx_usecs; /* interrupt moderation */ + u32 tx_frames, rx_frames; /* interrupt moderation */ + + u32 tx_ring_size; + u32 rx_ring_size; + + u32 msg_enable; + + u16 max_tx_sgl_size; + u16 max_rx_sgl_size; + + u8 mac_addr[ETH_ALEN]; + + char name[ENA_NAME_MAX_LEN]; + + unsigned long flags; + /* TX */ + struct ena_ring tx_ring[ENA_MAX_NUM_IO_QUEUES] + ____cacheline_aligned_in_smp; + + /* RX */ + struct ena_ring rx_ring[ENA_MAX_NUM_IO_QUEUES] + ____cacheline_aligned_in_smp; + + struct ena_napi ena_napi[ENA_MAX_NUM_IO_QUEUES]; + + struct ena_irq irq_tbl[ENA_MAX_MSIX_VEC(ENA_MAX_NUM_IO_QUEUES)]; + + /* timer service */ + struct work_struct reset_task; + struct work_struct suspend_io_task; + struct work_struct resume_io_task; + struct timer_list timer_service; + + bool wd_state; + unsigned long last_keep_alive_jiffies; + + struct u64_stats_sync syncp; + struct ena_stats_dev dev_stats; + + /* last queue index that was checked for uncompleted tx packets */ + u32 last_monitored_tx_qid; +}; + +void ena_set_ethtool_ops(struct net_device *netdev); + +void ena_dump_stats_to_dmesg(struct ena_adapter *adapter); + +void ena_dump_stats_to_buf(struct ena_adapter *adapter, u8 *buf); + +int ena_get_sset_count(struct net_device *netdev, int sset); + +#endif /* !(ENA_H) */ diff --git a/drivers/net/ethernet/amazon/ena/ena_pci_id_tbl.h b/drivers/net/ethernet/amazon/ena/ena_pci_id_tbl.h new file mode 100644 index 000000000000..f80d2a47fa94 --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/ena_pci_id_tbl.h @@ -0,0 +1,67 @@ +/* + * Copyright 2015 Amazon.com, Inc. or its affiliates. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ENA_PCI_ID_TBL_H_ +#define ENA_PCI_ID_TBL_H_ + +#ifndef PCI_VENDOR_ID_AMAZON +#define PCI_VENDOR_ID_AMAZON 0x1d0f +#endif + +#ifndef PCI_DEV_ID_ENA_PF +#define PCI_DEV_ID_ENA_PF 0x0ec2 +#endif + +#ifndef PCI_DEV_ID_ENA_LLQ_PF +#define PCI_DEV_ID_ENA_LLQ_PF 0x1ec2 +#endif + +#ifndef PCI_DEV_ID_ENA_VF +#define PCI_DEV_ID_ENA_VF 0xec20 +#endif + +#ifndef PCI_DEV_ID_ENA_LLQ_VF +#define PCI_DEV_ID_ENA_LLQ_VF 0xec21 +#endif + +#define ENA_PCI_ID_TABLE_ENTRY(devid) \ + {PCI_DEVICE(PCI_VENDOR_ID_AMAZON, devid)}, + +static const struct pci_device_id ena_pci_tbl[] = { + ENA_PCI_ID_TABLE_ENTRY(PCI_DEV_ID_ENA_PF) + ENA_PCI_ID_TABLE_ENTRY(PCI_DEV_ID_ENA_LLQ_PF) + ENA_PCI_ID_TABLE_ENTRY(PCI_DEV_ID_ENA_VF) + ENA_PCI_ID_TABLE_ENTRY(PCI_DEV_ID_ENA_LLQ_VF) + { } +}; + +#endif /* ENA_PCI_ID_TBL_H_ */ diff --git a/drivers/net/ethernet/amazon/ena/ena_regs_defs.h b/drivers/net/ethernet/amazon/ena/ena_regs_defs.h new file mode 100644 index 000000000000..26097a2b6030 --- /dev/null +++ b/drivers/net/ethernet/amazon/ena/ena_regs_defs.h @@ -0,0 +1,133 @@ +/* + * Copyright 2015 - 2016 Amazon.com, Inc. or its affiliates. + * + * This software is available to you under a choice of one of two + * licenses. You may choose to be licensed under the terms of the GNU + * General Public License (GPL) Version 2, available from the file + * COPYING in the main directory of this source tree, or the + * BSD license below: + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above + * copyright notice, this list of conditions and the following + * disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef _ENA_REGS_H_ +#define _ENA_REGS_H_ + +/* ena_registers offsets */ +#define ENA_REGS_VERSION_OFF 0x0 +#define ENA_REGS_CONTROLLER_VERSION_OFF 0x4 +#define ENA_REGS_CAPS_OFF 0x8 +#define ENA_REGS_CAPS_EXT_OFF 0xc +#define ENA_REGS_AQ_BASE_LO_OFF 0x10 +#define ENA_REGS_AQ_BASE_HI_OFF 0x14 +#define ENA_REGS_AQ_CAPS_OFF 0x18 +#define ENA_REGS_ACQ_BASE_LO_OFF 0x20 +#define ENA_REGS_ACQ_BASE_HI_OFF 0x24 +#define ENA_REGS_ACQ_CAPS_OFF 0x28 +#define ENA_REGS_AQ_DB_OFF 0x2c +#define ENA_REGS_ACQ_TAIL_OFF 0x30 +#define ENA_REGS_AENQ_CAPS_OFF 0x34 +#define ENA_REGS_AENQ_BASE_LO_OFF 0x38 +#define ENA_REGS_AENQ_BASE_HI_OFF 0x3c +#define ENA_REGS_AENQ_HEAD_DB_OFF 0x40 +#define ENA_REGS_AENQ_TAIL_OFF 0x44 +#define ENA_REGS_INTR_MASK_OFF 0x4c +#define ENA_REGS_DEV_CTL_OFF 0x54 +#define ENA_REGS_DEV_STS_OFF 0x58 +#define ENA_REGS_MMIO_REG_READ_OFF 0x5c +#define ENA_REGS_MMIO_RESP_LO_OFF 0x60 +#define ENA_REGS_MMIO_RESP_HI_OFF 0x64 +#define ENA_REGS_RSS_IND_ENTRY_UPDATE_OFF 0x68 + +/* version register */ +#define ENA_REGS_VERSION_MINOR_VERSION_MASK 0xff +#define ENA_REGS_VERSION_MAJOR_VERSION_SHIFT 8 +#define ENA_REGS_VERSION_MAJOR_VERSION_MASK 0xff00 + +/* controller_version register */ +#define ENA_REGS_CONTROLLER_VERSION_SUBMINOR_VERSION_MASK 0xff +#define ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_SHIFT 8 +#define ENA_REGS_CONTROLLER_VERSION_MINOR_VERSION_MASK 0xff00 +#define ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_SHIFT 16 +#define ENA_REGS_CONTROLLER_VERSION_MAJOR_VERSION_MASK 0xff0000 +#define ENA_REGS_CONTROLLER_VERSION_IMPL_ID_SHIFT 24 +#define ENA_REGS_CONTROLLER_VERSION_IMPL_ID_MASK 0xff000000 + +/* caps register */ +#define ENA_REGS_CAPS_CONTIGUOUS_QUEUE_REQUIRED_MASK 0x1 +#define ENA_REGS_CAPS_RESET_TIMEOUT_SHIFT 1 +#define ENA_REGS_CAPS_RESET_TIMEOUT_MASK 0x3e +#define ENA_REGS_CAPS_DMA_ADDR_WIDTH_SHIFT 8 +#define ENA_REGS_CAPS_DMA_ADDR_WIDTH_MASK 0xff00 + +/* aq_caps register */ +#define ENA_REGS_AQ_CAPS_AQ_DEPTH_MASK 0xffff +#define ENA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_SHIFT 16 +#define ENA_REGS_AQ_CAPS_AQ_ENTRY_SIZE_MASK 0xffff0000 + +/* acq_caps register */ +#define ENA_REGS_ACQ_CAPS_ACQ_DEPTH_MASK 0xffff +#define ENA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_SHIFT 16 +#define ENA_REGS_ACQ_CAPS_ACQ_ENTRY_SIZE_MASK 0xffff0000 + +/* aenq_caps register */ +#define ENA_REGS_AENQ_CAPS_AENQ_DEPTH_MASK 0xffff +#define ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_SHIFT 16 +#define ENA_REGS_AENQ_CAPS_AENQ_ENTRY_SIZE_MASK 0xffff0000 + +/* dev_ctl register */ +#define ENA_REGS_DEV_CTL_DEV_RESET_MASK 0x1 +#define ENA_REGS_DEV_CTL_AQ_RESTART_SHIFT 1 +#define ENA_REGS_DEV_CTL_AQ_RESTART_MASK 0x2 +#define ENA_REGS_DEV_CTL_QUIESCENT_SHIFT 2 +#define ENA_REGS_DEV_CTL_QUIESCENT_MASK 0x4 +#define ENA_REGS_DEV_CTL_IO_RESUME_SHIFT 3 +#define ENA_REGS_DEV_CTL_IO_RESUME_MASK 0x8 + +/* dev_sts register */ +#define ENA_REGS_DEV_STS_READY_MASK 0x1 +#define ENA_REGS_DEV_STS_AQ_RESTART_IN_PROGRESS_SHIFT 1 +#define ENA_REGS_DEV_STS_AQ_RESTART_IN_PROGRESS_MASK 0x2 +#define ENA_REGS_DEV_STS_AQ_RESTART_FINISHED_SHIFT 2 +#define ENA_REGS_DEV_STS_AQ_RESTART_FINISHED_MASK 0x4 +#define ENA_REGS_DEV_STS_RESET_IN_PROGRESS_SHIFT 3 +#define ENA_REGS_DEV_STS_RESET_IN_PROGRESS_MASK 0x8 +#define ENA_REGS_DEV_STS_RESET_FINISHED_SHIFT 4 +#define ENA_REGS_DEV_STS_RESET_FINISHED_MASK 0x10 +#define ENA_REGS_DEV_STS_FATAL_ERROR_SHIFT 5 +#define ENA_REGS_DEV_STS_FATAL_ERROR_MASK 0x20 +#define ENA_REGS_DEV_STS_QUIESCENT_STATE_IN_PROGRESS_SHIFT 6 +#define ENA_REGS_DEV_STS_QUIESCENT_STATE_IN_PROGRESS_MASK 0x40 +#define ENA_REGS_DEV_STS_QUIESCENT_STATE_ACHIEVED_SHIFT 7 +#define ENA_REGS_DEV_STS_QUIESCENT_STATE_ACHIEVED_MASK 0x80 + +/* mmio_reg_read register */ +#define ENA_REGS_MMIO_REG_READ_REQ_ID_MASK 0xffff +#define ENA_REGS_MMIO_REG_READ_REG_OFF_SHIFT 16 +#define ENA_REGS_MMIO_REG_READ_REG_OFF_MASK 0xffff0000 + +/* rss_ind_entry_update register */ +#define ENA_REGS_RSS_IND_ENTRY_UPDATE_INDEX_MASK 0xffff +#define ENA_REGS_RSS_IND_ENTRY_UPDATE_CQ_IDX_SHIFT 16 +#define ENA_REGS_RSS_IND_ENTRY_UPDATE_CQ_IDX_MASK 0xffff0000 + +#endif /*_ENA_REGS_H_ */ -- 2.34.1