#include <linux/ethtool.h>
#include <linux/workqueue.h>
#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <net/tcp.h>
#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/io.h>
+#include <asm/div64.h>
/* local include */
#include "s2io.h"
#include "s2io-regs.h"
-#define DRV_VERSION "Version 2.0.9.3"
+#define DRV_VERSION "2.0.11.2"
/* S2io Driver name & version. */
static char s2io_driver_name[] = "Neterion";
static char s2io_driver_version[] = DRV_VERSION;
-int rxd_size[4] = {32,48,48,64};
-int rxd_count[4] = {127,85,85,63};
+static int rxd_size[4] = {32,48,48,64};
+static int rxd_count[4] = {127,85,85,63};
static inline int RXD_IS_UP2DT(RxD_t *rxdp)
{
{"\n DRIVER STATISTICS"},
{"single_bit_ecc_errs"},
{"double_bit_ecc_errs"},
+ ("lro_aggregated_pkts"),
+ ("lro_flush_both_count"),
+ ("lro_out_of_sequence_pkts"),
+ ("lro_flush_due_to_max_pkts"),
+ ("lro_avg_aggr_pkts"),
};
#define S2IO_STAT_LEN sizeof(ethtool_stats_keys)/ ETH_GSTRING_LEN
static unsigned int rxsync_frequency = 3;
/* Interrupt type. Values can be 0(INTA), 1(MSI), 2(MSI_X) */
static unsigned int intr_type = 0;
+/* Large receive offload feature */
+static unsigned int lro = 0;
+/* Max pkts to be aggregated by LRO at one time. If not specified,
+ * aggregation happens until we hit max IP pkt size(64K)
+ */
+static unsigned int lro_max_pkts = 0xFFFF;
/*
* S2IO device table.
config->tx_cfg[i].fifo_len - 1;
mac_control->fifos[i].fifo_no = i;
mac_control->fifos[i].nic = nic;
- mac_control->fifos[i].max_txds = MAX_SKB_FRAGS + 1;
+ mac_control->fifos[i].max_txds = MAX_SKB_FRAGS + 2;
for (j = 0; j < page_num; j++) {
int k = 0;
}
}
+ nic->ufo_in_band_v = kmalloc((sizeof(u64) * size), GFP_KERNEL);
+ if (!nic->ufo_in_band_v)
+ return -ENOMEM;
+
/* Allocation and initialization of RXDs in Rings */
size = 0;
for (i = 0; i < config->rx_ring_num; i++) {
mac_control->stats_mem,
mac_control->stats_mem_phy);
}
+ if (nic->ufo_in_band_v)
+ kfree(nic->ufo_in_band_v);
}
/**
writel((u32) (val64 >> 32), (add + 4));
val64 = readq(&bar0->mac_cfg);
+ /* Enable FCS stripping by adapter */
+ add = &bar0->mac_cfg;
+ val64 = readq(&bar0->mac_cfg);
+ val64 |= MAC_CFG_RMAC_STRIP_FCS;
+ if (nic->device_type == XFRAME_II_DEVICE)
+ writeq(val64, &bar0->mac_cfg);
+ else {
+ writeq(RMAC_CFG_KEY(0x4C0D), &bar0->rmac_cfg_key);
+ writel((u32) (val64), add);
+ writeq(RMAC_CFG_KEY(0x4C0D), &bar0->rmac_cfg_key);
+ writel((u32) (val64 >> 32), (add + 4));
+ }
+
/*
* Set the time value to be inserted in the pause frame
* generated by xena.
return SUCCESS;
}
+/**
+ * s2io_txdl_getskb - Get the skb from txdl, unmap and return skb
+ */
+static struct sk_buff *s2io_txdl_getskb(fifo_info_t *fifo_data, TxD_t *txdlp, int get_off)
+{
+ nic_t *nic = fifo_data->nic;
+ struct sk_buff *skb;
+ TxD_t *txds;
+ u16 j, frg_cnt;
+
+ txds = txdlp;
+ if (txds->Host_Control == (u64)(long)nic->ufo_in_band_v) {
+ pci_unmap_single(nic->pdev, (dma_addr_t)
+ txds->Buffer_Pointer, sizeof(u64),
+ PCI_DMA_TODEVICE);
+ txds++;
+ }
+
+ skb = (struct sk_buff *) ((unsigned long)
+ txds->Host_Control);
+ if (!skb) {
+ memset(txdlp, 0, (sizeof(TxD_t) * fifo_data->max_txds));
+ return NULL;
+ }
+ pci_unmap_single(nic->pdev, (dma_addr_t)
+ txds->Buffer_Pointer,
+ skb->len - skb->data_len,
+ PCI_DMA_TODEVICE);
+ frg_cnt = skb_shinfo(skb)->nr_frags;
+ if (frg_cnt) {
+ txds++;
+ for (j = 0; j < frg_cnt; j++, txds++) {
+ skb_frag_t *frag = &skb_shinfo(skb)->frags[j];
+ if (!txds->Buffer_Pointer)
+ break;
+ pci_unmap_page(nic->pdev, (dma_addr_t)
+ txds->Buffer_Pointer,
+ frag->size, PCI_DMA_TODEVICE);
+ }
+ }
+ txdlp->Host_Control = 0;
+ return(skb);
+}
/**
* free_tx_buffers - Free all queued Tx buffers
int i, j;
mac_info_t *mac_control;
struct config_param *config;
- int cnt = 0, frg_cnt;
+ int cnt = 0;
mac_control = &nic->mac_control;
config = &nic->config;
for (j = 0; j < config->tx_cfg[i].fifo_len - 1; j++) {
txdp = (TxD_t *) mac_control->fifos[i].list_info[j].
list_virt_addr;
- skb =
- (struct sk_buff *) ((unsigned long) txdp->
- Host_Control);
- if (skb == NULL) {
- memset(txdp, 0, sizeof(TxD_t) *
- config->max_txds);
- continue;
+ skb = s2io_txdl_getskb(&mac_control->fifos[i], txdp, j);
+ if (skb) {
+ dev_kfree_skb(skb);
+ cnt++;
}
- frg_cnt = skb_shinfo(skb)->nr_frags;
- pci_unmap_single(nic->pdev, (dma_addr_t)
- txdp->Buffer_Pointer,
- skb->len - skb->data_len,
- PCI_DMA_TODEVICE);
- if (frg_cnt) {
- TxD_t *temp;
- temp = txdp;
- txdp++;
- for (j = 0; j < frg_cnt; j++, txdp++) {
- skb_frag_t *frag =
- &skb_shinfo(skb)->frags[j];
- pci_unmap_page(nic->pdev,
- (dma_addr_t)
- txdp->
- Buffer_Pointer,
- frag->size,
- PCI_DMA_TODEVICE);
- }
- txdp = temp;
- }
- dev_kfree_skb(skb);
- memset(txdp, 0, sizeof(TxD_t) * config->max_txds);
- cnt++;
}
DBG_PRINT(INTR_DBG,
"%s:forcibly freeing %d skbs on FIFO%d\n",
}
}
-int fill_rxd_3buf(nic_t *nic, RxD_t *rxdp, struct sk_buff *skb)
+static int fill_rxd_3buf(nic_t *nic, RxD_t *rxdp, struct sk_buff *skb)
{
struct net_device *dev = nic->dev;
struct sk_buff *frag_list;
#ifndef CONFIG_S2IO_NAPI
int pkt_cnt = 0;
#endif
+ int i;
+
spin_lock(&nic->rx_lock);
if (atomic_read(&nic->card_state) == CARD_DOWN) {
DBG_PRINT(INTR_DBG, "%s: %s going down for reset\n",
break;
#endif
}
+ if (nic->lro) {
+ /* Clear all LRO sessions before exiting */
+ for (i=0; i<MAX_LRO_SESSIONS; i++) {
+ lro_t *lro = &nic->lro0_n[i];
+ if (lro->in_use) {
+ update_L3L4_header(nic, lro);
+ queue_rx_frame(lro->parent);
+ clear_lro_session(lro);
+ }
+ }
+ }
+
spin_unlock(&nic->rx_lock);
}
tx_curr_get_info_t get_info, put_info;
struct sk_buff *skb;
TxD_t *txdlp;
- u16 j, frg_cnt;
get_info = fifo_data->tx_curr_get_info;
put_info = fifo_data->tx_curr_put_info;
}
}
- skb = (struct sk_buff *) ((unsigned long)
- txdlp->Host_Control);
+ skb = s2io_txdl_getskb(fifo_data, txdlp, get_info.offset);
if (skb == NULL) {
DBG_PRINT(ERR_DBG, "%s: Null skb ",
__FUNCTION__);
return;
}
- frg_cnt = skb_shinfo(skb)->nr_frags;
- nic->tx_pkt_count++;
-
- pci_unmap_single(nic->pdev, (dma_addr_t)
- txdlp->Buffer_Pointer,
- skb->len - skb->data_len,
- PCI_DMA_TODEVICE);
- if (frg_cnt) {
- TxD_t *temp;
- temp = txdlp;
- txdlp++;
- for (j = 0; j < frg_cnt; j++, txdlp++) {
- skb_frag_t *frag =
- &skb_shinfo(skb)->frags[j];
- if (!txdlp->Buffer_Pointer)
- break;
- pci_unmap_page(nic->pdev,
- (dma_addr_t)
- txdlp->
- Buffer_Pointer,
- frag->size,
- PCI_DMA_TODEVICE);
- }
- txdlp = temp;
- }
- memset(txdlp, 0,
- (sizeof(TxD_t) * fifo_data->max_txds));
-
/* Updating the statistics block */
nic->stats.tx_bytes += skb->len;
dev_kfree_skb_irq(skb);
* void.
*/
-void s2io_reset(nic_t * sp)
+static void s2io_reset(nic_t * sp)
{
XENA_dev_config_t __iomem *bar0 = sp->bar0;
u64 val64;
* SUCCESS on success and FAILURE on failure.
*/
-int s2io_set_swapper(nic_t * sp)
+static int s2io_set_swapper(nic_t * sp)
{
struct net_device *dev = sp->dev;
XENA_dev_config_t __iomem *bar0 = sp->bar0;
return ret;
}
-void restore_xmsi_data(nic_t *nic)
+static void restore_xmsi_data(nic_t *nic)
{
XENA_dev_config_t __iomem *bar0 = nic->bar0;
u64 val64;
return 0;
}
-int s2io_enable_msi_x(nic_t *nic)
+static int s2io_enable_msi_x(nic_t *nic)
{
XENA_dev_config_t __iomem *bar0 = nic->bar0;
u64 tx_mat, rx_mat;
s2io_msix_fifo_handle, 0, sp->desc1,
sp->s2io_entries[i].arg);
DBG_PRINT(ERR_DBG, "%s @ 0x%llx\n", sp->desc1,
- sp->msix_info[i].addr);
+ (unsigned long long)sp->msix_info[i].addr);
} else {
sprintf(sp->desc2, "%s:MSI-X-%d-RX",
dev->name, i);
s2io_msix_ring_handle, 0, sp->desc2,
sp->s2io_entries[i].arg);
DBG_PRINT(ERR_DBG, "%s @ 0x%llx\n", sp->desc2,
- sp->msix_info[i].addr);
+ (unsigned long long)sp->msix_info[i].addr);
}
if (err) {
DBG_PRINT(ERR_DBG, "%s: MSI-X-%d registration \
return 0;
}
+ txdp->Control_1 = 0;
+ txdp->Control_2 = 0;
#ifdef NETIF_F_TSO
mss = skb_shinfo(skb)->tso_size;
if (mss) {
txdp->Control_1 |= TXD_TCP_LSO_MSS(mss);
}
#endif
-
- frg_cnt = skb_shinfo(skb)->nr_frags;
- frg_len = skb->len - skb->data_len;
-
- txdp->Buffer_Pointer = pci_map_single
- (sp->pdev, skb->data, frg_len, PCI_DMA_TODEVICE);
- txdp->Host_Control = (unsigned long) skb;
if (skb->ip_summed == CHECKSUM_HW) {
txdp->Control_2 |=
(TXD_TX_CKO_IPV4_EN | TXD_TX_CKO_TCP_EN |
TXD_TX_CKO_UDP_EN);
}
-
+ txdp->Control_1 |= TXD_GATHER_CODE_FIRST;
+ txdp->Control_1 |= TXD_LIST_OWN_XENA;
txdp->Control_2 |= config->tx_intr_type;
if (sp->vlgrp && vlan_tx_tag_present(skb)) {
txdp->Control_2 |= TXD_VLAN_TAG(vlan_tag);
}
- txdp->Control_1 |= (TXD_BUFFER0_SIZE(frg_len) |
- TXD_GATHER_CODE_FIRST);
- txdp->Control_1 |= TXD_LIST_OWN_XENA;
+ frg_len = skb->len - skb->data_len;
+ if (skb_shinfo(skb)->ufo_size) {
+ int ufo_size;
+
+ ufo_size = skb_shinfo(skb)->ufo_size;
+ ufo_size &= ~7;
+ txdp->Control_1 |= TXD_UFO_EN;
+ txdp->Control_1 |= TXD_UFO_MSS(ufo_size);
+ txdp->Control_1 |= TXD_BUFFER0_SIZE(8);
+#ifdef __BIG_ENDIAN
+ sp->ufo_in_band_v[put_off] =
+ (u64)skb_shinfo(skb)->ip6_frag_id;
+#else
+ sp->ufo_in_band_v[put_off] =
+ (u64)skb_shinfo(skb)->ip6_frag_id << 32;
+#endif
+ txdp->Host_Control = (unsigned long)sp->ufo_in_band_v;
+ txdp->Buffer_Pointer = pci_map_single(sp->pdev,
+ sp->ufo_in_band_v,
+ sizeof(u64), PCI_DMA_TODEVICE);
+ txdp++;
+ txdp->Control_1 = 0;
+ txdp->Control_2 = 0;
+ }
+ txdp->Buffer_Pointer = pci_map_single
+ (sp->pdev, skb->data, frg_len, PCI_DMA_TODEVICE);
+ txdp->Host_Control = (unsigned long) skb;
+ txdp->Control_1 |= TXD_BUFFER0_SIZE(frg_len);
+
+ if (skb_shinfo(skb)->ufo_size)
+ txdp->Control_1 |= TXD_UFO_EN;
+
+ frg_cnt = skb_shinfo(skb)->nr_frags;
/* For fragmented SKB. */
for (i = 0; i < frg_cnt; i++) {
skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
txdp->Buffer_Pointer = (u64) pci_map_page
(sp->pdev, frag->page, frag->page_offset,
frag->size, PCI_DMA_TODEVICE);
- txdp->Control_1 |= TXD_BUFFER0_SIZE(frag->size);
+ txdp->Control_1 = TXD_BUFFER0_SIZE(frag->size);
+ if (skb_shinfo(skb)->ufo_size)
+ txdp->Control_1 |= TXD_UFO_EN;
}
txdp->Control_1 |= TXD_GATHER_CODE_LAST;
+ if (skb_shinfo(skb)->ufo_size)
+ frg_cnt++; /* as Txd0 was used for inband header */
+
tx_fifo = mac_control->tx_FIFO_start[queue];
val64 = mac_control->fifos[queue].list_info[put_off].list_phy_addr;
writeq(val64, &tx_fifo->TxDL_Pointer);
if (mss)
val64 |= TX_FIFO_SPECIAL_FUNC;
#endif
+ if (skb_shinfo(skb)->ufo_size)
+ val64 |= TX_FIFO_SPECIAL_FUNC;
writeq(val64, &tx_fifo->List_Control);
mmiowb();
* else schedule a tasklet to reallocate the buffers.
*/
for (i = 0; i < config->rx_ring_num; i++) {
- int rxb_size = atomic_read(&sp->rx_bufs_left[i]);
- int level = rx_buffer_level(sp, rxb_size, i);
-
- if ((level == PANIC) && (!TASKLET_IN_USE)) {
- DBG_PRINT(INTR_DBG, "%s: Rx BD hit ", dev->name);
- DBG_PRINT(INTR_DBG, "PANIC levels\n");
- if ((ret = fill_rx_buffers(sp, i)) == -ENOMEM) {
- DBG_PRINT(ERR_DBG, "%s:Out of memory",
- dev->name);
- DBG_PRINT(ERR_DBG, " in ISR!!\n");
+ if (!sp->lro) {
+ int rxb_size = atomic_read(&sp->rx_bufs_left[i]);
+ int level = rx_buffer_level(sp, rxb_size, i);
+
+ if ((level == PANIC) && (!TASKLET_IN_USE)) {
+ DBG_PRINT(INTR_DBG, "%s: Rx BD hit ",
+ dev->name);
+ DBG_PRINT(INTR_DBG, "PANIC levels\n");
+ if ((ret = fill_rx_buffers(sp, i)) == -ENOMEM) {
+ DBG_PRINT(ERR_DBG, "%s:Out of memory",
+ dev->name);
+ DBG_PRINT(ERR_DBG, " in ISR!!\n");
+ clear_bit(0, (&sp->tasklet_status));
+ atomic_dec(&sp->isr_cnt);
+ return IRQ_HANDLED;
+ }
clear_bit(0, (&sp->tasklet_status));
- atomic_dec(&sp->isr_cnt);
- return IRQ_HANDLED;
+ } else if (level == LOW) {
+ tasklet_schedule(&sp->task);
}
- clear_bit(0, (&sp->tasklet_status));
- } else if (level == LOW) {
- tasklet_schedule(&sp->task);
+ }
+ else if (fill_rx_buffers(sp, i) == -ENOMEM) {
+ DBG_PRINT(ERR_DBG, "%s:Out of memory",
+ dev->name);
+ DBG_PRINT(ERR_DBG, " in Rx Intr!!\n");
+ break;
}
}
{
ring_info_t *ring = (ring_info_t *)dev_id;
nic_t *sp = ring->nic;
+ struct net_device *dev = (struct net_device *) dev_id;
int rxb_size, level, rng_n;
atomic_inc(&sp->isr_cnt);
rx_intr_handler(ring);
rng_n = ring->ring_no;
- rxb_size = atomic_read(&sp->rx_bufs_left[rng_n]);
- level = rx_buffer_level(sp, rxb_size, rng_n);
-
- if ((level == PANIC) && (!TASKLET_IN_USE)) {
- int ret;
- DBG_PRINT(INTR_DBG, "%s: Rx BD hit ", __FUNCTION__);
- DBG_PRINT(INTR_DBG, "PANIC levels\n");
- if ((ret = fill_rx_buffers(sp, rng_n)) == -ENOMEM) {
- DBG_PRINT(ERR_DBG, "Out of memory in %s",
- __FUNCTION__);
+ if (!sp->lro) {
+ rxb_size = atomic_read(&sp->rx_bufs_left[rng_n]);
+ level = rx_buffer_level(sp, rxb_size, rng_n);
+
+ if ((level == PANIC) && (!TASKLET_IN_USE)) {
+ int ret;
+ DBG_PRINT(INTR_DBG, "%s: Rx BD hit ", __FUNCTION__);
+ DBG_PRINT(INTR_DBG, "PANIC levels\n");
+ if ((ret = fill_rx_buffers(sp, rng_n)) == -ENOMEM) {
+ DBG_PRINT(ERR_DBG, "Out of memory in %s",
+ __FUNCTION__);
+ clear_bit(0, (&sp->tasklet_status));
+ return IRQ_HANDLED;
+ }
clear_bit(0, (&sp->tasklet_status));
- return IRQ_HANDLED;
+ } else if (level == LOW) {
+ tasklet_schedule(&sp->task);
}
- clear_bit(0, (&sp->tasklet_status));
- } else if (level == LOW) {
- tasklet_schedule(&sp->task);
}
+ else if (fill_rx_buffers(sp, rng_n) == -ENOMEM) {
+ DBG_PRINT(ERR_DBG, "%s:Out of memory", dev->name);
+ DBG_PRINT(ERR_DBG, " in Rx Intr!!\n");
+ }
+
atomic_dec(&sp->isr_cnt);
return IRQ_HANDLED;
*/
#ifndef CONFIG_S2IO_NAPI
for (i = 0; i < config->rx_ring_num; i++) {
- int ret;
- int rxb_size = atomic_read(&sp->rx_bufs_left[i]);
- int level = rx_buffer_level(sp, rxb_size, i);
-
- if ((level == PANIC) && (!TASKLET_IN_USE)) {
- DBG_PRINT(INTR_DBG, "%s: Rx BD hit ", dev->name);
- DBG_PRINT(INTR_DBG, "PANIC levels\n");
- if ((ret = fill_rx_buffers(sp, i)) == -ENOMEM) {
- DBG_PRINT(ERR_DBG, "%s:Out of memory",
- dev->name);
- DBG_PRINT(ERR_DBG, " in ISR!!\n");
+ if (!sp->lro) {
+ int ret;
+ int rxb_size = atomic_read(&sp->rx_bufs_left[i]);
+ int level = rx_buffer_level(sp, rxb_size, i);
+
+ if ((level == PANIC) && (!TASKLET_IN_USE)) {
+ DBG_PRINT(INTR_DBG, "%s: Rx BD hit ",
+ dev->name);
+ DBG_PRINT(INTR_DBG, "PANIC levels\n");
+ if ((ret = fill_rx_buffers(sp, i)) == -ENOMEM) {
+ DBG_PRINT(ERR_DBG, "%s:Out of memory",
+ dev->name);
+ DBG_PRINT(ERR_DBG, " in ISR!!\n");
+ clear_bit(0, (&sp->tasklet_status));
+ atomic_dec(&sp->isr_cnt);
+ return IRQ_HANDLED;
+ }
clear_bit(0, (&sp->tasklet_status));
- atomic_dec(&sp->isr_cnt);
- return IRQ_HANDLED;
+ } else if (level == LOW) {
+ tasklet_schedule(&sp->task);
}
- clear_bit(0, (&sp->tasklet_status));
- } else if (level == LOW) {
- tasklet_schedule(&sp->task);
+ }
+ else if (fill_rx_buffers(sp, i) == -ENOMEM) {
+ DBG_PRINT(ERR_DBG, "%s:Out of memory",
+ dev->name);
+ DBG_PRINT(ERR_DBG, " in Rx intr!!\n");
+ break;
}
}
#endif
* as defined in errno.h file on failure.
*/
-int s2io_set_mac_addr(struct net_device *dev, u8 * addr)
+static int s2io_set_mac_addr(struct net_device *dev, u8 * addr)
{
nic_t *sp = dev->priv;
XENA_dev_config_t __iomem *bar0 = sp->bar0;
fail = 1;
if (ret_data != 0x012345) {
- DBG_PRINT(ERR_DBG, "%s: eeprom test error at offset 0x4F0. Data written %llx Data read %llx\n", dev->name, (u64)0x12345, ret_data);
+ DBG_PRINT(ERR_DBG, "%s: eeprom test error at offset 0x4F0. "
+ "Data written %llx Data read %llx\n",
+ dev->name, (unsigned long long)0x12345,
+ (unsigned long long)ret_data);
fail = 1;
}
fail = 1;
if (ret_data != 0x012345) {
- DBG_PRINT(ERR_DBG, "%s: eeprom test error at offset 0x7F0. Data written %llx Data read %llx\n", dev->name, (u64)0x12345, ret_data);
+ DBG_PRINT(ERR_DBG, "%s: eeprom test error at offset 0x7F0. "
+ "Data written %llx Data read %llx\n",
+ dev->name, (unsigned long long)0x12345,
+ (unsigned long long)ret_data);
fail = 1;
}
int i = 0;
nic_t *sp = dev->priv;
StatInfo_t *stat_info = sp->mac_control.stats_info;
+ u64 tmp;
s2io_updt_stats(sp);
tmp_stats[i++] =
tmp_stats[i++] = 0;
tmp_stats[i++] = stat_info->sw_stat.single_ecc_errs;
tmp_stats[i++] = stat_info->sw_stat.double_ecc_errs;
+ tmp_stats[i++] = stat_info->sw_stat.clubbed_frms_cnt;
+ tmp_stats[i++] = stat_info->sw_stat.sending_both;
+ tmp_stats[i++] = stat_info->sw_stat.outof_sequence_pkts;
+ tmp_stats[i++] = stat_info->sw_stat.flush_max_pkts;
+ tmp = 0;
+ if (stat_info->sw_stat.num_aggregations) {
+ tmp = stat_info->sw_stat.sum_avg_pkts_aggregated;
+ do_div(tmp, stat_info->sw_stat.num_aggregations);
+ }
+ tmp_stats[i++] = tmp;
}
static int s2io_ethtool_get_regs_len(struct net_device *dev)
.get_tso = ethtool_op_get_tso,
.set_tso = ethtool_op_set_tso,
#endif
+ .get_ufo = ethtool_op_get_ufo,
+ .set_ufo = ethtool_op_set_ufo,
.self_test_count = s2io_ethtool_self_test_count,
.self_test = s2io_ethtool_test,
.get_strings = s2io_ethtool_get_strings,
/* Setting its receive mode */
s2io_set_multicast(dev);
+ if (sp->lro) {
+ /* Initialize max aggregatable pkts based on MTU */
+ sp->lro_max_aggr_per_sess = ((1<<16) - 1) / dev->mtu;
+ /* Check if we can use(if specified) user provided value */
+ if (lro_max_pkts < sp->lro_max_aggr_per_sess)
+ sp->lro_max_aggr_per_sess = lro_max_pkts;
+ }
+
/* Enable tasklet for the device */
tasklet_init(&sp->task, s2io_tasklet, (unsigned long) dev);
((unsigned long) rxdp->Host_Control);
int ring_no = ring_data->ring_no;
u16 l3_csum, l4_csum;
+ lro_t *lro;
skb->dev = dev;
if (rxdp->Control_1 & RXD_T_CODE) {
skb_put(skb, buf2_len);
}
- if ((rxdp->Control_1 & TCP_OR_UDP_FRAME) &&
+ if ((rxdp->Control_1 & TCP_OR_UDP_FRAME) && ((!sp->lro) ||
+ (sp->lro && (!(rxdp->Control_1 & RXD_FRAME_IP_FRAG)))) &&
(sp->rx_csum)) {
l3_csum = RXD_GET_L3_CKSUM(rxdp->Control_1);
l4_csum = RXD_GET_L4_CKSUM(rxdp->Control_1);
* a flag in the RxD.
*/
skb->ip_summed = CHECKSUM_UNNECESSARY;
+ if (sp->lro) {
+ u32 tcp_len;
+ u8 *tcp;
+ int ret = 0;
+
+ ret = s2io_club_tcp_session(skb->data, &tcp,
+ &tcp_len, &lro, rxdp, sp);
+ switch (ret) {
+ case 3: /* Begin anew */
+ lro->parent = skb;
+ goto aggregate;
+ case 1: /* Aggregate */
+ {
+ lro_append_pkt(sp, lro,
+ skb, tcp_len);
+ goto aggregate;
+ }
+ case 4: /* Flush session */
+ {
+ lro_append_pkt(sp, lro,
+ skb, tcp_len);
+ queue_rx_frame(lro->parent);
+ clear_lro_session(lro);
+ sp->mac_control.stats_info->
+ sw_stat.flush_max_pkts++;
+ goto aggregate;
+ }
+ case 2: /* Flush both */
+ lro->parent->data_len =
+ lro->frags_len;
+ sp->mac_control.stats_info->
+ sw_stat.sending_both++;
+ queue_rx_frame(lro->parent);
+ clear_lro_session(lro);
+ goto send_up;
+ case 0: /* sessions exceeded */
+ case 5: /*
+ * First pkt in session not
+ * L3/L4 aggregatable
+ */
+ break;
+ default:
+ DBG_PRINT(ERR_DBG,
+ "%s: Samadhana!!\n",
+ __FUNCTION__);
+ BUG();
+ }
+ }
} else {
/*
* Packet with erroneous checksum, let the
skb->ip_summed = CHECKSUM_NONE;
}
- skb->protocol = eth_type_trans(skb, dev);
+ if (!sp->lro) {
+ skb->protocol = eth_type_trans(skb, dev);
#ifdef CONFIG_S2IO_NAPI
- if (sp->vlgrp && RXD_GET_VLAN_TAG(rxdp->Control_2)) {
- /* Queueing the vlan frame to the upper layer */
- vlan_hwaccel_receive_skb(skb, sp->vlgrp,
- RXD_GET_VLAN_TAG(rxdp->Control_2));
- } else {
- netif_receive_skb(skb);
- }
+ if (sp->vlgrp && RXD_GET_VLAN_TAG(rxdp->Control_2)) {
+ /* Queueing the vlan frame to the upper layer */
+ vlan_hwaccel_receive_skb(skb, sp->vlgrp,
+ RXD_GET_VLAN_TAG(rxdp->Control_2));
+ } else {
+ netif_receive_skb(skb);
+ }
#else
- if (sp->vlgrp && RXD_GET_VLAN_TAG(rxdp->Control_2)) {
- /* Queueing the vlan frame to the upper layer */
- vlan_hwaccel_rx(skb, sp->vlgrp,
- RXD_GET_VLAN_TAG(rxdp->Control_2));
- } else {
- netif_rx(skb);
- }
+ if (sp->vlgrp && RXD_GET_VLAN_TAG(rxdp->Control_2)) {
+ /* Queueing the vlan frame to the upper layer */
+ vlan_hwaccel_rx(skb, sp->vlgrp,
+ RXD_GET_VLAN_TAG(rxdp->Control_2));
+ } else {
+ netif_rx(skb);
+ }
#endif
+ } else {
+send_up:
+ queue_rx_frame(skb);
+ }
dev->last_rx = jiffies;
+aggregate:
atomic_dec(&sp->rx_bufs_left[ring_no]);
return SUCCESS;
}
* void.
*/
-void s2io_link(nic_t * sp, int link)
+static void s2io_link(nic_t * sp, int link)
{
struct net_device *dev = (struct net_device *) sp->dev;
* returns the revision ID of the device.
*/
-int get_xena_rev_id(struct pci_dev *pdev)
+static int get_xena_rev_id(struct pci_dev *pdev)
{
u8 id = 0;
int ret;
#endif
module_param(rxsync_frequency, int, 0);
module_param(intr_type, int, 0);
+module_param(lro, int, 0);
+module_param(lro_max_pkts, int, 0);
/**
* s2io_init_nic - Initialization of the adapter .
else
sp->device_type = XFRAME_I_DEVICE;
+ sp->lro = lro;
/* Initialize some PCI/PCI-X fields of the NIC. */
s2io_init_pci(sp);
break;
}
}
- config->max_txds = MAX_SKB_FRAGS + 1;
+ /* + 2 because one Txd for skb->data and one Txd for UFO */
+ config->max_txds = MAX_SKB_FRAGS + 2;
/* Rx side parameters. */
if (rx_ring_sz[0] == 0)
#ifdef NETIF_F_TSO
dev->features |= NETIF_F_TSO;
#endif
+ if (sp->device_type & XFRAME_II_DEVICE) {
+ dev->features |= NETIF_F_UFO;
+ dev->features |= NETIF_F_HW_CSUM;
+ }
dev->tx_timeout = &s2io_tx_watchdog;
dev->watchdog_timeo = WATCH_DOG_TIMEOUT;
DBG_PRINT(ERR_DBG, "%s: 3-Buffer mode support has been "
"enabled\n",dev->name);
+ if (sp->lro)
+ DBG_PRINT(ERR_DBG, "%s: Large receive offload enabled\n",
+ dev->name);
+
/* Initialize device name */
strcpy(sp->name, dev->name);
if (sp->device_type & XFRAME_II_DEVICE)
* Description: This function is the cleanup routine for the driver. It unregist * ers the driver.
*/
-void s2io_closer(void)
+static void s2io_closer(void)
{
pci_unregister_driver(&s2io_driver);
DBG_PRINT(INIT_DBG, "cleanup done\n");
module_init(s2io_starter);
module_exit(s2io_closer);
+
+static int check_L2_lro_capable(u8 *buffer, struct iphdr **ip,
+ struct tcphdr **tcp, RxD_t *rxdp)
+{
+ int ip_off;
+ u8 l2_type = (u8)((rxdp->Control_1 >> 37) & 0x7), ip_len;
+
+ if (!(rxdp->Control_1 & RXD_FRAME_PROTO_TCP)) {
+ DBG_PRINT(INIT_DBG,"%s: Non-TCP frames not supported for LRO\n",
+ __FUNCTION__);
+ return -1;
+ }
+
+ /* TODO:
+ * By default the VLAN field in the MAC is stripped by the card, if this
+ * feature is turned off in rx_pa_cfg register, then the ip_off field
+ * has to be shifted by a further 2 bytes
+ */
+ switch (l2_type) {
+ case 0: /* DIX type */
+ case 4: /* DIX type with VLAN */
+ ip_off = HEADER_ETHERNET_II_802_3_SIZE;
+ break;
+ /* LLC, SNAP etc are considered non-mergeable */
+ default:
+ return -1;
+ }
+
+ *ip = (struct iphdr *)((u8 *)buffer + ip_off);
+ ip_len = (u8)((*ip)->ihl);
+ ip_len <<= 2;
+ *tcp = (struct tcphdr *)((unsigned long)*ip + ip_len);
+
+ return 0;
+}
+
+static int check_for_socket_match(lro_t *lro, struct iphdr *ip,
+ struct tcphdr *tcp)
+{
+ DBG_PRINT(INFO_DBG,"%s: Been here...\n", __FUNCTION__);
+ if ((lro->iph->saddr != ip->saddr) || (lro->iph->daddr != ip->daddr) ||
+ (lro->tcph->source != tcp->source) || (lro->tcph->dest != tcp->dest))
+ return -1;
+ return 0;
+}
+
+static inline int get_l4_pyld_length(struct iphdr *ip, struct tcphdr *tcp)
+{
+ return(ntohs(ip->tot_len) - (ip->ihl << 2) - (tcp->doff << 2));
+}
+
+static void initiate_new_session(lro_t *lro, u8 *l2h,
+ struct iphdr *ip, struct tcphdr *tcp, u32 tcp_pyld_len)
+{
+ DBG_PRINT(INFO_DBG,"%s: Been here...\n", __FUNCTION__);
+ lro->l2h = l2h;
+ lro->iph = ip;
+ lro->tcph = tcp;
+ lro->tcp_next_seq = tcp_pyld_len + ntohl(tcp->seq);
+ lro->tcp_ack = ntohl(tcp->ack_seq);
+ lro->sg_num = 1;
+ lro->total_len = ntohs(ip->tot_len);
+ lro->frags_len = 0;
+ /*
+ * check if we saw TCP timestamp. Other consistency checks have
+ * already been done.
+ */
+ if (tcp->doff == 8) {
+ u32 *ptr;
+ ptr = (u32 *)(tcp+1);
+ lro->saw_ts = 1;
+ lro->cur_tsval = *(ptr+1);
+ lro->cur_tsecr = *(ptr+2);
+ }
+ lro->in_use = 1;
+}
+
+static void update_L3L4_header(nic_t *sp, lro_t *lro)
+{
+ struct iphdr *ip = lro->iph;
+ struct tcphdr *tcp = lro->tcph;
+ u16 nchk;
+ StatInfo_t *statinfo = sp->mac_control.stats_info;
+ DBG_PRINT(INFO_DBG,"%s: Been here...\n", __FUNCTION__);
+
+ /* Update L3 header */
+ ip->tot_len = htons(lro->total_len);
+ ip->check = 0;
+ nchk = ip_fast_csum((u8 *)lro->iph, ip->ihl);
+ ip->check = nchk;
+
+ /* Update L4 header */
+ tcp->ack_seq = lro->tcp_ack;
+ tcp->window = lro->window;
+
+ /* Update tsecr field if this session has timestamps enabled */
+ if (lro->saw_ts) {
+ u32 *ptr = (u32 *)(tcp + 1);
+ *(ptr+2) = lro->cur_tsecr;
+ }
+
+ /* Update counters required for calculation of
+ * average no. of packets aggregated.
+ */
+ statinfo->sw_stat.sum_avg_pkts_aggregated += lro->sg_num;
+ statinfo->sw_stat.num_aggregations++;
+}
+
+static void aggregate_new_rx(lro_t *lro, struct iphdr *ip,
+ struct tcphdr *tcp, u32 l4_pyld)
+{
+ DBG_PRINT(INFO_DBG,"%s: Been here...\n", __FUNCTION__);
+ lro->total_len += l4_pyld;
+ lro->frags_len += l4_pyld;
+ lro->tcp_next_seq += l4_pyld;
+ lro->sg_num++;
+
+ /* Update ack seq no. and window ad(from this pkt) in LRO object */
+ lro->tcp_ack = tcp->ack_seq;
+ lro->window = tcp->window;
+
+ if (lro->saw_ts) {
+ u32 *ptr;
+ /* Update tsecr and tsval from this packet */
+ ptr = (u32 *) (tcp + 1);
+ lro->cur_tsval = *(ptr + 1);
+ lro->cur_tsecr = *(ptr + 2);
+ }
+}
+
+static int verify_l3_l4_lro_capable(lro_t *l_lro, struct iphdr *ip,
+ struct tcphdr *tcp, u32 tcp_pyld_len)
+{
+ u8 *ptr;
+
+ DBG_PRINT(INFO_DBG,"%s: Been here...\n", __FUNCTION__);
+
+ if (!tcp_pyld_len) {
+ /* Runt frame or a pure ack */
+ return -1;
+ }
+
+ if (ip->ihl != 5) /* IP has options */
+ return -1;
+
+ if (tcp->urg || tcp->psh || tcp->rst || tcp->syn || tcp->fin ||
+ !tcp->ack) {
+ /*
+ * Currently recognize only the ack control word and
+ * any other control field being set would result in
+ * flushing the LRO session
+ */
+ return -1;
+ }
+
+ /*
+ * Allow only one TCP timestamp option. Don't aggregate if
+ * any other options are detected.
+ */
+ if (tcp->doff != 5 && tcp->doff != 8)
+ return -1;
+
+ if (tcp->doff == 8) {
+ ptr = (u8 *)(tcp + 1);
+ while (*ptr == TCPOPT_NOP)
+ ptr++;
+ if (*ptr != TCPOPT_TIMESTAMP || *(ptr+1) != TCPOLEN_TIMESTAMP)
+ return -1;
+
+ /* Ensure timestamp value increases monotonically */
+ if (l_lro)
+ if (l_lro->cur_tsval > *((u32 *)(ptr+2)))
+ return -1;
+
+ /* timestamp echo reply should be non-zero */
+ if (*((u32 *)(ptr+6)) == 0)
+ return -1;
+ }
+
+ return 0;
+}
+
+static int
+s2io_club_tcp_session(u8 *buffer, u8 **tcp, u32 *tcp_len, lro_t **lro,
+ RxD_t *rxdp, nic_t *sp)
+{
+ struct iphdr *ip;
+ struct tcphdr *tcph;
+ int ret = 0, i;
+
+ if (!(ret = check_L2_lro_capable(buffer, &ip, (struct tcphdr **)tcp,
+ rxdp))) {
+ DBG_PRINT(INFO_DBG,"IP Saddr: %x Daddr: %x\n",
+ ip->saddr, ip->daddr);
+ } else {
+ return ret;
+ }
+
+ tcph = (struct tcphdr *)*tcp;
+ *tcp_len = get_l4_pyld_length(ip, tcph);
+ for (i=0; i<MAX_LRO_SESSIONS; i++) {
+ lro_t *l_lro = &sp->lro0_n[i];
+ if (l_lro->in_use) {
+ if (check_for_socket_match(l_lro, ip, tcph))
+ continue;
+ /* Sock pair matched */
+ *lro = l_lro;
+
+ if ((*lro)->tcp_next_seq != ntohl(tcph->seq)) {
+ DBG_PRINT(INFO_DBG, "%s:Out of order. expected "
+ "0x%x, actual 0x%x\n", __FUNCTION__,
+ (*lro)->tcp_next_seq,
+ ntohl(tcph->seq));
+
+ sp->mac_control.stats_info->
+ sw_stat.outof_sequence_pkts++;
+ ret = 2;
+ break;
+ }
+
+ if (!verify_l3_l4_lro_capable(l_lro, ip, tcph,*tcp_len))
+ ret = 1; /* Aggregate */
+ else
+ ret = 2; /* Flush both */
+ break;
+ }
+ }
+
+ if (ret == 0) {
+ /* Before searching for available LRO objects,
+ * check if the pkt is L3/L4 aggregatable. If not
+ * don't create new LRO session. Just send this
+ * packet up.
+ */
+ if (verify_l3_l4_lro_capable(NULL, ip, tcph, *tcp_len)) {
+ return 5;
+ }
+
+ for (i=0; i<MAX_LRO_SESSIONS; i++) {
+ lro_t *l_lro = &sp->lro0_n[i];
+ if (!(l_lro->in_use)) {
+ *lro = l_lro;
+ ret = 3; /* Begin anew */
+ break;
+ }
+ }
+ }
+
+ if (ret == 0) { /* sessions exceeded */
+ DBG_PRINT(INFO_DBG,"%s:All LRO sessions already in use\n",
+ __FUNCTION__);
+ *lro = NULL;
+ return ret;
+ }
+
+ switch (ret) {
+ case 3:
+ initiate_new_session(*lro, buffer, ip, tcph, *tcp_len);
+ break;
+ case 2:
+ update_L3L4_header(sp, *lro);
+ break;
+ case 1:
+ aggregate_new_rx(*lro, ip, tcph, *tcp_len);
+ if ((*lro)->sg_num == sp->lro_max_aggr_per_sess) {
+ update_L3L4_header(sp, *lro);
+ ret = 4; /* Flush the LRO */
+ }
+ break;
+ default:
+ DBG_PRINT(ERR_DBG,"%s:Dont know, can't say!!\n",
+ __FUNCTION__);
+ break;
+ }
+
+ return ret;
+}
+
+static void clear_lro_session(lro_t *lro)
+{
+ static u16 lro_struct_size = sizeof(lro_t);
+
+ memset(lro, 0, lro_struct_size);
+}
+
+static void queue_rx_frame(struct sk_buff *skb)
+{
+ struct net_device *dev = skb->dev;
+
+ skb->protocol = eth_type_trans(skb, dev);
+#ifdef CONFIG_S2IO_NAPI
+ netif_receive_skb(skb);
+#else
+ netif_rx(skb);
+#endif
+}
+
+static void lro_append_pkt(nic_t *sp, lro_t *lro, struct sk_buff *skb,
+ u32 tcp_len)
+{
+ struct sk_buff *tmp, *first = lro->parent;
+
+ first->len += tcp_len;
+ first->data_len = lro->frags_len;
+ skb_pull(skb, (skb->len - tcp_len));
+ if ((tmp = skb_shinfo(first)->frag_list)) {
+ while (tmp->next)
+ tmp = tmp->next;
+ tmp->next = skb;
+ }
+ else
+ skb_shinfo(first)->frag_list = skb;
+ sp->mac_control.stats_info->sw_stat.clubbed_frms_cnt++;
+ return;
+}