Merge branch 'lro'
[sfrench/cifs-2.6.git] / drivers / net / s2io.c
index 49b597cbc19a076e1ed2e737ff68b7b653922044..0db218c2dbeb7a879e2b67013d19903419a4d72a 100644 (file)
 #include <linux/ethtool.h>
 #include <linux/workqueue.h>
 #include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/tcp.h>
+#include <net/tcp.h>
 
 #include <asm/system.h>
 #include <asm/uaccess.h>
 #include <asm/io.h>
+#include <asm/div64.h>
 
 /* local include */
 #include "s2io.h"
 #include "s2io-regs.h"
 
-#define DRV_VERSION "Version 2.0.9.4"
+#define DRV_VERSION "2.0.11.2"
 
 /* S2io Driver name & version. */
 static char s2io_driver_name[] = "Neterion";
 static char s2io_driver_version[] = DRV_VERSION;
 
-int rxd_size[4] = {32,48,48,64};
-int rxd_count[4] = {127,85,85,63};
+static int rxd_size[4] = {32,48,48,64};
+static int rxd_count[4] = {127,85,85,63};
 
 static inline int RXD_IS_UP2DT(RxD_t *rxdp)
 {
@@ -168,6 +172,11 @@ static char ethtool_stats_keys[][ETH_GSTRING_LEN] = {
        {"\n DRIVER STATISTICS"},
        {"single_bit_ecc_errs"},
        {"double_bit_ecc_errs"},
+       ("lro_aggregated_pkts"),
+       ("lro_flush_both_count"),
+       ("lro_out_of_sequence_pkts"),
+       ("lro_flush_due_to_max_pkts"),
+       ("lro_avg_aggr_pkts"),
 };
 
 #define S2IO_STAT_LEN sizeof(ethtool_stats_keys)/ ETH_GSTRING_LEN
@@ -317,6 +326,12 @@ static unsigned int indicate_max_pkts;
 static unsigned int rxsync_frequency = 3;
 /* Interrupt type. Values can be 0(INTA), 1(MSI), 2(MSI_X) */
 static unsigned int intr_type = 0;
+/* Large receive offload feature */
+static unsigned int lro = 0;
+/* Max pkts to be aggregated by LRO at one time. If not specified,
+ * aggregation happens until we hit max IP pkt size(64K)
+ */
+static unsigned int lro_max_pkts = 0xFFFF;
 
 /*
  * S2IO device table.
@@ -1476,6 +1491,19 @@ static int init_nic(struct s2io_nic *nic)
        writel((u32) (val64 >> 32), (add + 4));
        val64 = readq(&bar0->mac_cfg);
 
+       /* Enable FCS stripping by adapter */
+       add = &bar0->mac_cfg;
+       val64 = readq(&bar0->mac_cfg);
+       val64 |= MAC_CFG_RMAC_STRIP_FCS;
+       if (nic->device_type == XFRAME_II_DEVICE)
+               writeq(val64, &bar0->mac_cfg);
+       else {
+               writeq(RMAC_CFG_KEY(0x4C0D), &bar0->rmac_cfg_key);
+               writel((u32) (val64), add);
+               writeq(RMAC_CFG_KEY(0x4C0D), &bar0->rmac_cfg_key);
+               writel((u32) (val64 >> 32), (add + 4));
+       }
+
        /*
         * Set the time value to be inserted in the pause frame
         * generated by xena.
@@ -2127,7 +2155,7 @@ static void stop_nic(struct s2io_nic *nic)
        }
 }
 
-int fill_rxd_3buf(nic_t *nic, RxD_t *rxdp, struct sk_buff *skb)
+static int fill_rxd_3buf(nic_t *nic, RxD_t *rxdp, struct sk_buff *skb)
 {
        struct net_device *dev = nic->dev;
        struct sk_buff *frag_list;
@@ -2569,6 +2597,8 @@ static void rx_intr_handler(ring_info_t *ring_data)
 #ifndef CONFIG_S2IO_NAPI
        int pkt_cnt = 0;
 #endif
+       int i;
+
        spin_lock(&nic->rx_lock);
        if (atomic_read(&nic->card_state) == CARD_DOWN) {
                DBG_PRINT(INTR_DBG, "%s: %s going down for reset\n",
@@ -2661,6 +2691,18 @@ static void rx_intr_handler(ring_info_t *ring_data)
                        break;
 #endif
        }
+       if (nic->lro) {
+               /* Clear all LRO sessions before exiting */
+               for (i=0; i<MAX_LRO_SESSIONS; i++) {
+                       lro_t *lro = &nic->lro0_n[i];
+                       if (lro->in_use) {
+                               update_L3L4_header(nic, lro);
+                               queue_rx_frame(lro->parent);
+                               clear_lro_session(lro);
+                       }
+               }
+       }
+
        spin_unlock(&nic->rx_lock);
 }
 
@@ -2852,7 +2894,7 @@ static int wait_for_cmd_complete(nic_t * sp)
  *  void.
  */
 
-void s2io_reset(nic_t * sp)
+static void s2io_reset(nic_t * sp)
 {
        XENA_dev_config_t __iomem *bar0 = sp->bar0;
        u64 val64;
@@ -2940,7 +2982,7 @@ void s2io_reset(nic_t * sp)
  *  SUCCESS on success and FAILURE on failure.
  */
 
-int s2io_set_swapper(nic_t * sp)
+static int s2io_set_swapper(nic_t * sp)
 {
        struct net_device *dev = sp->dev;
        XENA_dev_config_t __iomem *bar0 = sp->bar0;
@@ -3089,7 +3131,7 @@ static int wait_for_msix_trans(nic_t *nic, int i)
        return ret;
 }
 
-void restore_xmsi_data(nic_t *nic)
+static void restore_xmsi_data(nic_t *nic)
 {
        XENA_dev_config_t __iomem *bar0 = nic->bar0;
        u64 val64;
@@ -3180,7 +3222,7 @@ int s2io_enable_msi(nic_t *nic)
        return 0;
 }
 
-int s2io_enable_msi_x(nic_t *nic)
+static int s2io_enable_msi_x(nic_t *nic)
 {
        XENA_dev_config_t __iomem *bar0 = nic->bar0;
        u64 tx_mat, rx_mat;
@@ -3668,23 +3710,32 @@ s2io_msi_handle(int irq, void *dev_id, struct pt_regs *regs)
         * else schedule a tasklet to reallocate the buffers.
         */
        for (i = 0; i < config->rx_ring_num; i++) {
-               int rxb_size = atomic_read(&sp->rx_bufs_left[i]);
-               int level = rx_buffer_level(sp, rxb_size, i);
-
-               if ((level == PANIC) && (!TASKLET_IN_USE)) {
-                       DBG_PRINT(INTR_DBG, "%s: Rx BD hit ", dev->name);
-                       DBG_PRINT(INTR_DBG, "PANIC levels\n");
-                       if ((ret = fill_rx_buffers(sp, i)) == -ENOMEM) {
-                               DBG_PRINT(ERR_DBG, "%s:Out of memory",
-                                         dev->name);
-                               DBG_PRINT(ERR_DBG, " in ISR!!\n");
+               if (!sp->lro) {
+                       int rxb_size = atomic_read(&sp->rx_bufs_left[i]);
+                       int level = rx_buffer_level(sp, rxb_size, i);
+
+                       if ((level == PANIC) && (!TASKLET_IN_USE)) {
+                               DBG_PRINT(INTR_DBG, "%s: Rx BD hit ", 
+                                                       dev->name);
+                               DBG_PRINT(INTR_DBG, "PANIC levels\n");
+                               if ((ret = fill_rx_buffers(sp, i)) == -ENOMEM) {
+                                       DBG_PRINT(ERR_DBG, "%s:Out of memory",
+                                                 dev->name);
+                                       DBG_PRINT(ERR_DBG, " in ISR!!\n");
+                                       clear_bit(0, (&sp->tasklet_status));
+                                       atomic_dec(&sp->isr_cnt);
+                                       return IRQ_HANDLED;
+                               }
                                clear_bit(0, (&sp->tasklet_status));
-                               atomic_dec(&sp->isr_cnt);
-                               return IRQ_HANDLED;
+                       } else if (level == LOW) {
+                               tasklet_schedule(&sp->task);
                        }
-                       clear_bit(0, (&sp->tasklet_status));
-               } else if (level == LOW) {
-                       tasklet_schedule(&sp->task);
+               }
+               else if (fill_rx_buffers(sp, i) == -ENOMEM) {
+                               DBG_PRINT(ERR_DBG, "%s:Out of memory",
+                                                       dev->name);
+                               DBG_PRINT(ERR_DBG, " in Rx Intr!!\n");
+                               break;
                }
        }
 
@@ -3697,29 +3748,37 @@ s2io_msix_ring_handle(int irq, void *dev_id, struct pt_regs *regs)
 {
        ring_info_t *ring = (ring_info_t *)dev_id;
        nic_t *sp = ring->nic;
+       struct net_device *dev = (struct net_device *) dev_id;
        int rxb_size, level, rng_n;
 
        atomic_inc(&sp->isr_cnt);
        rx_intr_handler(ring);
 
        rng_n = ring->ring_no;
-       rxb_size = atomic_read(&sp->rx_bufs_left[rng_n]);
-       level = rx_buffer_level(sp, rxb_size, rng_n);
-
-       if ((level == PANIC) && (!TASKLET_IN_USE)) {
-               int ret;
-               DBG_PRINT(INTR_DBG, "%s: Rx BD hit ", __FUNCTION__);
-               DBG_PRINT(INTR_DBG, "PANIC levels\n");
-               if ((ret = fill_rx_buffers(sp, rng_n)) == -ENOMEM) {
-                       DBG_PRINT(ERR_DBG, "Out of memory in %s",
-                                 __FUNCTION__);
+       if (!sp->lro) {
+               rxb_size = atomic_read(&sp->rx_bufs_left[rng_n]);
+               level = rx_buffer_level(sp, rxb_size, rng_n);
+
+               if ((level == PANIC) && (!TASKLET_IN_USE)) {
+                       int ret;
+                       DBG_PRINT(INTR_DBG, "%s: Rx BD hit ", __FUNCTION__);
+                       DBG_PRINT(INTR_DBG, "PANIC levels\n");
+                       if ((ret = fill_rx_buffers(sp, rng_n)) == -ENOMEM) {
+                               DBG_PRINT(ERR_DBG, "Out of memory in %s",
+                                         __FUNCTION__);
+                               clear_bit(0, (&sp->tasklet_status));
+                               return IRQ_HANDLED;
+                       }
                        clear_bit(0, (&sp->tasklet_status));
-                       return IRQ_HANDLED;
+               } else if (level == LOW) {
+                       tasklet_schedule(&sp->task);
                }
-               clear_bit(0, (&sp->tasklet_status));
-       } else if (level == LOW) {
-               tasklet_schedule(&sp->task);
        }
+       else if (fill_rx_buffers(sp, rng_n) == -ENOMEM) {
+                       DBG_PRINT(ERR_DBG, "%s:Out of memory", dev->name);
+                       DBG_PRINT(ERR_DBG, " in Rx Intr!!\n");
+       }
+
        atomic_dec(&sp->isr_cnt);
 
        return IRQ_HANDLED;
@@ -3875,24 +3934,33 @@ static irqreturn_t s2io_isr(int irq, void *dev_id, struct pt_regs *regs)
         */
 #ifndef CONFIG_S2IO_NAPI
        for (i = 0; i < config->rx_ring_num; i++) {
-               int ret;
-               int rxb_size = atomic_read(&sp->rx_bufs_left[i]);
-               int level = rx_buffer_level(sp, rxb_size, i);
-
-               if ((level == PANIC) && (!TASKLET_IN_USE)) {
-                       DBG_PRINT(INTR_DBG, "%s: Rx BD hit ", dev->name);
-                       DBG_PRINT(INTR_DBG, "PANIC levels\n");
-                       if ((ret = fill_rx_buffers(sp, i)) == -ENOMEM) {
-                               DBG_PRINT(ERR_DBG, "%s:Out of memory",
-                                         dev->name);
-                               DBG_PRINT(ERR_DBG, " in ISR!!\n");
+               if (!sp->lro) {
+                       int ret;
+                       int rxb_size = atomic_read(&sp->rx_bufs_left[i]);
+                       int level = rx_buffer_level(sp, rxb_size, i);
+
+                       if ((level == PANIC) && (!TASKLET_IN_USE)) {
+                               DBG_PRINT(INTR_DBG, "%s: Rx BD hit ", 
+                                                       dev->name);
+                               DBG_PRINT(INTR_DBG, "PANIC levels\n");
+                               if ((ret = fill_rx_buffers(sp, i)) == -ENOMEM) {
+                                       DBG_PRINT(ERR_DBG, "%s:Out of memory",
+                                                 dev->name);
+                                       DBG_PRINT(ERR_DBG, " in ISR!!\n");
+                                       clear_bit(0, (&sp->tasklet_status));
+                                       atomic_dec(&sp->isr_cnt);
+                                       return IRQ_HANDLED;
+                               }
                                clear_bit(0, (&sp->tasklet_status));
-                               atomic_dec(&sp->isr_cnt);
-                               return IRQ_HANDLED;
+                       } else if (level == LOW) {
+                               tasklet_schedule(&sp->task);
                        }
-                       clear_bit(0, (&sp->tasklet_status));
-               } else if (level == LOW) {
-                       tasklet_schedule(&sp->task);
+               }
+               else if (fill_rx_buffers(sp, i) == -ENOMEM) {
+                               DBG_PRINT(ERR_DBG, "%s:Out of memory",
+                                                       dev->name);
+                               DBG_PRINT(ERR_DBG, " in Rx intr!!\n");
+                               break;
                }
        }
 #endif
@@ -4128,7 +4196,7 @@ static void s2io_set_multicast(struct net_device *dev)
  *  as defined in errno.h file on failure.
  */
 
-int s2io_set_mac_addr(struct net_device *dev, u8 * addr)
+static int s2io_set_mac_addr(struct net_device *dev, u8 * addr)
 {
        nic_t *sp = dev->priv;
        XENA_dev_config_t __iomem *bar0 = sp->bar0;
@@ -5043,6 +5111,7 @@ static void s2io_get_ethtool_stats(struct net_device *dev,
        int i = 0;
        nic_t *sp = dev->priv;
        StatInfo_t *stat_info = sp->mac_control.stats_info;
+       u64 tmp;
 
        s2io_updt_stats(sp);
        tmp_stats[i++] =
@@ -5134,6 +5203,16 @@ static void s2io_get_ethtool_stats(struct net_device *dev,
        tmp_stats[i++] = 0;
        tmp_stats[i++] = stat_info->sw_stat.single_ecc_errs;
        tmp_stats[i++] = stat_info->sw_stat.double_ecc_errs;
+       tmp_stats[i++] = stat_info->sw_stat.clubbed_frms_cnt;
+       tmp_stats[i++] = stat_info->sw_stat.sending_both;
+       tmp_stats[i++] = stat_info->sw_stat.outof_sequence_pkts;
+       tmp_stats[i++] = stat_info->sw_stat.flush_max_pkts;
+       tmp = 0;
+       if (stat_info->sw_stat.num_aggregations) {
+               tmp = stat_info->sw_stat.sum_avg_pkts_aggregated;
+               do_div(tmp, stat_info->sw_stat.num_aggregations);
+       }
+       tmp_stats[i++] = tmp;
 }
 
 static int s2io_ethtool_get_regs_len(struct net_device *dev)
@@ -5515,6 +5594,14 @@ static int s2io_card_up(nic_t * sp)
        /* Setting its receive mode */
        s2io_set_multicast(dev);
 
+       if (sp->lro) {
+               /* Initialize max aggregatable pkts based on MTU */
+               sp->lro_max_aggr_per_sess = ((1<<16) - 1) / dev->mtu;
+               /* Check if we can use(if specified) user provided value */
+               if (lro_max_pkts < sp->lro_max_aggr_per_sess)
+                       sp->lro_max_aggr_per_sess = lro_max_pkts;
+       }
+
        /* Enable tasklet for the device */
        tasklet_init(&sp->task, s2io_tasklet, (unsigned long) dev);
 
@@ -5607,6 +5694,7 @@ static int rx_osm_handler(ring_info_t *ring_data, RxD_t * rxdp)
                ((unsigned long) rxdp->Host_Control);
        int ring_no = ring_data->ring_no;
        u16 l3_csum, l4_csum;
+       lro_t *lro;
 
        skb->dev = dev;
        if (rxdp->Control_1 & RXD_T_CODE) {
@@ -5655,7 +5743,8 @@ static int rx_osm_handler(ring_info_t *ring_data, RxD_t * rxdp)
                        skb_put(skb, buf2_len);
        }
 
-       if ((rxdp->Control_1 & TCP_OR_UDP_FRAME) &&
+       if ((rxdp->Control_1 & TCP_OR_UDP_FRAME) && ((!sp->lro) ||
+           (sp->lro && (!(rxdp->Control_1 & RXD_FRAME_IP_FRAG)))) &&
            (sp->rx_csum)) {
                l3_csum = RXD_GET_L3_CKSUM(rxdp->Control_1);
                l4_csum = RXD_GET_L4_CKSUM(rxdp->Control_1);
@@ -5666,6 +5755,54 @@ static int rx_osm_handler(ring_info_t *ring_data, RxD_t * rxdp)
                         * a flag in the RxD.
                         */
                        skb->ip_summed = CHECKSUM_UNNECESSARY;
+                       if (sp->lro) {
+                               u32 tcp_len;
+                               u8 *tcp;
+                               int ret = 0;
+
+                               ret = s2io_club_tcp_session(skb->data, &tcp,
+                                               &tcp_len, &lro, rxdp, sp);
+                               switch (ret) {
+                                       case 3: /* Begin anew */
+                                               lro->parent = skb;
+                                               goto aggregate;
+                                       case 1: /* Aggregate */
+                                       {
+                                               lro_append_pkt(sp, lro,
+                                                       skb, tcp_len);
+                                               goto aggregate;
+                                       }
+                                       case 4: /* Flush session */
+                                       {
+                                               lro_append_pkt(sp, lro,
+                                                       skb, tcp_len);
+                                               queue_rx_frame(lro->parent);
+                                               clear_lro_session(lro);
+                                               sp->mac_control.stats_info->
+                                                   sw_stat.flush_max_pkts++;
+                                               goto aggregate;
+                                       }
+                                       case 2: /* Flush both */
+                                               lro->parent->data_len =
+                                                       lro->frags_len;
+                                               sp->mac_control.stats_info->
+                                                    sw_stat.sending_both++;
+                                               queue_rx_frame(lro->parent);
+                                               clear_lro_session(lro);
+                                               goto send_up;
+                                       case 0: /* sessions exceeded */
+                                       case 5: /*
+                                                * First pkt in session not
+                                                * L3/L4 aggregatable
+                                                */
+                                               break;
+                                       default:
+                                               DBG_PRINT(ERR_DBG,
+                                                       "%s: Samadhana!!\n",
+                                                        __FUNCTION__);
+                                               BUG();
+                               }
+                       }
                } else {
                        /*
                         * Packet with erroneous checksum, let the
@@ -5677,25 +5814,31 @@ static int rx_osm_handler(ring_info_t *ring_data, RxD_t * rxdp)
                skb->ip_summed = CHECKSUM_NONE;
        }
 
-       skb->protocol = eth_type_trans(skb, dev);
+       if (!sp->lro) {
+               skb->protocol = eth_type_trans(skb, dev);
 #ifdef CONFIG_S2IO_NAPI
-       if (sp->vlgrp && RXD_GET_VLAN_TAG(rxdp->Control_2)) {
-               /* Queueing the vlan frame to the upper layer */
-               vlan_hwaccel_receive_skb(skb, sp->vlgrp,
-                       RXD_GET_VLAN_TAG(rxdp->Control_2));
-       } else {
-               netif_receive_skb(skb);
-       }
+               if (sp->vlgrp && RXD_GET_VLAN_TAG(rxdp->Control_2)) {
+                       /* Queueing the vlan frame to the upper layer */
+                       vlan_hwaccel_receive_skb(skb, sp->vlgrp,
+                               RXD_GET_VLAN_TAG(rxdp->Control_2));
+               } else {
+                       netif_receive_skb(skb);
+               }
 #else
-       if (sp->vlgrp && RXD_GET_VLAN_TAG(rxdp->Control_2)) {
-               /* Queueing the vlan frame to the upper layer */
-               vlan_hwaccel_rx(skb, sp->vlgrp,
-                       RXD_GET_VLAN_TAG(rxdp->Control_2));
-       } else {
-               netif_rx(skb);
-       }
+               if (sp->vlgrp && RXD_GET_VLAN_TAG(rxdp->Control_2)) {
+                       /* Queueing the vlan frame to the upper layer */
+                       vlan_hwaccel_rx(skb, sp->vlgrp,
+                               RXD_GET_VLAN_TAG(rxdp->Control_2));
+               } else {
+                       netif_rx(skb);
+               }
 #endif
+       } else {
+send_up:
+               queue_rx_frame(skb);
+       }               
        dev->last_rx = jiffies;
+aggregate:
        atomic_dec(&sp->rx_bufs_left[ring_no]);
        return SUCCESS;
 }
@@ -5713,7 +5856,7 @@ static int rx_osm_handler(ring_info_t *ring_data, RxD_t * rxdp)
  *  void.
  */
 
-void s2io_link(nic_t * sp, int link)
+static void s2io_link(nic_t * sp, int link)
 {
        struct net_device *dev = (struct net_device *) sp->dev;
 
@@ -5738,7 +5881,7 @@ void s2io_link(nic_t * sp, int link)
  *  returns the revision ID of the device.
  */
 
-int get_xena_rev_id(struct pci_dev *pdev)
+static int get_xena_rev_id(struct pci_dev *pdev)
 {
        u8 id = 0;
        int ret;
@@ -5807,6 +5950,8 @@ module_param(indicate_max_pkts, int, 0);
 #endif
 module_param(rxsync_frequency, int, 0);
 module_param(intr_type, int, 0);
+module_param(lro, int, 0);
+module_param(lro_max_pkts, int, 0);
 
 /**
  *  s2io_init_nic - Initialization of the adapter .
@@ -5938,6 +6083,7 @@ Defaulting to INTA\n");
        else
                sp->device_type = XFRAME_I_DEVICE;
 
+       sp->lro = lro;
                
        /* Initialize some PCI/PCI-X fields of the NIC. */
        s2io_init_pci(sp);
@@ -6241,6 +6387,10 @@ Defaulting to INTA\n");
                DBG_PRINT(ERR_DBG, "%s: 3-Buffer mode support has been "
                          "enabled\n",dev->name);
 
+       if (sp->lro)
+               DBG_PRINT(ERR_DBG, "%s: Large receive offload enabled\n",
+                       dev->name);
+
        /* Initialize device name */
        strcpy(sp->name, dev->name);
        if (sp->device_type & XFRAME_II_DEVICE)
@@ -6343,7 +6493,7 @@ int __init s2io_starter(void)
  * Description: This function is the cleanup routine for the driver. It unregist * ers the driver.
  */
 
-void s2io_closer(void)
+static void s2io_closer(void)
 {
        pci_unregister_driver(&s2io_driver);
        DBG_PRINT(INIT_DBG, "cleanup done\n");
@@ -6351,3 +6501,318 @@ void s2io_closer(void)
 
 module_init(s2io_starter);
 module_exit(s2io_closer);
+
+static int check_L2_lro_capable(u8 *buffer, struct iphdr **ip, 
+               struct tcphdr **tcp, RxD_t *rxdp)
+{
+       int ip_off;
+       u8 l2_type = (u8)((rxdp->Control_1 >> 37) & 0x7), ip_len;
+
+       if (!(rxdp->Control_1 & RXD_FRAME_PROTO_TCP)) {
+               DBG_PRINT(INIT_DBG,"%s: Non-TCP frames not supported for LRO\n",
+                         __FUNCTION__);
+               return -1;
+       }
+
+       /* TODO:
+        * By default the VLAN field in the MAC is stripped by the card, if this
+        * feature is turned off in rx_pa_cfg register, then the ip_off field
+        * has to be shifted by a further 2 bytes
+        */
+       switch (l2_type) {
+               case 0: /* DIX type */
+               case 4: /* DIX type with VLAN */
+                       ip_off = HEADER_ETHERNET_II_802_3_SIZE;
+                       break;
+               /* LLC, SNAP etc are considered non-mergeable */
+               default:
+                       return -1;
+       }
+
+       *ip = (struct iphdr *)((u8 *)buffer + ip_off);
+       ip_len = (u8)((*ip)->ihl);
+       ip_len <<= 2;
+       *tcp = (struct tcphdr *)((unsigned long)*ip + ip_len);
+
+       return 0;
+}
+
+static int check_for_socket_match(lro_t *lro, struct iphdr *ip,
+                                 struct tcphdr *tcp)
+{
+       DBG_PRINT(INFO_DBG,"%s: Been here...\n", __FUNCTION__);
+       if ((lro->iph->saddr != ip->saddr) || (lro->iph->daddr != ip->daddr) ||
+          (lro->tcph->source != tcp->source) || (lro->tcph->dest != tcp->dest))
+               return -1;
+       return 0;
+}
+
+static inline int get_l4_pyld_length(struct iphdr *ip, struct tcphdr *tcp)
+{
+       return(ntohs(ip->tot_len) - (ip->ihl << 2) - (tcp->doff << 2));
+}
+
+static void initiate_new_session(lro_t *lro, u8 *l2h,
+                    struct iphdr *ip, struct tcphdr *tcp, u32 tcp_pyld_len)
+{
+       DBG_PRINT(INFO_DBG,"%s: Been here...\n", __FUNCTION__);
+       lro->l2h = l2h;
+       lro->iph = ip;
+       lro->tcph = tcp;
+       lro->tcp_next_seq = tcp_pyld_len + ntohl(tcp->seq);
+       lro->tcp_ack = ntohl(tcp->ack_seq);
+       lro->sg_num = 1;
+       lro->total_len = ntohs(ip->tot_len);
+       lro->frags_len = 0;
+       /* 
+        * check if we saw TCP timestamp. Other consistency checks have
+        * already been done.
+        */
+       if (tcp->doff == 8) {
+               u32 *ptr;
+               ptr = (u32 *)(tcp+1);
+               lro->saw_ts = 1;
+               lro->cur_tsval = *(ptr+1);
+               lro->cur_tsecr = *(ptr+2);
+       }
+       lro->in_use = 1;
+}
+
+static void update_L3L4_header(nic_t *sp, lro_t *lro)
+{
+       struct iphdr *ip = lro->iph;
+       struct tcphdr *tcp = lro->tcph;
+       u16 nchk;
+       StatInfo_t *statinfo = sp->mac_control.stats_info;
+       DBG_PRINT(INFO_DBG,"%s: Been here...\n", __FUNCTION__);
+
+       /* Update L3 header */
+       ip->tot_len = htons(lro->total_len);
+       ip->check = 0;
+       nchk = ip_fast_csum((u8 *)lro->iph, ip->ihl);
+       ip->check = nchk;
+
+       /* Update L4 header */
+       tcp->ack_seq = lro->tcp_ack;
+       tcp->window = lro->window;
+
+       /* Update tsecr field if this session has timestamps enabled */
+       if (lro->saw_ts) {
+               u32 *ptr = (u32 *)(tcp + 1);
+               *(ptr+2) = lro->cur_tsecr;
+       }
+
+       /* Update counters required for calculation of
+        * average no. of packets aggregated.
+        */
+       statinfo->sw_stat.sum_avg_pkts_aggregated += lro->sg_num;
+       statinfo->sw_stat.num_aggregations++;
+}
+
+static void aggregate_new_rx(lro_t *lro, struct iphdr *ip,
+               struct tcphdr *tcp, u32 l4_pyld)
+{
+       DBG_PRINT(INFO_DBG,"%s: Been here...\n", __FUNCTION__);
+       lro->total_len += l4_pyld;
+       lro->frags_len += l4_pyld;
+       lro->tcp_next_seq += l4_pyld;
+       lro->sg_num++;
+
+       /* Update ack seq no. and window ad(from this pkt) in LRO object */
+       lro->tcp_ack = tcp->ack_seq;
+       lro->window = tcp->window;
+       
+       if (lro->saw_ts) {
+               u32 *ptr;
+               /* Update tsecr and tsval from this packet */
+               ptr = (u32 *) (tcp + 1);
+               lro->cur_tsval = *(ptr + 1); 
+               lro->cur_tsecr = *(ptr + 2);
+       }
+}
+
+static int verify_l3_l4_lro_capable(lro_t *l_lro, struct iphdr *ip,
+                                   struct tcphdr *tcp, u32 tcp_pyld_len)
+{
+       u8 *ptr;
+
+       DBG_PRINT(INFO_DBG,"%s: Been here...\n", __FUNCTION__);
+
+       if (!tcp_pyld_len) {
+               /* Runt frame or a pure ack */
+               return -1;
+       }
+
+       if (ip->ihl != 5) /* IP has options */
+               return -1;
+
+       if (tcp->urg || tcp->psh || tcp->rst || tcp->syn || tcp->fin ||
+                                                               !tcp->ack) {
+               /*
+                * Currently recognize only the ack control word and
+                * any other control field being set would result in
+                * flushing the LRO session
+                */
+               return -1;
+       }
+
+       /* 
+        * Allow only one TCP timestamp option. Don't aggregate if
+        * any other options are detected.
+        */
+       if (tcp->doff != 5 && tcp->doff != 8)
+               return -1;
+
+       if (tcp->doff == 8) {
+               ptr = (u8 *)(tcp + 1);  
+               while (*ptr == TCPOPT_NOP)
+                       ptr++;
+               if (*ptr != TCPOPT_TIMESTAMP || *(ptr+1) != TCPOLEN_TIMESTAMP)
+                       return -1;
+
+               /* Ensure timestamp value increases monotonically */
+               if (l_lro)
+                       if (l_lro->cur_tsval > *((u32 *)(ptr+2)))
+                               return -1;
+
+               /* timestamp echo reply should be non-zero */
+               if (*((u32 *)(ptr+6)) == 0) 
+                       return -1;
+       }
+
+       return 0;
+}
+
+static int
+s2io_club_tcp_session(u8 *buffer, u8 **tcp, u32 *tcp_len, lro_t **lro,
+                     RxD_t *rxdp, nic_t *sp)
+{
+       struct iphdr *ip;
+       struct tcphdr *tcph;
+       int ret = 0, i;
+
+       if (!(ret = check_L2_lro_capable(buffer, &ip, (struct tcphdr **)tcp,
+                                        rxdp))) {
+               DBG_PRINT(INFO_DBG,"IP Saddr: %x Daddr: %x\n",
+                         ip->saddr, ip->daddr);
+       } else {
+               return ret;
+       }
+
+       tcph = (struct tcphdr *)*tcp;
+       *tcp_len = get_l4_pyld_length(ip, tcph);
+       for (i=0; i<MAX_LRO_SESSIONS; i++) {
+               lro_t *l_lro = &sp->lro0_n[i];
+               if (l_lro->in_use) {
+                       if (check_for_socket_match(l_lro, ip, tcph))
+                               continue;
+                       /* Sock pair matched */
+                       *lro = l_lro;
+
+                       if ((*lro)->tcp_next_seq != ntohl(tcph->seq)) {
+                               DBG_PRINT(INFO_DBG, "%s:Out of order. expected "
+                                         "0x%x, actual 0x%x\n", __FUNCTION__,
+                                         (*lro)->tcp_next_seq,
+                                         ntohl(tcph->seq));
+
+                               sp->mac_control.stats_info->
+                                  sw_stat.outof_sequence_pkts++;
+                               ret = 2;
+                               break;
+                       }
+
+                       if (!verify_l3_l4_lro_capable(l_lro, ip, tcph,*tcp_len))
+                               ret = 1; /* Aggregate */
+                       else
+                               ret = 2; /* Flush both */
+                       break;
+               }
+       }
+
+       if (ret == 0) {
+               /* Before searching for available LRO objects,
+                * check if the pkt is L3/L4 aggregatable. If not
+                * don't create new LRO session. Just send this
+                * packet up.
+                */
+               if (verify_l3_l4_lro_capable(NULL, ip, tcph, *tcp_len)) {
+                       return 5;
+               }
+
+               for (i=0; i<MAX_LRO_SESSIONS; i++) {
+                       lro_t *l_lro = &sp->lro0_n[i];
+                       if (!(l_lro->in_use)) {
+                               *lro = l_lro;
+                               ret = 3; /* Begin anew */
+                               break;
+                       }
+               }
+       }
+
+       if (ret == 0) { /* sessions exceeded */
+               DBG_PRINT(INFO_DBG,"%s:All LRO sessions already in use\n",
+                         __FUNCTION__);
+               *lro = NULL;
+               return ret;
+       }
+
+       switch (ret) {
+               case 3:
+                       initiate_new_session(*lro, buffer, ip, tcph, *tcp_len);
+                       break;
+               case 2:
+                       update_L3L4_header(sp, *lro);
+                       break;
+               case 1:
+                       aggregate_new_rx(*lro, ip, tcph, *tcp_len);
+                       if ((*lro)->sg_num == sp->lro_max_aggr_per_sess) {
+                               update_L3L4_header(sp, *lro);
+                               ret = 4; /* Flush the LRO */
+                       }
+                       break;
+               default:
+                       DBG_PRINT(ERR_DBG,"%s:Dont know, can't say!!\n",
+                               __FUNCTION__);
+                       break;
+       }
+
+       return ret;
+}
+
+static void clear_lro_session(lro_t *lro)
+{
+       static u16 lro_struct_size = sizeof(lro_t);
+
+       memset(lro, 0, lro_struct_size);
+}
+
+static void queue_rx_frame(struct sk_buff *skb)
+{
+       struct net_device *dev = skb->dev;
+
+       skb->protocol = eth_type_trans(skb, dev);
+#ifdef CONFIG_S2IO_NAPI
+       netif_receive_skb(skb);
+#else
+       netif_rx(skb);
+#endif
+}
+
+static void lro_append_pkt(nic_t *sp, lro_t *lro, struct sk_buff *skb,
+                          u32 tcp_len)
+{
+       struct sk_buff *tmp, *first = lro->parent;
+
+       first->len += tcp_len;
+       first->data_len = lro->frags_len;
+       skb_pull(skb, (skb->len - tcp_len));
+       if ((tmp = skb_shinfo(first)->frag_list)) {
+               while (tmp->next)
+                       tmp = tmp->next;
+               tmp->next = skb;
+       }
+       else
+               skb_shinfo(first)->frag_list = skb;
+       sp->mac_control.stats_info->sw_stat.clubbed_frms_cnt++;
+       return;
+}