Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma
authorLinus Torvalds <torvalds@linux-foundation.org>
Thu, 23 Feb 2017 19:27:49 +0000 (11:27 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Thu, 23 Feb 2017 19:27:49 +0000 (11:27 -0800)
Pull Mellanox rdma updates from Doug Ledford:
 "Mellanox specific updates for 4.11 merge window

  Because the Mellanox code required being based on a net-next tree, I
  keept it separate from the remainder of the RDMA stack submission that
  is based on 4.10-rc3.

  This branch contains:

   - Various mlx4 and mlx5 fixes and minor changes

   - Support for adding a tag match rule to flow specs

   - Support for cvlan offload operation for raw ethernet QPs

   - A change to the core IB code to recognize raw eth capabilities and
     enumerate them (touches non-Mellanox code)

   - Implicit On-Demand Paging memory registration support"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/dledford/rdma: (40 commits)
  IB/mlx5: Fix configuration of port capabilities
  IB/mlx4: Take source GID by index from HW GID table
  IB/mlx5: Fix blue flame buffer size calculation
  IB/mlx4: Remove unused variable from function declaration
  IB: Query ports via the core instead of direct into the driver
  IB: Add protocol for USNIC
  IB/mlx4: Support raw packet protocol
  IB/mlx5: Support raw packet protocol
  IB/core: Add raw packet protocol
  IB/mlx5: Add implicit MR support
  IB/mlx5: Expose MR cache for mlx5_ib
  IB/mlx5: Add null_mkey access
  IB/umem: Indicate that process is being terminated
  IB/umem: Update on demand page (ODP) support
  IB/core: Add implicit MR flag
  IB/mlx5: Support creation of a WQ with scatter FCS offload
  IB/mlx5: Enable QP creation with cvlan offload
  IB/mlx5: Enable WQ creation and modification with cvlan offload
  IB/mlx5: Expose vlan offloads capabilities
  IB/uverbs: Enable QP creation with cvlan offload
  ...

42 files changed:
drivers/infiniband/core/umem.c
drivers/infiniband/core/umem_odp.c
drivers/infiniband/core/umem_rbtree.c
drivers/infiniband/core/uverbs.h
drivers/infiniband/core/uverbs_cmd.c
drivers/infiniband/hw/cxgb3/iwch_provider.c
drivers/infiniband/hw/cxgb4/provider.c
drivers/infiniband/hw/hfi1/verbs.c
drivers/infiniband/hw/hns/hns_roce_main.c
drivers/infiniband/hw/i40iw/i40iw_verbs.c
drivers/infiniband/hw/mlx4/alias_GUID.c
drivers/infiniband/hw/mlx4/main.c
drivers/infiniband/hw/mlx4/qp.c
drivers/infiniband/hw/mlx4/sysfs.c
drivers/infiniband/hw/mlx5/Makefile
drivers/infiniband/hw/mlx5/cmd.c [new file with mode: 0644]
drivers/infiniband/hw/mlx5/cmd.h [new file with mode: 0644]
drivers/infiniband/hw/mlx5/mad.c
drivers/infiniband/hw/mlx5/main.c
drivers/infiniband/hw/mlx5/mlx5_ib.h
drivers/infiniband/hw/mlx5/mr.c
drivers/infiniband/hw/mlx5/odp.c
drivers/infiniband/hw/mlx5/qp.c
drivers/infiniband/hw/mlx5/srq.c
drivers/infiniband/hw/mthca/mthca_provider.c
drivers/infiniband/hw/nes/nes_verbs.c
drivers/infiniband/hw/ocrdma/ocrdma_main.c
drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
drivers/infiniband/hw/qedr/verbs.c
drivers/infiniband/hw/qib/qib_verbs.c
drivers/infiniband/hw/usnic/usnic_ib_main.c
drivers/infiniband/hw/usnic/usnic_ib_verbs.c
drivers/infiniband/hw/vmw_pvrdma/pvrdma_main.c
drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
drivers/infiniband/sw/rdmavt/vt.c
drivers/infiniband/sw/rxe/rxe_verbs.c
drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
include/linux/mlx5/driver.h
include/linux/mlx5/mlx5_ifc.h
include/rdma/ib_umem_odp.h
include/rdma/ib_verbs.h
include/uapi/rdma/ib_user_verbs.h

index 4609b921f899c9d7481b86825f18fe076a6f732c..446b56a5260b73f1994355403411d3c971a06e25 100644 (file)
@@ -99,9 +99,6 @@ struct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
        if (dmasync)
                dma_attrs |= DMA_ATTR_WRITE_BARRIER;
 
-       if (!size)
-               return ERR_PTR(-EINVAL);
-
        /*
         * If the combination of the addr and size requested for this memory
         * region causes an integer overflow, return error.
index 6b079a31dceda1e56a6a9ee5112539ade36dcd08..f2fc0431512defe20e393c679a59b54916c3df47 100644 (file)
@@ -239,6 +239,71 @@ static const struct mmu_notifier_ops ib_umem_notifiers = {
        .invalidate_range_end       = ib_umem_notifier_invalidate_range_end,
 };
 
+struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context,
+                                 unsigned long addr,
+                                 size_t size)
+{
+       struct ib_umem *umem;
+       struct ib_umem_odp *odp_data;
+       int pages = size >> PAGE_SHIFT;
+       int ret;
+
+       umem = kzalloc(sizeof(*umem), GFP_KERNEL);
+       if (!umem)
+               return ERR_PTR(-ENOMEM);
+
+       umem->context   = context;
+       umem->length    = size;
+       umem->address   = addr;
+       umem->page_size = PAGE_SIZE;
+       umem->writable  = 1;
+
+       odp_data = kzalloc(sizeof(*odp_data), GFP_KERNEL);
+       if (!odp_data) {
+               ret = -ENOMEM;
+               goto out_umem;
+       }
+       odp_data->umem = umem;
+
+       mutex_init(&odp_data->umem_mutex);
+       init_completion(&odp_data->notifier_completion);
+
+       odp_data->page_list = vzalloc(pages * sizeof(*odp_data->page_list));
+       if (!odp_data->page_list) {
+               ret = -ENOMEM;
+               goto out_odp_data;
+       }
+
+       odp_data->dma_list = vzalloc(pages * sizeof(*odp_data->dma_list));
+       if (!odp_data->dma_list) {
+               ret = -ENOMEM;
+               goto out_page_list;
+       }
+
+       down_write(&context->umem_rwsem);
+       context->odp_mrs_count++;
+       rbt_ib_umem_insert(&odp_data->interval_tree, &context->umem_tree);
+       if (likely(!atomic_read(&context->notifier_count)))
+               odp_data->mn_counters_active = true;
+       else
+               list_add(&odp_data->no_private_counters,
+                        &context->no_private_counters);
+       up_write(&context->umem_rwsem);
+
+       umem->odp_data = odp_data;
+
+       return umem;
+
+out_page_list:
+       vfree(odp_data->page_list);
+out_odp_data:
+       kfree(odp_data);
+out_umem:
+       kfree(umem);
+       return ERR_PTR(ret);
+}
+EXPORT_SYMBOL(ib_alloc_odp_umem);
+
 int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem)
 {
        int ret_val;
@@ -270,18 +335,20 @@ int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem)
 
        init_completion(&umem->odp_data->notifier_completion);
 
-       umem->odp_data->page_list = vzalloc(ib_umem_num_pages(umem) *
+       if (ib_umem_num_pages(umem)) {
+               umem->odp_data->page_list = vzalloc(ib_umem_num_pages(umem) *
                                            sizeof(*umem->odp_data->page_list));
-       if (!umem->odp_data->page_list) {
-               ret_val = -ENOMEM;
-               goto out_odp_data;
-       }
+               if (!umem->odp_data->page_list) {
+                       ret_val = -ENOMEM;
+                       goto out_odp_data;
+               }
 
-       umem->odp_data->dma_list = vzalloc(ib_umem_num_pages(umem) *
+               umem->odp_data->dma_list = vzalloc(ib_umem_num_pages(umem) *
                                          sizeof(*umem->odp_data->dma_list));
-       if (!umem->odp_data->dma_list) {
-               ret_val = -ENOMEM;
-               goto out_page_list;
+               if (!umem->odp_data->dma_list) {
+                       ret_val = -ENOMEM;
+                       goto out_page_list;
+               }
        }
 
        /*
@@ -466,6 +533,7 @@ static int ib_umem_odp_map_dma_single_page(
                }
                umem->odp_data->dma_list[page_index] = dma_addr | access_mask;
                umem->odp_data->page_list[page_index] = page;
+               umem->npages++;
                stored_page = 1;
        } else if (umem->odp_data->page_list[page_index] == page) {
                umem->odp_data->dma_list[page_index] |= access_mask;
@@ -505,7 +573,8 @@ out:
  * for failure.
  * An -EAGAIN error code is returned when a concurrent mmu notifier prevents
  * the function from completing its task.
- *
+ * An -ENOENT error code indicates that userspace process is being terminated
+ * and mm was already destroyed.
  * @umem: the umem to map and pin
  * @user_virt: the address from which we need to map.
  * @bcnt: the minimal number of bytes to pin and map. The mapping might be
@@ -553,7 +622,7 @@ int ib_umem_odp_map_dma_pages(struct ib_umem *umem, u64 user_virt, u64 bcnt,
 
        owning_mm = get_task_mm(owning_process);
        if (owning_mm == NULL) {
-               ret = -EINVAL;
+               ret = -ENOENT;
                goto out_put_task;
        }
 
@@ -665,6 +734,7 @@ void ib_umem_odp_unmap_dma_pages(struct ib_umem *umem, u64 virt,
                                put_page(page);
                        umem->odp_data->page_list[idx] = NULL;
                        umem->odp_data->dma_list[idx] = 0;
+                       umem->npages--;
                }
        }
        mutex_unlock(&umem->odp_data->umem_mutex);
index 727d788448f52e842c2108c60dc09c53876fb98e..d176597b4d78d1efad8bdd6b66e79c142fa9ced7 100644 (file)
@@ -78,17 +78,32 @@ int rbt_ib_umem_for_each_in_range(struct rb_root *root,
                                  void *cookie)
 {
        int ret_val = 0;
-       struct umem_odp_node *node;
+       struct umem_odp_node *node, *next;
        struct ib_umem_odp *umem;
 
        if (unlikely(start == last))
                return ret_val;
 
-       for (node = rbt_ib_umem_iter_first(root, start, last - 1); node;
-                       node = rbt_ib_umem_iter_next(node, start, last - 1)) {
+       for (node = rbt_ib_umem_iter_first(root, start, last - 1);
+                       node; node = next) {
+               next = rbt_ib_umem_iter_next(node, start, last - 1);
                umem = container_of(node, struct ib_umem_odp, interval_tree);
                ret_val = cb(umem->umem, start, last, cookie) || ret_val;
        }
 
        return ret_val;
 }
+EXPORT_SYMBOL(rbt_ib_umem_for_each_in_range);
+
+struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root *root,
+                                      u64 addr, u64 length)
+{
+       struct umem_odp_node *node;
+
+       node = rbt_ib_umem_iter_first(root, addr, addr + length - 1);
+       if (node)
+               return container_of(node, struct ib_umem_odp, interval_tree);
+       return NULL;
+
+}
+EXPORT_SYMBOL(rbt_ib_umem_lookup);
index 455034ac994e680c2825eb9d0a07a636fb5cb1f0..e1bedf0bac043d0335aaee5cdc5185d17574880a 100644 (file)
@@ -228,6 +228,7 @@ struct ib_uverbs_flow_spec {
                struct ib_uverbs_flow_spec_ipv4    ipv4;
                struct ib_uverbs_flow_spec_tcp_udp tcp_udp;
                struct ib_uverbs_flow_spec_ipv6    ipv6;
+               struct ib_uverbs_flow_spec_action_tag   flow_tag;
        };
 };
 
index 70078220348383eec62b9b6252a5607f9255e9fa..b4b395a054acd9b3e668a147df26b1d699c521d5 100644 (file)
@@ -1891,7 +1891,8 @@ static int create_qp(struct ib_uverbs_file *file,
                                IB_QP_CREATE_CROSS_CHANNEL |
                                IB_QP_CREATE_MANAGED_SEND |
                                IB_QP_CREATE_MANAGED_RECV |
-                               IB_QP_CREATE_SCATTER_FCS)) {
+                               IB_QP_CREATE_SCATTER_FCS |
+                               IB_QP_CREATE_CVLAN_STRIPPING)) {
                ret = -EINVAL;
                goto err_put;
        }
@@ -3143,6 +3144,25 @@ out_put:
        return ret ? ret : in_len;
 }
 
+static int kern_spec_to_ib_spec_action(struct ib_uverbs_flow_spec *kern_spec,
+                                      union ib_flow_spec *ib_spec)
+{
+       ib_spec->type = kern_spec->type;
+       switch (ib_spec->type) {
+       case IB_FLOW_SPEC_ACTION_TAG:
+               if (kern_spec->flow_tag.size !=
+                   sizeof(struct ib_uverbs_flow_spec_action_tag))
+                       return -EINVAL;
+
+               ib_spec->flow_tag.size = sizeof(struct ib_flow_spec_action_tag);
+               ib_spec->flow_tag.tag_id = kern_spec->flow_tag.tag_id;
+               break;
+       default:
+               return -EINVAL;
+       }
+       return 0;
+}
+
 static size_t kern_spec_filter_sz(struct ib_uverbs_flow_spec_hdr *spec)
 {
        /* Returns user space filter size, includes padding */
@@ -3167,8 +3187,8 @@ static ssize_t spec_filter_size(void *kern_spec_filter, u16 kern_filter_size,
        return kern_filter_size;
 }
 
-static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
-                               union ib_flow_spec *ib_spec)
+static int kern_spec_to_ib_spec_filter(struct ib_uverbs_flow_spec *kern_spec,
+                                      union ib_flow_spec *ib_spec)
 {
        ssize_t actual_filter_sz;
        ssize_t kern_filter_sz;
@@ -3263,6 +3283,18 @@ static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
        return 0;
 }
 
+static int kern_spec_to_ib_spec(struct ib_uverbs_flow_spec *kern_spec,
+                               union ib_flow_spec *ib_spec)
+{
+       if (kern_spec->reserved)
+               return -EINVAL;
+
+       if (kern_spec->type >= IB_FLOW_SPEC_ACTION_TAG)
+               return kern_spec_to_ib_spec_action(kern_spec, ib_spec);
+       else
+               return kern_spec_to_ib_spec_filter(kern_spec, ib_spec);
+}
+
 int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
                           struct ib_device *ib_dev,
                           struct ib_udata *ucore,
@@ -3325,6 +3357,9 @@ int ib_uverbs_ex_create_wq(struct ib_uverbs_file *file,
        wq_init_attr.wq_context = file;
        wq_init_attr.wq_type = cmd.wq_type;
        wq_init_attr.event_handler = ib_uverbs_wq_event_handler;
+       if (ucore->inlen >= (offsetof(typeof(cmd), create_flags) +
+                            sizeof(cmd.create_flags)))
+               wq_init_attr.create_flags = cmd.create_flags;
        obj->uevent.events_reported = 0;
        INIT_LIST_HEAD(&obj->uevent.event_list);
        wq = pd->device->create_wq(pd, &wq_init_attr, uhw);
@@ -3480,7 +3515,7 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
        if (!cmd.attr_mask)
                return -EINVAL;
 
-       if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE))
+       if (cmd.attr_mask > (IB_WQ_STATE | IB_WQ_CUR_STATE | IB_WQ_FLAGS))
                return -EINVAL;
 
        wq = idr_read_wq(cmd.wq_handle, file->ucontext);
@@ -3489,6 +3524,10 @@ int ib_uverbs_ex_modify_wq(struct ib_uverbs_file *file,
 
        wq_attr.curr_wq_state = cmd.curr_wq_state;
        wq_attr.wq_state = cmd.wq_state;
+       if (cmd.attr_mask & IB_WQ_FLAGS) {
+               wq_attr.flags = cmd.flags;
+               wq_attr.flags_mask = cmd.flags_mask;
+       }
        ret = wq->device->modify_wq(wq, &wq_attr, cmd.attr_mask, uhw);
        put_wq_read(wq);
        return ret;
@@ -4323,6 +4362,12 @@ int ib_uverbs_ex_query_device(struct ib_uverbs_file *file,
 
        resp.max_wq_type_rq = attr.max_wq_type_rq;
        resp.response_length += sizeof(resp.max_wq_type_rq);
+
+       if (ucore->outlen < resp.response_length + sizeof(resp.raw_packet_caps))
+               goto end;
+
+       resp.raw_packet_caps = attr.raw_packet_caps;
+       resp.response_length += sizeof(resp.raw_packet_caps);
 end:
        err = ib_copy_to_udata(ucore, &resp, resp.response_length);
        return err;
index 6262dc035f3cea4c9613d96f67ec13e76a18643e..48649f93258a41e8ecf8b4214d4d518a28a07318 100644 (file)
@@ -1133,7 +1133,7 @@ static int iwch_query_port(struct ib_device *ibdev,
        dev = to_iwch_dev(ibdev);
        netdev = dev->rdev.port_info.lldevs[port-1];
 
-       memset(props, 0, sizeof(struct ib_port_attr));
+       /* props being zeroed by the caller, avoid zeroing it here */
        props->max_mtu = IB_MTU_4096;
        props->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
 
@@ -1329,13 +1329,14 @@ static int iwch_port_immutable(struct ib_device *ibdev, u8 port_num,
        struct ib_port_attr attr;
        int err;
 
-       err = iwch_query_port(ibdev, port_num, &attr);
+       immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
+
+       err = ib_query_port(ibdev, port_num, &attr);
        if (err)
                return err;
 
        immutable->pkey_tbl_len = attr.pkey_tbl_len;
        immutable->gid_tbl_len = attr.gid_tbl_len;
-       immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
 
        return 0;
 }
index 3345e1c312f771cfaa8e31858624ca9892267467..bdf7de571d838d824dd5fc57d8e357a35ddfe84a 100644 (file)
@@ -370,8 +370,7 @@ static int c4iw_query_port(struct ib_device *ibdev, u8 port,
 
        dev = to_c4iw_dev(ibdev);
        netdev = dev->rdev.lldi.ports[port-1];
-
-       memset(props, 0, sizeof(struct ib_port_attr));
+       /* props being zeroed by the caller, avoid zeroing it here */
        props->max_mtu = IB_MTU_4096;
        props->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
 
@@ -508,13 +507,14 @@ static int c4iw_port_immutable(struct ib_device *ibdev, u8 port_num,
        struct ib_port_attr attr;
        int err;
 
-       err = c4iw_query_port(ibdev, port_num, &attr);
+       immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
+
+       err = ib_query_port(ibdev, port_num, &attr);
        if (err)
                return err;
 
        immutable->pkey_tbl_len = attr.pkey_tbl_len;
        immutable->gid_tbl_len = attr.gid_tbl_len;
-       immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
 
        return 0;
 }
index 5ba4c0dec3488429de0a6a0d83fc9f89e9a8b17d..33f00f0719c561acec89667bed0566fe8432bb71 100644 (file)
@@ -1302,6 +1302,7 @@ static int query_port(struct rvt_dev_info *rdi, u8 port_num,
        struct hfi1_pportdata *ppd = &dd->pport[port_num - 1];
        u16 lid = ppd->lid;
 
+       /* props being zeroed by the caller, avoid zeroing it here */
        props->lid = lid ? lid : 0;
        props->lmc = ppd->lmc;
        /* OPA logical states match IB logical states */
index cf14679664ca84e50c0d15a6dd0c4df074631432..6843409fba298abf1d593d0990c49054333cf43d 100644 (file)
@@ -250,7 +250,7 @@ static int hns_roce_query_port(struct ib_device *ib_dev, u8 port_num,
        assert(port_num > 0);
        port = port_num - 1;
 
-       memset(props, 0, sizeof(*props));
+       /* props being zeroed by the caller, avoid zeroing it here */
 
        props->max_mtu = hr_dev->caps.max_mtu;
        props->gid_tbl_len = hr_dev->caps.gid_table_len[port];
@@ -401,14 +401,15 @@ static int hns_roce_port_immutable(struct ib_device *ib_dev, u8 port_num,
        struct ib_port_attr attr;
        int ret;
 
-       ret = hns_roce_query_port(ib_dev, port_num, &attr);
+       immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
+
+       ret = ib_query_port(ib_dev, port_num, &attr);
        if (ret)
                return ret;
 
        immutable->pkey_tbl_len = attr.pkey_tbl_len;
        immutable->gid_tbl_len = attr.gid_tbl_len;
 
-       immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
        immutable->max_mad_size = IB_MGMT_MAD_SIZE;
 
        return 0;
index 4c000d60d5c6f865ae17aa28654497e3dbbb913c..5f695bf232a8f0d7bf332f65441db6a195d9092e 100644 (file)
@@ -97,8 +97,7 @@ static int i40iw_query_port(struct ib_device *ibdev,
        struct i40iw_device *iwdev = to_iwdev(ibdev);
        struct net_device *netdev = iwdev->netdev;
 
-       memset(props, 0, sizeof(*props));
-
+       /* props being zeroed by the caller, avoid zeroing it here */
        props->max_mtu = IB_MTU_4096;
        props->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
 
@@ -2497,14 +2496,15 @@ static int i40iw_port_immutable(struct ib_device *ibdev, u8 port_num,
        struct ib_port_attr attr;
        int err;
 
-       err = i40iw_query_port(ibdev, port_num, &attr);
+       immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
+
+       err = ib_query_port(ibdev, port_num, &attr);
 
        if (err)
                return err;
 
        immutable->pkey_tbl_len = attr.pkey_tbl_len;
        immutable->gid_tbl_len = attr.gid_tbl_len;
-       immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
 
        return 0;
 }
index 06020c54db203110a13dc849031ba21e23c7359d..ea24230ea0d49956ff8311e6c310dd46dbdfc7ec 100644 (file)
@@ -499,6 +499,7 @@ static int set_guid_rec(struct ib_device *ibdev,
        struct list_head *head =
                &dev->sriov.alias_guid.ports_guid[port - 1].cb_list;
 
+       memset(&attr, 0, sizeof(attr));
        err = __mlx4_ib_query_port(ibdev, port, &attr, 1);
        if (err) {
                pr_debug("mlx4_ib_query_port failed (err: %d), port: %d\n",
index 7031a8dd4d1404d439dafcb90af38b610774cc05..211cbbe9ccd1e7a97540df1fed2d838f03139e90 100644 (file)
@@ -678,7 +678,7 @@ static u8 state_to_phys_state(enum ib_port_state state)
 }
 
 static int eth_link_query_port(struct ib_device *ibdev, u8 port,
-                              struct ib_port_attr *props, int netw_view)
+                              struct ib_port_attr *props)
 {
 
        struct mlx4_ib_dev *mdev = to_mdev(ibdev);
@@ -741,11 +741,11 @@ int __mlx4_ib_query_port(struct ib_device *ibdev, u8 port,
 {
        int err;
 
-       memset(props, 0, sizeof *props);
+       /* props being zeroed by the caller, avoid zeroing it here */
 
        err = mlx4_ib_port_link_layer(ibdev, port) == IB_LINK_LAYER_INFINIBAND ?
                ib_link_query_port(ibdev, port, props, netw_view) :
-                               eth_link_query_port(ibdev, port, props, netw_view);
+                               eth_link_query_port(ibdev, port, props);
 
        return err;
 }
@@ -1014,7 +1014,7 @@ static int mlx4_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
 
        mutex_lock(&mdev->cap_mask_mutex);
 
-       err = mlx4_ib_query_port(ibdev, port, &attr);
+       err = ib_query_port(ibdev, port, &attr);
        if (err)
                goto out;
 
@@ -2537,24 +2537,27 @@ static int mlx4_port_immutable(struct ib_device *ibdev, u8 port_num,
        struct mlx4_ib_dev *mdev = to_mdev(ibdev);
        int err;
 
-       err = mlx4_ib_query_port(ibdev, port_num, &attr);
-       if (err)
-               return err;
-
-       immutable->pkey_tbl_len = attr.pkey_tbl_len;
-       immutable->gid_tbl_len = attr.gid_tbl_len;
-
        if (mlx4_ib_port_link_layer(ibdev, port_num) == IB_LINK_LAYER_INFINIBAND) {
                immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
+               immutable->max_mad_size = IB_MGMT_MAD_SIZE;
        } else {
                if (mdev->dev->caps.flags & MLX4_DEV_CAP_FLAG_IBOE)
                        immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
                if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)
                        immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
                                RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+               immutable->core_cap_flags |= RDMA_CORE_PORT_RAW_PACKET;
+               if (immutable->core_cap_flags & (RDMA_CORE_PORT_IBA_ROCE |
+                   RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP))
+                       immutable->max_mad_size = IB_MGMT_MAD_SIZE;
        }
 
-       immutable->max_mad_size = IB_MGMT_MAD_SIZE;
+       err = ib_query_port(ibdev, port_num, &attr);
+       if (err)
+               return err;
+
+       immutable->pkey_tbl_len = attr.pkey_tbl_len;
+       immutable->gid_tbl_len = attr.gid_tbl_len;
 
        return 0;
 }
index 7d76f769233cfb5a58a2ce78717e758e8492363e..c34eebc7db65a9bbf4deb0ce611e6ec61e88af7f 100644 (file)
@@ -2420,11 +2420,31 @@ static u8 sl_to_vl(struct mlx4_ib_dev *dev, u8 sl, int port_num)
        return vl;
 }
 
+static int fill_gid_by_hw_index(struct mlx4_ib_dev *ibdev, u8 port_num,
+                               int index, union ib_gid *gid,
+                               enum ib_gid_type *gid_type)
+{
+       struct mlx4_ib_iboe *iboe = &ibdev->iboe;
+       struct mlx4_port_gid_table *port_gid_table;
+       unsigned long flags;
+
+       port_gid_table = &iboe->gids[port_num - 1];
+       spin_lock_irqsave(&iboe->lock, flags);
+       memcpy(gid, &port_gid_table->gids[index].gid, sizeof(*gid));
+       *gid_type = port_gid_table->gids[index].gid_type;
+       spin_unlock_irqrestore(&iboe->lock, flags);
+       if (!memcmp(gid, &zgid, sizeof(*gid)))
+               return -ENOENT;
+
+       return 0;
+}
+
 #define MLX4_ROCEV2_QP1_SPORT 0xC000
 static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
                            void *wqe, unsigned *mlx_seg_len)
 {
        struct ib_device *ib_dev = sqp->qp.ibqp.device;
+       struct mlx4_ib_dev *ibdev = to_mdev(ib_dev);
        struct mlx4_wqe_mlx_seg *mlx = wqe;
        struct mlx4_wqe_ctrl_seg *ctrl = wqe;
        struct mlx4_wqe_inline_seg *inl = wqe + sizeof *mlx;
@@ -2450,8 +2470,7 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
        is_eth = rdma_port_get_link_layer(sqp->qp.ibqp.device, sqp->qp.port) == IB_LINK_LAYER_ETHERNET;
        is_grh = mlx4_ib_ah_grh_present(ah);
        if (is_eth) {
-               struct ib_gid_attr gid_attr;
-
+               enum ib_gid_type gid_type;
                if (mlx4_is_mfunc(to_mdev(ib_dev)->dev)) {
                        /* When multi-function is enabled, the ib_core gid
                         * indexes don't necessarily match the hw ones, so
@@ -2462,18 +2481,11 @@ static int build_mlx_header(struct mlx4_ib_sqp *sqp, struct ib_ud_wr *wr,
                        if (err)
                                return err;
                } else  {
-                       err = ib_get_cached_gid(ib_dev,
-                                               be32_to_cpu(ah->av.ib.port_pd) >> 24,
-                                               ah->av.ib.gid_index, &sgid,
-                                               &gid_attr);
-                       if (!err) {
-                               if (gid_attr.ndev)
-                                       dev_put(gid_attr.ndev);
-                               if (!memcmp(&sgid, &zgid, sizeof(sgid)))
-                                       err = -ENOENT;
-                       }
+                       err = fill_gid_by_hw_index(ibdev, sqp->qp.port,
+                                           ah->av.ib.gid_index,
+                                           &sgid, &gid_type);
                        if (!err) {
-                               is_udp = gid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP;
+                               is_udp = gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP;
                                if (is_udp) {
                                        if (ipv6_addr_v4mapped((struct in6_addr *)&sgid))
                                                ip_version = 4;
@@ -2951,21 +2963,17 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 
                if (sqp->roce_v2_gsi) {
                        struct mlx4_ib_ah *ah = to_mah(ud_wr(wr)->ah);
-                       struct ib_gid_attr gid_attr;
+                       enum ib_gid_type gid_type;
                        union ib_gid gid;
 
-                       if (!ib_get_cached_gid(ibqp->device,
-                                              be32_to_cpu(ah->av.ib.port_pd) >> 24,
-                                              ah->av.ib.gid_index, &gid,
-                                              &gid_attr)) {
-                               if (gid_attr.ndev)
-                                       dev_put(gid_attr.ndev);
-                               qp = (gid_attr.gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ?
-                                       to_mqp(sqp->roce_v2_gsi) : qp;
-                       } else {
+                       if (!fill_gid_by_hw_index(mdev, sqp->qp.port,
+                                          ah->av.ib.gid_index,
+                                          &gid, &gid_type))
+                               qp = (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP) ?
+                                               to_mqp(sqp->roce_v2_gsi) : qp;
+                       else
                                pr_err("Failed to get gid at index %d. RoCEv2 will not work properly\n",
                                       ah->av.ib.gid_index);
-                       }
                }
        }
 
index 69fb5ba94d0f226c11a8614b29be4c33df329040..0ba5ba7540c87e32bbcf795a7b581f1b4b7a49aa 100644 (file)
@@ -226,6 +226,7 @@ static int add_port_entries(struct mlx4_ib_dev *device, int port_num)
        int ret = 0 ;
        struct ib_port_attr attr;
 
+       memset(&attr, 0, sizeof(attr));
        /* get the physical gid and pkey table sizes.*/
        ret = __mlx4_ib_query_port(&device->ib_dev, port_num, &attr, 1);
        if (ret)
index 7493a83acd28dcc73c27395d53203a19f88565ce..90ad2adc752f0508ff2b553b1f7cc4f5ec76f305 100644 (file)
@@ -1,4 +1,4 @@
 obj-$(CONFIG_MLX5_INFINIBAND)  += mlx5_ib.o
 
-mlx5_ib-y :=   main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o
+mlx5_ib-y :=   main.o cq.o doorbell.o qp.o mem.o srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o
 mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
new file mode 100644 (file)
index 0000000..cdc2d30
--- /dev/null
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "cmd.h"
+
+int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey)
+{
+       u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {};
+       u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)]   = {};
+       int err;
+
+       MLX5_SET(query_special_contexts_in, in, opcode,
+                MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS);
+       err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+       if (!err)
+               *null_mkey = MLX5_GET(query_special_contexts_out, out,
+                                     null_mkey);
+       return err;
+}
diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h
new file mode 100644 (file)
index 0000000..7ca8a7b
--- /dev/null
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2017, Mellanox Technologies. All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MLX5_IB_CMD_H
+#define MLX5_IB_CMD_H
+
+#include <linux/kernel.h>
+#include <linux/mlx5/driver.h>
+
+int mlx5_cmd_null_mkey(struct mlx5_core_dev *dev, u32 *null_mkey);
+#endif /* MLX5_IB_CMD_H */
index 39e58489dcc2c22e15388a1e6dc4027a25cd0214..8dacb49eabd98c92339b9d83e8f45166b8f33f2a 100644 (file)
@@ -42,12 +42,24 @@ enum {
        MLX5_IB_VENDOR_CLASS2 = 0xa
 };
 
+static bool can_do_mad_ifc(struct mlx5_ib_dev *dev, u8 port_num,
+                          struct ib_mad *in_mad)
+{
+       if (in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_LID_ROUTED &&
+           in_mad->mad_hdr.mgmt_class != IB_MGMT_CLASS_SUBN_DIRECTED_ROUTE)
+               return true;
+       return dev->mdev->port_caps[port_num - 1].has_smi;
+}
+
 int mlx5_MAD_IFC(struct mlx5_ib_dev *dev, int ignore_mkey, int ignore_bkey,
                 u8 port, const struct ib_wc *in_wc, const struct ib_grh *in_grh,
                 const void *in_mad, void *response_mad)
 {
        u8 op_modifier = 0;
 
+       if (!can_do_mad_ifc(dev, port, (struct ib_mad *)in_mad))
+               return -EPERM;
+
        /* Key check traps can't be generated unless we have in_wc to
         * tell us where to send the trap.
         */
@@ -515,7 +527,7 @@ int mlx5_query_mad_ifc_port(struct ib_device *ibdev, u8 port,
        if (!in_mad || !out_mad)
                goto out;
 
-       memset(props, 0, sizeof(*props));
+       /* props being zeroed by the caller, avoid zeroing it here */
 
        init_query_mad(in_mad);
        in_mad->attr_id  = IB_SMP_ATTR_PORT_INFO;
index 9d8535385bb8bded0e6dc5161396034cdf524b9b..6a8498c052a5c3a164ab47a1525f88835634ab17 100644 (file)
@@ -65,10 +65,6 @@ MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver");
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_VERSION(DRIVER_VERSION);
 
-static int deprecated_prof_sel = 2;
-module_param_named(prof_sel, deprecated_prof_sel, int, 0444);
-MODULE_PARM_DESC(prof_sel, "profile selector. Deprecated here. Moved to module mlx5_core");
-
 static char mlx5_version[] =
        DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
        DRIVER_VERSION " (" DRIVER_RELDATE ")\n";
@@ -175,7 +171,7 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
        enum ib_mtu ndev_ib_mtu;
        u16 qkey_viol_cntr;
 
-       memset(props, 0, sizeof(*props));
+       /* props being zeroed by the caller, avoid zeroing it here */
 
        props->port_cap_flags  |= IB_PORT_CM_SUP;
        props->port_cap_flags  |= IB_PORT_IP_BASED_GIDS;
@@ -326,6 +322,27 @@ __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
        return cpu_to_be16(MLX5_CAP_ROCE(dev->mdev, r_roce_min_src_udp_port));
 }
 
+int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num,
+                          int index, enum ib_gid_type *gid_type)
+{
+       struct ib_gid_attr attr;
+       union ib_gid gid;
+       int ret;
+
+       ret = ib_get_cached_gid(&dev->ib_dev, port_num, index, &gid, &attr);
+       if (ret)
+               return ret;
+
+       if (!attr.ndev)
+               return -ENODEV;
+
+       dev_put(attr.ndev);
+
+       *gid_type = attr.gid_type;
+
+       return 0;
+}
+
 static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
 {
        if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB)
@@ -565,8 +582,15 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
                props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
 
        if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads)) {
-               if (MLX5_CAP_ETH(mdev, csum_cap))
+               if (MLX5_CAP_ETH(mdev, csum_cap)) {
+                       /* Legacy bit to support old userspace libraries */
                        props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
+                       props->raw_packet_caps |= IB_RAW_PACKET_CAP_IP_CSUM;
+               }
+
+               if (MLX5_CAP_ETH(dev->mdev, vlan_cap))
+                       props->raw_packet_caps |=
+                               IB_RAW_PACKET_CAP_CVLAN_STRIPPING;
 
                if (field_avail(typeof(resp), tso_caps, uhw->outlen)) {
                        max_tso = MLX5_CAP_ETH(mdev, max_lso_cap);
@@ -605,8 +629,11 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
        }
 
        if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
-           MLX5_CAP_ETH(dev->mdev, scatter_fcs))
+           MLX5_CAP_ETH(dev->mdev, scatter_fcs)) {
+               /* Legacy bit to support old userspace libraries */
                props->device_cap_flags |= IB_DEVICE_RAW_SCATTER_FCS;
+               props->raw_packet_caps |= IB_RAW_PACKET_CAP_SCATTER_FCS;
+       }
 
        if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS))
                props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
@@ -831,7 +858,7 @@ static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port,
                goto out;
        }
 
-       memset(props, 0, sizeof(*props));
+       /* props being zeroed by the caller, avoid zeroing it here */
 
        err = mlx5_query_hca_vport_context(mdev, 0, port, 0, rep);
        if (err)
@@ -969,6 +996,31 @@ static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask,
        return err;
 }
 
+static int set_port_caps_atomic(struct mlx5_ib_dev *dev, u8 port_num, u32 mask,
+                               u32 value)
+{
+       struct mlx5_hca_vport_context ctx = {};
+       int err;
+
+       err = mlx5_query_hca_vport_context(dev->mdev, 0,
+                                          port_num, 0, &ctx);
+       if (err)
+               return err;
+
+       if (~ctx.cap_mask1_perm & mask) {
+               mlx5_ib_warn(dev, "trying to change bitmask 0x%X but change supported 0x%X\n",
+                            mask, ctx.cap_mask1_perm);
+               return -EINVAL;
+       }
+
+       ctx.cap_mask1 = value;
+       ctx.cap_mask1_perm = mask;
+       err = mlx5_core_modify_hca_vport_context(dev->mdev, 0,
+                                                port_num, 0, &ctx);
+
+       return err;
+}
+
 static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
                               struct ib_port_modify *props)
 {
@@ -976,10 +1028,20 @@ static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
        struct ib_port_attr attr;
        u32 tmp;
        int err;
+       u32 change_mask;
+       u32 value;
+       bool is_ib = (mlx5_ib_port_link_layer(ibdev, port) ==
+                     IB_LINK_LAYER_INFINIBAND);
+
+       if (MLX5_CAP_GEN(dev->mdev, ib_virt) && is_ib) {
+               change_mask = props->clr_port_cap_mask | props->set_port_cap_mask;
+               value = ~props->clr_port_cap_mask | props->set_port_cap_mask;
+               return set_port_caps_atomic(dev, port, change_mask, value);
+       }
 
        mutex_lock(&dev->cap_mask_mutex);
 
-       err = mlx5_ib_query_port(ibdev, port, &attr);
+       err = ib_query_port(ibdev, port, &attr);
        if (err)
                goto out;
 
@@ -1661,6 +1723,7 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
 #define LAST_IPV6_FIELD traffic_class
 #define LAST_TCP_UDP_FIELD src_port
 #define LAST_TUNNEL_FIELD tunnel_id
+#define LAST_FLOW_TAG_FIELD tag_id
 
 /* Field is the last supported field */
 #define FIELDS_NOT_SUPPORTED(filter, field)\
@@ -1671,7 +1734,7 @@ static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
                   sizeof(filter.field))
 
 static int parse_flow_attr(u32 *match_c, u32 *match_v,
-                          const union ib_flow_spec *ib_spec)
+                          const union ib_flow_spec *ib_spec, u32 *tag_id)
 {
        void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
                                           misc_parameters);
@@ -1695,7 +1758,7 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
        switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) {
        case IB_FLOW_SPEC_ETH:
                if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD))
-                       return -ENOTSUPP;
+                       return -EOPNOTSUPP;
 
                ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
                                             dmac_47_16),
@@ -1743,7 +1806,7 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
                break;
        case IB_FLOW_SPEC_IPV4:
                if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD))
-                       return -ENOTSUPP;
+                       return -EOPNOTSUPP;
 
                MLX5_SET(fte_match_set_lyr_2_4, headers_c,
                         ethertype, 0xffff);
@@ -1775,7 +1838,7 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
                break;
        case IB_FLOW_SPEC_IPV6:
                if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD))
-                       return -ENOTSUPP;
+                       return -EOPNOTSUPP;
 
                MLX5_SET(fte_match_set_lyr_2_4, headers_c,
                         ethertype, 0xffff);
@@ -1816,7 +1879,7 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
        case IB_FLOW_SPEC_TCP:
                if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
                                         LAST_TCP_UDP_FIELD))
-                       return -ENOTSUPP;
+                       return -EOPNOTSUPP;
 
                MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
                         0xff);
@@ -1836,7 +1899,7 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
        case IB_FLOW_SPEC_UDP:
                if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
                                         LAST_TCP_UDP_FIELD))
-                       return -ENOTSUPP;
+                       return -EOPNOTSUPP;
 
                MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
                         0xff);
@@ -1856,13 +1919,22 @@ static int parse_flow_attr(u32 *match_c, u32 *match_v,
        case IB_FLOW_SPEC_VXLAN_TUNNEL:
                if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask,
                                         LAST_TUNNEL_FIELD))
-                       return -ENOTSUPP;
+                       return -EOPNOTSUPP;
 
                MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni,
                         ntohl(ib_spec->tunnel.mask.tunnel_id));
                MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni,
                         ntohl(ib_spec->tunnel.val.tunnel_id));
                break;
+       case IB_FLOW_SPEC_ACTION_TAG:
+               if (FIELDS_NOT_SUPPORTED(ib_spec->flow_tag,
+                                        LAST_FLOW_TAG_FIELD))
+                       return -EOPNOTSUPP;
+               if (ib_spec->flow_tag.tag_id >= BIT(24))
+                       return -EINVAL;
+
+               *tag_id = ib_spec->flow_tag.tag_id;
+               break;
        default:
                return -EINVAL;
        }
@@ -2046,6 +2118,7 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
        struct mlx5_flow_spec *spec;
        const void *ib_flow = (const void *)flow_attr + sizeof(*flow_attr);
        unsigned int spec_index;
+       u32 flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
        int err = 0;
 
        if (!is_valid_attr(flow_attr))
@@ -2062,7 +2135,7 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
 
        for (spec_index = 0; spec_index < flow_attr->num_of_specs; spec_index++) {
                err = parse_flow_attr(spec->match_criteria,
-                                     spec->match_value, ib_flow);
+                                     spec->match_value, ib_flow, &flow_tag);
                if (err < 0)
                        goto free;
 
@@ -2072,7 +2145,16 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
        spec->match_criteria_enable = get_match_criteria_enable(spec->match_criteria);
        flow_act.action = dst ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
                MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
-       flow_act.flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
+
+       if (flow_tag != MLX5_FS_DEFAULT_FLOW_TAG &&
+           (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
+            flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT)) {
+               mlx5_ib_warn(dev, "Flow tag %u and attribute type %x isn't allowed in leftovers\n",
+                            flow_tag, flow_attr->type);
+               err = -EINVAL;
+               goto free;
+       }
+       flow_act.flow_tag = flow_tag;
        handler->rule = mlx5_add_flow_rules(ft, spec,
                                            &flow_act,
                                            dst, 1);
@@ -2542,6 +2624,35 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
                ibdev->ib_active = false;
 }
 
+static int set_has_smi_cap(struct mlx5_ib_dev *dev)
+{
+       struct mlx5_hca_vport_context vport_ctx;
+       int err;
+       int port;
+
+       for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) {
+               dev->mdev->port_caps[port - 1].has_smi = false;
+               if (MLX5_CAP_GEN(dev->mdev, port_type) ==
+                   MLX5_CAP_PORT_TYPE_IB) {
+                       if (MLX5_CAP_GEN(dev->mdev, ib_virt)) {
+                               err = mlx5_query_hca_vport_context(dev->mdev, 0,
+                                                                  port, 0,
+                                                                  &vport_ctx);
+                               if (err) {
+                                       mlx5_ib_err(dev, "query_hca_vport_context for port=%d failed %d\n",
+                                                   port, err);
+                                       return err;
+                               }
+                               dev->mdev->port_caps[port - 1].has_smi =
+                                       vport_ctx.has_smi;
+                       } else {
+                               dev->mdev->port_caps[port - 1].has_smi = true;
+                       }
+               }
+       }
+       return 0;
+}
+
 static void get_ext_port_caps(struct mlx5_ib_dev *dev)
 {
        int port;
@@ -2566,6 +2677,10 @@ static int get_port_caps(struct mlx5_ib_dev *dev)
        if (!dprops)
                goto out;
 
+       err = set_has_smi_cap(dev);
+       if (err)
+               goto out;
+
        err = mlx5_ib_query_device(&dev->ib_dev, dprops, &uhw);
        if (err) {
                mlx5_ib_warn(dev, "query_device failed %d\n", err);
@@ -2573,6 +2688,7 @@ static int get_port_caps(struct mlx5_ib_dev *dev)
        }
 
        for (port = 1; port <= MLX5_CAP_GEN(dev->mdev, num_ports); port++) {
+               memset(pprops, 0, sizeof(*pprops));
                err = mlx5_ib_query_port(&dev->ib_dev, port, pprops);
                if (err) {
                        mlx5_ib_warn(dev, "query_port %d failed %d\n",
@@ -2867,11 +2983,13 @@ static u32 get_core_cap_flags(struct ib_device *ibdev)
        if (ll == IB_LINK_LAYER_INFINIBAND)
                return RDMA_CORE_PORT_IBA_IB;
 
+       ret = RDMA_CORE_PORT_RAW_PACKET;
+
        if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV4_CAP))
-               return 0;
+               return ret;
 
        if (!(l3_type_cap & MLX5_ROCE_L3_TYPE_IPV6_CAP))
-               return 0;
+               return ret;
 
        if (roce_version_cap & MLX5_ROCE_VERSION_1_CAP)
                ret |= RDMA_CORE_PORT_IBA_ROCE;
@@ -2890,7 +3008,9 @@ static int mlx5_port_immutable(struct ib_device *ibdev, u8 port_num,
        enum rdma_link_layer ll = mlx5_ib_port_link_layer(ibdev, port_num);
        int err;
 
-       err = mlx5_ib_query_port(ibdev, port_num, &attr);
+       immutable->core_cap_flags = get_core_cap_flags(ibdev);
+
+       err = ib_query_port(ibdev, port_num, &attr);
        if (err)
                return err;
 
@@ -3011,13 +3131,102 @@ static void mlx5_disable_eth(struct mlx5_ib_dev *dev)
                mlx5_nic_vport_disable_roce(dev->mdev);
 }
 
+struct mlx5_ib_q_counter {
+       const char *name;
+       size_t offset;
+};
+
+#define INIT_Q_COUNTER(_name)          \
+       { .name = #_name, .offset = MLX5_BYTE_OFF(query_q_counter_out, _name)}
+
+static const struct mlx5_ib_q_counter basic_q_cnts[] = {
+       INIT_Q_COUNTER(rx_write_requests),
+       INIT_Q_COUNTER(rx_read_requests),
+       INIT_Q_COUNTER(rx_atomic_requests),
+       INIT_Q_COUNTER(out_of_buffer),
+};
+
+static const struct mlx5_ib_q_counter out_of_seq_q_cnts[] = {
+       INIT_Q_COUNTER(out_of_sequence),
+};
+
+static const struct mlx5_ib_q_counter retrans_q_cnts[] = {
+       INIT_Q_COUNTER(duplicate_request),
+       INIT_Q_COUNTER(rnr_nak_retry_err),
+       INIT_Q_COUNTER(packet_seq_err),
+       INIT_Q_COUNTER(implied_nak_seq_err),
+       INIT_Q_COUNTER(local_ack_timeout_err),
+};
+
 static void mlx5_ib_dealloc_q_counters(struct mlx5_ib_dev *dev)
 {
        unsigned int i;
 
-       for (i = 0; i < dev->num_ports; i++)
+       for (i = 0; i < dev->num_ports; i++) {
                mlx5_core_dealloc_q_counter(dev->mdev,
-                                           dev->port[i].q_cnt_id);
+                                           dev->port[i].q_cnts.set_id);
+               kfree(dev->port[i].q_cnts.names);
+               kfree(dev->port[i].q_cnts.offsets);
+       }
+}
+
+static int __mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev,
+                                     const char ***names,
+                                     size_t **offsets,
+                                     u32 *num)
+{
+       u32 num_counters;
+
+       num_counters = ARRAY_SIZE(basic_q_cnts);
+
+       if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt))
+               num_counters += ARRAY_SIZE(out_of_seq_q_cnts);
+
+       if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters))
+               num_counters += ARRAY_SIZE(retrans_q_cnts);
+
+       *names = kcalloc(num_counters, sizeof(**names), GFP_KERNEL);
+       if (!*names)
+               return -ENOMEM;
+
+       *offsets = kcalloc(num_counters, sizeof(**offsets), GFP_KERNEL);
+       if (!*offsets)
+               goto err_names;
+
+       *num = num_counters;
+
+       return 0;
+
+err_names:
+       kfree(*names);
+       return -ENOMEM;
+}
+
+static void mlx5_ib_fill_q_counters(struct mlx5_ib_dev *dev,
+                                   const char **names,
+                                   size_t *offsets)
+{
+       int i;
+       int j = 0;
+
+       for (i = 0; i < ARRAY_SIZE(basic_q_cnts); i++, j++) {
+               names[j] = basic_q_cnts[i].name;
+               offsets[j] = basic_q_cnts[i].offset;
+       }
+
+       if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt)) {
+               for (i = 0; i < ARRAY_SIZE(out_of_seq_q_cnts); i++, j++) {
+                       names[j] = out_of_seq_q_cnts[i].name;
+                       offsets[j] = out_of_seq_q_cnts[i].offset;
+               }
+       }
+
+       if (MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
+               for (i = 0; i < ARRAY_SIZE(retrans_q_cnts); i++, j++) {
+                       names[j] = retrans_q_cnts[i].name;
+                       offsets[j] = retrans_q_cnts[i].offset;
+               }
+       }
 }
 
 static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev)
@@ -3026,14 +3235,26 @@ static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev)
        int ret;
 
        for (i = 0; i < dev->num_ports; i++) {
+               struct mlx5_ib_port *port = &dev->port[i];
+
                ret = mlx5_core_alloc_q_counter(dev->mdev,
-                                               &dev->port[i].q_cnt_id);
+                                               &port->q_cnts.set_id);
                if (ret) {
                        mlx5_ib_warn(dev,
                                     "couldn't allocate queue counter for port %d, err %d\n",
                                     i + 1, ret);
                        goto dealloc_counters;
                }
+
+               ret = __mlx5_ib_alloc_q_counters(dev,
+                                                &port->q_cnts.names,
+                                                &port->q_cnts.offsets,
+                                                &port->q_cnts.num_counters);
+               if (ret)
+                       goto dealloc_counters;
+
+               mlx5_ib_fill_q_counters(dev, port->q_cnts.names,
+                                       port->q_cnts.offsets);
        }
 
        return 0;
@@ -3041,62 +3262,39 @@ static int mlx5_ib_alloc_q_counters(struct mlx5_ib_dev *dev)
 dealloc_counters:
        while (--i >= 0)
                mlx5_core_dealloc_q_counter(dev->mdev,
-                                           dev->port[i].q_cnt_id);
+                                           dev->port[i].q_cnts.set_id);
 
        return ret;
 }
 
-static const char * const names[] = {
-       "rx_write_requests",
-       "rx_read_requests",
-       "rx_atomic_requests",
-       "out_of_buffer",
-       "out_of_sequence",
-       "duplicate_request",
-       "rnr_nak_retry_err",
-       "packet_seq_err",
-       "implied_nak_seq_err",
-       "local_ack_timeout_err",
-};
-
-static const size_t stats_offsets[] = {
-       MLX5_BYTE_OFF(query_q_counter_out, rx_write_requests),
-       MLX5_BYTE_OFF(query_q_counter_out, rx_read_requests),
-       MLX5_BYTE_OFF(query_q_counter_out, rx_atomic_requests),
-       MLX5_BYTE_OFF(query_q_counter_out, out_of_buffer),
-       MLX5_BYTE_OFF(query_q_counter_out, out_of_sequence),
-       MLX5_BYTE_OFF(query_q_counter_out, duplicate_request),
-       MLX5_BYTE_OFF(query_q_counter_out, rnr_nak_retry_err),
-       MLX5_BYTE_OFF(query_q_counter_out, packet_seq_err),
-       MLX5_BYTE_OFF(query_q_counter_out, implied_nak_seq_err),
-       MLX5_BYTE_OFF(query_q_counter_out, local_ack_timeout_err),
-};
-
 static struct rdma_hw_stats *mlx5_ib_alloc_hw_stats(struct ib_device *ibdev,
                                                    u8 port_num)
 {
-       BUILD_BUG_ON(ARRAY_SIZE(names) != ARRAY_SIZE(stats_offsets));
+       struct mlx5_ib_dev *dev = to_mdev(ibdev);
+       struct mlx5_ib_port *port = &dev->port[port_num - 1];
 
        /* We support only per port stats */
        if (port_num == 0)
                return NULL;
 
-       return rdma_alloc_hw_stats_struct(names, ARRAY_SIZE(names),
+       return rdma_alloc_hw_stats_struct(port->q_cnts.names,
+                                         port->q_cnts.num_counters,
                                          RDMA_HW_STATS_DEFAULT_LIFESPAN);
 }
 
 static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
                                struct rdma_hw_stats *stats,
-                               u8 port, int index)
+                               u8 port_num, int index)
 {
        struct mlx5_ib_dev *dev = to_mdev(ibdev);
+       struct mlx5_ib_port *port = &dev->port[port_num - 1];
        int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out);
        void *out;
        __be32 val;
        int ret;
        int i;
 
-       if (!port || !stats)
+       if (!stats)
                return -ENOSYS;
 
        out = mlx5_vzalloc(outlen);
@@ -3104,18 +3302,19 @@ static int mlx5_ib_get_hw_stats(struct ib_device *ibdev,
                return -ENOMEM;
 
        ret = mlx5_core_query_q_counter(dev->mdev,
-                                       dev->port[port - 1].q_cnt_id, 0,
+                                       port->q_cnts.set_id, 0,
                                        out, outlen);
        if (ret)
                goto free;
 
-       for (i = 0; i < ARRAY_SIZE(names); i++) {
-               val = *(__be32 *)(out + stats_offsets[i]);
+       for (i = 0; i < port->q_cnts.num_counters; i++) {
+               val = *(__be32 *)(out + port->q_cnts.offsets[i]);
                stats->value[i] = (u64)be32_to_cpu(val);
        }
+
 free:
        kvfree(out);
-       return ARRAY_SIZE(names);
+       return port->q_cnts.num_counters;
 }
 
 static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
@@ -3267,8 +3466,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
                        (1ull << IB_USER_VERBS_CMD_DEALLOC_MW);
        }
 
-       if (MLX5_CAP_GEN(dev->mdev, out_of_seq_cnt) &&
-           MLX5_CAP_GEN(dev->mdev, retransmission_q_counters)) {
+       if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {
                dev->ib_dev.get_hw_stats        = mlx5_ib_get_hw_stats;
                dev->ib_dev.alloc_hw_stats      = mlx5_ib_alloc_hw_stats;
        }
@@ -3322,9 +3520,11 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
        if (err)
                goto err_rsrc;
 
-       err = mlx5_ib_alloc_q_counters(dev);
-       if (err)
-               goto err_odp;
+       if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt)) {
+               err = mlx5_ib_alloc_q_counters(dev);
+               if (err)
+                       goto err_odp;
+       }
 
        dev->mdev->priv.uar = mlx5_get_uars_page(dev->mdev);
        if (!dev->mdev->priv.uar)
@@ -3373,7 +3573,8 @@ err_uar_page:
        mlx5_put_uars_page(dev->mdev, dev->mdev->priv.uar);
 
 err_q_cnt:
-       mlx5_ib_dealloc_q_counters(dev);
+       if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
+               mlx5_ib_dealloc_q_counters(dev);
 
 err_odp:
        mlx5_ib_odp_remove_one(dev);
@@ -3406,7 +3607,8 @@ static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
        mlx5_free_bfreg(dev->mdev, &dev->fp_bfreg);
        mlx5_free_bfreg(dev->mdev, &dev->bfreg);
        mlx5_put_uars_page(dev->mdev, mdev->priv.uar);
-       mlx5_ib_dealloc_q_counters(dev);
+       if (MLX5_CAP_GEN(dev->mdev, max_qp_cnt))
+               mlx5_ib_dealloc_q_counters(dev);
        destroy_umrc_res(dev);
        mlx5_ib_odp_remove_one(dev);
        destroy_dev_resources(&dev->devr);
@@ -3430,8 +3632,7 @@ static int __init mlx5_ib_init(void)
 {
        int err;
 
-       if (deprecated_prof_sel != 2)
-               pr_warn("prof_sel is deprecated for mlx5_ib, set it for mlx5_core\n");
+       mlx5_ib_odp_init();
 
        err = mlx5_register_interface(&mlx5_ib_interface);
 
index e1a4b93dce6b5957ac0a0ab4941e178e290c0e89..3cd064b5f0bff1d86af9f448c5ecc9f4322280c5 100644 (file)
@@ -202,6 +202,7 @@ struct mlx5_ib_flow_db {
 #define MLX5_IB_UPD_XLT_ADDR         BIT(3)
 #define MLX5_IB_UPD_XLT_PD           BIT(4)
 #define MLX5_IB_UPD_XLT_ACCESS       BIT(5)
+#define MLX5_IB_UPD_XLT_INDIRECT      BIT(6)
 
 /* Private QP creation flags to be passed in ib_qp_init_attr.create_flags.
  *
@@ -220,6 +221,10 @@ struct wr_list {
        u16     next;
 };
 
+enum mlx5_ib_rq_flags {
+       MLX5_IB_RQ_CVLAN_STRIPPING      = 1 << 0,
+};
+
 struct mlx5_ib_wq {
        u64                    *wrid;
        u32                    *wr_data;
@@ -308,6 +313,7 @@ struct mlx5_ib_rq {
        struct mlx5_db          *doorbell;
        u32                     tirn;
        u8                      state;
+       u32                     flags;
 };
 
 struct mlx5_ib_sq {
@@ -392,6 +398,7 @@ enum mlx5_ib_qp_flags {
        MLX5_IB_QP_SQPN_QP1                     = 1 << 6,
        MLX5_IB_QP_CAP_SCATTER_FCS              = 1 << 7,
        MLX5_IB_QP_RSS                          = 1 << 8,
+       MLX5_IB_QP_CVLAN_STRIPPING              = 1 << 9,
 };
 
 struct mlx5_umr_wr {
@@ -497,6 +504,10 @@ struct mlx5_ib_mr {
        int                     live;
        void                    *descs_alloc;
        int                     access_flags; /* Needed for rereg MR */
+
+       struct mlx5_ib_mr      *parent;
+       atomic_t                num_leaf_free;
+       wait_queue_head_t       q_leaf_free;
 };
 
 struct mlx5_ib_mw {
@@ -535,6 +546,10 @@ struct mlx5_cache_ent {
        struct dentry          *dir;
        char                    name[4];
        u32                     order;
+       u32                     xlt;
+       u32                     access_mode;
+       u32                     page;
+
        u32                     size;
        u32                     cur;
        u32                     miss;
@@ -549,6 +564,7 @@ struct mlx5_cache_ent {
        struct work_struct      work;
        struct delayed_work     dwork;
        int                     pending;
+       struct completion       compl;
 };
 
 struct mlx5_mr_cache {
@@ -579,8 +595,15 @@ struct mlx5_ib_resources {
        struct mutex    mutex;
 };
 
+struct mlx5_ib_q_counters {
+       const char **names;
+       size_t *offsets;
+       u32 num_counters;
+       u16 set_id;
+};
+
 struct mlx5_ib_port {
-       u16 q_cnt_id;
+       struct mlx5_ib_q_counters q_cnts;
 };
 
 struct mlx5_roce {
@@ -619,6 +642,7 @@ struct mlx5_ib_dev {
         * being used by a page fault handler.
         */
        struct srcu_struct      mr_srcu;
+       u32                     null_mkey;
 #endif
        struct mlx5_ib_flow_db  flow_db;
        /* protect resources needed as part of reset flow */
@@ -771,6 +795,9 @@ struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
 int mlx5_ib_dealloc_mw(struct ib_mw *mw);
 int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
                       int page_shift, int flags);
+struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
+                                            int access_flags);
+void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *mr);
 int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
                          u64 length, u64 virt_addr, int access_flags,
                          struct ib_pd *pd, struct ib_udata *udata);
@@ -824,7 +851,9 @@ void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num);
 int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq);
 int mlx5_mr_cache_init(struct mlx5_ib_dev *dev);
 int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev);
-int mlx5_mr_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift);
+
+struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry);
+void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
 int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
                            struct ib_mr_status *mr_status);
 struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
@@ -848,6 +877,9 @@ int __init mlx5_ib_odp_init(void);
 void mlx5_ib_odp_cleanup(void);
 void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
                              unsigned long end);
+void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent);
+void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
+                          size_t nentries, struct mlx5_ib_mr *mr, int flags);
 #else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
 static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
 {
@@ -855,9 +887,13 @@ static inline void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
 }
 
 static inline int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) { return 0; }
-static inline void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev)   {}
+static inline void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev)       {}
 static inline int mlx5_ib_odp_init(void) { return 0; }
-static inline void mlx5_ib_odp_cleanup(void)                           {}
+static inline void mlx5_ib_odp_cleanup(void)                               {}
+static inline void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent) {}
+static inline void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
+                                        size_t nentries, struct mlx5_ib_mr *mr,
+                                        int flags) {}
 
 #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
 
@@ -872,6 +908,8 @@ int mlx5_ib_set_vf_guid(struct ib_device *device, int vf, u8 port,
 
 __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev, u8 port_num,
                               int index);
+int mlx5_get_roce_gid_type(struct mlx5_ib_dev *dev, u8 port_num,
+                          int index, enum ib_gid_type *gid_type);
 
 /* GSI QP helper functions */
 struct ib_qp *mlx5_ib_gsi_create_qp(struct ib_pd *pd,
index 8cf2a67f9fb0bad7bdbd201e7f359a859c782b80..3c1f483d003f76d3330fcaa95091828dcb80898d 100644 (file)
@@ -49,6 +49,7 @@ enum {
 
 static int clean_mr(struct mlx5_ib_mr *mr);
 static int use_umr(struct mlx5_ib_dev *dev, int order);
+static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
 
 static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 {
@@ -149,6 +150,9 @@ static void reg_mr_callback(int status, void *context)
        if (err)
                pr_err("Error inserting to mkey tree. 0x%x\n", -err);
        write_unlock_irqrestore(&table->lock, flags);
+
+       if (!completion_done(&ent->compl))
+               complete(&ent->compl);
 }
 
 static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
@@ -157,7 +161,6 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
        struct mlx5_cache_ent *ent = &cache->ent[c];
        int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
        struct mlx5_ib_mr *mr;
-       int npages = 1 << ent->order;
        void *mkc;
        u32 *in;
        int err = 0;
@@ -185,11 +188,11 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
 
                MLX5_SET(mkc, mkc, free, 1);
                MLX5_SET(mkc, mkc, umr_en, 1);
-               MLX5_SET(mkc, mkc, access_mode, MLX5_MKC_ACCESS_MODE_MTT);
+               MLX5_SET(mkc, mkc, access_mode, ent->access_mode);
 
                MLX5_SET(mkc, mkc, qpn, 0xffffff);
-               MLX5_SET(mkc, mkc, translations_octword_size, (npages + 1) / 2);
-               MLX5_SET(mkc, mkc, log_page_size, 12);
+               MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt);
+               MLX5_SET(mkc, mkc, log_page_size, ent->page);
 
                spin_lock_irq(&ent->lock);
                ent->pending++;
@@ -447,6 +450,42 @@ static void cache_work_func(struct work_struct *work)
        __cache_work_func(ent);
 }
 
+struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry)
+{
+       struct mlx5_mr_cache *cache = &dev->cache;
+       struct mlx5_cache_ent *ent;
+       struct mlx5_ib_mr *mr;
+       int err;
+
+       if (entry < 0 || entry >= MAX_MR_CACHE_ENTRIES) {
+               mlx5_ib_err(dev, "cache entry %d is out of range\n", entry);
+               return NULL;
+       }
+
+       ent = &cache->ent[entry];
+       while (1) {
+               spin_lock_irq(&ent->lock);
+               if (list_empty(&ent->head)) {
+                       spin_unlock_irq(&ent->lock);
+
+                       err = add_keys(dev, entry, 1);
+                       if (err && err != -EAGAIN)
+                               return ERR_PTR(err);
+
+                       wait_for_completion(&ent->compl);
+               } else {
+                       mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
+                                             list);
+                       list_del(&mr->list);
+                       ent->cur--;
+                       spin_unlock_irq(&ent->lock);
+                       if (ent->cur < ent->limit)
+                               queue_work(cache->wq, &ent->work);
+                       return mr;
+               }
+       }
+}
+
 static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
 {
        struct mlx5_mr_cache *cache = &dev->cache;
@@ -456,12 +495,12 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
        int i;
 
        c = order2idx(dev, order);
-       if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
+       if (c < 0 || c > MAX_UMR_CACHE_ENTRY) {
                mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c);
                return NULL;
        }
 
-       for (i = c; i < MAX_MR_CACHE_ENTRIES; i++) {
+       for (i = c; i < MAX_UMR_CACHE_ENTRY; i++) {
                ent = &cache->ent[i];
 
                mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
@@ -488,7 +527,7 @@ static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
        return mr;
 }
 
-static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
+void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
 {
        struct mlx5_mr_cache *cache = &dev->cache;
        struct mlx5_cache_ent *ent;
@@ -500,6 +539,10 @@ static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
                mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c);
                return;
        }
+
+       if (unreg_umr(dev, mr))
+               return;
+
        ent = &cache->ent[c];
        spin_lock_irq(&ent->lock);
        list_add_tail(&mr->list, &ent->head);
@@ -602,7 +645,6 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
 {
        struct mlx5_mr_cache *cache = &dev->cache;
        struct mlx5_cache_ent *ent;
-       int limit;
        int err;
        int i;
 
@@ -615,26 +657,35 @@ int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
 
        setup_timer(&dev->delay_timer, delay_time_func, (unsigned long)dev);
        for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
-               INIT_LIST_HEAD(&cache->ent[i].head);
-               spin_lock_init(&cache->ent[i].lock);
-
                ent = &cache->ent[i];
                INIT_LIST_HEAD(&ent->head);
                spin_lock_init(&ent->lock);
                ent->order = i + 2;
                ent->dev = dev;
+               ent->limit = 0;
 
-               if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) &&
-                   mlx5_core_is_pf(dev->mdev) &&
-                   use_umr(dev, ent->order))
-                       limit = dev->mdev->profile->mr_cache[i].limit;
-               else
-                       limit = 0;
-
+               init_completion(&ent->compl);
                INIT_WORK(&ent->work, cache_work_func);
                INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
-               ent->limit = limit;
                queue_work(cache->wq, &ent->work);
+
+               if (i > MAX_UMR_CACHE_ENTRY) {
+                       mlx5_odp_init_mr_cache_entry(ent);
+                       continue;
+               }
+
+               if (!use_umr(dev, ent->order))
+                       continue;
+
+               ent->page = PAGE_SHIFT;
+               ent->xlt = (1 << ent->order) * sizeof(struct mlx5_mtt) /
+                          MLX5_IB_UMR_OCTOWORD;
+               ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
+               if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) &&
+                   mlx5_core_is_pf(dev->mdev))
+                       ent->limit = dev->mdev->profile->mr_cache[i].limit;
+               else
+                       ent->limit = 0;
        }
 
        err = mlx5_mr_cache_debugfs_init(dev);
@@ -758,7 +809,7 @@ static int get_octo_len(u64 addr, u64 len, int page_size)
 static int use_umr(struct mlx5_ib_dev *dev, int order)
 {
        if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset))
-               return order < MAX_MR_CACHE_ENTRIES + 2;
+               return order <= MAX_UMR_CACHE_ENTRY + 2;
        return order <= MLX5_MAX_UMR_SHIFT;
 }
 
@@ -871,7 +922,7 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
                                 MLX5_IB_UPD_XLT_ENABLE);
 
        if (err) {
-               free_cached_mr(dev, mr);
+               mlx5_mr_cache_free(dev, mr);
                return ERR_PTR(err);
        }
 
@@ -886,6 +937,10 @@ static inline int populate_xlt(struct mlx5_ib_mr *mr, int idx, int npages,
 {
        struct mlx5_ib_dev *dev = mr->dev;
        struct ib_umem *umem = mr->umem;
+       if (flags & MLX5_IB_UPD_XLT_INDIRECT) {
+               mlx5_odp_populate_klm(xlt, idx, npages, mr, flags);
+               return npages;
+       }
 
        npages = min_t(size_t, npages, ib_umem_num_pages(umem) - idx);
 
@@ -919,7 +974,9 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
        struct mlx5_umr_wr wr;
        struct ib_sge sg;
        int err = 0;
-       int desc_size = sizeof(struct mlx5_mtt);
+       int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT)
+                              ? sizeof(struct mlx5_klm)
+                              : sizeof(struct mlx5_mtt);
        const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size;
        const int page_mask = page_align - 1;
        size_t pages_mapped = 0;
@@ -1091,6 +1148,7 @@ static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
                goto err_2;
        }
        mr->mmkey.type = MLX5_MKEY_MR;
+       mr->desc_size = sizeof(struct mlx5_mtt);
        mr->umem = umem;
        mr->dev = dev;
        mr->live = 1;
@@ -1136,6 +1194,18 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 
        mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
                    start, virt_addr, length, access_flags);
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+       if (!start && length == U64_MAX) {
+               if (!(access_flags & IB_ACCESS_ON_DEMAND) ||
+                   !(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
+                       return ERR_PTR(-EINVAL);
+
+               mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), access_flags);
+               return &mr->ibmr;
+       }
+#endif
+
        err = mr_umem_get(pd, start, length, access_flags, &umem, &npages,
                           &page_shift, &ncont, &order);
 
@@ -1398,12 +1468,7 @@ static int clean_mr(struct mlx5_ib_mr *mr)
                        return err;
                }
        } else {
-               err = unreg_umr(dev, mr);
-               if (err) {
-                       mlx5_ib_warn(dev, "failed unregister\n");
-                       return err;
-               }
-               free_cached_mr(dev, mr);
+               mlx5_mr_cache_free(dev, mr);
        }
 
        if (!umred)
@@ -1426,8 +1491,11 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
                /* Wait for all running page-fault handlers to finish. */
                synchronize_srcu(&dev->mr_srcu);
                /* Destroy all page mappings */
-               mlx5_ib_invalidate_range(umem, ib_umem_start(umem),
-                                        ib_umem_end(umem));
+               if (umem->odp_data->page_list)
+                       mlx5_ib_invalidate_range(umem, ib_umem_start(umem),
+                                                ib_umem_end(umem));
+               else
+                       mlx5_ib_free_implicit_mr(mr);
                /*
                 * We kill the umem before the MR for ODP,
                 * so that there will not be any invalidations in
index e5bc267aca73383667a58e9914f863d28efdea70..d7b12f0750e275afefe413b0b676c0ea65756356 100644 (file)
@@ -34,6 +34,7 @@
 #include <rdma/ib_umem_odp.h>
 
 #include "mlx5_ib.h"
+#include "cmd.h"
 
 #define MAX_PREFETCH_LEN (4*1024*1024U)
 
  * a pagefault. */
 #define MMU_NOTIFIER_TIMEOUT 1000
 
+#define MLX5_IMR_MTT_BITS (30 - PAGE_SHIFT)
+#define MLX5_IMR_MTT_SHIFT (MLX5_IMR_MTT_BITS + PAGE_SHIFT)
+#define MLX5_IMR_MTT_ENTRIES BIT_ULL(MLX5_IMR_MTT_BITS)
+#define MLX5_IMR_MTT_SIZE BIT_ULL(MLX5_IMR_MTT_SHIFT)
+#define MLX5_IMR_MTT_MASK (~(MLX5_IMR_MTT_SIZE - 1))
+
+#define MLX5_KSM_PAGE_SHIFT MLX5_IMR_MTT_SHIFT
+
+static u64 mlx5_imr_ksm_entries;
+
+static int check_parent(struct ib_umem_odp *odp,
+                              struct mlx5_ib_mr *parent)
+{
+       struct mlx5_ib_mr *mr = odp->private;
+
+       return mr && mr->parent == parent;
+}
+
+static struct ib_umem_odp *odp_next(struct ib_umem_odp *odp)
+{
+       struct mlx5_ib_mr *mr = odp->private, *parent = mr->parent;
+       struct ib_ucontext *ctx = odp->umem->context;
+       struct rb_node *rb;
+
+       down_read(&ctx->umem_rwsem);
+       while (1) {
+               rb = rb_next(&odp->interval_tree.rb);
+               if (!rb)
+                       goto not_found;
+               odp = rb_entry(rb, struct ib_umem_odp, interval_tree.rb);
+               if (check_parent(odp, parent))
+                       goto end;
+       }
+not_found:
+       odp = NULL;
+end:
+       up_read(&ctx->umem_rwsem);
+       return odp;
+}
+
+static struct ib_umem_odp *odp_lookup(struct ib_ucontext *ctx,
+                                     u64 start, u64 length,
+                                     struct mlx5_ib_mr *parent)
+{
+       struct ib_umem_odp *odp;
+       struct rb_node *rb;
+
+       down_read(&ctx->umem_rwsem);
+       odp = rbt_ib_umem_lookup(&ctx->umem_tree, start, length);
+       if (!odp)
+               goto end;
+
+       while (1) {
+               if (check_parent(odp, parent))
+                       goto end;
+               rb = rb_next(&odp->interval_tree.rb);
+               if (!rb)
+                       goto not_found;
+               odp = rb_entry(rb, struct ib_umem_odp, interval_tree.rb);
+               if (ib_umem_start(odp->umem) > start + length)
+                       goto not_found;
+       }
+not_found:
+       odp = NULL;
+end:
+       up_read(&ctx->umem_rwsem);
+       return odp;
+}
+
+void mlx5_odp_populate_klm(struct mlx5_klm *pklm, size_t offset,
+                          size_t nentries, struct mlx5_ib_mr *mr, int flags)
+{
+       struct ib_pd *pd = mr->ibmr.pd;
+       struct ib_ucontext *ctx = pd->uobject->context;
+       struct mlx5_ib_dev *dev = to_mdev(pd->device);
+       struct ib_umem_odp *odp;
+       unsigned long va;
+       int i;
+
+       if (flags & MLX5_IB_UPD_XLT_ZAP) {
+               for (i = 0; i < nentries; i++, pklm++) {
+                       pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE);
+                       pklm->key = cpu_to_be32(dev->null_mkey);
+                       pklm->va = 0;
+               }
+               return;
+       }
+
+       odp = odp_lookup(ctx, offset * MLX5_IMR_MTT_SIZE,
+                            nentries * MLX5_IMR_MTT_SIZE, mr);
+
+       for (i = 0; i < nentries; i++, pklm++) {
+               pklm->bcount = cpu_to_be32(MLX5_IMR_MTT_SIZE);
+               va = (offset + i) * MLX5_IMR_MTT_SIZE;
+               if (odp && odp->umem->address == va) {
+                       struct mlx5_ib_mr *mtt = odp->private;
+
+                       pklm->key = cpu_to_be32(mtt->ibmr.lkey);
+                       odp = odp_next(odp);
+               } else {
+                       pklm->key = cpu_to_be32(dev->null_mkey);
+               }
+               mlx5_ib_dbg(dev, "[%d] va %lx key %x\n",
+                           i, va, be32_to_cpu(pklm->key));
+       }
+}
+
+static void mr_leaf_free_action(struct work_struct *work)
+{
+       struct ib_umem_odp *odp = container_of(work, struct ib_umem_odp, work);
+       int idx = ib_umem_start(odp->umem) >> MLX5_IMR_MTT_SHIFT;
+       struct mlx5_ib_mr *mr = odp->private, *imr = mr->parent;
+
+       mr->parent = NULL;
+       synchronize_srcu(&mr->dev->mr_srcu);
+
+       if (!READ_ONCE(odp->dying)) {
+               mr->parent = imr;
+               if (atomic_dec_and_test(&imr->num_leaf_free))
+                       wake_up(&imr->q_leaf_free);
+               return;
+       }
+
+       ib_umem_release(odp->umem);
+       if (imr->live)
+               mlx5_ib_update_xlt(imr, idx, 1, 0,
+                                  MLX5_IB_UPD_XLT_INDIRECT |
+                                  MLX5_IB_UPD_XLT_ATOMIC);
+       mlx5_mr_cache_free(mr->dev, mr);
+
+       if (atomic_dec_and_test(&imr->num_leaf_free))
+               wake_up(&imr->q_leaf_free);
+}
+
 void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
                              unsigned long end)
 {
@@ -111,6 +246,13 @@ void mlx5_ib_invalidate_range(struct ib_umem *umem, unsigned long start,
         */
 
        ib_umem_odp_unmap_dma_pages(umem, start, end);
+
+       if (unlikely(!umem->npages && mr->parent &&
+                    !umem->odp_data->dying)) {
+               WRITE_ONCE(umem->odp_data->dying, 1);
+               atomic_inc(&mr->parent->num_leaf_free);
+               schedule_work(&umem->odp_data->work);
+       }
 }
 
 void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
@@ -147,6 +289,11 @@ void mlx5_ib_internal_fill_odp_caps(struct mlx5_ib_dev *dev)
        if (MLX5_CAP_ODP(dev->mdev, rc_odp_caps.atomic))
                caps->per_transport_caps.rc_odp_caps |= IB_ODP_SUPPORT_ATOMIC;
 
+       if (MLX5_CAP_GEN(dev->mdev, fixed_buffer_size) &&
+           MLX5_CAP_GEN(dev->mdev, null_mkey) &&
+           MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset))
+               caps->general_caps |= IB_ODP_SUPPORT_IMPLICIT;
+
        return;
 }
 
@@ -184,6 +331,197 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev,
                            wq_num);
 }
 
+static struct mlx5_ib_mr *implicit_mr_alloc(struct ib_pd *pd,
+                                           struct ib_umem *umem,
+                                           bool ksm, int access_flags)
+{
+       struct mlx5_ib_dev *dev = to_mdev(pd->device);
+       struct mlx5_ib_mr *mr;
+       int err;
+
+       mr = mlx5_mr_cache_alloc(dev, ksm ? MLX5_IMR_KSM_CACHE_ENTRY :
+                                           MLX5_IMR_MTT_CACHE_ENTRY);
+
+       if (IS_ERR(mr))
+               return mr;
+
+       mr->ibmr.pd = pd;
+
+       mr->dev = dev;
+       mr->access_flags = access_flags;
+       mr->mmkey.iova = 0;
+       mr->umem = umem;
+
+       if (ksm) {
+               err = mlx5_ib_update_xlt(mr, 0,
+                                        mlx5_imr_ksm_entries,
+                                        MLX5_KSM_PAGE_SHIFT,
+                                        MLX5_IB_UPD_XLT_INDIRECT |
+                                        MLX5_IB_UPD_XLT_ZAP |
+                                        MLX5_IB_UPD_XLT_ENABLE);
+
+       } else {
+               err = mlx5_ib_update_xlt(mr, 0,
+                                        MLX5_IMR_MTT_ENTRIES,
+                                        PAGE_SHIFT,
+                                        MLX5_IB_UPD_XLT_ZAP |
+                                        MLX5_IB_UPD_XLT_ENABLE |
+                                        MLX5_IB_UPD_XLT_ATOMIC);
+       }
+
+       if (err)
+               goto fail;
+
+       mr->ibmr.lkey = mr->mmkey.key;
+       mr->ibmr.rkey = mr->mmkey.key;
+
+       mr->live = 1;
+
+       mlx5_ib_dbg(dev, "key %x dev %p mr %p\n",
+                   mr->mmkey.key, dev->mdev, mr);
+
+       return mr;
+
+fail:
+       mlx5_ib_err(dev, "Failed to register MKEY %d\n", err);
+       mlx5_mr_cache_free(dev, mr);
+
+       return ERR_PTR(err);
+}
+
+static struct ib_umem_odp *implicit_mr_get_data(struct mlx5_ib_mr *mr,
+                                               u64 io_virt, size_t bcnt)
+{
+       struct ib_ucontext *ctx = mr->ibmr.pd->uobject->context;
+       struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.pd->device);
+       struct ib_umem_odp *odp, *result = NULL;
+       u64 addr = io_virt & MLX5_IMR_MTT_MASK;
+       int nentries = 0, start_idx = 0, ret;
+       struct mlx5_ib_mr *mtt;
+       struct ib_umem *umem;
+
+       mutex_lock(&mr->umem->odp_data->umem_mutex);
+       odp = odp_lookup(ctx, addr, 1, mr);
+
+       mlx5_ib_dbg(dev, "io_virt:%llx bcnt:%zx addr:%llx odp:%p\n",
+                   io_virt, bcnt, addr, odp);
+
+next_mr:
+       if (likely(odp)) {
+               if (nentries)
+                       nentries++;
+       } else {
+               umem = ib_alloc_odp_umem(ctx, addr, MLX5_IMR_MTT_SIZE);
+               if (IS_ERR(umem)) {
+                       mutex_unlock(&mr->umem->odp_data->umem_mutex);
+                       return ERR_CAST(umem);
+               }
+
+               mtt = implicit_mr_alloc(mr->ibmr.pd, umem, 0, mr->access_flags);
+               if (IS_ERR(mtt)) {
+                       mutex_unlock(&mr->umem->odp_data->umem_mutex);
+                       ib_umem_release(umem);
+                       return ERR_CAST(mtt);
+               }
+
+               odp = umem->odp_data;
+               odp->private = mtt;
+               mtt->umem = umem;
+               mtt->mmkey.iova = addr;
+               mtt->parent = mr;
+               INIT_WORK(&odp->work, mr_leaf_free_action);
+
+               if (!nentries)
+                       start_idx = addr >> MLX5_IMR_MTT_SHIFT;
+               nentries++;
+       }
+
+       odp->dying = 0;
+
+       /* Return first odp if region not covered by single one */
+       if (likely(!result))
+               result = odp;
+
+       addr += MLX5_IMR_MTT_SIZE;
+       if (unlikely(addr < io_virt + bcnt)) {
+               odp = odp_next(odp);
+               if (odp && odp->umem->address != addr)
+                       odp = NULL;
+               goto next_mr;
+       }
+
+       if (unlikely(nentries)) {
+               ret = mlx5_ib_update_xlt(mr, start_idx, nentries, 0,
+                                        MLX5_IB_UPD_XLT_INDIRECT |
+                                        MLX5_IB_UPD_XLT_ATOMIC);
+               if (ret) {
+                       mlx5_ib_err(dev, "Failed to update PAS\n");
+                       result = ERR_PTR(ret);
+               }
+       }
+
+       mutex_unlock(&mr->umem->odp_data->umem_mutex);
+       return result;
+}
+
+struct mlx5_ib_mr *mlx5_ib_alloc_implicit_mr(struct mlx5_ib_pd *pd,
+                                            int access_flags)
+{
+       struct ib_ucontext *ctx = pd->ibpd.uobject->context;
+       struct mlx5_ib_mr *imr;
+       struct ib_umem *umem;
+
+       umem = ib_umem_get(ctx, 0, 0, IB_ACCESS_ON_DEMAND, 0);
+       if (IS_ERR(umem))
+               return ERR_CAST(umem);
+
+       imr = implicit_mr_alloc(&pd->ibpd, umem, 1, access_flags);
+       if (IS_ERR(imr)) {
+               ib_umem_release(umem);
+               return ERR_CAST(imr);
+       }
+
+       imr->umem = umem;
+       init_waitqueue_head(&imr->q_leaf_free);
+       atomic_set(&imr->num_leaf_free, 0);
+
+       return imr;
+}
+
+static int mr_leaf_free(struct ib_umem *umem, u64 start,
+                       u64 end, void *cookie)
+{
+       struct mlx5_ib_mr *mr = umem->odp_data->private, *imr = cookie;
+
+       if (mr->parent != imr)
+               return 0;
+
+       ib_umem_odp_unmap_dma_pages(umem,
+                                   ib_umem_start(umem),
+                                   ib_umem_end(umem));
+
+       if (umem->odp_data->dying)
+               return 0;
+
+       WRITE_ONCE(umem->odp_data->dying, 1);
+       atomic_inc(&imr->num_leaf_free);
+       schedule_work(&umem->odp_data->work);
+
+       return 0;
+}
+
+void mlx5_ib_free_implicit_mr(struct mlx5_ib_mr *imr)
+{
+       struct ib_ucontext *ctx = imr->ibmr.pd->uobject->context;
+
+       down_read(&ctx->umem_rwsem);
+       rbt_ib_umem_for_each_in_range(&ctx->umem_tree, 0, ULLONG_MAX,
+                                     mr_leaf_free, imr);
+       up_read(&ctx->umem_rwsem);
+
+       wait_event(imr->q_leaf_free, !atomic_read(&imr->num_leaf_free));
+}
+
 /*
  * Handle a single data segment in a page-fault WQE or RDMA region.
  *
@@ -195,47 +533,43 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_dev *dev,
  * -EFAULT when there's an error mapping the requested pages. The caller will
  *  abort the page fault handling.
  */
-static int pagefault_single_data_segment(struct mlx5_ib_dev *mib_dev,
+static int pagefault_single_data_segment(struct mlx5_ib_dev *dev,
                                         u32 key, u64 io_virt, size_t bcnt,
                                         u32 *bytes_committed,
                                         u32 *bytes_mapped)
 {
        int srcu_key;
-       unsigned int current_seq;
+       unsigned int current_seq = 0;
        u64 start_idx;
        int npages = 0, ret = 0;
        struct mlx5_ib_mr *mr;
        u64 access_mask = ODP_READ_ALLOWED_BIT;
+       struct ib_umem_odp *odp;
+       int implicit = 0;
+       size_t size;
 
-       srcu_key = srcu_read_lock(&mib_dev->mr_srcu);
-       mr = mlx5_ib_odp_find_mr_lkey(mib_dev, key);
+       srcu_key = srcu_read_lock(&dev->mr_srcu);
+       mr = mlx5_ib_odp_find_mr_lkey(dev, key);
        /*
         * If we didn't find the MR, it means the MR was closed while we were
         * handling the ODP event. In this case we return -EFAULT so that the
         * QP will be closed.
         */
        if (!mr || !mr->ibmr.pd) {
-               pr_err("Failed to find relevant mr for lkey=0x%06x, probably the MR was destroyed\n",
-                      key);
+               mlx5_ib_dbg(dev, "Failed to find relevant mr for lkey=0x%06x, probably the MR was destroyed\n",
+                           key);
                ret = -EFAULT;
                goto srcu_unlock;
        }
        if (!mr->umem->odp_data) {
-               pr_debug("skipping non ODP MR (lkey=0x%06x) in page fault handler.\n",
-                        key);
+               mlx5_ib_dbg(dev, "skipping non ODP MR (lkey=0x%06x) in page fault handler.\n",
+                           key);
                if (bytes_mapped)
                        *bytes_mapped +=
                                (bcnt - *bytes_committed);
                goto srcu_unlock;
        }
 
-       current_seq = ACCESS_ONCE(mr->umem->odp_data->notifiers_seq);
-       /*
-        * Ensure the sequence number is valid for some time before we call
-        * gup.
-        */
-       smp_rmb();
-
        /*
         * Avoid branches - this code will perform correctly
         * in all iterations (in iteration 2 and above,
@@ -244,63 +578,109 @@ static int pagefault_single_data_segment(struct mlx5_ib_dev *mib_dev,
        io_virt += *bytes_committed;
        bcnt -= *bytes_committed;
 
+       if (!mr->umem->odp_data->page_list) {
+               odp = implicit_mr_get_data(mr, io_virt, bcnt);
+
+               if (IS_ERR(odp)) {
+                       ret = PTR_ERR(odp);
+                       goto srcu_unlock;
+               }
+               mr = odp->private;
+               implicit = 1;
+
+       } else {
+               odp = mr->umem->odp_data;
+       }
+
+next_mr:
+       current_seq = READ_ONCE(odp->notifiers_seq);
+       /*
+        * Ensure the sequence number is valid for some time before we call
+        * gup.
+        */
+       smp_rmb();
+
+       size = min_t(size_t, bcnt, ib_umem_end(odp->umem) - io_virt);
        start_idx = (io_virt - (mr->mmkey.iova & PAGE_MASK)) >> PAGE_SHIFT;
 
        if (mr->umem->writable)
                access_mask |= ODP_WRITE_ALLOWED_BIT;
-       npages = ib_umem_odp_map_dma_pages(mr->umem, io_virt, bcnt,
-                                          access_mask, current_seq);
-       if (npages < 0) {
-               ret = npages;
+
+       ret = ib_umem_odp_map_dma_pages(mr->umem, io_virt, size,
+                                       access_mask, current_seq);
+
+       if (ret < 0)
                goto srcu_unlock;
-       }
 
-       if (npages > 0) {
-               mutex_lock(&mr->umem->odp_data->umem_mutex);
+       if (ret > 0) {
+               int np = ret;
+
+               mutex_lock(&odp->umem_mutex);
                if (!ib_umem_mmu_notifier_retry(mr->umem, current_seq)) {
                        /*
                         * No need to check whether the MTTs really belong to
                         * this MR, since ib_umem_odp_map_dma_pages already
                         * checks this.
                         */
-                       ret = mlx5_ib_update_xlt(mr, start_idx, npages,
+                       ret = mlx5_ib_update_xlt(mr, start_idx, np,
                                                 PAGE_SHIFT,
                                                 MLX5_IB_UPD_XLT_ATOMIC);
                } else {
                        ret = -EAGAIN;
                }
-               mutex_unlock(&mr->umem->odp_data->umem_mutex);
+               mutex_unlock(&odp->umem_mutex);
                if (ret < 0) {
                        if (ret != -EAGAIN)
-                               pr_err("Failed to update mkey page tables\n");
+                               mlx5_ib_err(dev, "Failed to update mkey page tables\n");
                        goto srcu_unlock;
                }
 
                if (bytes_mapped) {
-                       u32 new_mappings = npages * PAGE_SIZE -
+                       u32 new_mappings = np * PAGE_SIZE -
                                (io_virt - round_down(io_virt, PAGE_SIZE));
-                       *bytes_mapped += min_t(u32, new_mappings, bcnt);
+                       *bytes_mapped += min_t(u32, new_mappings, size);
                }
+
+               npages += np;
+       }
+
+       bcnt -= size;
+       if (unlikely(bcnt)) {
+               struct ib_umem_odp *next;
+
+               io_virt += size;
+               next = odp_next(odp);
+               if (unlikely(!next || next->umem->address != io_virt)) {
+                       mlx5_ib_dbg(dev, "next implicit leaf removed at 0x%llx. got %p\n",
+                                   io_virt, next);
+                       ret = -EAGAIN;
+                       goto srcu_unlock_no_wait;
+               }
+               odp = next;
+               mr = odp->private;
+               goto next_mr;
        }
 
 srcu_unlock:
        if (ret == -EAGAIN) {
-               if (!mr->umem->odp_data->dying) {
-                       struct ib_umem_odp *odp_data = mr->umem->odp_data;
+               if (implicit || !odp->dying) {
                        unsigned long timeout =
                                msecs_to_jiffies(MMU_NOTIFIER_TIMEOUT);
 
                        if (!wait_for_completion_timeout(
-                                       &odp_data->notifier_completion,
+                                       &odp->notifier_completion,
                                        timeout)) {
-                               pr_warn("timeout waiting for mmu notifier completion\n");
+                               mlx5_ib_warn(dev, "timeout waiting for mmu notifier. seq %d against %d\n",
+                                            current_seq, odp->notifiers_seq);
                        }
                } else {
                        /* The MR is being killed, kill the QP as well. */
                        ret = -EFAULT;
                }
        }
-       srcu_read_unlock(&mib_dev->mr_srcu, srcu_key);
+
+srcu_unlock_no_wait:
+       srcu_read_unlock(&dev->mr_srcu, srcu_key);
        *bytes_committed = 0;
        return ret ? ret : npages;
 }
@@ -618,8 +998,8 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev,
                goto resolve_page_fault;
        } else if (ret < 0 || total_wqe_bytes > bytes_mapped) {
                if (ret != -ENOENT)
-                       mlx5_ib_err(dev, "Error getting user pages for page fault. Error: %d\n",
-                                   ret);
+                       mlx5_ib_err(dev, "PAGE FAULT error: %d. QP 0x%x. type: 0x%x\n",
+                                   ret, pfault->wqe.wq_num, pfault->type);
                goto resolve_page_fault;
        }
 
@@ -627,7 +1007,7 @@ static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_dev *dev,
 resolve_page_fault:
        mlx5_ib_page_fault_resume(dev, pfault, resume_with_error);
        mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x resume_with_error=%d, type: 0x%x\n",
-                   pfault->token, resume_with_error,
+                   pfault->wqe.wq_num, resume_with_error,
                    pfault->type);
        free_page((unsigned long)buffer);
 }
@@ -700,10 +1080,9 @@ static void mlx5_ib_mr_rdma_pfault_handler(struct mlx5_ib_dev *dev,
                ret = pagefault_single_data_segment(dev, rkey, address,
                                                    prefetch_len,
                                                    &bytes_committed, NULL);
-               if (ret < 0) {
+               if (ret < 0 && ret != -EAGAIN) {
                        mlx5_ib_warn(dev, "Prefetch failed. ret: %d, QP 0x%x, address: 0x%.16llx, length = 0x%.16x\n",
-                                    ret, pfault->token, address,
-                                    prefetch_len);
+                                    ret, pfault->token, address, prefetch_len);
                }
        }
 }
@@ -728,19 +1107,61 @@ void mlx5_ib_pfault(struct mlx5_core_dev *mdev, void *context,
        }
 }
 
-int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev)
+void mlx5_odp_init_mr_cache_entry(struct mlx5_cache_ent *ent)
+{
+       if (!(ent->dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
+               return;
+
+       switch (ent->order - 2) {
+       case MLX5_IMR_MTT_CACHE_ENTRY:
+               ent->page = PAGE_SHIFT;
+               ent->xlt = MLX5_IMR_MTT_ENTRIES *
+                          sizeof(struct mlx5_mtt) /
+                          MLX5_IB_UMR_OCTOWORD;
+               ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
+               ent->limit = 0;
+               break;
+
+       case MLX5_IMR_KSM_CACHE_ENTRY:
+               ent->page = MLX5_KSM_PAGE_SHIFT;
+               ent->xlt = mlx5_imr_ksm_entries *
+                          sizeof(struct mlx5_klm) /
+                          MLX5_IB_UMR_OCTOWORD;
+               ent->access_mode = MLX5_MKC_ACCESS_MODE_KSM;
+               ent->limit = 0;
+               break;
+       }
+}
+
+int mlx5_ib_odp_init_one(struct mlx5_ib_dev *dev)
 {
        int ret;
 
-       ret = init_srcu_struct(&ibdev->mr_srcu);
+       ret = init_srcu_struct(&dev->mr_srcu);
        if (ret)
                return ret;
 
+       if (dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT) {
+               ret = mlx5_cmd_null_mkey(dev->mdev, &dev->null_mkey);
+               if (ret) {
+                       mlx5_ib_err(dev, "Error getting null_mkey %d\n", ret);
+                       return ret;
+               }
+       }
+
        return 0;
 }
 
-void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev)
+void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *dev)
+{
+       cleanup_srcu_struct(&dev->mr_srcu);
+}
+
+int mlx5_ib_odp_init(void)
 {
-       cleanup_srcu_struct(&ibdev->mr_srcu);
+       mlx5_imr_ksm_entries = BIT_ULL(get_order(TASK_SIZE) -
+                                      MLX5_IMR_MTT_BITS);
+
+       return 0;
 }
 
index e31bf11ae64fccdda3bb85863ea4da0da902b9bb..ad8a2638e339b4bf0d7e866cfbf0fa08b8f13a1b 100644 (file)
@@ -905,7 +905,10 @@ static int create_kernel_qp(struct mlx5_ib_dev *dev,
        else
                qp->bf.bfreg = &dev->bfreg;
 
-       qp->bf.buf_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size);
+       /* We need to divide by two since each register is comprised of
+        * two buffers of identical size, namely odd and even
+        */
+       qp->bf.buf_size = (1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size)) / 2;
        uar_index = qp->bf.bfreg->index;
 
        err = calc_sq_size(dev, init_attr, qp);
@@ -1141,7 +1144,8 @@ static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
                return -ENOMEM;
 
        rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
-       MLX5_SET(rqc, rqc, vsd, 1);
+       if (!(rq->flags & MLX5_IB_RQ_CVLAN_STRIPPING))
+               MLX5_SET(rqc, rqc, vsd, 1);
        MLX5_SET(rqc, rqc, mem_rq_type, MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE);
        MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RST);
        MLX5_SET(rqc, rqc, flush_in_error_en, 1);
@@ -1238,6 +1242,8 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
        if (qp->rq.wqe_cnt) {
                rq->base.container_mibqp = qp;
 
+               if (qp->flags & MLX5_IB_QP_CVLAN_STRIPPING)
+                       rq->flags |= MLX5_IB_RQ_CVLAN_STRIPPING;
                err = create_raw_packet_qp_rq(dev, rq, in);
                if (err)
                        goto err_destroy_sq;
@@ -1559,6 +1565,14 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
        if (init_attr->sq_sig_type == IB_SIGNAL_ALL_WR)
                qp->sq_signal_bits = MLX5_WQE_CTRL_CQ_UPDATE;
 
+       if (init_attr->create_flags & IB_QP_CREATE_CVLAN_STRIPPING) {
+               if (!(MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
+                     MLX5_CAP_ETH(dev->mdev, vlan_cap)) ||
+                   (init_attr->qp_type != IB_QPT_RAW_PACKET))
+                       return -EOPNOTSUPP;
+               qp->flags |= MLX5_IB_QP_CVLAN_STRIPPING;
+       }
+
        if (pd && pd->uobject) {
                if (ib_copy_from_udata(&ucmd, udata, sizeof(ucmd))) {
                        mlx5_ib_dbg(dev, "copy failed\n");
@@ -2198,6 +2212,7 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 {
        enum rdma_link_layer ll = rdma_port_get_link_layer(&dev->ib_dev, port);
        int err;
+       enum ib_gid_type gid_type;
 
        if (attr_mask & IB_QP_PKEY_INDEX)
                path->pkey_index = cpu_to_be16(alt ? attr->alt_pkey_index :
@@ -2216,10 +2231,16 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
        if (ll == IB_LINK_LAYER_ETHERNET) {
                if (!(ah->ah_flags & IB_AH_GRH))
                        return -EINVAL;
+               err = mlx5_get_roce_gid_type(dev, port, ah->grh.sgid_index,
+                                            &gid_type);
+               if (err)
+                       return err;
                memcpy(path->rmac, ah->dmac, sizeof(ah->dmac));
                path->udp_sport = mlx5_get_roce_udp_sport(dev, port,
                                                          ah->grh.sgid_index);
                path->dci_cfi_prio_sl = (ah->sl & 0x7) << 4;
+               if (gid_type == IB_GID_TYPE_ROCE_UDP_ENCAP)
+                       path->ecn_dscp = (ah->grh.traffic_class >> 2) & 0x3f;
        } else {
                path->fl_free_ar = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0;
                path->fl_free_ar |=
@@ -2422,7 +2443,7 @@ static int modify_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
        if (raw_qp_param->set_mask & MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID) {
                if (MLX5_CAP_GEN(dev->mdev, modify_rq_counter_set_id)) {
                        MLX5_SET64(modify_rq_in, in, modify_bitmask,
-                                  MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_MODIFY_RQ_COUNTER_SET_ID);
+                                  MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_RQ_COUNTER_SET_ID);
                        MLX5_SET(rqc, rqc, counter_set_id, raw_qp_param->rq_q_ctr_id);
                } else
                        pr_info_once("%s: RAW PACKET QP counters are not supported on current FW\n",
@@ -2777,7 +2798,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
                               qp->port) - 1;
                mibport = &dev->port[port_num];
                context->qp_counter_set_usr_page |=
-                       cpu_to_be32((u32)(mibport->q_cnt_id) << 24);
+                       cpu_to_be32((u32)(mibport->q_cnts.set_id) << 24);
        }
 
        if (!ibqp->uobject && cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
@@ -2805,7 +2826,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
 
                raw_qp_param.operation = op;
                if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT) {
-                       raw_qp_param.rq_q_ctr_id = mibport->q_cnt_id;
+                       raw_qp_param.rq_q_ctr_id = mibport->q_cnts.set_id;
                        raw_qp_param.set_mask |= MLX5_RAW_QP_MOD_SET_RQ_Q_CTR_ID;
                }
 
@@ -3637,8 +3658,9 @@ static int set_psv_wr(struct ib_sig_domain *domain,
                psv_seg->ref_tag = cpu_to_be32(domain->sig.dif.ref_tag);
                break;
        default:
-               pr_err("Bad signature type given.\n");
-               return 1;
+               pr_err("Bad signature type (%d) is given.\n",
+                      domain->sig_type);
+               return -EINVAL;
        }
 
        *seg += sizeof(*psv_seg);
@@ -3978,6 +4000,12 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
                        break;
 
                case IB_QPT_SMI:
+                       if (unlikely(!mdev->port_caps[qp->port - 1].has_smi)) {
+                               mlx5_ib_warn(dev, "Send SMP MADs is not allowed\n");
+                               err = -EPERM;
+                               *bad_wr = wr;
+                               goto out;
+                       }
                case MLX5_IB_QPT_HW_GSI:
                        set_datagram_seg(seg, wr);
                        seg += sizeof(struct mlx5_wqe_datagram_seg);
@@ -4579,6 +4607,7 @@ static int  create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
                      struct ib_wq_init_attr *init_attr)
 {
        struct mlx5_ib_dev *dev;
+       int has_net_offloads;
        __be64 *rq_pas0;
        void *in;
        void *rqc;
@@ -4610,9 +4639,28 @@ static int  create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
        MLX5_SET(wq, wq, log_wq_pg_sz, rwq->log_page_size);
        MLX5_SET(wq, wq, wq_signature, rwq->wq_sig);
        MLX5_SET64(wq, wq, dbr_addr, rwq->db.dma);
+       has_net_offloads = MLX5_CAP_GEN(dev->mdev, eth_net_offloads);
+       if (init_attr->create_flags & IB_WQ_FLAGS_CVLAN_STRIPPING) {
+               if (!(has_net_offloads && MLX5_CAP_ETH(dev->mdev, vlan_cap))) {
+                       mlx5_ib_dbg(dev, "VLAN offloads are not supported\n");
+                       err = -EOPNOTSUPP;
+                       goto out;
+               }
+       } else {
+               MLX5_SET(rqc, rqc, vsd, 1);
+       }
+       if (init_attr->create_flags & IB_WQ_FLAGS_SCATTER_FCS) {
+               if (!(has_net_offloads && MLX5_CAP_ETH(dev->mdev, scatter_fcs))) {
+                       mlx5_ib_dbg(dev, "Scatter FCS is not supported\n");
+                       err = -EOPNOTSUPP;
+                       goto out;
+               }
+               MLX5_SET(rqc, rqc, scatter_fcs, 1);
+       }
        rq_pas0 = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
        mlx5_ib_populate_pas(dev, rwq->umem, rwq->page_shift, rq_pas0, 0);
        err = mlx5_core_create_rq_tracked(dev->mdev, in, inlen, &rwq->core_qp);
+out:
        kvfree(in);
        return err;
 }
@@ -4896,10 +4944,37 @@ int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
        MLX5_SET(modify_rq_in, in, rq_state, curr_wq_state);
        MLX5_SET(rqc, rqc, state, wq_state);
 
+       if (wq_attr_mask & IB_WQ_FLAGS) {
+               if (wq_attr->flags_mask & IB_WQ_FLAGS_CVLAN_STRIPPING) {
+                       if (!(MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
+                             MLX5_CAP_ETH(dev->mdev, vlan_cap))) {
+                               mlx5_ib_dbg(dev, "VLAN offloads are not "
+                                           "supported\n");
+                               err = -EOPNOTSUPP;
+                               goto out;
+                       }
+                       MLX5_SET64(modify_rq_in, in, modify_bitmask,
+                                  MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_VSD);
+                       MLX5_SET(rqc, rqc, vsd,
+                                (wq_attr->flags & IB_WQ_FLAGS_CVLAN_STRIPPING) ? 0 : 1);
+               }
+       }
+
+       if (curr_wq_state == IB_WQS_RESET && wq_state == IB_WQS_RDY) {
+               if (MLX5_CAP_GEN(dev->mdev, modify_rq_counter_set_id)) {
+                       MLX5_SET64(modify_rq_in, in, modify_bitmask,
+                                  MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_RQ_COUNTER_SET_ID);
+                       MLX5_SET(rqc, rqc, counter_set_id, dev->port->q_cnts.set_id);
+               } else
+                       pr_info_once("%s: Receive WQ counters are not supported on current FW\n",
+                                    dev->ib_dev.name);
+       }
+
        err = mlx5_core_modify_rq(dev->mdev, rwq->core_qp.qpn, in, inlen);
-       kvfree(in);
        if (!err)
                rwq->ibwq.state = (wq_state == MLX5_RQC_STATE_ERR) ? IB_WQS_ERR : wq_state;
 
+out:
+       kvfree(in);
        return err;
 }
index 6f4397ee1ed63267e7b2b2aba825b50258321d5a..7cb145f9a6dbc3cb1cf63b5141c4bf421c2e6923 100644 (file)
@@ -165,8 +165,6 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
        int err;
        int i;
        struct mlx5_wqe_srq_next_seg *next;
-       int page_shift;
-       int npages;
 
        err = mlx5_db_alloc(dev->mdev, &srq->db);
        if (err) {
@@ -179,7 +177,6 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
                err = -ENOMEM;
                goto err_db;
        }
-       page_shift = srq->buf.page_shift;
 
        srq->head    = 0;
        srq->tail    = srq->msrq.max - 1;
@@ -191,10 +188,8 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
                        cpu_to_be16((i + 1) & (srq->msrq.max - 1));
        }
 
-       npages = DIV_ROUND_UP(srq->buf.npages, 1 << (page_shift - PAGE_SHIFT));
-       mlx5_ib_dbg(dev, "buf_size %d, page_shift %d, npages %d, calc npages %d\n",
-                   buf_size, page_shift, srq->buf.npages, npages);
-       in->pas = mlx5_vzalloc(sizeof(*in->pas) * npages);
+       mlx5_ib_dbg(dev, "srq->buf.page_shift = %d\n", srq->buf.page_shift);
+       in->pas = mlx5_vzalloc(sizeof(*in->pas) * srq->buf.npages);
        if (!in->pas) {
                err = -ENOMEM;
                goto err_buf;
@@ -208,7 +203,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
        }
        srq->wq_sig = !!srq_signature;
 
-       in->log_page_size = page_shift - MLX5_ADAPTER_PAGE_SHIFT;
+       in->log_page_size = srq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT;
        if (MLX5_CAP_GEN(dev->mdev, cqe_version) == MLX5_CQE_VERSION_V1 &&
            in->type == IB_SRQT_XRC)
                in->user_index = MLX5_IB_DEFAULT_UIDX;
index d31708742ba5b1814d28135e4e41b273b6e651ba..ce163184e7422450044f271348b1fba67871d001 100644 (file)
@@ -146,7 +146,7 @@ static int mthca_query_port(struct ib_device *ibdev,
        if (!in_mad || !out_mad)
                goto out;
 
-       memset(props, 0, sizeof *props);
+       /* props being zeroed by the caller, avoid zeroing it here */
 
        init_query_mad(in_mad);
        in_mad->attr_id  = IB_SMP_ATTR_PORT_INFO;
@@ -212,7 +212,7 @@ static int mthca_modify_port(struct ib_device *ibdev,
        if (mutex_lock_interruptible(&to_mdev(ibdev)->cap_mask_mutex))
                return -ERESTARTSYS;
 
-       err = mthca_query_port(ibdev, port, &attr);
+       err = ib_query_port(ibdev, port, &attr);
        if (err)
                goto out;
 
@@ -1166,13 +1166,14 @@ static int mthca_port_immutable(struct ib_device *ibdev, u8 port_num,
        struct ib_port_attr attr;
        int err;
 
-       err = mthca_query_port(ibdev, port_num, &attr);
+       immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
+
+       err = ib_query_port(ibdev, port_num, &attr);
        if (err)
                return err;
 
        immutable->pkey_tbl_len = attr.pkey_tbl_len;
        immutable->gid_tbl_len = attr.gid_tbl_len;
-       immutable->core_cap_flags = RDMA_CORE_PORT_IBA_IB;
        immutable->max_mad_size = IB_MGMT_MAD_SIZE;
 
        return 0;
index 5a31f3c6a4211d507cc4634c49df53021bba505b..d3eae2f3e9f504957305e4bda59f837327bc69f7 100644 (file)
@@ -475,7 +475,7 @@ static int nes_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr
        struct nes_vnic *nesvnic = to_nesvnic(ibdev);
        struct net_device *netdev = nesvnic->netdev;
 
-       memset(props, 0, sizeof(*props));
+       /* props being zeroed by the caller, avoid zeroing it here */
 
        props->max_mtu = IB_MTU_4096;
        props->active_mtu = ib_mtu_int_to_enum(netdev->mtu);
@@ -3660,13 +3660,14 @@ static int nes_port_immutable(struct ib_device *ibdev, u8 port_num,
        struct ib_port_attr attr;
        int err;
 
+       immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
+
        err = nes_query_port(ibdev, port_num, &attr);
        if (err)
                return err;
 
        immutable->pkey_tbl_len = attr.pkey_tbl_len;
        immutable->gid_tbl_len = attr.gid_tbl_len;
-       immutable->core_cap_flags = RDMA_CORE_PORT_IWARP;
 
        return 0;
 }
index 896071502739a8b48042948badbfc3d6aeae5b24..3e43bdc81e7a5b49574c5b5460e0f7a38636e199 100644 (file)
@@ -93,15 +93,16 @@ static int ocrdma_port_immutable(struct ib_device *ibdev, u8 port_num,
        int err;
 
        dev = get_ocrdma_dev(ibdev);
-       err = ocrdma_query_port(ibdev, port_num, &attr);
+       immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
+       if (ocrdma_is_udp_encap_supported(dev))
+               immutable->core_cap_flags |= RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP;
+
+       err = ib_query_port(ibdev, port_num, &attr);
        if (err)
                return err;
 
        immutable->pkey_tbl_len = attr.pkey_tbl_len;
        immutable->gid_tbl_len = attr.gid_tbl_len;
-       immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
-       if (ocrdma_is_udp_encap_supported(dev))
-               immutable->core_cap_flags |= RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP;
        immutable->max_mad_size = IB_MGMT_MAD_SIZE;
 
        return 0;
index e06ad72509636414414eb393076d425fe7e32c9e..bc9fb144e57b8e3a05863223156f25b7f17e6a34 100644 (file)
@@ -210,6 +210,7 @@ int ocrdma_query_port(struct ib_device *ibdev,
        struct ocrdma_dev *dev;
        struct net_device *netdev;
 
+       /* props being zeroed by the caller, avoid zeroing it here */
        dev = get_ocrdma_dev(ibdev);
        if (port > 1) {
                pr_err("%s(%d) invalid_port=0x%x\n", __func__,
index 0c51657af151c02e8cae67bdedca882232d05fff..6b3bb32803bd8661d9efebd14dcefff0b601f6f3 100644 (file)
@@ -238,8 +238,8 @@ int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
        }
 
        rdma_port = dev->ops->rdma_query_port(dev->rdma_ctx);
-       memset(attr, 0, sizeof(*attr));
 
+       /* *attr being zeroed by the caller, avoid zeroing it here */
        if (rdma_port->port_state == QED_RDMA_PORT_UP) {
                attr->state = IB_PORT_ACTIVE;
                attr->phys_state = 5;
@@ -3494,14 +3494,15 @@ int qedr_port_immutable(struct ib_device *ibdev, u8 port_num,
        struct ib_port_attr attr;
        int err;
 
-       err = qedr_query_port(ibdev, port_num, &attr);
+       immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
+                                   RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+
+       err = ib_query_port(ibdev, port_num, &attr);
        if (err)
                return err;
 
        immutable->pkey_tbl_len = attr.pkey_tbl_len;
        immutable->gid_tbl_len = attr.gid_tbl_len;
-       immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE |
-                                   RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
        immutable->max_mad_size = IB_MGMT_MAD_SIZE;
 
        return 0;
index b0b78e1cec9282dcf95bcccdd88e13fb1d4dfb69..6b56f1c01a0789f9335691f57535947df4afa2a9 100644 (file)
@@ -1220,6 +1220,7 @@ static int qib_query_port(struct rvt_dev_info *rdi, u8 port_num,
        enum ib_mtu mtu;
        u16 lid = ppd->lid;
 
+       /* props being zeroed by the caller, avoid zeroing it here */
        props->lid = lid ? lid : be16_to_cpu(IB_LID_PERMISSIVE);
        props->lmc = ppd->lmc;
        props->state = dd->f_iblink_state(ppd->lastibcstat);
index 0a89a955550b29ce327751ce5c2274638b1eccb9..4f5a45db08e1889e7f4b072b7a92dfa5a5bacb6a 100644 (file)
@@ -321,7 +321,9 @@ static int usnic_port_immutable(struct ib_device *ibdev, u8 port_num,
        struct ib_port_attr attr;
        int err;
 
-       err = usnic_ib_query_port(ibdev, port_num, &attr);
+       immutable->core_cap_flags = RDMA_CORE_PORT_USNIC;
+
+       err = ib_query_port(ibdev, port_num, &attr);
        if (err)
                return err;
 
index 69df8e353123c893aa1f34277bc86dd961bffdea..3284730d3c0923d33f3c2ea916c0f38f71c515ca 100644 (file)
@@ -330,7 +330,7 @@ int usnic_ib_query_port(struct ib_device *ibdev, u8 port,
 
        mutex_lock(&us_ibdev->usdev_lock);
        __ethtool_get_link_ksettings(us_ibdev->netdev, &cmd);
-       memset(props, 0, sizeof(*props));
+       /* props being zeroed by the caller, avoid zeroing it here */
 
        props->lid = 0;
        props->lmc = 1;
index 60cdb77195650c25f1b3776897ef239ca2e730a7..e03d2f6c1f90ed4f7f9782027d73ba9c44ab4f3e 100644 (file)
@@ -132,13 +132,14 @@ static int pvrdma_port_immutable(struct ib_device *ibdev, u8 port_num,
        struct ib_port_attr attr;
        int err;
 
-       err = pvrdma_query_port(ibdev, port_num, &attr);
+       immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
+
+       err = ib_query_port(ibdev, port_num, &attr);
        if (err)
                return err;
 
        immutable->pkey_tbl_len = attr.pkey_tbl_len;
        immutable->gid_tbl_len = attr.gid_tbl_len;
-       immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE;
        immutable->max_mad_size = IB_MGMT_MAD_SIZE;
        return 0;
 }
index c2aa52638dcb81ea4539b61c43d61f55edefb2b3..fec17c49103b9a89c992e23a93e335ffefea573a 100644 (file)
@@ -135,7 +135,7 @@ int pvrdma_query_port(struct ib_device *ibdev, u8 port,
                return err;
        }
 
-       memset(props, 0, sizeof(*props));
+       /* props being zeroed by the caller, avoid zeroing it here */
 
        props->state = pvrdma_port_state_to_ib(resp->attrs.state);
        props->max_mtu = pvrdma_mtu_to_ib(resp->attrs.max_mtu);
@@ -275,7 +275,7 @@ int pvrdma_modify_port(struct ib_device *ibdev, u8 port, int mask,
        }
 
        mutex_lock(&vdev->port_mutex);
-       ret = pvrdma_query_port(ibdev, port, &attr);
+       ret = ib_query_port(ibdev, port, &attr);
        if (ret)
                goto out;
 
index d430c2f7cec4cea4fc24f465dedd0b30e27f29ce..1165639a914bf52518eee2b3f48b7a061bd9608c 100644 (file)
@@ -165,7 +165,7 @@ static int rvt_query_port(struct ib_device *ibdev, u8 port_num,
                return -EINVAL;
 
        rvp = rdi->ports[port_index];
-       memset(props, 0, sizeof(*props));
+       /* props being zeroed by the caller, avoid zeroing it here */
        props->sm_lid = rvp->sm_lid;
        props->sm_sl = rvp->sm_sl;
        props->port_cap_flags = rvp->port_cap_flags;
@@ -326,13 +326,14 @@ static int rvt_get_port_immutable(struct ib_device *ibdev, u8 port_num,
        if (port_index < 0)
                return -EINVAL;
 
-       err = rvt_query_port(ibdev, port_num, &attr);
+       immutable->core_cap_flags = rdi->dparms.core_cap_flags;
+
+       err = ib_query_port(ibdev, port_num, &attr);
        if (err)
                return err;
 
        immutable->pkey_tbl_len = attr.pkey_tbl_len;
        immutable->gid_tbl_len = attr.gid_tbl_len;
-       immutable->core_cap_flags = rdi->dparms.core_cap_flags;
        immutable->max_mad_size = rdi->dparms.max_mad_size;
 
        return 0;
index e4de37fb9aabc0d634f0c95a2045a248e00a8f04..d2e2eff7a515dd31ac5bd12cde06a80c5f806fcb 100644 (file)
@@ -86,6 +86,7 @@ static int rxe_query_port(struct ib_device *dev,
 
        port = &rxe->port;
 
+       /* *attr being zeroed by the caller, avoid zeroing it here */
        *attr = port->attr;
 
        mutex_lock(&rxe->usdev_lock);
@@ -261,13 +262,14 @@ static int rxe_port_immutable(struct ib_device *dev, u8 port_num,
        int err;
        struct ib_port_attr attr;
 
-       err = rxe_query_port(dev, port_num, &attr);
+       immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
+
+       err = ib_query_port(dev, port_num, &attr);
        if (err)
                return err;
 
        immutable->pkey_tbl_len = attr.pkey_tbl_len;
        immutable->gid_tbl_len = attr.gid_tbl_len;
-       immutable->core_cap_flags = RDMA_CORE_PORT_IBA_ROCE_UDP_ENCAP;
        immutable->max_mad_size = IB_MGMT_MAD_SIZE;
 
        return 0;
index ce3d92106386b31d5aa44dae7a347c9d0830c49e..2478516a61e2ea547f5ae8af0c3aae7228e64db9 100644 (file)
@@ -1232,10 +1232,18 @@ static struct mlx5_flow_handle *add_rule_fg(struct mlx5_flow_group *fg,
        fs_for_each_fte(fte, fg) {
                nested_lock_ref_node(&fte->node, FS_MUTEX_CHILD);
                if (compare_match_value(&fg->mask, match_value, &fte->val) &&
-                   (flow_act->action & fte->action) &&
-                   flow_act->flow_tag == fte->flow_tag) {
+                   (flow_act->action & fte->action)) {
                        int old_action = fte->action;
 
+                       if (fte->flow_tag != flow_act->flow_tag) {
+                               mlx5_core_warn(get_dev(&fte->node),
+                                              "FTE flow tag %u already exists with different flow tag %u\n",
+                                              fte->flow_tag,
+                                              flow_act->flow_tag);
+                               handle = ERR_PTR(-EEXIST);
+                               goto unlock_fte;
+                       }
+
                        fte->action |= flow_act->action;
                        handle = add_rule_fte(fte, fg, dest, dest_num,
                                              old_action != flow_act->action);
index 1bc4641734da943e80af43af5c82e4eda0a62319..2fcff6b4503f6a4824bea50c189b072ef6c486cb 100644 (file)
@@ -295,6 +295,7 @@ struct mlx5_port_caps {
        int     gid_table_len;
        int     pkey_table_len;
        u8      ext_port_cap;
+       bool    has_smi;
 };
 
 struct mlx5_cmd_mailbox {
@@ -1061,7 +1062,10 @@ enum {
 };
 
 enum {
-       MAX_MR_CACHE_ENTRIES    = 21,
+       MAX_UMR_CACHE_ENTRY = 20,
+       MLX5_IMR_MTT_CACHE_ENTRY,
+       MLX5_IMR_KSM_CACHE_ENTRY,
+       MAX_MR_CACHE_ENTRIES
 };
 
 enum {
index afcd4736d8df7b57450e6d94c1b67bea7f55e561..838242697541a28fdda4d90bf7b604e25f3bfba2 100644 (file)
@@ -5013,7 +5013,7 @@ struct mlx5_ifc_modify_rq_out_bits {
 
 enum {
        MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_VSD = 1ULL << 1,
-       MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_MODIFY_RQ_COUNTER_SET_ID = 1ULL << 3,
+       MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_RQ_COUNTER_SET_ID = 1ULL << 3,
 };
 
 struct mlx5_ifc_modify_rq_in_bits {
index 3da0b167041b477e14a3e731e9de76e85148b46c..542cd8b3414c14f5fc65aa85006836fffe7ed42a 100644 (file)
@@ -79,11 +79,15 @@ struct ib_umem_odp {
 
        struct completion       notifier_completion;
        int                     dying;
+       struct work_struct      work;
 };
 
 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
 
 int ib_umem_odp_get(struct ib_ucontext *context, struct ib_umem *umem);
+struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context,
+                                 unsigned long addr,
+                                 size_t size);
 
 void ib_umem_odp_release(struct ib_umem *umem);
 
@@ -117,10 +121,12 @@ typedef int (*umem_call_back)(struct ib_umem *item, u64 start, u64 end,
 int rbt_ib_umem_for_each_in_range(struct rb_root *root, u64 start, u64 end,
                                  umem_call_back cb, void *cookie);
 
-struct umem_odp_node *rbt_ib_umem_iter_first(struct rb_root *root,
-                                            u64 start, u64 last);
-struct umem_odp_node *rbt_ib_umem_iter_next(struct umem_odp_node *node,
-                                           u64 start, u64 last);
+/*
+ * Find first region intersecting with address range.
+ * Return NULL if not found
+ */
+struct ib_umem_odp *rbt_ib_umem_lookup(struct rb_root *root,
+                                      u64 addr, u64 length);
 
 static inline int ib_umem_mmu_notifier_retry(struct ib_umem *item,
                                             unsigned long mmu_seq)
@@ -153,6 +159,13 @@ static inline int ib_umem_odp_get(struct ib_ucontext *context,
        return -EINVAL;
 }
 
+static inline struct ib_umem *ib_alloc_odp_umem(struct ib_ucontext *context,
+                                               unsigned long addr,
+                                               size_t size)
+{
+       return ERR_PTR(-EINVAL);
+}
+
 static inline void ib_umem_odp_release(struct ib_umem *umem) {}
 
 #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
index 8c61532cf5218e214f75f5dd50be74f30dcac5d7..89f5bd4e1d5201c847ff77823b6b4159a741ef2f 100644 (file)
@@ -207,6 +207,7 @@ enum ib_device_cap_flags {
        IB_DEVICE_MEM_WINDOW_TYPE_2A            = (1 << 23),
        IB_DEVICE_MEM_WINDOW_TYPE_2B            = (1 << 24),
        IB_DEVICE_RC_IP_CSUM                    = (1 << 25),
+       /* Deprecated. Please use IB_RAW_PACKET_CAP_IP_CSUM. */
        IB_DEVICE_RAW_IP_CSUM                   = (1 << 26),
        /*
         * Devices should set IB_DEVICE_CROSS_CHANNEL if they
@@ -220,6 +221,7 @@ enum ib_device_cap_flags {
        IB_DEVICE_ON_DEMAND_PAGING              = (1ULL << 31),
        IB_DEVICE_SG_GAPS_REG                   = (1ULL << 32),
        IB_DEVICE_VIRTUAL_FUNCTION              = (1ULL << 33),
+       /* Deprecated. Please use IB_RAW_PACKET_CAP_SCATTER_FCS. */
        IB_DEVICE_RAW_SCATTER_FCS               = (1ULL << 34),
 };
 
@@ -241,7 +243,8 @@ enum ib_atomic_cap {
 };
 
 enum ib_odp_general_cap_bits {
-       IB_ODP_SUPPORT = 1 << 0,
+       IB_ODP_SUPPORT          = 1 << 0,
+       IB_ODP_SUPPORT_IMPLICIT = 1 << 1,
 };
 
 enum ib_odp_transport_cap_bits {
@@ -330,6 +333,7 @@ struct ib_device_attr {
        uint64_t                hca_core_clock; /* in KHZ */
        struct ib_rss_caps      rss_caps;
        u32                     max_wq_type_rq;
+       u32                     raw_packet_caps; /* Use ib_raw_packet_caps enum */
 };
 
 enum ib_mtu {
@@ -499,6 +503,8 @@ static inline struct rdma_hw_stats *rdma_alloc_hw_stats_struct(
 #define RDMA_CORE_CAP_PROT_ROCE         0x00200000
 #define RDMA_CORE_CAP_PROT_IWARP        0x00400000
 #define RDMA_CORE_CAP_PROT_ROCE_UDP_ENCAP 0x00800000
+#define RDMA_CORE_CAP_PROT_RAW_PACKET   0x01000000
+#define RDMA_CORE_CAP_PROT_USNIC        0x02000000
 
 #define RDMA_CORE_PORT_IBA_IB          (RDMA_CORE_CAP_PROT_IB  \
                                        | RDMA_CORE_CAP_IB_MAD \
@@ -522,6 +528,10 @@ static inline struct rdma_hw_stats *rdma_alloc_hw_stats_struct(
 #define RDMA_CORE_PORT_INTEL_OPA       (RDMA_CORE_PORT_IBA_IB  \
                                        | RDMA_CORE_CAP_OPA_MAD)
 
+#define RDMA_CORE_PORT_RAW_PACKET      (RDMA_CORE_CAP_PROT_RAW_PACKET)
+
+#define RDMA_CORE_PORT_USNIC           (RDMA_CORE_CAP_PROT_USNIC)
+
 struct ib_port_attr {
        u64                     subnet_prefix;
        enum ib_port_state      state;
@@ -1019,6 +1029,7 @@ enum ib_qp_create_flags {
        IB_QP_CREATE_SIGNATURE_EN               = 1 << 6,
        IB_QP_CREATE_USE_GFP_NOIO               = 1 << 7,
        IB_QP_CREATE_SCATTER_FCS                = 1 << 8,
+       IB_QP_CREATE_CVLAN_STRIPPING            = 1 << 9,
        /* reserve bits 26-31 for low level drivers' internal use */
        IB_QP_CREATE_RESERVED_START             = 1 << 26,
        IB_QP_CREATE_RESERVED_END               = 1 << 31,
@@ -1470,6 +1481,18 @@ struct ib_srq {
        } ext;
 };
 
+enum ib_raw_packet_caps {
+       /* Strip cvlan from incoming packet and report it in the matching work
+        * completion is supported.
+        */
+       IB_RAW_PACKET_CAP_CVLAN_STRIPPING       = (1 << 0),
+       /* Scatter FCS field of an incoming packet to host memory is supported.
+        */
+       IB_RAW_PACKET_CAP_SCATTER_FCS           = (1 << 1),
+       /* Checksum offloads are supported (for both send and receive). */
+       IB_RAW_PACKET_CAP_IP_CSUM               = (1 << 2),
+};
+
 enum ib_wq_type {
        IB_WQT_RQ
 };
@@ -1493,6 +1516,11 @@ struct ib_wq {
        atomic_t                usecnt;
 };
 
+enum ib_wq_flags {
+       IB_WQ_FLAGS_CVLAN_STRIPPING     = 1 << 0,
+       IB_WQ_FLAGS_SCATTER_FCS         = 1 << 1,
+};
+
 struct ib_wq_init_attr {
        void                   *wq_context;
        enum ib_wq_type wq_type;
@@ -1500,16 +1528,20 @@ struct ib_wq_init_attr {
        u32             max_sge;
        struct  ib_cq          *cq;
        void                (*event_handler)(struct ib_event *, void *);
+       u32             create_flags; /* Use enum ib_wq_flags */
 };
 
 enum ib_wq_attr_mask {
-       IB_WQ_STATE     = 1 << 0,
-       IB_WQ_CUR_STATE = 1 << 1,
+       IB_WQ_STATE             = 1 << 0,
+       IB_WQ_CUR_STATE         = 1 << 1,
+       IB_WQ_FLAGS             = 1 << 2,
 };
 
 struct ib_wq_attr {
        enum    ib_wq_state     wq_state;
        enum    ib_wq_state     curr_wq_state;
+       u32                     flags; /* Use enum ib_wq_flags */
+       u32                     flags_mask; /* Use enum ib_wq_flags */
 };
 
 struct ib_rwq_ind_table {
@@ -1618,6 +1650,8 @@ enum ib_flow_spec_type {
        IB_FLOW_SPEC_UDP                = 0x41,
        IB_FLOW_SPEC_VXLAN_TUNNEL       = 0x50,
        IB_FLOW_SPEC_INNER              = 0x100,
+       /* Actions */
+       IB_FLOW_SPEC_ACTION_TAG         = 0x1000,
 };
 #define IB_FLOW_SPEC_LAYER_MASK        0xF0
 #define IB_FLOW_SPEC_SUPPORT_LAYERS 8
@@ -1740,6 +1774,12 @@ struct ib_flow_spec_tunnel {
        struct ib_flow_tunnel_filter  mask;
 };
 
+struct ib_flow_spec_action_tag {
+       enum ib_flow_spec_type        type;
+       u16                           size;
+       u32                           tag_id;
+};
+
 union ib_flow_spec {
        struct {
                u32                     type;
@@ -1751,6 +1791,7 @@ union ib_flow_spec {
        struct ib_flow_spec_tcp_udp     tcp_udp;
        struct ib_flow_spec_ipv6        ipv6;
        struct ib_flow_spec_tunnel      tunnel;
+       struct ib_flow_spec_action_tag  flow_tag;
 };
 
 struct ib_flow_attr {
@@ -2333,6 +2374,16 @@ static inline bool rdma_ib_or_roce(const struct ib_device *device, u8 port_num)
                rdma_protocol_roce(device, port_num);
 }
 
+static inline bool rdma_protocol_raw_packet(const struct ib_device *device, u8 port_num)
+{
+       return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_RAW_PACKET;
+}
+
+static inline bool rdma_protocol_usnic(const struct ib_device *device, u8 port_num)
+{
+       return device->port_immutable[port_num].core_cap_flags & RDMA_CORE_CAP_PROT_USNIC;
+}
+
 /**
  * rdma_cap_ib_mad - Check if the port of a device supports Infiniband
  * Management Datagrams.
index f4f87cff6dc6c5fc9f1a91e32c9e8d3a601f10c8..997f904c76923057a376e0f4f0772ac82dcacef7 100644 (file)
@@ -246,7 +246,7 @@ struct ib_uverbs_ex_query_device_resp {
        __u64 device_cap_flags_ex;
        struct ib_uverbs_rss_caps rss_caps;
        __u32  max_wq_type_rq;
-       __u32 reserved;
+       __u32 raw_packet_caps;
 };
 
 struct ib_uverbs_query_port {
@@ -934,6 +934,19 @@ struct ib_uverbs_flow_spec_ipv6 {
        struct ib_uverbs_flow_ipv6_filter mask;
 };
 
+struct ib_uverbs_flow_spec_action_tag {
+       union {
+               struct ib_uverbs_flow_spec_hdr hdr;
+               struct {
+                       __u32 type;
+                       __u16 size;
+                       __u16 reserved;
+               };
+       };
+       __u32                         tag_id;
+       __u32                         reserved1;
+};
+
 struct ib_uverbs_flow_tunnel_filter {
        __be32 tunnel_id;
 };
@@ -1053,6 +1066,8 @@ struct ib_uverbs_ex_create_wq  {
        __u32 cq_handle;
        __u32 max_wr;
        __u32 max_sge;
+       __u32 create_flags; /* Use enum ib_wq_flags */
+       __u32 reserved;
 };
 
 struct ib_uverbs_ex_create_wq_resp {
@@ -1081,6 +1096,8 @@ struct ib_uverbs_ex_modify_wq  {
        __u32 wq_handle;
        __u32 wq_state;
        __u32 curr_wq_state;
+       __u32 flags; /* Use enum ib_wq_flags */
+       __u32 flags_mask; /* Use enum ib_wq_flags */
 };
 
 /* Prevent memory allocation rather than max expected size */