Merge tag 'driver-core-5.5-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git...
[sfrench/cifs-2.6.git] / drivers / infiniband / hw / mlx5 / main.c
1 /*
2  * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32
33 #include <linux/debugfs.h>
34 #include <linux/highmem.h>
35 #include <linux/module.h>
36 #include <linux/init.h>
37 #include <linux/errno.h>
38 #include <linux/pci.h>
39 #include <linux/dma-mapping.h>
40 #include <linux/slab.h>
41 #include <linux/bitmap.h>
42 #if defined(CONFIG_X86)
43 #include <asm/pat.h>
44 #endif
45 #include <linux/sched.h>
46 #include <linux/sched/mm.h>
47 #include <linux/sched/task.h>
48 #include <linux/delay.h>
49 #include <rdma/ib_user_verbs.h>
50 #include <rdma/ib_addr.h>
51 #include <rdma/ib_cache.h>
52 #include <linux/mlx5/port.h>
53 #include <linux/mlx5/vport.h>
54 #include <linux/mlx5/fs.h>
55 #include <linux/mlx5/eswitch.h>
56 #include <linux/list.h>
57 #include <rdma/ib_smi.h>
58 #include <rdma/ib_umem.h>
59 #include <linux/in.h>
60 #include <linux/etherdevice.h>
61 #include "mlx5_ib.h"
62 #include "ib_rep.h"
63 #include "cmd.h"
64 #include "srq.h"
65 #include <linux/mlx5/fs_helpers.h>
66 #include <linux/mlx5/accel.h>
67 #include <rdma/uverbs_std_types.h>
68 #include <rdma/mlx5_user_ioctl_verbs.h>
69 #include <rdma/mlx5_user_ioctl_cmds.h>
70 #include <rdma/ib_umem_odp.h>
71
72 #define UVERBS_MODULE_NAME mlx5_ib
73 #include <rdma/uverbs_named_ioctl.h>
74
75 #define DRIVER_NAME "mlx5_ib"
76 #define DRIVER_VERSION "5.0-0"
77
78 MODULE_AUTHOR("Eli Cohen <eli@mellanox.com>");
79 MODULE_DESCRIPTION("Mellanox Connect-IB HCA IB driver");
80 MODULE_LICENSE("Dual BSD/GPL");
81
82 static char mlx5_version[] =
83         DRIVER_NAME ": Mellanox Connect-IB Infiniband driver v"
84         DRIVER_VERSION "\n";
85
86 struct mlx5_ib_event_work {
87         struct work_struct      work;
88         union {
89                 struct mlx5_ib_dev            *dev;
90                 struct mlx5_ib_multiport_info *mpi;
91         };
92         bool                    is_slave;
93         unsigned int            event;
94         void                    *param;
95 };
96
97 enum {
98         MLX5_ATOMIC_SIZE_QP_8BYTES = 1 << 3,
99 };
100
101 static struct workqueue_struct *mlx5_ib_event_wq;
102 static LIST_HEAD(mlx5_ib_unaffiliated_port_list);
103 static LIST_HEAD(mlx5_ib_dev_list);
104 /*
105  * This mutex should be held when accessing either of the above lists
106  */
107 static DEFINE_MUTEX(mlx5_ib_multiport_mutex);
108
109 /* We can't use an array for xlt_emergency_page because dma_map_single
110  * doesn't work on kernel modules memory
111  */
112 static unsigned long xlt_emergency_page;
113 static struct mutex xlt_emergency_page_mutex;
114
115 struct mlx5_ib_dev *mlx5_ib_get_ibdev_from_mpi(struct mlx5_ib_multiport_info *mpi)
116 {
117         struct mlx5_ib_dev *dev;
118
119         mutex_lock(&mlx5_ib_multiport_mutex);
120         dev = mpi->ibdev;
121         mutex_unlock(&mlx5_ib_multiport_mutex);
122         return dev;
123 }
124
125 static enum rdma_link_layer
126 mlx5_port_type_cap_to_rdma_ll(int port_type_cap)
127 {
128         switch (port_type_cap) {
129         case MLX5_CAP_PORT_TYPE_IB:
130                 return IB_LINK_LAYER_INFINIBAND;
131         case MLX5_CAP_PORT_TYPE_ETH:
132                 return IB_LINK_LAYER_ETHERNET;
133         default:
134                 return IB_LINK_LAYER_UNSPECIFIED;
135         }
136 }
137
138 static enum rdma_link_layer
139 mlx5_ib_port_link_layer(struct ib_device *device, u8 port_num)
140 {
141         struct mlx5_ib_dev *dev = to_mdev(device);
142         int port_type_cap = MLX5_CAP_GEN(dev->mdev, port_type);
143
144         return mlx5_port_type_cap_to_rdma_ll(port_type_cap);
145 }
146
147 static int get_port_state(struct ib_device *ibdev,
148                           u8 port_num,
149                           enum ib_port_state *state)
150 {
151         struct ib_port_attr attr;
152         int ret;
153
154         memset(&attr, 0, sizeof(attr));
155         ret = ibdev->ops.query_port(ibdev, port_num, &attr);
156         if (!ret)
157                 *state = attr.state;
158         return ret;
159 }
160
161 static struct mlx5_roce *mlx5_get_rep_roce(struct mlx5_ib_dev *dev,
162                                            struct net_device *ndev,
163                                            u8 *port_num)
164 {
165         struct mlx5_eswitch *esw = dev->mdev->priv.eswitch;
166         struct net_device *rep_ndev;
167         struct mlx5_ib_port *port;
168         int i;
169
170         for (i = 0; i < dev->num_ports; i++) {
171                 port  = &dev->port[i];
172                 if (!port->rep)
173                         continue;
174
175                 read_lock(&port->roce.netdev_lock);
176                 rep_ndev = mlx5_ib_get_rep_netdev(esw,
177                                                   port->rep->vport);
178                 if (rep_ndev == ndev) {
179                         read_unlock(&port->roce.netdev_lock);
180                         *port_num = i + 1;
181                         return &port->roce;
182                 }
183                 read_unlock(&port->roce.netdev_lock);
184         }
185
186         return NULL;
187 }
188
189 static int mlx5_netdev_event(struct notifier_block *this,
190                              unsigned long event, void *ptr)
191 {
192         struct mlx5_roce *roce = container_of(this, struct mlx5_roce, nb);
193         struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
194         u8 port_num = roce->native_port_num;
195         struct mlx5_core_dev *mdev;
196         struct mlx5_ib_dev *ibdev;
197
198         ibdev = roce->dev;
199         mdev = mlx5_ib_get_native_port_mdev(ibdev, port_num, NULL);
200         if (!mdev)
201                 return NOTIFY_DONE;
202
203         switch (event) {
204         case NETDEV_REGISTER:
205                 /* Should already be registered during the load */
206                 if (ibdev->is_rep)
207                         break;
208                 write_lock(&roce->netdev_lock);
209                 if (ndev->dev.parent == mdev->device)
210                         roce->netdev = ndev;
211                 write_unlock(&roce->netdev_lock);
212                 break;
213
214         case NETDEV_UNREGISTER:
215                 /* In case of reps, ib device goes away before the netdevs */
216                 write_lock(&roce->netdev_lock);
217                 if (roce->netdev == ndev)
218                         roce->netdev = NULL;
219                 write_unlock(&roce->netdev_lock);
220                 break;
221
222         case NETDEV_CHANGE:
223         case NETDEV_UP:
224         case NETDEV_DOWN: {
225                 struct net_device *lag_ndev = mlx5_lag_get_roce_netdev(mdev);
226                 struct net_device *upper = NULL;
227
228                 if (lag_ndev) {
229                         upper = netdev_master_upper_dev_get(lag_ndev);
230                         dev_put(lag_ndev);
231                 }
232
233                 if (ibdev->is_rep)
234                         roce = mlx5_get_rep_roce(ibdev, ndev, &port_num);
235                 if (!roce)
236                         return NOTIFY_DONE;
237                 if ((upper == ndev || (!upper && ndev == roce->netdev))
238                     && ibdev->ib_active) {
239                         struct ib_event ibev = { };
240                         enum ib_port_state port_state;
241
242                         if (get_port_state(&ibdev->ib_dev, port_num,
243                                            &port_state))
244                                 goto done;
245
246                         if (roce->last_port_state == port_state)
247                                 goto done;
248
249                         roce->last_port_state = port_state;
250                         ibev.device = &ibdev->ib_dev;
251                         if (port_state == IB_PORT_DOWN)
252                                 ibev.event = IB_EVENT_PORT_ERR;
253                         else if (port_state == IB_PORT_ACTIVE)
254                                 ibev.event = IB_EVENT_PORT_ACTIVE;
255                         else
256                                 goto done;
257
258                         ibev.element.port_num = port_num;
259                         ib_dispatch_event(&ibev);
260                 }
261                 break;
262         }
263
264         default:
265                 break;
266         }
267 done:
268         mlx5_ib_put_native_port_mdev(ibdev, port_num);
269         return NOTIFY_DONE;
270 }
271
272 static struct net_device *mlx5_ib_get_netdev(struct ib_device *device,
273                                              u8 port_num)
274 {
275         struct mlx5_ib_dev *ibdev = to_mdev(device);
276         struct net_device *ndev;
277         struct mlx5_core_dev *mdev;
278
279         mdev = mlx5_ib_get_native_port_mdev(ibdev, port_num, NULL);
280         if (!mdev)
281                 return NULL;
282
283         ndev = mlx5_lag_get_roce_netdev(mdev);
284         if (ndev)
285                 goto out;
286
287         /* Ensure ndev does not disappear before we invoke dev_hold()
288          */
289         read_lock(&ibdev->port[port_num - 1].roce.netdev_lock);
290         ndev = ibdev->port[port_num - 1].roce.netdev;
291         if (ndev)
292                 dev_hold(ndev);
293         read_unlock(&ibdev->port[port_num - 1].roce.netdev_lock);
294
295 out:
296         mlx5_ib_put_native_port_mdev(ibdev, port_num);
297         return ndev;
298 }
299
300 struct mlx5_core_dev *mlx5_ib_get_native_port_mdev(struct mlx5_ib_dev *ibdev,
301                                                    u8 ib_port_num,
302                                                    u8 *native_port_num)
303 {
304         enum rdma_link_layer ll = mlx5_ib_port_link_layer(&ibdev->ib_dev,
305                                                           ib_port_num);
306         struct mlx5_core_dev *mdev = NULL;
307         struct mlx5_ib_multiport_info *mpi;
308         struct mlx5_ib_port *port;
309
310         if (!mlx5_core_mp_enabled(ibdev->mdev) ||
311             ll != IB_LINK_LAYER_ETHERNET) {
312                 if (native_port_num)
313                         *native_port_num = ib_port_num;
314                 return ibdev->mdev;
315         }
316
317         if (native_port_num)
318                 *native_port_num = 1;
319
320         port = &ibdev->port[ib_port_num - 1];
321         if (!port)
322                 return NULL;
323
324         spin_lock(&port->mp.mpi_lock);
325         mpi = ibdev->port[ib_port_num - 1].mp.mpi;
326         if (mpi && !mpi->unaffiliate) {
327                 mdev = mpi->mdev;
328                 /* If it's the master no need to refcount, it'll exist
329                  * as long as the ib_dev exists.
330                  */
331                 if (!mpi->is_master)
332                         mpi->mdev_refcnt++;
333         }
334         spin_unlock(&port->mp.mpi_lock);
335
336         return mdev;
337 }
338
339 void mlx5_ib_put_native_port_mdev(struct mlx5_ib_dev *ibdev, u8 port_num)
340 {
341         enum rdma_link_layer ll = mlx5_ib_port_link_layer(&ibdev->ib_dev,
342                                                           port_num);
343         struct mlx5_ib_multiport_info *mpi;
344         struct mlx5_ib_port *port;
345
346         if (!mlx5_core_mp_enabled(ibdev->mdev) || ll != IB_LINK_LAYER_ETHERNET)
347                 return;
348
349         port = &ibdev->port[port_num - 1];
350
351         spin_lock(&port->mp.mpi_lock);
352         mpi = ibdev->port[port_num - 1].mp.mpi;
353         if (mpi->is_master)
354                 goto out;
355
356         mpi->mdev_refcnt--;
357         if (mpi->unaffiliate)
358                 complete(&mpi->unref_comp);
359 out:
360         spin_unlock(&port->mp.mpi_lock);
361 }
362
363 static int translate_eth_legacy_proto_oper(u32 eth_proto_oper, u8 *active_speed,
364                                            u8 *active_width)
365 {
366         switch (eth_proto_oper) {
367         case MLX5E_PROT_MASK(MLX5E_1000BASE_CX_SGMII):
368         case MLX5E_PROT_MASK(MLX5E_1000BASE_KX):
369         case MLX5E_PROT_MASK(MLX5E_100BASE_TX):
370         case MLX5E_PROT_MASK(MLX5E_1000BASE_T):
371                 *active_width = IB_WIDTH_1X;
372                 *active_speed = IB_SPEED_SDR;
373                 break;
374         case MLX5E_PROT_MASK(MLX5E_10GBASE_T):
375         case MLX5E_PROT_MASK(MLX5E_10GBASE_CX4):
376         case MLX5E_PROT_MASK(MLX5E_10GBASE_KX4):
377         case MLX5E_PROT_MASK(MLX5E_10GBASE_KR):
378         case MLX5E_PROT_MASK(MLX5E_10GBASE_CR):
379         case MLX5E_PROT_MASK(MLX5E_10GBASE_SR):
380         case MLX5E_PROT_MASK(MLX5E_10GBASE_ER):
381                 *active_width = IB_WIDTH_1X;
382                 *active_speed = IB_SPEED_QDR;
383                 break;
384         case MLX5E_PROT_MASK(MLX5E_25GBASE_CR):
385         case MLX5E_PROT_MASK(MLX5E_25GBASE_KR):
386         case MLX5E_PROT_MASK(MLX5E_25GBASE_SR):
387                 *active_width = IB_WIDTH_1X;
388                 *active_speed = IB_SPEED_EDR;
389                 break;
390         case MLX5E_PROT_MASK(MLX5E_40GBASE_CR4):
391         case MLX5E_PROT_MASK(MLX5E_40GBASE_KR4):
392         case MLX5E_PROT_MASK(MLX5E_40GBASE_SR4):
393         case MLX5E_PROT_MASK(MLX5E_40GBASE_LR4):
394                 *active_width = IB_WIDTH_4X;
395                 *active_speed = IB_SPEED_QDR;
396                 break;
397         case MLX5E_PROT_MASK(MLX5E_50GBASE_CR2):
398         case MLX5E_PROT_MASK(MLX5E_50GBASE_KR2):
399         case MLX5E_PROT_MASK(MLX5E_50GBASE_SR2):
400                 *active_width = IB_WIDTH_1X;
401                 *active_speed = IB_SPEED_HDR;
402                 break;
403         case MLX5E_PROT_MASK(MLX5E_56GBASE_R4):
404                 *active_width = IB_WIDTH_4X;
405                 *active_speed = IB_SPEED_FDR;
406                 break;
407         case MLX5E_PROT_MASK(MLX5E_100GBASE_CR4):
408         case MLX5E_PROT_MASK(MLX5E_100GBASE_SR4):
409         case MLX5E_PROT_MASK(MLX5E_100GBASE_KR4):
410         case MLX5E_PROT_MASK(MLX5E_100GBASE_LR4):
411                 *active_width = IB_WIDTH_4X;
412                 *active_speed = IB_SPEED_EDR;
413                 break;
414         default:
415                 return -EINVAL;
416         }
417
418         return 0;
419 }
420
421 static int translate_eth_ext_proto_oper(u32 eth_proto_oper, u8 *active_speed,
422                                         u8 *active_width)
423 {
424         switch (eth_proto_oper) {
425         case MLX5E_PROT_MASK(MLX5E_SGMII_100M):
426         case MLX5E_PROT_MASK(MLX5E_1000BASE_X_SGMII):
427                 *active_width = IB_WIDTH_1X;
428                 *active_speed = IB_SPEED_SDR;
429                 break;
430         case MLX5E_PROT_MASK(MLX5E_5GBASE_R):
431                 *active_width = IB_WIDTH_1X;
432                 *active_speed = IB_SPEED_DDR;
433                 break;
434         case MLX5E_PROT_MASK(MLX5E_10GBASE_XFI_XAUI_1):
435                 *active_width = IB_WIDTH_1X;
436                 *active_speed = IB_SPEED_QDR;
437                 break;
438         case MLX5E_PROT_MASK(MLX5E_40GBASE_XLAUI_4_XLPPI_4):
439                 *active_width = IB_WIDTH_4X;
440                 *active_speed = IB_SPEED_QDR;
441                 break;
442         case MLX5E_PROT_MASK(MLX5E_25GAUI_1_25GBASE_CR_KR):
443                 *active_width = IB_WIDTH_1X;
444                 *active_speed = IB_SPEED_EDR;
445                 break;
446         case MLX5E_PROT_MASK(MLX5E_50GAUI_2_LAUI_2_50GBASE_CR2_KR2):
447                 *active_width = IB_WIDTH_2X;
448                 *active_speed = IB_SPEED_EDR;
449                 break;
450         case MLX5E_PROT_MASK(MLX5E_50GAUI_1_LAUI_1_50GBASE_CR_KR):
451                 *active_width = IB_WIDTH_1X;
452                 *active_speed = IB_SPEED_HDR;
453                 break;
454         case MLX5E_PROT_MASK(MLX5E_CAUI_4_100GBASE_CR4_KR4):
455                 *active_width = IB_WIDTH_4X;
456                 *active_speed = IB_SPEED_EDR;
457                 break;
458         case MLX5E_PROT_MASK(MLX5E_100GAUI_2_100GBASE_CR2_KR2):
459                 *active_width = IB_WIDTH_2X;
460                 *active_speed = IB_SPEED_HDR;
461                 break;
462         case MLX5E_PROT_MASK(MLX5E_200GAUI_4_200GBASE_CR4_KR4):
463                 *active_width = IB_WIDTH_4X;
464                 *active_speed = IB_SPEED_HDR;
465                 break;
466         default:
467                 return -EINVAL;
468         }
469
470         return 0;
471 }
472
473 static int translate_eth_proto_oper(u32 eth_proto_oper, u8 *active_speed,
474                                     u8 *active_width, bool ext)
475 {
476         return ext ?
477                 translate_eth_ext_proto_oper(eth_proto_oper, active_speed,
478                                              active_width) :
479                 translate_eth_legacy_proto_oper(eth_proto_oper, active_speed,
480                                                 active_width);
481 }
482
483 static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
484                                 struct ib_port_attr *props)
485 {
486         struct mlx5_ib_dev *dev = to_mdev(device);
487         u32 out[MLX5_ST_SZ_DW(ptys_reg)] = {0};
488         struct mlx5_core_dev *mdev;
489         struct net_device *ndev, *upper;
490         enum ib_mtu ndev_ib_mtu;
491         bool put_mdev = true;
492         u16 qkey_viol_cntr;
493         u32 eth_prot_oper;
494         u8 mdev_port_num;
495         bool ext;
496         int err;
497
498         mdev = mlx5_ib_get_native_port_mdev(dev, port_num, &mdev_port_num);
499         if (!mdev) {
500                 /* This means the port isn't affiliated yet. Get the
501                  * info for the master port instead.
502                  */
503                 put_mdev = false;
504                 mdev = dev->mdev;
505                 mdev_port_num = 1;
506                 port_num = 1;
507         }
508
509         /* Possible bad flows are checked before filling out props so in case
510          * of an error it will still be zeroed out.
511          * Use native port in case of reps
512          */
513         if (dev->is_rep)
514                 err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN,
515                                            1);
516         else
517                 err = mlx5_query_port_ptys(mdev, out, sizeof(out), MLX5_PTYS_EN,
518                                            mdev_port_num);
519         if (err)
520                 goto out;
521         ext = MLX5_CAP_PCAM_FEATURE(dev->mdev, ptys_extended_ethernet);
522         eth_prot_oper = MLX5_GET_ETH_PROTO(ptys_reg, out, ext, eth_proto_oper);
523
524         props->active_width     = IB_WIDTH_4X;
525         props->active_speed     = IB_SPEED_QDR;
526
527         translate_eth_proto_oper(eth_prot_oper, &props->active_speed,
528                                  &props->active_width, ext);
529
530         props->port_cap_flags |= IB_PORT_CM_SUP;
531         props->ip_gids = true;
532
533         props->gid_tbl_len      = MLX5_CAP_ROCE(dev->mdev,
534                                                 roce_address_table_size);
535         props->max_mtu          = IB_MTU_4096;
536         props->max_msg_sz       = 1 << MLX5_CAP_GEN(dev->mdev, log_max_msg);
537         props->pkey_tbl_len     = 1;
538         props->state            = IB_PORT_DOWN;
539         props->phys_state       = IB_PORT_PHYS_STATE_DISABLED;
540
541         mlx5_query_nic_vport_qkey_viol_cntr(mdev, &qkey_viol_cntr);
542         props->qkey_viol_cntr = qkey_viol_cntr;
543
544         /* If this is a stub query for an unaffiliated port stop here */
545         if (!put_mdev)
546                 goto out;
547
548         ndev = mlx5_ib_get_netdev(device, port_num);
549         if (!ndev)
550                 goto out;
551
552         if (dev->lag_active) {
553                 rcu_read_lock();
554                 upper = netdev_master_upper_dev_get_rcu(ndev);
555                 if (upper) {
556                         dev_put(ndev);
557                         ndev = upper;
558                         dev_hold(ndev);
559                 }
560                 rcu_read_unlock();
561         }
562
563         if (netif_running(ndev) && netif_carrier_ok(ndev)) {
564                 props->state      = IB_PORT_ACTIVE;
565                 props->phys_state = IB_PORT_PHYS_STATE_LINK_UP;
566         }
567
568         ndev_ib_mtu = iboe_get_mtu(ndev->mtu);
569
570         dev_put(ndev);
571
572         props->active_mtu       = min(props->max_mtu, ndev_ib_mtu);
573 out:
574         if (put_mdev)
575                 mlx5_ib_put_native_port_mdev(dev, port_num);
576         return err;
577 }
578
579 static int set_roce_addr(struct mlx5_ib_dev *dev, u8 port_num,
580                          unsigned int index, const union ib_gid *gid,
581                          const struct ib_gid_attr *attr)
582 {
583         enum ib_gid_type gid_type = IB_GID_TYPE_IB;
584         u16 vlan_id = 0xffff;
585         u8 roce_version = 0;
586         u8 roce_l3_type = 0;
587         u8 mac[ETH_ALEN];
588         int ret;
589
590         if (gid) {
591                 gid_type = attr->gid_type;
592                 ret = rdma_read_gid_l2_fields(attr, &vlan_id, &mac[0]);
593                 if (ret)
594                         return ret;
595         }
596
597         switch (gid_type) {
598         case IB_GID_TYPE_IB:
599                 roce_version = MLX5_ROCE_VERSION_1;
600                 break;
601         case IB_GID_TYPE_ROCE_UDP_ENCAP:
602                 roce_version = MLX5_ROCE_VERSION_2;
603                 if (ipv6_addr_v4mapped((void *)gid))
604                         roce_l3_type = MLX5_ROCE_L3_TYPE_IPV4;
605                 else
606                         roce_l3_type = MLX5_ROCE_L3_TYPE_IPV6;
607                 break;
608
609         default:
610                 mlx5_ib_warn(dev, "Unexpected GID type %u\n", gid_type);
611         }
612
613         return mlx5_core_roce_gid_set(dev->mdev, index, roce_version,
614                                       roce_l3_type, gid->raw, mac,
615                                       vlan_id < VLAN_CFI_MASK, vlan_id,
616                                       port_num);
617 }
618
619 static int mlx5_ib_add_gid(const struct ib_gid_attr *attr,
620                            __always_unused void **context)
621 {
622         return set_roce_addr(to_mdev(attr->device), attr->port_num,
623                              attr->index, &attr->gid, attr);
624 }
625
626 static int mlx5_ib_del_gid(const struct ib_gid_attr *attr,
627                            __always_unused void **context)
628 {
629         return set_roce_addr(to_mdev(attr->device), attr->port_num,
630                              attr->index, NULL, NULL);
631 }
632
633 __be16 mlx5_get_roce_udp_sport(struct mlx5_ib_dev *dev,
634                                const struct ib_gid_attr *attr)
635 {
636         if (attr->gid_type != IB_GID_TYPE_ROCE_UDP_ENCAP)
637                 return 0;
638
639         return cpu_to_be16(MLX5_CAP_ROCE(dev->mdev, r_roce_min_src_udp_port));
640 }
641
642 static int mlx5_use_mad_ifc(struct mlx5_ib_dev *dev)
643 {
644         if (MLX5_CAP_GEN(dev->mdev, port_type) == MLX5_CAP_PORT_TYPE_IB)
645                 return !MLX5_CAP_GEN(dev->mdev, ib_virt);
646         return 0;
647 }
648
649 enum {
650         MLX5_VPORT_ACCESS_METHOD_MAD,
651         MLX5_VPORT_ACCESS_METHOD_HCA,
652         MLX5_VPORT_ACCESS_METHOD_NIC,
653 };
654
655 static int mlx5_get_vport_access_method(struct ib_device *ibdev)
656 {
657         if (mlx5_use_mad_ifc(to_mdev(ibdev)))
658                 return MLX5_VPORT_ACCESS_METHOD_MAD;
659
660         if (mlx5_ib_port_link_layer(ibdev, 1) ==
661             IB_LINK_LAYER_ETHERNET)
662                 return MLX5_VPORT_ACCESS_METHOD_NIC;
663
664         return MLX5_VPORT_ACCESS_METHOD_HCA;
665 }
666
667 static void get_atomic_caps(struct mlx5_ib_dev *dev,
668                             u8 atomic_size_qp,
669                             struct ib_device_attr *props)
670 {
671         u8 tmp;
672         u8 atomic_operations = MLX5_CAP_ATOMIC(dev->mdev, atomic_operations);
673         u8 atomic_req_8B_endianness_mode =
674                 MLX5_CAP_ATOMIC(dev->mdev, atomic_req_8B_endianness_mode);
675
676         /* Check if HW supports 8 bytes standard atomic operations and capable
677          * of host endianness respond
678          */
679         tmp = MLX5_ATOMIC_OPS_CMP_SWAP | MLX5_ATOMIC_OPS_FETCH_ADD;
680         if (((atomic_operations & tmp) == tmp) &&
681             (atomic_size_qp & MLX5_ATOMIC_SIZE_QP_8BYTES) &&
682             (atomic_req_8B_endianness_mode)) {
683                 props->atomic_cap = IB_ATOMIC_HCA;
684         } else {
685                 props->atomic_cap = IB_ATOMIC_NONE;
686         }
687 }
688
689 static void get_atomic_caps_qp(struct mlx5_ib_dev *dev,
690                                struct ib_device_attr *props)
691 {
692         u8 atomic_size_qp = MLX5_CAP_ATOMIC(dev->mdev, atomic_size_qp);
693
694         get_atomic_caps(dev, atomic_size_qp, props);
695 }
696
697 static int mlx5_query_system_image_guid(struct ib_device *ibdev,
698                                         __be64 *sys_image_guid)
699 {
700         struct mlx5_ib_dev *dev = to_mdev(ibdev);
701         struct mlx5_core_dev *mdev = dev->mdev;
702         u64 tmp;
703         int err;
704
705         switch (mlx5_get_vport_access_method(ibdev)) {
706         case MLX5_VPORT_ACCESS_METHOD_MAD:
707                 return mlx5_query_mad_ifc_system_image_guid(ibdev,
708                                                             sys_image_guid);
709
710         case MLX5_VPORT_ACCESS_METHOD_HCA:
711                 err = mlx5_query_hca_vport_system_image_guid(mdev, &tmp);
712                 break;
713
714         case MLX5_VPORT_ACCESS_METHOD_NIC:
715                 err = mlx5_query_nic_vport_system_image_guid(mdev, &tmp);
716                 break;
717
718         default:
719                 return -EINVAL;
720         }
721
722         if (!err)
723                 *sys_image_guid = cpu_to_be64(tmp);
724
725         return err;
726
727 }
728
729 static int mlx5_query_max_pkeys(struct ib_device *ibdev,
730                                 u16 *max_pkeys)
731 {
732         struct mlx5_ib_dev *dev = to_mdev(ibdev);
733         struct mlx5_core_dev *mdev = dev->mdev;
734
735         switch (mlx5_get_vport_access_method(ibdev)) {
736         case MLX5_VPORT_ACCESS_METHOD_MAD:
737                 return mlx5_query_mad_ifc_max_pkeys(ibdev, max_pkeys);
738
739         case MLX5_VPORT_ACCESS_METHOD_HCA:
740         case MLX5_VPORT_ACCESS_METHOD_NIC:
741                 *max_pkeys = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev,
742                                                 pkey_table_size));
743                 return 0;
744
745         default:
746                 return -EINVAL;
747         }
748 }
749
750 static int mlx5_query_vendor_id(struct ib_device *ibdev,
751                                 u32 *vendor_id)
752 {
753         struct mlx5_ib_dev *dev = to_mdev(ibdev);
754
755         switch (mlx5_get_vport_access_method(ibdev)) {
756         case MLX5_VPORT_ACCESS_METHOD_MAD:
757                 return mlx5_query_mad_ifc_vendor_id(ibdev, vendor_id);
758
759         case MLX5_VPORT_ACCESS_METHOD_HCA:
760         case MLX5_VPORT_ACCESS_METHOD_NIC:
761                 return mlx5_core_query_vendor_id(dev->mdev, vendor_id);
762
763         default:
764                 return -EINVAL;
765         }
766 }
767
768 static int mlx5_query_node_guid(struct mlx5_ib_dev *dev,
769                                 __be64 *node_guid)
770 {
771         u64 tmp;
772         int err;
773
774         switch (mlx5_get_vport_access_method(&dev->ib_dev)) {
775         case MLX5_VPORT_ACCESS_METHOD_MAD:
776                 return mlx5_query_mad_ifc_node_guid(dev, node_guid);
777
778         case MLX5_VPORT_ACCESS_METHOD_HCA:
779                 err = mlx5_query_hca_vport_node_guid(dev->mdev, &tmp);
780                 break;
781
782         case MLX5_VPORT_ACCESS_METHOD_NIC:
783                 err = mlx5_query_nic_vport_node_guid(dev->mdev, &tmp);
784                 break;
785
786         default:
787                 return -EINVAL;
788         }
789
790         if (!err)
791                 *node_guid = cpu_to_be64(tmp);
792
793         return err;
794 }
795
796 struct mlx5_reg_node_desc {
797         u8      desc[IB_DEVICE_NODE_DESC_MAX];
798 };
799
800 static int mlx5_query_node_desc(struct mlx5_ib_dev *dev, char *node_desc)
801 {
802         struct mlx5_reg_node_desc in;
803
804         if (mlx5_use_mad_ifc(dev))
805                 return mlx5_query_mad_ifc_node_desc(dev, node_desc);
806
807         memset(&in, 0, sizeof(in));
808
809         return mlx5_core_access_reg(dev->mdev, &in, sizeof(in), node_desc,
810                                     sizeof(struct mlx5_reg_node_desc),
811                                     MLX5_REG_NODE_DESC, 0, 0);
812 }
813
814 static int mlx5_ib_query_device(struct ib_device *ibdev,
815                                 struct ib_device_attr *props,
816                                 struct ib_udata *uhw)
817 {
818         struct mlx5_ib_dev *dev = to_mdev(ibdev);
819         struct mlx5_core_dev *mdev = dev->mdev;
820         int err = -ENOMEM;
821         int max_sq_desc;
822         int max_rq_sg;
823         int max_sq_sg;
824         u64 min_page_size = 1ull << MLX5_CAP_GEN(mdev, log_pg_sz);
825         bool raw_support = !mlx5_core_mp_enabled(mdev);
826         struct mlx5_ib_query_device_resp resp = {};
827         size_t resp_len;
828         u64 max_tso;
829
830         resp_len = sizeof(resp.comp_mask) + sizeof(resp.response_length);
831         if (uhw->outlen && uhw->outlen < resp_len)
832                 return -EINVAL;
833
834         resp.response_length = resp_len;
835
836         if (uhw->inlen && !ib_is_udata_cleared(uhw, 0, uhw->inlen))
837                 return -EINVAL;
838
839         memset(props, 0, sizeof(*props));
840         err = mlx5_query_system_image_guid(ibdev,
841                                            &props->sys_image_guid);
842         if (err)
843                 return err;
844
845         err = mlx5_query_max_pkeys(ibdev, &props->max_pkeys);
846         if (err)
847                 return err;
848
849         err = mlx5_query_vendor_id(ibdev, &props->vendor_id);
850         if (err)
851                 return err;
852
853         props->fw_ver = ((u64)fw_rev_maj(dev->mdev) << 32) |
854                 (fw_rev_min(dev->mdev) << 16) |
855                 fw_rev_sub(dev->mdev);
856         props->device_cap_flags    = IB_DEVICE_CHANGE_PHY_PORT |
857                 IB_DEVICE_PORT_ACTIVE_EVENT             |
858                 IB_DEVICE_SYS_IMAGE_GUID                |
859                 IB_DEVICE_RC_RNR_NAK_GEN;
860
861         if (MLX5_CAP_GEN(mdev, pkv))
862                 props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
863         if (MLX5_CAP_GEN(mdev, qkv))
864                 props->device_cap_flags |= IB_DEVICE_BAD_QKEY_CNTR;
865         if (MLX5_CAP_GEN(mdev, apm))
866                 props->device_cap_flags |= IB_DEVICE_AUTO_PATH_MIG;
867         if (MLX5_CAP_GEN(mdev, xrc))
868                 props->device_cap_flags |= IB_DEVICE_XRC;
869         if (MLX5_CAP_GEN(mdev, imaicl)) {
870                 props->device_cap_flags |= IB_DEVICE_MEM_WINDOW |
871                                            IB_DEVICE_MEM_WINDOW_TYPE_2B;
872                 props->max_mw = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);
873                 /* We support 'Gappy' memory registration too */
874                 props->device_cap_flags |= IB_DEVICE_SG_GAPS_REG;
875         }
876         props->device_cap_flags |= IB_DEVICE_MEM_MGT_EXTENSIONS;
877         if (MLX5_CAP_GEN(mdev, sho)) {
878                 props->device_cap_flags |= IB_DEVICE_INTEGRITY_HANDOVER;
879                 /* At this stage no support for signature handover */
880                 props->sig_prot_cap = IB_PROT_T10DIF_TYPE_1 |
881                                       IB_PROT_T10DIF_TYPE_2 |
882                                       IB_PROT_T10DIF_TYPE_3;
883                 props->sig_guard_cap = IB_GUARD_T10DIF_CRC |
884                                        IB_GUARD_T10DIF_CSUM;
885         }
886         if (MLX5_CAP_GEN(mdev, block_lb_mc))
887                 props->device_cap_flags |= IB_DEVICE_BLOCK_MULTICAST_LOOPBACK;
888
889         if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) && raw_support) {
890                 if (MLX5_CAP_ETH(mdev, csum_cap)) {
891                         /* Legacy bit to support old userspace libraries */
892                         props->device_cap_flags |= IB_DEVICE_RAW_IP_CSUM;
893                         props->raw_packet_caps |= IB_RAW_PACKET_CAP_IP_CSUM;
894                 }
895
896                 if (MLX5_CAP_ETH(dev->mdev, vlan_cap))
897                         props->raw_packet_caps |=
898                                 IB_RAW_PACKET_CAP_CVLAN_STRIPPING;
899
900                 if (field_avail(typeof(resp), tso_caps, uhw->outlen)) {
901                         max_tso = MLX5_CAP_ETH(mdev, max_lso_cap);
902                         if (max_tso) {
903                                 resp.tso_caps.max_tso = 1 << max_tso;
904                                 resp.tso_caps.supported_qpts |=
905                                         1 << IB_QPT_RAW_PACKET;
906                                 resp.response_length += sizeof(resp.tso_caps);
907                         }
908                 }
909
910                 if (field_avail(typeof(resp), rss_caps, uhw->outlen)) {
911                         resp.rss_caps.rx_hash_function =
912                                                 MLX5_RX_HASH_FUNC_TOEPLITZ;
913                         resp.rss_caps.rx_hash_fields_mask =
914                                                 MLX5_RX_HASH_SRC_IPV4 |
915                                                 MLX5_RX_HASH_DST_IPV4 |
916                                                 MLX5_RX_HASH_SRC_IPV6 |
917                                                 MLX5_RX_HASH_DST_IPV6 |
918                                                 MLX5_RX_HASH_SRC_PORT_TCP |
919                                                 MLX5_RX_HASH_DST_PORT_TCP |
920                                                 MLX5_RX_HASH_SRC_PORT_UDP |
921                                                 MLX5_RX_HASH_DST_PORT_UDP |
922                                                 MLX5_RX_HASH_INNER;
923                         if (mlx5_accel_ipsec_device_caps(dev->mdev) &
924                             MLX5_ACCEL_IPSEC_CAP_DEVICE)
925                                 resp.rss_caps.rx_hash_fields_mask |=
926                                         MLX5_RX_HASH_IPSEC_SPI;
927                         resp.response_length += sizeof(resp.rss_caps);
928                 }
929         } else {
930                 if (field_avail(typeof(resp), tso_caps, uhw->outlen))
931                         resp.response_length += sizeof(resp.tso_caps);
932                 if (field_avail(typeof(resp), rss_caps, uhw->outlen))
933                         resp.response_length += sizeof(resp.rss_caps);
934         }
935
936         if (MLX5_CAP_GEN(mdev, ipoib_basic_offloads)) {
937                 props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
938                 props->device_cap_flags |= IB_DEVICE_UD_TSO;
939         }
940
941         if (MLX5_CAP_GEN(dev->mdev, rq_delay_drop) &&
942             MLX5_CAP_GEN(dev->mdev, general_notification_event) &&
943             raw_support)
944                 props->raw_packet_caps |= IB_RAW_PACKET_CAP_DELAY_DROP;
945
946         if (MLX5_CAP_GEN(mdev, ipoib_enhanced_offloads) &&
947             MLX5_CAP_IPOIB_ENHANCED(mdev, csum_cap))
948                 props->device_cap_flags |= IB_DEVICE_UD_IP_CSUM;
949
950         if (MLX5_CAP_GEN(dev->mdev, eth_net_offloads) &&
951             MLX5_CAP_ETH(dev->mdev, scatter_fcs) &&
952             raw_support) {
953                 /* Legacy bit to support old userspace libraries */
954                 props->device_cap_flags |= IB_DEVICE_RAW_SCATTER_FCS;
955                 props->raw_packet_caps |= IB_RAW_PACKET_CAP_SCATTER_FCS;
956         }
957
958         if (MLX5_CAP_DEV_MEM(mdev, memic)) {
959                 props->max_dm_size =
960                         MLX5_CAP_DEV_MEM(mdev, max_memic_size);
961         }
962
963         if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_BYPASS))
964                 props->device_cap_flags |= IB_DEVICE_MANAGED_FLOW_STEERING;
965
966         if (MLX5_CAP_GEN(mdev, end_pad))
967                 props->device_cap_flags |= IB_DEVICE_PCI_WRITE_END_PADDING;
968
969         props->vendor_part_id      = mdev->pdev->device;
970         props->hw_ver              = mdev->pdev->revision;
971
972         props->max_mr_size         = ~0ull;
973         props->page_size_cap       = ~(min_page_size - 1);
974         props->max_qp              = 1 << MLX5_CAP_GEN(mdev, log_max_qp);
975         props->max_qp_wr           = 1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
976         max_rq_sg =  MLX5_CAP_GEN(mdev, max_wqe_sz_rq) /
977                      sizeof(struct mlx5_wqe_data_seg);
978         max_sq_desc = min_t(int, MLX5_CAP_GEN(mdev, max_wqe_sz_sq), 512);
979         max_sq_sg = (max_sq_desc - sizeof(struct mlx5_wqe_ctrl_seg) -
980                      sizeof(struct mlx5_wqe_raddr_seg)) /
981                 sizeof(struct mlx5_wqe_data_seg);
982         props->max_send_sge = max_sq_sg;
983         props->max_recv_sge = max_rq_sg;
984         props->max_sge_rd          = MLX5_MAX_SGE_RD;
985         props->max_cq              = 1 << MLX5_CAP_GEN(mdev, log_max_cq);
986         props->max_cqe = (1 << MLX5_CAP_GEN(mdev, log_max_cq_sz)) - 1;
987         props->max_mr              = 1 << MLX5_CAP_GEN(mdev, log_max_mkey);
988         props->max_pd              = 1 << MLX5_CAP_GEN(mdev, log_max_pd);
989         props->max_qp_rd_atom      = 1 << MLX5_CAP_GEN(mdev, log_max_ra_req_qp);
990         props->max_qp_init_rd_atom = 1 << MLX5_CAP_GEN(mdev, log_max_ra_res_qp);
991         props->max_srq             = 1 << MLX5_CAP_GEN(mdev, log_max_srq);
992         props->max_srq_wr = (1 << MLX5_CAP_GEN(mdev, log_max_srq_sz)) - 1;
993         props->local_ca_ack_delay  = MLX5_CAP_GEN(mdev, local_ca_ack_delay);
994         props->max_res_rd_atom     = props->max_qp_rd_atom * props->max_qp;
995         props->max_srq_sge         = max_rq_sg - 1;
996         props->max_fast_reg_page_list_len =
997                 1 << MLX5_CAP_GEN(mdev, log_max_klm_list_size);
998         props->max_pi_fast_reg_page_list_len =
999                 props->max_fast_reg_page_list_len / 2;
1000         props->max_sgl_rd =
1001                 MLX5_CAP_GEN(mdev, max_sgl_for_optimized_performance);
1002         get_atomic_caps_qp(dev, props);
1003         props->masked_atomic_cap   = IB_ATOMIC_NONE;
1004         props->max_mcast_grp       = 1 << MLX5_CAP_GEN(mdev, log_max_mcg);
1005         props->max_mcast_qp_attach = MLX5_CAP_GEN(mdev, max_qp_mcg);
1006         props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
1007                                            props->max_mcast_grp;
1008         props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */
1009         props->max_ah = INT_MAX;
1010         props->hca_core_clock = MLX5_CAP_GEN(mdev, device_frequency_khz);
1011         props->timestamp_mask = 0x7FFFFFFFFFFFFFFFULL;
1012
1013         if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
1014                 if (dev->odp_caps.general_caps & IB_ODP_SUPPORT)
1015                         props->device_cap_flags |= IB_DEVICE_ON_DEMAND_PAGING;
1016                 props->odp_caps = dev->odp_caps;
1017         }
1018
1019         if (MLX5_CAP_GEN(mdev, cd))
1020                 props->device_cap_flags |= IB_DEVICE_CROSS_CHANNEL;
1021
1022         if (mlx5_core_is_vf(mdev))
1023                 props->device_cap_flags |= IB_DEVICE_VIRTUAL_FUNCTION;
1024
1025         if (mlx5_ib_port_link_layer(ibdev, 1) ==
1026             IB_LINK_LAYER_ETHERNET && raw_support) {
1027                 props->rss_caps.max_rwq_indirection_tables =
1028                         1 << MLX5_CAP_GEN(dev->mdev, log_max_rqt);
1029                 props->rss_caps.max_rwq_indirection_table_size =
1030                         1 << MLX5_CAP_GEN(dev->mdev, log_max_rqt_size);
1031                 props->rss_caps.supported_qpts = 1 << IB_QPT_RAW_PACKET;
1032                 props->max_wq_type_rq =
1033                         1 << MLX5_CAP_GEN(dev->mdev, log_max_rq);
1034         }
1035
1036         if (MLX5_CAP_GEN(mdev, tag_matching)) {
1037                 props->tm_caps.max_num_tags =
1038                         (1 << MLX5_CAP_GEN(mdev, log_tag_matching_list_sz)) - 1;
1039                 props->tm_caps.max_ops =
1040                         1 << MLX5_CAP_GEN(mdev, log_max_qp_sz);
1041                 props->tm_caps.max_sge = MLX5_TM_MAX_SGE;
1042         }
1043
1044         if (MLX5_CAP_GEN(mdev, tag_matching) &&
1045             MLX5_CAP_GEN(mdev, rndv_offload_rc)) {
1046                 props->tm_caps.flags = IB_TM_CAP_RNDV_RC;
1047                 props->tm_caps.max_rndv_hdr_size = MLX5_TM_MAX_RNDV_MSG_SIZE;
1048         }
1049
1050         if (MLX5_CAP_GEN(dev->mdev, cq_moderation)) {
1051                 props->cq_caps.max_cq_moderation_count =
1052                                                 MLX5_MAX_CQ_COUNT;
1053                 props->cq_caps.max_cq_moderation_period =
1054                                                 MLX5_MAX_CQ_PERIOD;
1055         }
1056
1057         if (field_avail(typeof(resp), cqe_comp_caps, uhw->outlen)) {
1058                 resp.response_length += sizeof(resp.cqe_comp_caps);
1059
1060                 if (MLX5_CAP_GEN(dev->mdev, cqe_compression)) {
1061                         resp.cqe_comp_caps.max_num =
1062                                 MLX5_CAP_GEN(dev->mdev,
1063                                              cqe_compression_max_num);
1064
1065                         resp.cqe_comp_caps.supported_format =
1066                                 MLX5_IB_CQE_RES_FORMAT_HASH |
1067                                 MLX5_IB_CQE_RES_FORMAT_CSUM;
1068
1069                         if (MLX5_CAP_GEN(dev->mdev, mini_cqe_resp_stride_index))
1070                                 resp.cqe_comp_caps.supported_format |=
1071                                         MLX5_IB_CQE_RES_FORMAT_CSUM_STRIDX;
1072                 }
1073         }
1074
1075         if (field_avail(typeof(resp), packet_pacing_caps, uhw->outlen) &&
1076             raw_support) {
1077                 if (MLX5_CAP_QOS(mdev, packet_pacing) &&
1078                     MLX5_CAP_GEN(mdev, qos)) {
1079                         resp.packet_pacing_caps.qp_rate_limit_max =
1080                                 MLX5_CAP_QOS(mdev, packet_pacing_max_rate);
1081                         resp.packet_pacing_caps.qp_rate_limit_min =
1082                                 MLX5_CAP_QOS(mdev, packet_pacing_min_rate);
1083                         resp.packet_pacing_caps.supported_qpts |=
1084                                 1 << IB_QPT_RAW_PACKET;
1085                         if (MLX5_CAP_QOS(mdev, packet_pacing_burst_bound) &&
1086                             MLX5_CAP_QOS(mdev, packet_pacing_typical_size))
1087                                 resp.packet_pacing_caps.cap_flags |=
1088                                         MLX5_IB_PP_SUPPORT_BURST;
1089                 }
1090                 resp.response_length += sizeof(resp.packet_pacing_caps);
1091         }
1092
1093         if (field_avail(typeof(resp), mlx5_ib_support_multi_pkt_send_wqes,
1094                         uhw->outlen)) {
1095                 if (MLX5_CAP_ETH(mdev, multi_pkt_send_wqe))
1096                         resp.mlx5_ib_support_multi_pkt_send_wqes =
1097                                 MLX5_IB_ALLOW_MPW;
1098
1099                 if (MLX5_CAP_ETH(mdev, enhanced_multi_pkt_send_wqe))
1100                         resp.mlx5_ib_support_multi_pkt_send_wqes |=
1101                                 MLX5_IB_SUPPORT_EMPW;
1102
1103                 resp.response_length +=
1104                         sizeof(resp.mlx5_ib_support_multi_pkt_send_wqes);
1105         }
1106
1107         if (field_avail(typeof(resp), flags, uhw->outlen)) {
1108                 resp.response_length += sizeof(resp.flags);
1109
1110                 if (MLX5_CAP_GEN(mdev, cqe_compression_128))
1111                         resp.flags |=
1112                                 MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_COMP;
1113
1114                 if (MLX5_CAP_GEN(mdev, cqe_128_always))
1115                         resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_CQE_128B_PAD;
1116                 if (MLX5_CAP_GEN(mdev, qp_packet_based))
1117                         resp.flags |=
1118                                 MLX5_IB_QUERY_DEV_RESP_PACKET_BASED_CREDIT_MODE;
1119
1120                 resp.flags |= MLX5_IB_QUERY_DEV_RESP_FLAGS_SCAT2CQE_DCT;
1121         }
1122
1123         if (field_avail(typeof(resp), sw_parsing_caps,
1124                         uhw->outlen)) {
1125                 resp.response_length += sizeof(resp.sw_parsing_caps);
1126                 if (MLX5_CAP_ETH(mdev, swp)) {
1127                         resp.sw_parsing_caps.sw_parsing_offloads |=
1128                                 MLX5_IB_SW_PARSING;
1129
1130                         if (MLX5_CAP_ETH(mdev, swp_csum))
1131                                 resp.sw_parsing_caps.sw_parsing_offloads |=
1132                                         MLX5_IB_SW_PARSING_CSUM;
1133
1134                         if (MLX5_CAP_ETH(mdev, swp_lso))
1135                                 resp.sw_parsing_caps.sw_parsing_offloads |=
1136                                         MLX5_IB_SW_PARSING_LSO;
1137
1138                         if (resp.sw_parsing_caps.sw_parsing_offloads)
1139                                 resp.sw_parsing_caps.supported_qpts =
1140                                         BIT(IB_QPT_RAW_PACKET);
1141                 }
1142         }
1143
1144         if (field_avail(typeof(resp), striding_rq_caps, uhw->outlen) &&
1145             raw_support) {
1146                 resp.response_length += sizeof(resp.striding_rq_caps);
1147                 if (MLX5_CAP_GEN(mdev, striding_rq)) {
1148                         resp.striding_rq_caps.min_single_stride_log_num_of_bytes =
1149                                 MLX5_MIN_SINGLE_STRIDE_LOG_NUM_BYTES;
1150                         resp.striding_rq_caps.max_single_stride_log_num_of_bytes =
1151                                 MLX5_MAX_SINGLE_STRIDE_LOG_NUM_BYTES;
1152                         if (MLX5_CAP_GEN(dev->mdev, ext_stride_num_range))
1153                                 resp.striding_rq_caps
1154                                         .min_single_wqe_log_num_of_strides =
1155                                         MLX5_EXT_MIN_SINGLE_WQE_LOG_NUM_STRIDES;
1156                         else
1157                                 resp.striding_rq_caps
1158                                         .min_single_wqe_log_num_of_strides =
1159                                         MLX5_MIN_SINGLE_WQE_LOG_NUM_STRIDES;
1160                         resp.striding_rq_caps.max_single_wqe_log_num_of_strides =
1161                                 MLX5_MAX_SINGLE_WQE_LOG_NUM_STRIDES;
1162                         resp.striding_rq_caps.supported_qpts =
1163                                 BIT(IB_QPT_RAW_PACKET);
1164                 }
1165         }
1166
1167         if (field_avail(typeof(resp), tunnel_offloads_caps,
1168                         uhw->outlen)) {
1169                 resp.response_length += sizeof(resp.tunnel_offloads_caps);
1170                 if (MLX5_CAP_ETH(mdev, tunnel_stateless_vxlan))
1171                         resp.tunnel_offloads_caps |=
1172                                 MLX5_IB_TUNNELED_OFFLOADS_VXLAN;
1173                 if (MLX5_CAP_ETH(mdev, tunnel_stateless_geneve_rx))
1174                         resp.tunnel_offloads_caps |=
1175                                 MLX5_IB_TUNNELED_OFFLOADS_GENEVE;
1176                 if (MLX5_CAP_ETH(mdev, tunnel_stateless_gre))
1177                         resp.tunnel_offloads_caps |=
1178                                 MLX5_IB_TUNNELED_OFFLOADS_GRE;
1179                 if (MLX5_CAP_GEN(mdev, flex_parser_protocols) &
1180                     MLX5_FLEX_PROTO_CW_MPLS_GRE)
1181                         resp.tunnel_offloads_caps |=
1182                                 MLX5_IB_TUNNELED_OFFLOADS_MPLS_GRE;
1183                 if (MLX5_CAP_GEN(mdev, flex_parser_protocols) &
1184                     MLX5_FLEX_PROTO_CW_MPLS_UDP)
1185                         resp.tunnel_offloads_caps |=
1186                                 MLX5_IB_TUNNELED_OFFLOADS_MPLS_UDP;
1187         }
1188
1189         if (uhw->outlen) {
1190                 err = ib_copy_to_udata(uhw, &resp, resp.response_length);
1191
1192                 if (err)
1193                         return err;
1194         }
1195
1196         return 0;
1197 }
1198
1199 enum mlx5_ib_width {
1200         MLX5_IB_WIDTH_1X        = 1 << 0,
1201         MLX5_IB_WIDTH_2X        = 1 << 1,
1202         MLX5_IB_WIDTH_4X        = 1 << 2,
1203         MLX5_IB_WIDTH_8X        = 1 << 3,
1204         MLX5_IB_WIDTH_12X       = 1 << 4
1205 };
1206
1207 static void translate_active_width(struct ib_device *ibdev, u8 active_width,
1208                                   u8 *ib_width)
1209 {
1210         struct mlx5_ib_dev *dev = to_mdev(ibdev);
1211
1212         if (active_width & MLX5_IB_WIDTH_1X)
1213                 *ib_width = IB_WIDTH_1X;
1214         else if (active_width & MLX5_IB_WIDTH_2X)
1215                 *ib_width = IB_WIDTH_2X;
1216         else if (active_width & MLX5_IB_WIDTH_4X)
1217                 *ib_width = IB_WIDTH_4X;
1218         else if (active_width & MLX5_IB_WIDTH_8X)
1219                 *ib_width = IB_WIDTH_8X;
1220         else if (active_width & MLX5_IB_WIDTH_12X)
1221                 *ib_width = IB_WIDTH_12X;
1222         else {
1223                 mlx5_ib_dbg(dev, "Invalid active_width %d, setting width to default value: 4x\n",
1224                             (int)active_width);
1225                 *ib_width = IB_WIDTH_4X;
1226         }
1227
1228         return;
1229 }
1230
1231 static int mlx5_mtu_to_ib_mtu(int mtu)
1232 {
1233         switch (mtu) {
1234         case 256: return 1;
1235         case 512: return 2;
1236         case 1024: return 3;
1237         case 2048: return 4;
1238         case 4096: return 5;
1239         default:
1240                 pr_warn("invalid mtu\n");
1241                 return -1;
1242         }
1243 }
1244
1245 enum ib_max_vl_num {
1246         __IB_MAX_VL_0           = 1,
1247         __IB_MAX_VL_0_1         = 2,
1248         __IB_MAX_VL_0_3         = 3,
1249         __IB_MAX_VL_0_7         = 4,
1250         __IB_MAX_VL_0_14        = 5,
1251 };
1252
1253 enum mlx5_vl_hw_cap {
1254         MLX5_VL_HW_0    = 1,
1255         MLX5_VL_HW_0_1  = 2,
1256         MLX5_VL_HW_0_2  = 3,
1257         MLX5_VL_HW_0_3  = 4,
1258         MLX5_VL_HW_0_4  = 5,
1259         MLX5_VL_HW_0_5  = 6,
1260         MLX5_VL_HW_0_6  = 7,
1261         MLX5_VL_HW_0_7  = 8,
1262         MLX5_VL_HW_0_14 = 15
1263 };
1264
1265 static int translate_max_vl_num(struct ib_device *ibdev, u8 vl_hw_cap,
1266                                 u8 *max_vl_num)
1267 {
1268         switch (vl_hw_cap) {
1269         case MLX5_VL_HW_0:
1270                 *max_vl_num = __IB_MAX_VL_0;
1271                 break;
1272         case MLX5_VL_HW_0_1:
1273                 *max_vl_num = __IB_MAX_VL_0_1;
1274                 break;
1275         case MLX5_VL_HW_0_3:
1276                 *max_vl_num = __IB_MAX_VL_0_3;
1277                 break;
1278         case MLX5_VL_HW_0_7:
1279                 *max_vl_num = __IB_MAX_VL_0_7;
1280                 break;
1281         case MLX5_VL_HW_0_14:
1282                 *max_vl_num = __IB_MAX_VL_0_14;
1283                 break;
1284
1285         default:
1286                 return -EINVAL;
1287         }
1288
1289         return 0;
1290 }
1291
1292 static int mlx5_query_hca_port(struct ib_device *ibdev, u8 port,
1293                                struct ib_port_attr *props)
1294 {
1295         struct mlx5_ib_dev *dev = to_mdev(ibdev);
1296         struct mlx5_core_dev *mdev = dev->mdev;
1297         struct mlx5_hca_vport_context *rep;
1298         u16 max_mtu;
1299         u16 oper_mtu;
1300         int err;
1301         u8 ib_link_width_oper;
1302         u8 vl_hw_cap;
1303
1304         rep = kzalloc(sizeof(*rep), GFP_KERNEL);
1305         if (!rep) {
1306                 err = -ENOMEM;
1307                 goto out;
1308         }
1309
1310         /* props being zeroed by the caller, avoid zeroing it here */
1311
1312         err = mlx5_query_hca_vport_context(mdev, 0, port, 0, rep);
1313         if (err)
1314                 goto out;
1315
1316         props->lid              = rep->lid;
1317         props->lmc              = rep->lmc;
1318         props->sm_lid           = rep->sm_lid;
1319         props->sm_sl            = rep->sm_sl;
1320         props->state            = rep->vport_state;
1321         props->phys_state       = rep->port_physical_state;
1322         props->port_cap_flags   = rep->cap_mask1;
1323         props->gid_tbl_len      = mlx5_get_gid_table_len(MLX5_CAP_GEN(mdev, gid_table_size));
1324         props->max_msg_sz       = 1 << MLX5_CAP_GEN(mdev, log_max_msg);
1325         props->pkey_tbl_len     = mlx5_to_sw_pkey_sz(MLX5_CAP_GEN(mdev, pkey_table_size));
1326         props->bad_pkey_cntr    = rep->pkey_violation_counter;
1327         props->qkey_viol_cntr   = rep->qkey_violation_counter;
1328         props->subnet_timeout   = rep->subnet_timeout;
1329         props->init_type_reply  = rep->init_type_reply;
1330
1331         if (props->port_cap_flags & IB_PORT_CAP_MASK2_SUP)
1332                 props->port_cap_flags2 = rep->cap_mask2;
1333
1334         err = mlx5_query_port_link_width_oper(mdev, &ib_link_width_oper, port);
1335         if (err)
1336                 goto out;
1337
1338         translate_active_width(ibdev, ib_link_width_oper, &props->active_width);
1339
1340         err = mlx5_query_port_ib_proto_oper(mdev, &props->active_speed, port);
1341         if (err)
1342                 goto out;
1343
1344         mlx5_query_port_max_mtu(mdev, &max_mtu, port);
1345
1346         props->max_mtu = mlx5_mtu_to_ib_mtu(max_mtu);
1347
1348         mlx5_query_port_oper_mtu(mdev, &oper_mtu, port);
1349
1350         props->active_mtu = mlx5_mtu_to_ib_mtu(oper_mtu);
1351
1352         err = mlx5_query_port_vl_hw_cap(mdev, &vl_hw_cap, port);
1353         if (err)
1354                 goto out;
1355
1356         err = translate_max_vl_num(ibdev, vl_hw_cap,
1357                                    &props->max_vl_num);
1358 out:
1359         kfree(rep);
1360         return err;
1361 }
1362
1363 int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
1364                        struct ib_port_attr *props)
1365 {
1366         unsigned int count;
1367         int ret;
1368
1369         switch (mlx5_get_vport_access_method(ibdev)) {
1370         case MLX5_VPORT_ACCESS_METHOD_MAD:
1371                 ret = mlx5_query_mad_ifc_port(ibdev, port, props);
1372                 break;
1373
1374         case MLX5_VPORT_ACCESS_METHOD_HCA:
1375                 ret = mlx5_query_hca_port(ibdev, port, props);
1376                 break;
1377
1378         case MLX5_VPORT_ACCESS_METHOD_NIC:
1379                 ret = mlx5_query_port_roce(ibdev, port, props);
1380                 break;
1381
1382         default:
1383                 ret = -EINVAL;
1384         }
1385
1386         if (!ret && props) {
1387                 struct mlx5_ib_dev *dev = to_mdev(ibdev);
1388                 struct mlx5_core_dev *mdev;
1389                 bool put_mdev = true;
1390
1391                 mdev = mlx5_ib_get_native_port_mdev(dev, port, NULL);
1392                 if (!mdev) {
1393                         /* If the port isn't affiliated yet query the master.
1394                          * The master and slave will have the same values.
1395                          */
1396                         mdev = dev->mdev;
1397                         port = 1;
1398                         put_mdev = false;
1399                 }
1400                 count = mlx5_core_reserved_gids_count(mdev);
1401                 if (put_mdev)
1402                         mlx5_ib_put_native_port_mdev(dev, port);
1403                 props->gid_tbl_len -= count;
1404         }
1405         return ret;
1406 }
1407
1408 static int mlx5_ib_rep_query_port(struct ib_device *ibdev, u8 port,
1409                                   struct ib_port_attr *props)
1410 {
1411         int ret;
1412
1413         /* Only link layer == ethernet is valid for representors
1414          * and we always use port 1
1415          */
1416         ret = mlx5_query_port_roce(ibdev, port, props);
1417         if (ret || !props)
1418                 return ret;
1419
1420         /* We don't support GIDS */
1421         props->gid_tbl_len = 0;
1422
1423         return ret;
1424 }
1425
1426 static int mlx5_ib_query_gid(struct ib_device *ibdev, u8 port, int index,
1427                              union ib_gid *gid)
1428 {
1429         struct mlx5_ib_dev *dev = to_mdev(ibdev);
1430         struct mlx5_core_dev *mdev = dev->mdev;
1431
1432         switch (mlx5_get_vport_access_method(ibdev)) {
1433         case MLX5_VPORT_ACCESS_METHOD_MAD:
1434                 return mlx5_query_mad_ifc_gids(ibdev, port, index, gid);
1435
1436         case MLX5_VPORT_ACCESS_METHOD_HCA:
1437                 return mlx5_query_hca_vport_gid(mdev, 0, port, 0, index, gid);
1438
1439         default:
1440                 return -EINVAL;
1441         }
1442
1443 }
1444
1445 static int mlx5_query_hca_nic_pkey(struct ib_device *ibdev, u8 port,
1446                                    u16 index, u16 *pkey)
1447 {
1448         struct mlx5_ib_dev *dev = to_mdev(ibdev);
1449         struct mlx5_core_dev *mdev;
1450         bool put_mdev = true;
1451         u8 mdev_port_num;
1452         int err;
1453
1454         mdev = mlx5_ib_get_native_port_mdev(dev, port, &mdev_port_num);
1455         if (!mdev) {
1456                 /* The port isn't affiliated yet, get the PKey from the master
1457                  * port. For RoCE the PKey tables will be the same.
1458                  */
1459                 put_mdev = false;
1460                 mdev = dev->mdev;
1461                 mdev_port_num = 1;
1462         }
1463
1464         err = mlx5_query_hca_vport_pkey(mdev, 0, mdev_port_num, 0,
1465                                         index, pkey);
1466         if (put_mdev)
1467                 mlx5_ib_put_native_port_mdev(dev, port);
1468
1469         return err;
1470 }
1471
1472 static int mlx5_ib_query_pkey(struct ib_device *ibdev, u8 port, u16 index,
1473                               u16 *pkey)
1474 {
1475         switch (mlx5_get_vport_access_method(ibdev)) {
1476         case MLX5_VPORT_ACCESS_METHOD_MAD:
1477                 return mlx5_query_mad_ifc_pkey(ibdev, port, index, pkey);
1478
1479         case MLX5_VPORT_ACCESS_METHOD_HCA:
1480         case MLX5_VPORT_ACCESS_METHOD_NIC:
1481                 return mlx5_query_hca_nic_pkey(ibdev, port, index, pkey);
1482         default:
1483                 return -EINVAL;
1484         }
1485 }
1486
1487 static int mlx5_ib_modify_device(struct ib_device *ibdev, int mask,
1488                                  struct ib_device_modify *props)
1489 {
1490         struct mlx5_ib_dev *dev = to_mdev(ibdev);
1491         struct mlx5_reg_node_desc in;
1492         struct mlx5_reg_node_desc out;
1493         int err;
1494
1495         if (mask & ~IB_DEVICE_MODIFY_NODE_DESC)
1496                 return -EOPNOTSUPP;
1497
1498         if (!(mask & IB_DEVICE_MODIFY_NODE_DESC))
1499                 return 0;
1500
1501         /*
1502          * If possible, pass node desc to FW, so it can generate
1503          * a 144 trap.  If cmd fails, just ignore.
1504          */
1505         memcpy(&in, props->node_desc, IB_DEVICE_NODE_DESC_MAX);
1506         err = mlx5_core_access_reg(dev->mdev, &in, sizeof(in), &out,
1507                                    sizeof(out), MLX5_REG_NODE_DESC, 0, 1);
1508         if (err)
1509                 return err;
1510
1511         memcpy(ibdev->node_desc, props->node_desc, IB_DEVICE_NODE_DESC_MAX);
1512
1513         return err;
1514 }
1515
1516 static int set_port_caps_atomic(struct mlx5_ib_dev *dev, u8 port_num, u32 mask,
1517                                 u32 value)
1518 {
1519         struct mlx5_hca_vport_context ctx = {};
1520         struct mlx5_core_dev *mdev;
1521         u8 mdev_port_num;
1522         int err;
1523
1524         mdev = mlx5_ib_get_native_port_mdev(dev, port_num, &mdev_port_num);
1525         if (!mdev)
1526                 return -ENODEV;
1527
1528         err = mlx5_query_hca_vport_context(mdev, 0, mdev_port_num, 0, &ctx);
1529         if (err)
1530                 goto out;
1531
1532         if (~ctx.cap_mask1_perm & mask) {
1533                 mlx5_ib_warn(dev, "trying to change bitmask 0x%X but change supported 0x%X\n",
1534                              mask, ctx.cap_mask1_perm);
1535                 err = -EINVAL;
1536                 goto out;
1537         }
1538
1539         ctx.cap_mask1 = value;
1540         ctx.cap_mask1_perm = mask;
1541         err = mlx5_core_modify_hca_vport_context(mdev, 0, mdev_port_num,
1542                                                  0, &ctx);
1543
1544 out:
1545         mlx5_ib_put_native_port_mdev(dev, port_num);
1546
1547         return err;
1548 }
1549
1550 static int mlx5_ib_modify_port(struct ib_device *ibdev, u8 port, int mask,
1551                                struct ib_port_modify *props)
1552 {
1553         struct mlx5_ib_dev *dev = to_mdev(ibdev);
1554         struct ib_port_attr attr;
1555         u32 tmp;
1556         int err;
1557         u32 change_mask;
1558         u32 value;
1559         bool is_ib = (mlx5_ib_port_link_layer(ibdev, port) ==
1560                       IB_LINK_LAYER_INFINIBAND);
1561
1562         /* CM layer calls ib_modify_port() regardless of the link layer. For
1563          * Ethernet ports, qkey violation and Port capabilities are meaningless.
1564          */
1565         if (!is_ib)
1566                 return 0;
1567
1568         if (MLX5_CAP_GEN(dev->mdev, ib_virt) && is_ib) {
1569                 change_mask = props->clr_port_cap_mask | props->set_port_cap_mask;
1570                 value = ~props->clr_port_cap_mask | props->set_port_cap_mask;
1571                 return set_port_caps_atomic(dev, port, change_mask, value);
1572         }
1573
1574         mutex_lock(&dev->cap_mask_mutex);
1575
1576         err = ib_query_port(ibdev, port, &attr);
1577         if (err)
1578                 goto out;
1579
1580         tmp = (attr.port_cap_flags | props->set_port_cap_mask) &
1581                 ~props->clr_port_cap_mask;
1582
1583         err = mlx5_set_port_caps(dev->mdev, port, tmp);
1584
1585 out:
1586         mutex_unlock(&dev->cap_mask_mutex);
1587         return err;
1588 }
1589
1590 static void print_lib_caps(struct mlx5_ib_dev *dev, u64 caps)
1591 {
1592         mlx5_ib_dbg(dev, "MLX5_LIB_CAP_4K_UAR = %s\n",
1593                     caps & MLX5_LIB_CAP_4K_UAR ? "y" : "n");
1594 }
1595
1596 static u16 calc_dynamic_bfregs(int uars_per_sys_page)
1597 {
1598         /* Large page with non 4k uar support might limit the dynamic size */
1599         if (uars_per_sys_page == 1  && PAGE_SIZE > 4096)
1600                 return MLX5_MIN_DYN_BFREGS;
1601
1602         return MLX5_MAX_DYN_BFREGS;
1603 }
1604
1605 static int calc_total_bfregs(struct mlx5_ib_dev *dev, bool lib_uar_4k,
1606                              struct mlx5_ib_alloc_ucontext_req_v2 *req,
1607                              struct mlx5_bfreg_info *bfregi)
1608 {
1609         int uars_per_sys_page;
1610         int bfregs_per_sys_page;
1611         int ref_bfregs = req->total_num_bfregs;
1612
1613         if (req->total_num_bfregs == 0)
1614                 return -EINVAL;
1615
1616         BUILD_BUG_ON(MLX5_MAX_BFREGS % MLX5_NON_FP_BFREGS_IN_PAGE);
1617         BUILD_BUG_ON(MLX5_MAX_BFREGS < MLX5_NON_FP_BFREGS_IN_PAGE);
1618
1619         if (req->total_num_bfregs > MLX5_MAX_BFREGS)
1620                 return -ENOMEM;
1621
1622         uars_per_sys_page = get_uars_per_sys_page(dev, lib_uar_4k);
1623         bfregs_per_sys_page = uars_per_sys_page * MLX5_NON_FP_BFREGS_PER_UAR;
1624         /* This holds the required static allocation asked by the user */
1625         req->total_num_bfregs = ALIGN(req->total_num_bfregs, bfregs_per_sys_page);
1626         if (req->num_low_latency_bfregs > req->total_num_bfregs - 1)
1627                 return -EINVAL;
1628
1629         bfregi->num_static_sys_pages = req->total_num_bfregs / bfregs_per_sys_page;
1630         bfregi->num_dyn_bfregs = ALIGN(calc_dynamic_bfregs(uars_per_sys_page), bfregs_per_sys_page);
1631         bfregi->total_num_bfregs = req->total_num_bfregs + bfregi->num_dyn_bfregs;
1632         bfregi->num_sys_pages = bfregi->total_num_bfregs / bfregs_per_sys_page;
1633
1634         mlx5_ib_dbg(dev, "uar_4k: fw support %s, lib support %s, user requested %d bfregs, allocated %d, total bfregs %d, using %d sys pages\n",
1635                     MLX5_CAP_GEN(dev->mdev, uar_4k) ? "yes" : "no",
1636                     lib_uar_4k ? "yes" : "no", ref_bfregs,
1637                     req->total_num_bfregs, bfregi->total_num_bfregs,
1638                     bfregi->num_sys_pages);
1639
1640         return 0;
1641 }
1642
1643 static int allocate_uars(struct mlx5_ib_dev *dev, struct mlx5_ib_ucontext *context)
1644 {
1645         struct mlx5_bfreg_info *bfregi;
1646         int err;
1647         int i;
1648
1649         bfregi = &context->bfregi;
1650         for (i = 0; i < bfregi->num_static_sys_pages; i++) {
1651                 err = mlx5_cmd_alloc_uar(dev->mdev, &bfregi->sys_pages[i]);
1652                 if (err)
1653                         goto error;
1654
1655                 mlx5_ib_dbg(dev, "allocated uar %d\n", bfregi->sys_pages[i]);
1656         }
1657
1658         for (i = bfregi->num_static_sys_pages; i < bfregi->num_sys_pages; i++)
1659                 bfregi->sys_pages[i] = MLX5_IB_INVALID_UAR_INDEX;
1660
1661         return 0;
1662
1663 error:
1664         for (--i; i >= 0; i--)
1665                 if (mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]))
1666                         mlx5_ib_warn(dev, "failed to free uar %d\n", i);
1667
1668         return err;
1669 }
1670
1671 static void deallocate_uars(struct mlx5_ib_dev *dev,
1672                             struct mlx5_ib_ucontext *context)
1673 {
1674         struct mlx5_bfreg_info *bfregi;
1675         int i;
1676
1677         bfregi = &context->bfregi;
1678         for (i = 0; i < bfregi->num_sys_pages; i++)
1679                 if (i < bfregi->num_static_sys_pages ||
1680                     bfregi->sys_pages[i] != MLX5_IB_INVALID_UAR_INDEX)
1681                         mlx5_cmd_free_uar(dev->mdev, bfregi->sys_pages[i]);
1682 }
1683
1684 int mlx5_ib_enable_lb(struct mlx5_ib_dev *dev, bool td, bool qp)
1685 {
1686         int err = 0;
1687
1688         mutex_lock(&dev->lb.mutex);
1689         if (td)
1690                 dev->lb.user_td++;
1691         if (qp)
1692                 dev->lb.qps++;
1693
1694         if (dev->lb.user_td == 2 ||
1695             dev->lb.qps == 1) {
1696                 if (!dev->lb.enabled) {
1697                         err = mlx5_nic_vport_update_local_lb(dev->mdev, true);
1698                         dev->lb.enabled = true;
1699                 }
1700         }
1701
1702         mutex_unlock(&dev->lb.mutex);
1703
1704         return err;
1705 }
1706
1707 void mlx5_ib_disable_lb(struct mlx5_ib_dev *dev, bool td, bool qp)
1708 {
1709         mutex_lock(&dev->lb.mutex);
1710         if (td)
1711                 dev->lb.user_td--;
1712         if (qp)
1713                 dev->lb.qps--;
1714
1715         if (dev->lb.user_td == 1 &&
1716             dev->lb.qps == 0) {
1717                 if (dev->lb.enabled) {
1718                         mlx5_nic_vport_update_local_lb(dev->mdev, false);
1719                         dev->lb.enabled = false;
1720                 }
1721         }
1722
1723         mutex_unlock(&dev->lb.mutex);
1724 }
1725
1726 static int mlx5_ib_alloc_transport_domain(struct mlx5_ib_dev *dev, u32 *tdn,
1727                                           u16 uid)
1728 {
1729         int err;
1730
1731         if (!MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
1732                 return 0;
1733
1734         err = mlx5_cmd_alloc_transport_domain(dev->mdev, tdn, uid);
1735         if (err)
1736                 return err;
1737
1738         if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) ||
1739             (!MLX5_CAP_GEN(dev->mdev, disable_local_lb_uc) &&
1740              !MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc)))
1741                 return err;
1742
1743         return mlx5_ib_enable_lb(dev, true, false);
1744 }
1745
1746 static void mlx5_ib_dealloc_transport_domain(struct mlx5_ib_dev *dev, u32 tdn,
1747                                              u16 uid)
1748 {
1749         if (!MLX5_CAP_GEN(dev->mdev, log_max_transport_domain))
1750                 return;
1751
1752         mlx5_cmd_dealloc_transport_domain(dev->mdev, tdn, uid);
1753
1754         if ((MLX5_CAP_GEN(dev->mdev, port_type) != MLX5_CAP_PORT_TYPE_ETH) ||
1755             (!MLX5_CAP_GEN(dev->mdev, disable_local_lb_uc) &&
1756              !MLX5_CAP_GEN(dev->mdev, disable_local_lb_mc)))
1757                 return;
1758
1759         mlx5_ib_disable_lb(dev, true, false);
1760 }
1761
1762 static int mlx5_ib_alloc_ucontext(struct ib_ucontext *uctx,
1763                                   struct ib_udata *udata)
1764 {
1765         struct ib_device *ibdev = uctx->device;
1766         struct mlx5_ib_dev *dev = to_mdev(ibdev);
1767         struct mlx5_ib_alloc_ucontext_req_v2 req = {};
1768         struct mlx5_ib_alloc_ucontext_resp resp = {};
1769         struct mlx5_core_dev *mdev = dev->mdev;
1770         struct mlx5_ib_ucontext *context = to_mucontext(uctx);
1771         struct mlx5_bfreg_info *bfregi;
1772         int ver;
1773         int err;
1774         size_t min_req_v2 = offsetof(struct mlx5_ib_alloc_ucontext_req_v2,
1775                                      max_cqe_version);
1776         u32 dump_fill_mkey;
1777         bool lib_uar_4k;
1778
1779         if (!dev->ib_active)
1780                 return -EAGAIN;
1781
1782         if (udata->inlen == sizeof(struct mlx5_ib_alloc_ucontext_req))
1783                 ver = 0;
1784         else if (udata->inlen >= min_req_v2)
1785                 ver = 2;
1786         else
1787                 return -EINVAL;
1788
1789         err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req)));
1790         if (err)
1791                 return err;
1792
1793         if (req.flags & ~MLX5_IB_ALLOC_UCTX_DEVX)
1794                 return -EOPNOTSUPP;
1795
1796         if (req.comp_mask || req.reserved0 || req.reserved1 || req.reserved2)
1797                 return -EOPNOTSUPP;
1798
1799         req.total_num_bfregs = ALIGN(req.total_num_bfregs,
1800                                     MLX5_NON_FP_BFREGS_PER_UAR);
1801         if (req.num_low_latency_bfregs > req.total_num_bfregs - 1)
1802                 return -EINVAL;
1803
1804         resp.qp_tab_size = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp);
1805         if (dev->wc_support)
1806                 resp.bf_reg_size = 1 << MLX5_CAP_GEN(dev->mdev, log_bf_reg_size);
1807         resp.cache_line_size = cache_line_size();
1808         resp.max_sq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_sq);
1809         resp.max_rq_desc_sz = MLX5_CAP_GEN(dev->mdev, max_wqe_sz_rq);
1810         resp.max_send_wqebb = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
1811         resp.max_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_qp_sz);
1812         resp.max_srq_recv_wr = 1 << MLX5_CAP_GEN(dev->mdev, log_max_srq_sz);
1813         resp.cqe_version = min_t(__u8,
1814                                  (__u8)MLX5_CAP_GEN(dev->mdev, cqe_version),
1815                                  req.max_cqe_version);
1816         resp.log_uar_size = MLX5_CAP_GEN(dev->mdev, uar_4k) ?
1817                                 MLX5_ADAPTER_PAGE_SHIFT : PAGE_SHIFT;
1818         resp.num_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ?
1819                                         MLX5_CAP_GEN(dev->mdev, num_of_uars_per_page) : 1;
1820         resp.response_length = min(offsetof(typeof(resp), response_length) +
1821                                    sizeof(resp.response_length), udata->outlen);
1822
1823         if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_DEVICE) {
1824                 if (mlx5_get_flow_namespace(dev->mdev, MLX5_FLOW_NAMESPACE_EGRESS))
1825                         resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM;
1826                 if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_REQUIRED_METADATA)
1827                         resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_REQ_METADATA;
1828                 if (MLX5_CAP_FLOWTABLE(dev->mdev, flow_table_properties_nic_receive.ft_field_support.outer_esp_spi))
1829                         resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_SPI_STEERING;
1830                 if (mlx5_accel_ipsec_device_caps(dev->mdev) & MLX5_ACCEL_IPSEC_CAP_TX_IV_IS_ESN)
1831                         resp.flow_action_flags |= MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_TX_IV_IS_ESN;
1832                 /* MLX5_USER_ALLOC_UCONTEXT_FLOW_ACTION_FLAGS_ESP_AES_GCM_FULL_OFFLOAD is currently always 0 */
1833         }
1834
1835         lib_uar_4k = req.lib_caps & MLX5_LIB_CAP_4K_UAR;
1836         bfregi = &context->bfregi;
1837
1838         /* updates req->total_num_bfregs */
1839         err = calc_total_bfregs(dev, lib_uar_4k, &req, bfregi);
1840         if (err)
1841                 goto out_ctx;
1842
1843         mutex_init(&bfregi->lock);
1844         bfregi->lib_uar_4k = lib_uar_4k;
1845         bfregi->count = kcalloc(bfregi->total_num_bfregs, sizeof(*bfregi->count),
1846                                 GFP_KERNEL);
1847         if (!bfregi->count) {
1848                 err = -ENOMEM;
1849                 goto out_ctx;
1850         }
1851
1852         bfregi->sys_pages = kcalloc(bfregi->num_sys_pages,
1853                                     sizeof(*bfregi->sys_pages),
1854                                     GFP_KERNEL);
1855         if (!bfregi->sys_pages) {
1856                 err = -ENOMEM;
1857                 goto out_count;
1858         }
1859
1860         err = allocate_uars(dev, context);
1861         if (err)
1862                 goto out_sys_pages;
1863
1864         if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX) {
1865                 err = mlx5_ib_devx_create(dev, true);
1866                 if (err < 0)
1867                         goto out_uars;
1868                 context->devx_uid = err;
1869         }
1870
1871         err = mlx5_ib_alloc_transport_domain(dev, &context->tdn,
1872                                              context->devx_uid);
1873         if (err)
1874                 goto out_devx;
1875
1876         if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) {
1877                 err = mlx5_cmd_dump_fill_mkey(dev->mdev, &dump_fill_mkey);
1878                 if (err)
1879                         goto out_mdev;
1880         }
1881
1882         INIT_LIST_HEAD(&context->db_page_list);
1883         mutex_init(&context->db_page_mutex);
1884
1885         resp.tot_bfregs = req.total_num_bfregs;
1886         resp.num_ports = dev->num_ports;
1887
1888         if (field_avail(typeof(resp), cqe_version, udata->outlen))
1889                 resp.response_length += sizeof(resp.cqe_version);
1890
1891         if (field_avail(typeof(resp), cmds_supp_uhw, udata->outlen)) {
1892                 resp.cmds_supp_uhw |= MLX5_USER_CMDS_SUPP_UHW_QUERY_DEVICE |
1893                                       MLX5_USER_CMDS_SUPP_UHW_CREATE_AH;
1894                 resp.response_length += sizeof(resp.cmds_supp_uhw);
1895         }
1896
1897         if (field_avail(typeof(resp), eth_min_inline, udata->outlen)) {
1898                 if (mlx5_ib_port_link_layer(ibdev, 1) == IB_LINK_LAYER_ETHERNET) {
1899                         mlx5_query_min_inline(dev->mdev, &resp.eth_min_inline);
1900                         resp.eth_min_inline++;
1901                 }
1902                 resp.response_length += sizeof(resp.eth_min_inline);
1903         }
1904
1905         if (field_avail(typeof(resp), clock_info_versions, udata->outlen)) {
1906                 if (mdev->clock_info)
1907                         resp.clock_info_versions = BIT(MLX5_IB_CLOCK_INFO_V1);
1908                 resp.response_length += sizeof(resp.clock_info_versions);
1909         }
1910
1911         /*
1912          * We don't want to expose information from the PCI bar that is located
1913          * after 4096 bytes, so if the arch only supports larger pages, let's
1914          * pretend we don't support reading the HCA's core clock. This is also
1915          * forced by mmap function.
1916          */
1917         if (field_avail(typeof(resp), hca_core_clock_offset, udata->outlen)) {
1918                 if (PAGE_SIZE <= 4096) {
1919                         resp.comp_mask |=
1920                                 MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_CORE_CLOCK_OFFSET;
1921                         resp.hca_core_clock_offset =
1922                                 offsetof(struct mlx5_init_seg, internal_timer_h) % PAGE_SIZE;
1923                 }
1924                 resp.response_length += sizeof(resp.hca_core_clock_offset);
1925         }
1926
1927         if (field_avail(typeof(resp), log_uar_size, udata->outlen))
1928                 resp.response_length += sizeof(resp.log_uar_size);
1929
1930         if (field_avail(typeof(resp), num_uars_per_page, udata->outlen))
1931                 resp.response_length += sizeof(resp.num_uars_per_page);
1932
1933         if (field_avail(typeof(resp), num_dyn_bfregs, udata->outlen)) {
1934                 resp.num_dyn_bfregs = bfregi->num_dyn_bfregs;
1935                 resp.response_length += sizeof(resp.num_dyn_bfregs);
1936         }
1937
1938         if (field_avail(typeof(resp), dump_fill_mkey, udata->outlen)) {
1939                 if (MLX5_CAP_GEN(dev->mdev, dump_fill_mkey)) {
1940                         resp.dump_fill_mkey = dump_fill_mkey;
1941                         resp.comp_mask |=
1942                                 MLX5_IB_ALLOC_UCONTEXT_RESP_MASK_DUMP_FILL_MKEY;
1943                 }
1944                 resp.response_length += sizeof(resp.dump_fill_mkey);
1945         }
1946
1947         err = ib_copy_to_udata(udata, &resp, resp.response_length);
1948         if (err)
1949                 goto out_mdev;
1950
1951         bfregi->ver = ver;
1952         bfregi->num_low_latency_bfregs = req.num_low_latency_bfregs;
1953         context->cqe_version = resp.cqe_version;
1954         context->lib_caps = req.lib_caps;
1955         print_lib_caps(dev, context->lib_caps);
1956
1957         if (dev->lag_active) {
1958                 u8 port = mlx5_core_native_port_num(dev->mdev) - 1;
1959
1960                 atomic_set(&context->tx_port_affinity,
1961                            atomic_add_return(
1962                                    1, &dev->port[port].roce.tx_port_affinity));
1963         }
1964
1965         return 0;
1966
1967 out_mdev:
1968         mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid);
1969 out_devx:
1970         if (req.flags & MLX5_IB_ALLOC_UCTX_DEVX)
1971                 mlx5_ib_devx_destroy(dev, context->devx_uid);
1972
1973 out_uars:
1974         deallocate_uars(dev, context);
1975
1976 out_sys_pages:
1977         kfree(bfregi->sys_pages);
1978
1979 out_count:
1980         kfree(bfregi->count);
1981
1982 out_ctx:
1983         return err;
1984 }
1985
1986 static void mlx5_ib_dealloc_ucontext(struct ib_ucontext *ibcontext)
1987 {
1988         struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
1989         struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
1990         struct mlx5_bfreg_info *bfregi;
1991
1992         bfregi = &context->bfregi;
1993         mlx5_ib_dealloc_transport_domain(dev, context->tdn, context->devx_uid);
1994
1995         if (context->devx_uid)
1996                 mlx5_ib_devx_destroy(dev, context->devx_uid);
1997
1998         deallocate_uars(dev, context);
1999         kfree(bfregi->sys_pages);
2000         kfree(bfregi->count);
2001 }
2002
2003 static phys_addr_t uar_index2pfn(struct mlx5_ib_dev *dev,
2004                                  int uar_idx)
2005 {
2006         int fw_uars_per_page;
2007
2008         fw_uars_per_page = MLX5_CAP_GEN(dev->mdev, uar_4k) ? MLX5_UARS_IN_PAGE : 1;
2009
2010         return (dev->mdev->bar_addr >> PAGE_SHIFT) + uar_idx / fw_uars_per_page;
2011 }
2012
2013 static int get_command(unsigned long offset)
2014 {
2015         return (offset >> MLX5_IB_MMAP_CMD_SHIFT) & MLX5_IB_MMAP_CMD_MASK;
2016 }
2017
2018 static int get_arg(unsigned long offset)
2019 {
2020         return offset & ((1 << MLX5_IB_MMAP_CMD_SHIFT) - 1);
2021 }
2022
2023 static int get_index(unsigned long offset)
2024 {
2025         return get_arg(offset);
2026 }
2027
2028 /* Index resides in an extra byte to enable larger values than 255 */
2029 static int get_extended_index(unsigned long offset)
2030 {
2031         return get_arg(offset) | ((offset >> 16) & 0xff) << 8;
2032 }
2033
2034
2035 static void mlx5_ib_disassociate_ucontext(struct ib_ucontext *ibcontext)
2036 {
2037 }
2038
2039 static inline char *mmap_cmd2str(enum mlx5_ib_mmap_cmd cmd)
2040 {
2041         switch (cmd) {
2042         case MLX5_IB_MMAP_WC_PAGE:
2043                 return "WC";
2044         case MLX5_IB_MMAP_REGULAR_PAGE:
2045                 return "best effort WC";
2046         case MLX5_IB_MMAP_NC_PAGE:
2047                 return "NC";
2048         case MLX5_IB_MMAP_DEVICE_MEM:
2049                 return "Device Memory";
2050         default:
2051                 return NULL;
2052         }
2053 }
2054
2055 static int mlx5_ib_mmap_clock_info_page(struct mlx5_ib_dev *dev,
2056                                         struct vm_area_struct *vma,
2057                                         struct mlx5_ib_ucontext *context)
2058 {
2059         if ((vma->vm_end - vma->vm_start != PAGE_SIZE) ||
2060             !(vma->vm_flags & VM_SHARED))
2061                 return -EINVAL;
2062
2063         if (get_index(vma->vm_pgoff) != MLX5_IB_CLOCK_INFO_V1)
2064                 return -EOPNOTSUPP;
2065
2066         if (vma->vm_flags & (VM_WRITE | VM_EXEC))
2067                 return -EPERM;
2068         vma->vm_flags &= ~VM_MAYWRITE;
2069
2070         if (!dev->mdev->clock_info)
2071                 return -EOPNOTSUPP;
2072
2073         return vm_insert_page(vma, vma->vm_start,
2074                               virt_to_page(dev->mdev->clock_info));
2075 }
2076
2077 static int uar_mmap(struct mlx5_ib_dev *dev, enum mlx5_ib_mmap_cmd cmd,
2078                     struct vm_area_struct *vma,
2079                     struct mlx5_ib_ucontext *context)
2080 {
2081         struct mlx5_bfreg_info *bfregi = &context->bfregi;
2082         int err;
2083         unsigned long idx;
2084         phys_addr_t pfn;
2085         pgprot_t prot;
2086         u32 bfreg_dyn_idx = 0;
2087         u32 uar_index;
2088         int dyn_uar = (cmd == MLX5_IB_MMAP_ALLOC_WC);
2089         int max_valid_idx = dyn_uar ? bfregi->num_sys_pages :
2090                                 bfregi->num_static_sys_pages;
2091
2092         if (vma->vm_end - vma->vm_start != PAGE_SIZE)
2093                 return -EINVAL;
2094
2095         if (dyn_uar)
2096                 idx = get_extended_index(vma->vm_pgoff) + bfregi->num_static_sys_pages;
2097         else
2098                 idx = get_index(vma->vm_pgoff);
2099
2100         if (idx >= max_valid_idx) {
2101                 mlx5_ib_warn(dev, "invalid uar index %lu, max=%d\n",
2102                              idx, max_valid_idx);
2103                 return -EINVAL;
2104         }
2105
2106         switch (cmd) {
2107         case MLX5_IB_MMAP_WC_PAGE:
2108         case MLX5_IB_MMAP_ALLOC_WC:
2109 /* Some architectures don't support WC memory */
2110 #if defined(CONFIG_X86)
2111                 if (!pat_enabled())
2112                         return -EPERM;
2113 #elif !(defined(CONFIG_PPC) || (defined(CONFIG_ARM) && defined(CONFIG_MMU)))
2114                         return -EPERM;
2115 #endif
2116         /* fall through */
2117         case MLX5_IB_MMAP_REGULAR_PAGE:
2118                 /* For MLX5_IB_MMAP_REGULAR_PAGE do the best effort to get WC */
2119                 prot = pgprot_writecombine(vma->vm_page_prot);
2120                 break;
2121         case MLX5_IB_MMAP_NC_PAGE:
2122                 prot = pgprot_noncached(vma->vm_page_prot);
2123                 break;
2124         default:
2125                 return -EINVAL;
2126         }
2127
2128         if (dyn_uar) {
2129                 int uars_per_page;
2130
2131                 uars_per_page = get_uars_per_sys_page(dev, bfregi->lib_uar_4k);
2132                 bfreg_dyn_idx = idx * (uars_per_page * MLX5_NON_FP_BFREGS_PER_UAR);
2133                 if (bfreg_dyn_idx >= bfregi->total_num_bfregs) {
2134                         mlx5_ib_warn(dev, "invalid bfreg_dyn_idx %u, max=%u\n",
2135                                      bfreg_dyn_idx, bfregi->total_num_bfregs);
2136                         return -EINVAL;
2137                 }
2138
2139                 mutex_lock(&bfregi->lock);
2140                 /* Fail if uar already allocated, first bfreg index of each
2141                  * page holds its count.
2142                  */
2143                 if (bfregi->count[bfreg_dyn_idx]) {
2144                         mlx5_ib_warn(dev, "wrong offset, idx %lu is busy, bfregn=%u\n", idx, bfreg_dyn_idx);
2145                         mutex_unlock(&bfregi->lock);
2146                         return -EINVAL;
2147                 }
2148
2149                 bfregi->count[bfreg_dyn_idx]++;
2150                 mutex_unlock(&bfregi->lock);
2151
2152                 err = mlx5_cmd_alloc_uar(dev->mdev, &uar_index);
2153                 if (err) {
2154                         mlx5_ib_warn(dev, "UAR alloc failed\n");
2155                         goto free_bfreg;
2156                 }
2157         } else {
2158                 uar_index = bfregi->sys_pages[idx];
2159         }
2160
2161         pfn = uar_index2pfn(dev, uar_index);
2162         mlx5_ib_dbg(dev, "uar idx 0x%lx, pfn %pa\n", idx, &pfn);
2163
2164         err = rdma_user_mmap_io(&context->ibucontext, vma, pfn, PAGE_SIZE,
2165                                 prot, NULL);
2166         if (err) {
2167                 mlx5_ib_err(dev,
2168                             "rdma_user_mmap_io failed with error=%d, mmap_cmd=%s\n",
2169                             err, mmap_cmd2str(cmd));
2170                 goto err;
2171         }
2172
2173         if (dyn_uar)
2174                 bfregi->sys_pages[idx] = uar_index;
2175         return 0;
2176
2177 err:
2178         if (!dyn_uar)
2179                 return err;
2180
2181         mlx5_cmd_free_uar(dev->mdev, idx);
2182
2183 free_bfreg:
2184         mlx5_ib_free_bfreg(dev, bfregi, bfreg_dyn_idx);
2185
2186         return err;
2187 }
2188
2189 static int dm_mmap(struct ib_ucontext *context, struct vm_area_struct *vma)
2190 {
2191         struct mlx5_ib_ucontext *mctx = to_mucontext(context);
2192         struct mlx5_ib_dev *dev = to_mdev(context->device);
2193         u16 page_idx = get_extended_index(vma->vm_pgoff);
2194         size_t map_size = vma->vm_end - vma->vm_start;
2195         u32 npages = map_size >> PAGE_SHIFT;
2196         phys_addr_t pfn;
2197
2198         if (find_next_zero_bit(mctx->dm_pages, page_idx + npages, page_idx) !=
2199             page_idx + npages)
2200                 return -EINVAL;
2201
2202         pfn = ((dev->mdev->bar_addr +
2203               MLX5_CAP64_DEV_MEM(dev->mdev, memic_bar_start_addr)) >>
2204               PAGE_SHIFT) +
2205               page_idx;
2206         return rdma_user_mmap_io(context, vma, pfn, map_size,
2207                                  pgprot_writecombine(vma->vm_page_prot),
2208                                  NULL);
2209 }
2210
2211 static int mlx5_ib_mmap(struct ib_ucontext *ibcontext, struct vm_area_struct *vma)
2212 {
2213         struct mlx5_ib_ucontext *context = to_mucontext(ibcontext);
2214         struct mlx5_ib_dev *dev = to_mdev(ibcontext->device);
2215         unsigned long command;
2216         phys_addr_t pfn;
2217
2218         command = get_command(vma->vm_pgoff);
2219         switch (command) {
2220         case MLX5_IB_MMAP_WC_PAGE:
2221         case MLX5_IB_MMAP_NC_PAGE:
2222         case MLX5_IB_MMAP_REGULAR_PAGE:
2223         case MLX5_IB_MMAP_ALLOC_WC:
2224                 return uar_mmap(dev, command, vma, context);
2225
2226         case MLX5_IB_MMAP_GET_CONTIGUOUS_PAGES:
2227                 return -ENOSYS;
2228
2229         case MLX5_IB_MMAP_CORE_CLOCK:
2230                 if (vma->vm_end - vma->vm_start != PAGE_SIZE)
2231                         return -EINVAL;
2232
2233                 if (vma->vm_flags & VM_WRITE)
2234                         return -EPERM;
2235                 vma->vm_flags &= ~VM_MAYWRITE;
2236
2237                 /* Don't expose to user-space information it shouldn't have */
2238                 if (PAGE_SIZE > 4096)
2239                         return -EOPNOTSUPP;
2240
2241                 pfn = (dev->mdev->iseg_base +
2242                        offsetof(struct mlx5_init_seg, internal_timer_h)) >>
2243                         PAGE_SHIFT;
2244                 return rdma_user_mmap_io(&context->ibucontext, vma, pfn,
2245                                          PAGE_SIZE,
2246                                          pgprot_noncached(vma->vm_page_prot),
2247                                          NULL);
2248         case MLX5_IB_MMAP_CLOCK_INFO:
2249                 return mlx5_ib_mmap_clock_info_page(dev, vma, context);
2250
2251         case MLX5_IB_MMAP_DEVICE_MEM:
2252                 return dm_mmap(ibcontext, vma);
2253
2254         default:
2255                 return -EINVAL;
2256         }
2257
2258         return 0;
2259 }
2260
2261 static inline int check_dm_type_support(struct mlx5_ib_dev *dev,
2262                                         u32 type)
2263 {
2264         switch (type) {
2265         case MLX5_IB_UAPI_DM_TYPE_MEMIC:
2266                 if (!MLX5_CAP_DEV_MEM(dev->mdev, memic))
2267                         return -EOPNOTSUPP;
2268                 break;
2269         case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
2270         case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
2271                 if (!capable(CAP_SYS_RAWIO) ||
2272                     !capable(CAP_NET_RAW))
2273                         return -EPERM;
2274
2275                 if (!(MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev, sw_owner) ||
2276                       MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, sw_owner)))
2277                         return -EOPNOTSUPP;
2278                 break;
2279         }
2280
2281         return 0;
2282 }
2283
2284 static int handle_alloc_dm_memic(struct ib_ucontext *ctx,
2285                                  struct mlx5_ib_dm *dm,
2286                                  struct ib_dm_alloc_attr *attr,
2287                                  struct uverbs_attr_bundle *attrs)
2288 {
2289         struct mlx5_dm *dm_db = &to_mdev(ctx->device)->dm;
2290         u64 start_offset;
2291         u32 page_idx;
2292         int err;
2293
2294         dm->size = roundup(attr->length, MLX5_MEMIC_BASE_SIZE);
2295
2296         err = mlx5_cmd_alloc_memic(dm_db, &dm->dev_addr,
2297                                    dm->size, attr->alignment);
2298         if (err)
2299                 return err;
2300
2301         page_idx = (dm->dev_addr - pci_resource_start(dm_db->dev->pdev, 0) -
2302                     MLX5_CAP64_DEV_MEM(dm_db->dev, memic_bar_start_addr)) >>
2303                     PAGE_SHIFT;
2304
2305         err = uverbs_copy_to(attrs,
2306                              MLX5_IB_ATTR_ALLOC_DM_RESP_PAGE_INDEX,
2307                              &page_idx, sizeof(page_idx));
2308         if (err)
2309                 goto err_dealloc;
2310
2311         start_offset = dm->dev_addr & ~PAGE_MASK;
2312         err = uverbs_copy_to(attrs,
2313                              MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
2314                              &start_offset, sizeof(start_offset));
2315         if (err)
2316                 goto err_dealloc;
2317
2318         bitmap_set(to_mucontext(ctx)->dm_pages, page_idx,
2319                    DIV_ROUND_UP(dm->size, PAGE_SIZE));
2320
2321         return 0;
2322
2323 err_dealloc:
2324         mlx5_cmd_dealloc_memic(dm_db, dm->dev_addr, dm->size);
2325
2326         return err;
2327 }
2328
2329 static int handle_alloc_dm_sw_icm(struct ib_ucontext *ctx,
2330                                   struct mlx5_ib_dm *dm,
2331                                   struct ib_dm_alloc_attr *attr,
2332                                   struct uverbs_attr_bundle *attrs,
2333                                   int type)
2334 {
2335         struct mlx5_core_dev *dev = to_mdev(ctx->device)->mdev;
2336         u64 act_size;
2337         int err;
2338
2339         /* Allocation size must a multiple of the basic block size
2340          * and a power of 2.
2341          */
2342         act_size = round_up(attr->length, MLX5_SW_ICM_BLOCK_SIZE(dev));
2343         act_size = roundup_pow_of_two(act_size);
2344
2345         dm->size = act_size;
2346         err = mlx5_dm_sw_icm_alloc(dev, type, act_size,
2347                                    to_mucontext(ctx)->devx_uid, &dm->dev_addr,
2348                                    &dm->icm_dm.obj_id);
2349         if (err)
2350                 return err;
2351
2352         err = uverbs_copy_to(attrs,
2353                              MLX5_IB_ATTR_ALLOC_DM_RESP_START_OFFSET,
2354                              &dm->dev_addr, sizeof(dm->dev_addr));
2355         if (err)
2356                 mlx5_dm_sw_icm_dealloc(dev, type, dm->size,
2357                                        to_mucontext(ctx)->devx_uid, dm->dev_addr,
2358                                        dm->icm_dm.obj_id);
2359
2360         return err;
2361 }
2362
2363 struct ib_dm *mlx5_ib_alloc_dm(struct ib_device *ibdev,
2364                                struct ib_ucontext *context,
2365                                struct ib_dm_alloc_attr *attr,
2366                                struct uverbs_attr_bundle *attrs)
2367 {
2368         struct mlx5_ib_dm *dm;
2369         enum mlx5_ib_uapi_dm_type type;
2370         int err;
2371
2372         err = uverbs_get_const_default(&type, attrs,
2373                                        MLX5_IB_ATTR_ALLOC_DM_REQ_TYPE,
2374                                        MLX5_IB_UAPI_DM_TYPE_MEMIC);
2375         if (err)
2376                 return ERR_PTR(err);
2377
2378         mlx5_ib_dbg(to_mdev(ibdev), "alloc_dm req: dm_type=%d user_length=0x%llx log_alignment=%d\n",
2379                     type, attr->length, attr->alignment);
2380
2381         err = check_dm_type_support(to_mdev(ibdev), type);
2382         if (err)
2383                 return ERR_PTR(err);
2384
2385         dm = kzalloc(sizeof(*dm), GFP_KERNEL);
2386         if (!dm)
2387                 return ERR_PTR(-ENOMEM);
2388
2389         dm->type = type;
2390
2391         switch (type) {
2392         case MLX5_IB_UAPI_DM_TYPE_MEMIC:
2393                 err = handle_alloc_dm_memic(context, dm,
2394                                             attr,
2395                                             attrs);
2396                 break;
2397         case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
2398                 err = handle_alloc_dm_sw_icm(context, dm,
2399                                              attr, attrs,
2400                                              MLX5_SW_ICM_TYPE_STEERING);
2401                 break;
2402         case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
2403                 err = handle_alloc_dm_sw_icm(context, dm,
2404                                              attr, attrs,
2405                                              MLX5_SW_ICM_TYPE_HEADER_MODIFY);
2406                 break;
2407         default:
2408                 err = -EOPNOTSUPP;
2409         }
2410
2411         if (err)
2412                 goto err_free;
2413
2414         return &dm->ibdm;
2415
2416 err_free:
2417         kfree(dm);
2418         return ERR_PTR(err);
2419 }
2420
2421 int mlx5_ib_dealloc_dm(struct ib_dm *ibdm, struct uverbs_attr_bundle *attrs)
2422 {
2423         struct mlx5_ib_ucontext *ctx = rdma_udata_to_drv_context(
2424                 &attrs->driver_udata, struct mlx5_ib_ucontext, ibucontext);
2425         struct mlx5_core_dev *dev = to_mdev(ibdm->device)->mdev;
2426         struct mlx5_dm *dm_db = &to_mdev(ibdm->device)->dm;
2427         struct mlx5_ib_dm *dm = to_mdm(ibdm);
2428         u32 page_idx;
2429         int ret;
2430
2431         switch (dm->type) {
2432         case MLX5_IB_UAPI_DM_TYPE_MEMIC:
2433                 ret = mlx5_cmd_dealloc_memic(dm_db, dm->dev_addr, dm->size);
2434                 if (ret)
2435                         return ret;
2436
2437                 page_idx = (dm->dev_addr - pci_resource_start(dev->pdev, 0) -
2438                             MLX5_CAP64_DEV_MEM(dev, memic_bar_start_addr)) >>
2439                             PAGE_SHIFT;
2440                 bitmap_clear(ctx->dm_pages, page_idx,
2441                              DIV_ROUND_UP(dm->size, PAGE_SIZE));
2442                 break;
2443         case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
2444                 ret = mlx5_dm_sw_icm_dealloc(dev, MLX5_SW_ICM_TYPE_STEERING,
2445                                              dm->size, ctx->devx_uid, dm->dev_addr,
2446                                              dm->icm_dm.obj_id);
2447                 if (ret)
2448                         return ret;
2449                 break;
2450         case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
2451                 ret = mlx5_dm_sw_icm_dealloc(dev, MLX5_SW_ICM_TYPE_HEADER_MODIFY,
2452                                              dm->size, ctx->devx_uid, dm->dev_addr,
2453                                              dm->icm_dm.obj_id);
2454                 if (ret)
2455                         return ret;
2456                 break;
2457         default:
2458                 return -EOPNOTSUPP;
2459         }
2460
2461         kfree(dm);
2462
2463         return 0;
2464 }
2465
2466 static int mlx5_ib_alloc_pd(struct ib_pd *ibpd, struct ib_udata *udata)
2467 {
2468         struct mlx5_ib_pd *pd = to_mpd(ibpd);
2469         struct ib_device *ibdev = ibpd->device;
2470         struct mlx5_ib_alloc_pd_resp resp;
2471         int err;
2472         u32 out[MLX5_ST_SZ_DW(alloc_pd_out)] = {};
2473         u32 in[MLX5_ST_SZ_DW(alloc_pd_in)]   = {};
2474         u16 uid = 0;
2475         struct mlx5_ib_ucontext *context = rdma_udata_to_drv_context(
2476                 udata, struct mlx5_ib_ucontext, ibucontext);
2477
2478         uid = context ? context->devx_uid : 0;
2479         MLX5_SET(alloc_pd_in, in, opcode, MLX5_CMD_OP_ALLOC_PD);
2480         MLX5_SET(alloc_pd_in, in, uid, uid);
2481         err = mlx5_cmd_exec(to_mdev(ibdev)->mdev, in, sizeof(in),
2482                             out, sizeof(out));
2483         if (err)
2484                 return err;
2485
2486         pd->pdn = MLX5_GET(alloc_pd_out, out, pd);
2487         pd->uid = uid;
2488         if (udata) {
2489                 resp.pdn = pd->pdn;
2490                 if (ib_copy_to_udata(udata, &resp, sizeof(resp))) {
2491                         mlx5_cmd_dealloc_pd(to_mdev(ibdev)->mdev, pd->pdn, uid);
2492                         return -EFAULT;
2493                 }
2494         }
2495
2496         return 0;
2497 }
2498
2499 static void mlx5_ib_dealloc_pd(struct ib_pd *pd, struct ib_udata *udata)
2500 {
2501         struct mlx5_ib_dev *mdev = to_mdev(pd->device);
2502         struct mlx5_ib_pd *mpd = to_mpd(pd);
2503
2504         mlx5_cmd_dealloc_pd(mdev->mdev, mpd->pdn, mpd->uid);
2505 }
2506
2507 enum {
2508         MATCH_CRITERIA_ENABLE_OUTER_BIT,
2509         MATCH_CRITERIA_ENABLE_MISC_BIT,
2510         MATCH_CRITERIA_ENABLE_INNER_BIT,
2511         MATCH_CRITERIA_ENABLE_MISC2_BIT
2512 };
2513
2514 #define HEADER_IS_ZERO(match_criteria, headers)                            \
2515         !(memchr_inv(MLX5_ADDR_OF(fte_match_param, match_criteria, headers), \
2516                     0, MLX5_FLD_SZ_BYTES(fte_match_param, headers)))       \
2517
2518 static u8 get_match_criteria_enable(u32 *match_criteria)
2519 {
2520         u8 match_criteria_enable;
2521
2522         match_criteria_enable =
2523                 (!HEADER_IS_ZERO(match_criteria, outer_headers)) <<
2524                 MATCH_CRITERIA_ENABLE_OUTER_BIT;
2525         match_criteria_enable |=
2526                 (!HEADER_IS_ZERO(match_criteria, misc_parameters)) <<
2527                 MATCH_CRITERIA_ENABLE_MISC_BIT;
2528         match_criteria_enable |=
2529                 (!HEADER_IS_ZERO(match_criteria, inner_headers)) <<
2530                 MATCH_CRITERIA_ENABLE_INNER_BIT;
2531         match_criteria_enable |=
2532                 (!HEADER_IS_ZERO(match_criteria, misc_parameters_2)) <<
2533                 MATCH_CRITERIA_ENABLE_MISC2_BIT;
2534
2535         return match_criteria_enable;
2536 }
2537
2538 static int set_proto(void *outer_c, void *outer_v, u8 mask, u8 val)
2539 {
2540         u8 entry_mask;
2541         u8 entry_val;
2542         int err = 0;
2543
2544         if (!mask)
2545                 goto out;
2546
2547         entry_mask = MLX5_GET(fte_match_set_lyr_2_4, outer_c,
2548                               ip_protocol);
2549         entry_val = MLX5_GET(fte_match_set_lyr_2_4, outer_v,
2550                              ip_protocol);
2551         if (!entry_mask) {
2552                 MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_protocol, mask);
2553                 MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_protocol, val);
2554                 goto out;
2555         }
2556         /* Don't override existing ip protocol */
2557         if (mask != entry_mask || val != entry_val)
2558                 err = -EINVAL;
2559 out:
2560         return err;
2561 }
2562
2563 static void set_flow_label(void *misc_c, void *misc_v, u32 mask, u32 val,
2564                            bool inner)
2565 {
2566         if (inner) {
2567                 MLX5_SET(fte_match_set_misc,
2568                          misc_c, inner_ipv6_flow_label, mask);
2569                 MLX5_SET(fte_match_set_misc,
2570                          misc_v, inner_ipv6_flow_label, val);
2571         } else {
2572                 MLX5_SET(fte_match_set_misc,
2573                          misc_c, outer_ipv6_flow_label, mask);
2574                 MLX5_SET(fte_match_set_misc,
2575                          misc_v, outer_ipv6_flow_label, val);
2576         }
2577 }
2578
2579 static void set_tos(void *outer_c, void *outer_v, u8 mask, u8 val)
2580 {
2581         MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_ecn, mask);
2582         MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_ecn, val);
2583         MLX5_SET(fte_match_set_lyr_2_4, outer_c, ip_dscp, mask >> 2);
2584         MLX5_SET(fte_match_set_lyr_2_4, outer_v, ip_dscp, val >> 2);
2585 }
2586
2587 static int check_mpls_supp_fields(u32 field_support, const __be32 *set_mask)
2588 {
2589         if (MLX5_GET(fte_match_mpls, set_mask, mpls_label) &&
2590             !(field_support & MLX5_FIELD_SUPPORT_MPLS_LABEL))
2591                 return -EOPNOTSUPP;
2592
2593         if (MLX5_GET(fte_match_mpls, set_mask, mpls_exp) &&
2594             !(field_support & MLX5_FIELD_SUPPORT_MPLS_EXP))
2595                 return -EOPNOTSUPP;
2596
2597         if (MLX5_GET(fte_match_mpls, set_mask, mpls_s_bos) &&
2598             !(field_support & MLX5_FIELD_SUPPORT_MPLS_S_BOS))
2599                 return -EOPNOTSUPP;
2600
2601         if (MLX5_GET(fte_match_mpls, set_mask, mpls_ttl) &&
2602             !(field_support & MLX5_FIELD_SUPPORT_MPLS_TTL))
2603                 return -EOPNOTSUPP;
2604
2605         return 0;
2606 }
2607
2608 #define LAST_ETH_FIELD vlan_tag
2609 #define LAST_IB_FIELD sl
2610 #define LAST_IPV4_FIELD tos
2611 #define LAST_IPV6_FIELD traffic_class
2612 #define LAST_TCP_UDP_FIELD src_port
2613 #define LAST_TUNNEL_FIELD tunnel_id
2614 #define LAST_FLOW_TAG_FIELD tag_id
2615 #define LAST_DROP_FIELD size
2616 #define LAST_COUNTERS_FIELD counters
2617
2618 /* Field is the last supported field */
2619 #define FIELDS_NOT_SUPPORTED(filter, field)\
2620         memchr_inv((void *)&filter.field  +\
2621                    sizeof(filter.field), 0,\
2622                    sizeof(filter) -\
2623                    offsetof(typeof(filter), field) -\
2624                    sizeof(filter.field))
2625
2626 int parse_flow_flow_action(struct mlx5_ib_flow_action *maction,
2627                            bool is_egress,
2628                            struct mlx5_flow_act *action)
2629 {
2630
2631         switch (maction->ib_action.type) {
2632         case IB_FLOW_ACTION_ESP:
2633                 if (action->action & (MLX5_FLOW_CONTEXT_ACTION_ENCRYPT |
2634                                       MLX5_FLOW_CONTEXT_ACTION_DECRYPT))
2635                         return -EINVAL;
2636                 /* Currently only AES_GCM keymat is supported by the driver */
2637                 action->esp_id = (uintptr_t)maction->esp_aes_gcm.ctx;
2638                 action->action |= is_egress ?
2639                         MLX5_FLOW_CONTEXT_ACTION_ENCRYPT :
2640                         MLX5_FLOW_CONTEXT_ACTION_DECRYPT;
2641                 return 0;
2642         case IB_FLOW_ACTION_UNSPECIFIED:
2643                 if (maction->flow_action_raw.sub_type ==
2644                     MLX5_IB_FLOW_ACTION_MODIFY_HEADER) {
2645                         if (action->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
2646                                 return -EINVAL;
2647                         action->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
2648                         action->modify_hdr =
2649                                 maction->flow_action_raw.modify_hdr;
2650                         return 0;
2651                 }
2652                 if (maction->flow_action_raw.sub_type ==
2653                     MLX5_IB_FLOW_ACTION_DECAP) {
2654                         if (action->action & MLX5_FLOW_CONTEXT_ACTION_DECAP)
2655                                 return -EINVAL;
2656                         action->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
2657                         return 0;
2658                 }
2659                 if (maction->flow_action_raw.sub_type ==
2660                     MLX5_IB_FLOW_ACTION_PACKET_REFORMAT) {
2661                         if (action->action &
2662                             MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT)
2663                                 return -EINVAL;
2664                         action->action |=
2665                                 MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
2666                         action->pkt_reformat =
2667                                 maction->flow_action_raw.pkt_reformat;
2668                         return 0;
2669                 }
2670                 /* fall through */
2671         default:
2672                 return -EOPNOTSUPP;
2673         }
2674 }
2675
2676 static int parse_flow_attr(struct mlx5_core_dev *mdev,
2677                            struct mlx5_flow_spec *spec,
2678                            const union ib_flow_spec *ib_spec,
2679                            const struct ib_flow_attr *flow_attr,
2680                            struct mlx5_flow_act *action, u32 prev_type)
2681 {
2682         struct mlx5_flow_context *flow_context = &spec->flow_context;
2683         u32 *match_c = spec->match_criteria;
2684         u32 *match_v = spec->match_value;
2685         void *misc_params_c = MLX5_ADDR_OF(fte_match_param, match_c,
2686                                            misc_parameters);
2687         void *misc_params_v = MLX5_ADDR_OF(fte_match_param, match_v,
2688                                            misc_parameters);
2689         void *misc_params2_c = MLX5_ADDR_OF(fte_match_param, match_c,
2690                                             misc_parameters_2);
2691         void *misc_params2_v = MLX5_ADDR_OF(fte_match_param, match_v,
2692                                             misc_parameters_2);
2693         void *headers_c;
2694         void *headers_v;
2695         int match_ipv;
2696         int ret;
2697
2698         if (ib_spec->type & IB_FLOW_SPEC_INNER) {
2699                 headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
2700                                          inner_headers);
2701                 headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
2702                                          inner_headers);
2703                 match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
2704                                         ft_field_support.inner_ip_version);
2705         } else {
2706                 headers_c = MLX5_ADDR_OF(fte_match_param, match_c,
2707                                          outer_headers);
2708                 headers_v = MLX5_ADDR_OF(fte_match_param, match_v,
2709                                          outer_headers);
2710                 match_ipv = MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
2711                                         ft_field_support.outer_ip_version);
2712         }
2713
2714         switch (ib_spec->type & ~IB_FLOW_SPEC_INNER) {
2715         case IB_FLOW_SPEC_ETH:
2716                 if (FIELDS_NOT_SUPPORTED(ib_spec->eth.mask, LAST_ETH_FIELD))
2717                         return -EOPNOTSUPP;
2718
2719                 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2720                                              dmac_47_16),
2721                                 ib_spec->eth.mask.dst_mac);
2722                 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2723                                              dmac_47_16),
2724                                 ib_spec->eth.val.dst_mac);
2725
2726                 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2727                                              smac_47_16),
2728                                 ib_spec->eth.mask.src_mac);
2729                 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2730                                              smac_47_16),
2731                                 ib_spec->eth.val.src_mac);
2732
2733                 if (ib_spec->eth.mask.vlan_tag) {
2734                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2735                                  cvlan_tag, 1);
2736                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2737                                  cvlan_tag, 1);
2738
2739                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2740                                  first_vid, ntohs(ib_spec->eth.mask.vlan_tag));
2741                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2742                                  first_vid, ntohs(ib_spec->eth.val.vlan_tag));
2743
2744                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2745                                  first_cfi,
2746                                  ntohs(ib_spec->eth.mask.vlan_tag) >> 12);
2747                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2748                                  first_cfi,
2749                                  ntohs(ib_spec->eth.val.vlan_tag) >> 12);
2750
2751                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2752                                  first_prio,
2753                                  ntohs(ib_spec->eth.mask.vlan_tag) >> 13);
2754                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2755                                  first_prio,
2756                                  ntohs(ib_spec->eth.val.vlan_tag) >> 13);
2757                 }
2758                 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2759                          ethertype, ntohs(ib_spec->eth.mask.ether_type));
2760                 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2761                          ethertype, ntohs(ib_spec->eth.val.ether_type));
2762                 break;
2763         case IB_FLOW_SPEC_IPV4:
2764                 if (FIELDS_NOT_SUPPORTED(ib_spec->ipv4.mask, LAST_IPV4_FIELD))
2765                         return -EOPNOTSUPP;
2766
2767                 if (match_ipv) {
2768                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2769                                  ip_version, 0xf);
2770                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2771                                  ip_version, MLX5_FS_IPV4_VERSION);
2772                 } else {
2773                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2774                                  ethertype, 0xffff);
2775                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2776                                  ethertype, ETH_P_IP);
2777                 }
2778
2779                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2780                                     src_ipv4_src_ipv6.ipv4_layout.ipv4),
2781                        &ib_spec->ipv4.mask.src_ip,
2782                        sizeof(ib_spec->ipv4.mask.src_ip));
2783                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2784                                     src_ipv4_src_ipv6.ipv4_layout.ipv4),
2785                        &ib_spec->ipv4.val.src_ip,
2786                        sizeof(ib_spec->ipv4.val.src_ip));
2787                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2788                                     dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
2789                        &ib_spec->ipv4.mask.dst_ip,
2790                        sizeof(ib_spec->ipv4.mask.dst_ip));
2791                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2792                                     dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
2793                        &ib_spec->ipv4.val.dst_ip,
2794                        sizeof(ib_spec->ipv4.val.dst_ip));
2795
2796                 set_tos(headers_c, headers_v,
2797                         ib_spec->ipv4.mask.tos, ib_spec->ipv4.val.tos);
2798
2799                 if (set_proto(headers_c, headers_v,
2800                               ib_spec->ipv4.mask.proto,
2801                               ib_spec->ipv4.val.proto))
2802                         return -EINVAL;
2803                 break;
2804         case IB_FLOW_SPEC_IPV6:
2805                 if (FIELDS_NOT_SUPPORTED(ib_spec->ipv6.mask, LAST_IPV6_FIELD))
2806                         return -EOPNOTSUPP;
2807
2808                 if (match_ipv) {
2809                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2810                                  ip_version, 0xf);
2811                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2812                                  ip_version, MLX5_FS_IPV6_VERSION);
2813                 } else {
2814                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2815                                  ethertype, 0xffff);
2816                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2817                                  ethertype, ETH_P_IPV6);
2818                 }
2819
2820                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2821                                     src_ipv4_src_ipv6.ipv6_layout.ipv6),
2822                        &ib_spec->ipv6.mask.src_ip,
2823                        sizeof(ib_spec->ipv6.mask.src_ip));
2824                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2825                                     src_ipv4_src_ipv6.ipv6_layout.ipv6),
2826                        &ib_spec->ipv6.val.src_ip,
2827                        sizeof(ib_spec->ipv6.val.src_ip));
2828                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2829                                     dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
2830                        &ib_spec->ipv6.mask.dst_ip,
2831                        sizeof(ib_spec->ipv6.mask.dst_ip));
2832                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2833                                     dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
2834                        &ib_spec->ipv6.val.dst_ip,
2835                        sizeof(ib_spec->ipv6.val.dst_ip));
2836
2837                 set_tos(headers_c, headers_v,
2838                         ib_spec->ipv6.mask.traffic_class,
2839                         ib_spec->ipv6.val.traffic_class);
2840
2841                 if (set_proto(headers_c, headers_v,
2842                               ib_spec->ipv6.mask.next_hdr,
2843                               ib_spec->ipv6.val.next_hdr))
2844                         return -EINVAL;
2845
2846                 set_flow_label(misc_params_c, misc_params_v,
2847                                ntohl(ib_spec->ipv6.mask.flow_label),
2848                                ntohl(ib_spec->ipv6.val.flow_label),
2849                                ib_spec->type & IB_FLOW_SPEC_INNER);
2850                 break;
2851         case IB_FLOW_SPEC_ESP:
2852                 if (ib_spec->esp.mask.seq)
2853                         return -EOPNOTSUPP;
2854
2855                 MLX5_SET(fte_match_set_misc, misc_params_c, outer_esp_spi,
2856                          ntohl(ib_spec->esp.mask.spi));
2857                 MLX5_SET(fte_match_set_misc, misc_params_v, outer_esp_spi,
2858                          ntohl(ib_spec->esp.val.spi));
2859                 break;
2860         case IB_FLOW_SPEC_TCP:
2861                 if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
2862                                          LAST_TCP_UDP_FIELD))
2863                         return -EOPNOTSUPP;
2864
2865                 if (set_proto(headers_c, headers_v, 0xff, IPPROTO_TCP))
2866                         return -EINVAL;
2867
2868                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_sport,
2869                          ntohs(ib_spec->tcp_udp.mask.src_port));
2870                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_sport,
2871                          ntohs(ib_spec->tcp_udp.val.src_port));
2872
2873                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_dport,
2874                          ntohs(ib_spec->tcp_udp.mask.dst_port));
2875                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_dport,
2876                          ntohs(ib_spec->tcp_udp.val.dst_port));
2877                 break;
2878         case IB_FLOW_SPEC_UDP:
2879                 if (FIELDS_NOT_SUPPORTED(ib_spec->tcp_udp.mask,
2880                                          LAST_TCP_UDP_FIELD))
2881                         return -EOPNOTSUPP;
2882
2883                 if (set_proto(headers_c, headers_v, 0xff, IPPROTO_UDP))
2884                         return -EINVAL;
2885
2886                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_sport,
2887                          ntohs(ib_spec->tcp_udp.mask.src_port));
2888                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_sport,
2889                          ntohs(ib_spec->tcp_udp.val.src_port));
2890
2891                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, udp_dport,
2892                          ntohs(ib_spec->tcp_udp.mask.dst_port));
2893                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, udp_dport,
2894                          ntohs(ib_spec->tcp_udp.val.dst_port));
2895                 break;
2896         case IB_FLOW_SPEC_GRE:
2897                 if (ib_spec->gre.mask.c_ks_res0_ver)
2898                         return -EOPNOTSUPP;
2899
2900                 if (set_proto(headers_c, headers_v, 0xff, IPPROTO_GRE))
2901                         return -EINVAL;
2902
2903                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
2904                          0xff);
2905                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
2906                          IPPROTO_GRE);
2907
2908                 MLX5_SET(fte_match_set_misc, misc_params_c, gre_protocol,
2909                          ntohs(ib_spec->gre.mask.protocol));
2910                 MLX5_SET(fte_match_set_misc, misc_params_v, gre_protocol,
2911                          ntohs(ib_spec->gre.val.protocol));
2912
2913                 memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_c,
2914                                     gre_key.nvgre.hi),
2915                        &ib_spec->gre.mask.key,
2916                        sizeof(ib_spec->gre.mask.key));
2917                 memcpy(MLX5_ADDR_OF(fte_match_set_misc, misc_params_v,
2918                                     gre_key.nvgre.hi),
2919                        &ib_spec->gre.val.key,
2920                        sizeof(ib_spec->gre.val.key));
2921                 break;
2922         case IB_FLOW_SPEC_MPLS:
2923                 switch (prev_type) {
2924                 case IB_FLOW_SPEC_UDP:
2925                         if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
2926                                                    ft_field_support.outer_first_mpls_over_udp),
2927                                                    &ib_spec->mpls.mask.tag))
2928                                 return -EOPNOTSUPP;
2929
2930                         memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
2931                                             outer_first_mpls_over_udp),
2932                                &ib_spec->mpls.val.tag,
2933                                sizeof(ib_spec->mpls.val.tag));
2934                         memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
2935                                             outer_first_mpls_over_udp),
2936                                &ib_spec->mpls.mask.tag,
2937                                sizeof(ib_spec->mpls.mask.tag));
2938                         break;
2939                 case IB_FLOW_SPEC_GRE:
2940                         if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
2941                                                    ft_field_support.outer_first_mpls_over_gre),
2942                                                    &ib_spec->mpls.mask.tag))
2943                                 return -EOPNOTSUPP;
2944
2945                         memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
2946                                             outer_first_mpls_over_gre),
2947                                &ib_spec->mpls.val.tag,
2948                                sizeof(ib_spec->mpls.val.tag));
2949                         memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
2950                                             outer_first_mpls_over_gre),
2951                                &ib_spec->mpls.mask.tag,
2952                                sizeof(ib_spec->mpls.mask.tag));
2953                         break;
2954                 default:
2955                         if (ib_spec->type & IB_FLOW_SPEC_INNER) {
2956                                 if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
2957                                                            ft_field_support.inner_first_mpls),
2958                                                            &ib_spec->mpls.mask.tag))
2959                                         return -EOPNOTSUPP;
2960
2961                                 memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
2962                                                     inner_first_mpls),
2963                                        &ib_spec->mpls.val.tag,
2964                                        sizeof(ib_spec->mpls.val.tag));
2965                                 memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
2966                                                     inner_first_mpls),
2967                                        &ib_spec->mpls.mask.tag,
2968                                        sizeof(ib_spec->mpls.mask.tag));
2969                         } else {
2970                                 if (check_mpls_supp_fields(MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
2971                                                            ft_field_support.outer_first_mpls),
2972                                                            &ib_spec->mpls.mask.tag))
2973                                         return -EOPNOTSUPP;
2974
2975                                 memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_v,
2976                                                     outer_first_mpls),
2977                                        &ib_spec->mpls.val.tag,
2978                                        sizeof(ib_spec->mpls.val.tag));
2979                                 memcpy(MLX5_ADDR_OF(fte_match_set_misc2, misc_params2_c,
2980                                                     outer_first_mpls),
2981                                        &ib_spec->mpls.mask.tag,
2982                                        sizeof(ib_spec->mpls.mask.tag));
2983                         }
2984                 }
2985                 break;
2986         case IB_FLOW_SPEC_VXLAN_TUNNEL:
2987                 if (FIELDS_NOT_SUPPORTED(ib_spec->tunnel.mask,
2988                                          LAST_TUNNEL_FIELD))
2989                         return -EOPNOTSUPP;
2990
2991                 MLX5_SET(fte_match_set_misc, misc_params_c, vxlan_vni,
2992                          ntohl(ib_spec->tunnel.mask.tunnel_id));
2993                 MLX5_SET(fte_match_set_misc, misc_params_v, vxlan_vni,
2994                          ntohl(ib_spec->tunnel.val.tunnel_id));
2995                 break;
2996         case IB_FLOW_SPEC_ACTION_TAG:
2997                 if (FIELDS_NOT_SUPPORTED(ib_spec->flow_tag,
2998                                          LAST_FLOW_TAG_FIELD))
2999                         return -EOPNOTSUPP;
3000                 if (ib_spec->flow_tag.tag_id >= BIT(24))
3001                         return -EINVAL;
3002
3003                 flow_context->flow_tag = ib_spec->flow_tag.tag_id;
3004                 flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
3005                 break;
3006         case IB_FLOW_SPEC_ACTION_DROP:
3007                 if (FIELDS_NOT_SUPPORTED(ib_spec->drop,
3008                                          LAST_DROP_FIELD))
3009                         return -EOPNOTSUPP;
3010                 action->action |= MLX5_FLOW_CONTEXT_ACTION_DROP;
3011                 break;