ASoC: Merge up left over v6.8 fix
[sfrench/cifs-2.6.git] / drivers / net / ethernet / mellanox / mlx5 / core / steering / dr_send.c
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
3
4 #include <linux/smp.h>
5 #include "dr_types.h"
6
7 #define QUEUE_SIZE 128
8 #define SIGNAL_PER_DIV_QUEUE 16
9 #define TH_NUMS_TO_DRAIN 2
10 #define DR_SEND_INFO_POOL_SIZE 1000
11
12 enum { CQ_OK = 0, CQ_EMPTY = -1, CQ_POLL_ERR = -2 };
13
14 struct dr_data_seg {
15         u64 addr;
16         u32 length;
17         u32 lkey;
18         unsigned int send_flags;
19 };
20
21 enum send_info_type {
22         WRITE_ICM = 0,
23         GTA_ARG   = 1,
24 };
25
26 struct postsend_info {
27         enum send_info_type type;
28         struct dr_data_seg write;
29         struct dr_data_seg read;
30         u64 remote_addr;
31         u32 rkey;
32 };
33
34 struct dr_qp_rtr_attr {
35         struct mlx5dr_cmd_gid_attr dgid_attr;
36         enum ib_mtu mtu;
37         u32 qp_num;
38         u16 port_num;
39         u8 min_rnr_timer;
40         u8 sgid_index;
41         u16 udp_src_port;
42         u8 fl:1;
43 };
44
45 struct dr_qp_rts_attr {
46         u8 timeout;
47         u8 retry_cnt;
48         u8 rnr_retry;
49 };
50
51 struct dr_qp_init_attr {
52         u32 cqn;
53         u32 pdn;
54         u32 max_send_wr;
55         struct mlx5_uars_page *uar;
56         u8 isolate_vl_tc:1;
57 };
58
59 struct mlx5dr_send_info_pool_obj {
60         struct mlx5dr_ste_send_info ste_send_info;
61         struct mlx5dr_send_info_pool *pool;
62         struct list_head list_node;
63 };
64
65 struct mlx5dr_send_info_pool {
66         struct list_head free_list;
67 };
68
69 static int dr_send_info_pool_fill(struct mlx5dr_send_info_pool *pool)
70 {
71         struct mlx5dr_send_info_pool_obj *pool_obj, *tmp_pool_obj;
72         int i;
73
74         for (i = 0; i < DR_SEND_INFO_POOL_SIZE; i++) {
75                 pool_obj = kzalloc(sizeof(*pool_obj), GFP_KERNEL);
76                 if (!pool_obj)
77                         goto clean_pool;
78
79                 pool_obj->pool = pool;
80                 list_add_tail(&pool_obj->list_node, &pool->free_list);
81         }
82
83         return 0;
84
85 clean_pool:
86         list_for_each_entry_safe(pool_obj, tmp_pool_obj, &pool->free_list, list_node) {
87                 list_del(&pool_obj->list_node);
88                 kfree(pool_obj);
89         }
90
91         return -ENOMEM;
92 }
93
94 static void dr_send_info_pool_destroy(struct mlx5dr_send_info_pool *pool)
95 {
96         struct mlx5dr_send_info_pool_obj *pool_obj, *tmp_pool_obj;
97
98         list_for_each_entry_safe(pool_obj, tmp_pool_obj, &pool->free_list, list_node) {
99                 list_del(&pool_obj->list_node);
100                 kfree(pool_obj);
101         }
102
103         kfree(pool);
104 }
105
106 void mlx5dr_send_info_pool_destroy(struct mlx5dr_domain *dmn)
107 {
108         dr_send_info_pool_destroy(dmn->send_info_pool_tx);
109         dr_send_info_pool_destroy(dmn->send_info_pool_rx);
110 }
111
112 static struct mlx5dr_send_info_pool *dr_send_info_pool_create(void)
113 {
114         struct mlx5dr_send_info_pool *pool;
115         int ret;
116
117         pool = kzalloc(sizeof(*pool), GFP_KERNEL);
118         if (!pool)
119                 return NULL;
120
121         INIT_LIST_HEAD(&pool->free_list);
122
123         ret = dr_send_info_pool_fill(pool);
124         if (ret) {
125                 kfree(pool);
126                 return NULL;
127         }
128
129         return pool;
130 }
131
132 int mlx5dr_send_info_pool_create(struct mlx5dr_domain *dmn)
133 {
134         dmn->send_info_pool_rx = dr_send_info_pool_create();
135         if (!dmn->send_info_pool_rx)
136                 return -ENOMEM;
137
138         dmn->send_info_pool_tx = dr_send_info_pool_create();
139         if (!dmn->send_info_pool_tx) {
140                 dr_send_info_pool_destroy(dmn->send_info_pool_rx);
141                 return -ENOMEM;
142         }
143
144         return 0;
145 }
146
147 struct mlx5dr_ste_send_info
148 *mlx5dr_send_info_alloc(struct mlx5dr_domain *dmn,
149                         enum mlx5dr_domain_nic_type nic_type)
150 {
151         struct mlx5dr_send_info_pool_obj *pool_obj;
152         struct mlx5dr_send_info_pool *pool;
153         int ret;
154
155         pool = nic_type == DR_DOMAIN_NIC_TYPE_RX ? dmn->send_info_pool_rx :
156                                                    dmn->send_info_pool_tx;
157
158         if (unlikely(list_empty(&pool->free_list))) {
159                 ret = dr_send_info_pool_fill(pool);
160                 if (ret)
161                         return NULL;
162         }
163
164         pool_obj = list_first_entry_or_null(&pool->free_list,
165                                             struct mlx5dr_send_info_pool_obj,
166                                             list_node);
167
168         if (likely(pool_obj)) {
169                 list_del_init(&pool_obj->list_node);
170         } else {
171                 WARN_ONCE(!pool_obj, "Failed getting ste send info obj from pool");
172                 return NULL;
173         }
174
175         return &pool_obj->ste_send_info;
176 }
177
178 void mlx5dr_send_info_free(struct mlx5dr_ste_send_info *ste_send_info)
179 {
180         struct mlx5dr_send_info_pool_obj *pool_obj;
181
182         pool_obj = container_of(ste_send_info,
183                                 struct mlx5dr_send_info_pool_obj,
184                                 ste_send_info);
185
186         list_add(&pool_obj->list_node, &pool_obj->pool->free_list);
187 }
188
189 static int dr_parse_cqe(struct mlx5dr_cq *dr_cq, struct mlx5_cqe64 *cqe64)
190 {
191         unsigned int idx;
192         u8 opcode;
193
194         opcode = get_cqe_opcode(cqe64);
195         if (opcode == MLX5_CQE_REQ_ERR) {
196                 idx = be16_to_cpu(cqe64->wqe_counter) &
197                         (dr_cq->qp->sq.wqe_cnt - 1);
198                 dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1;
199         } else if (opcode == MLX5_CQE_RESP_ERR) {
200                 ++dr_cq->qp->sq.cc;
201         } else {
202                 idx = be16_to_cpu(cqe64->wqe_counter) &
203                         (dr_cq->qp->sq.wqe_cnt - 1);
204                 dr_cq->qp->sq.cc = dr_cq->qp->sq.wqe_head[idx] + 1;
205
206                 return CQ_OK;
207         }
208
209         return CQ_POLL_ERR;
210 }
211
212 static int dr_cq_poll_one(struct mlx5dr_cq *dr_cq)
213 {
214         struct mlx5_cqe64 *cqe64;
215         int err;
216
217         cqe64 = mlx5_cqwq_get_cqe(&dr_cq->wq);
218         if (!cqe64) {
219                 if (unlikely(dr_cq->mdev->state ==
220                              MLX5_DEVICE_STATE_INTERNAL_ERROR)) {
221                         mlx5_core_dbg_once(dr_cq->mdev,
222                                            "Polling CQ while device is shutting down\n");
223                         return CQ_POLL_ERR;
224                 }
225                 return CQ_EMPTY;
226         }
227
228         mlx5_cqwq_pop(&dr_cq->wq);
229         err = dr_parse_cqe(dr_cq, cqe64);
230         mlx5_cqwq_update_db_record(&dr_cq->wq);
231
232         return err;
233 }
234
235 static int dr_poll_cq(struct mlx5dr_cq *dr_cq, int ne)
236 {
237         int npolled;
238         int err = 0;
239
240         for (npolled = 0; npolled < ne; ++npolled) {
241                 err = dr_cq_poll_one(dr_cq);
242                 if (err != CQ_OK)
243                         break;
244         }
245
246         return err == CQ_POLL_ERR ? err : npolled;
247 }
248
249 static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev,
250                                          struct dr_qp_init_attr *attr)
251 {
252         u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
253         u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {};
254         struct mlx5_wq_param wqp;
255         struct mlx5dr_qp *dr_qp;
256         int inlen;
257         void *qpc;
258         void *in;
259         int err;
260
261         dr_qp = kzalloc(sizeof(*dr_qp), GFP_KERNEL);
262         if (!dr_qp)
263                 return NULL;
264
265         wqp.buf_numa_node = mdev->priv.numa_node;
266         wqp.db_numa_node = mdev->priv.numa_node;
267
268         dr_qp->rq.pc = 0;
269         dr_qp->rq.cc = 0;
270         dr_qp->rq.wqe_cnt = 256;
271         dr_qp->sq.pc = 0;
272         dr_qp->sq.cc = 0;
273         dr_qp->sq.head = 0;
274         dr_qp->sq.wqe_cnt = roundup_pow_of_two(attr->max_send_wr);
275
276         MLX5_SET(qpc, temp_qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4);
277         MLX5_SET(qpc, temp_qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt));
278         MLX5_SET(qpc, temp_qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt));
279         err = mlx5_wq_qp_create(mdev, &wqp, temp_qpc, &dr_qp->wq,
280                                 &dr_qp->wq_ctrl);
281         if (err) {
282                 mlx5_core_warn(mdev, "Can't create QP WQ\n");
283                 goto err_wq;
284         }
285
286         dr_qp->sq.wqe_head = kcalloc(dr_qp->sq.wqe_cnt,
287                                      sizeof(dr_qp->sq.wqe_head[0]),
288                                      GFP_KERNEL);
289
290         if (!dr_qp->sq.wqe_head) {
291                 mlx5_core_warn(mdev, "Can't allocate wqe head\n");
292                 goto err_wqe_head;
293         }
294
295         inlen = MLX5_ST_SZ_BYTES(create_qp_in) +
296                 MLX5_FLD_SZ_BYTES(create_qp_in, pas[0]) *
297                 dr_qp->wq_ctrl.buf.npages;
298         in = kvzalloc(inlen, GFP_KERNEL);
299         if (!in) {
300                 err = -ENOMEM;
301                 goto err_in;
302         }
303
304         qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
305         MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
306         MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
307         MLX5_SET(qpc, qpc, isolate_vl_tc, attr->isolate_vl_tc);
308         MLX5_SET(qpc, qpc, pd, attr->pdn);
309         MLX5_SET(qpc, qpc, uar_page, attr->uar->index);
310         MLX5_SET(qpc, qpc, log_page_size,
311                  dr_qp->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
312         MLX5_SET(qpc, qpc, fre, 1);
313         MLX5_SET(qpc, qpc, rlky, 1);
314         MLX5_SET(qpc, qpc, cqn_snd, attr->cqn);
315         MLX5_SET(qpc, qpc, cqn_rcv, attr->cqn);
316         MLX5_SET(qpc, qpc, log_rq_stride, ilog2(MLX5_SEND_WQE_DS) - 4);
317         MLX5_SET(qpc, qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt));
318         MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
319         MLX5_SET(qpc, qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt));
320         MLX5_SET(qpc, qpc, ts_format, mlx5_get_qp_default_ts(mdev));
321         MLX5_SET64(qpc, qpc, dbr_addr, dr_qp->wq_ctrl.db.dma);
322         if (MLX5_CAP_GEN(mdev, cqe_version) == 1)
323                 MLX5_SET(qpc, qpc, user_index, 0xFFFFFF);
324         mlx5_fill_page_frag_array(&dr_qp->wq_ctrl.buf,
325                                   (__be64 *)MLX5_ADDR_OF(create_qp_in,
326                                                          in, pas));
327
328         MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
329         err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
330         dr_qp->qpn = MLX5_GET(create_qp_out, out, qpn);
331         kvfree(in);
332         if (err)
333                 goto err_in;
334         dr_qp->uar = attr->uar;
335
336         return dr_qp;
337
338 err_in:
339         kfree(dr_qp->sq.wqe_head);
340 err_wqe_head:
341         mlx5_wq_destroy(&dr_qp->wq_ctrl);
342 err_wq:
343         kfree(dr_qp);
344         return NULL;
345 }
346
347 static void dr_destroy_qp(struct mlx5_core_dev *mdev,
348                           struct mlx5dr_qp *dr_qp)
349 {
350         u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
351
352         MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
353         MLX5_SET(destroy_qp_in, in, qpn, dr_qp->qpn);
354         mlx5_cmd_exec_in(mdev, destroy_qp, in);
355
356         kfree(dr_qp->sq.wqe_head);
357         mlx5_wq_destroy(&dr_qp->wq_ctrl);
358         kfree(dr_qp);
359 }
360
361 static void dr_cmd_notify_hw(struct mlx5dr_qp *dr_qp, void *ctrl)
362 {
363         dma_wmb();
364         *dr_qp->wq.sq.db = cpu_to_be32(dr_qp->sq.pc & 0xffff);
365
366         /* After wmb() the hw aware of new work */
367         wmb();
368
369         mlx5_write64(ctrl, dr_qp->uar->map + MLX5_BF_OFFSET);
370 }
371
372 static void
373 dr_rdma_handle_flow_access_arg_segments(struct mlx5_wqe_ctrl_seg *wq_ctrl,
374                                         u32 remote_addr,
375                                         struct dr_data_seg *data_seg,
376                                         int *size)
377 {
378         struct mlx5_wqe_header_modify_argument_update_seg *wq_arg_seg;
379         struct mlx5_wqe_flow_update_ctrl_seg *wq_flow_seg;
380
381         wq_ctrl->general_id = cpu_to_be32(remote_addr);
382         wq_flow_seg = (void *)(wq_ctrl + 1);
383
384         /* mlx5_wqe_flow_update_ctrl_seg - all reserved */
385         memset(wq_flow_seg, 0, sizeof(*wq_flow_seg));
386         wq_arg_seg = (void *)(wq_flow_seg + 1);
387
388         memcpy(wq_arg_seg->argument_list,
389                (void *)(uintptr_t)data_seg->addr,
390                data_seg->length);
391
392         *size = (sizeof(*wq_ctrl) +      /* WQE ctrl segment */
393                  sizeof(*wq_flow_seg) +  /* WQE flow update ctrl seg - reserved */
394                  sizeof(*wq_arg_seg)) /  /* WQE hdr modify arg seg - data */
395                 MLX5_SEND_WQE_DS;
396 }
397
398 static void
399 dr_rdma_handle_icm_write_segments(struct mlx5_wqe_ctrl_seg *wq_ctrl,
400                                   u64 remote_addr,
401                                   u32 rkey,
402                                   struct dr_data_seg *data_seg,
403                                   unsigned int *size)
404 {
405         struct mlx5_wqe_raddr_seg *wq_raddr;
406         struct mlx5_wqe_data_seg *wq_dseg;
407
408         wq_raddr = (void *)(wq_ctrl + 1);
409
410         wq_raddr->raddr = cpu_to_be64(remote_addr);
411         wq_raddr->rkey = cpu_to_be32(rkey);
412         wq_raddr->reserved = 0;
413
414         wq_dseg = (void *)(wq_raddr + 1);
415
416         wq_dseg->byte_count = cpu_to_be32(data_seg->length);
417         wq_dseg->lkey = cpu_to_be32(data_seg->lkey);
418         wq_dseg->addr = cpu_to_be64(data_seg->addr);
419
420         *size = (sizeof(*wq_ctrl) +    /* WQE ctrl segment */
421                  sizeof(*wq_dseg) +    /* WQE data segment */
422                  sizeof(*wq_raddr)) /  /* WQE remote addr segment */
423                 MLX5_SEND_WQE_DS;
424 }
425
426 static void dr_set_ctrl_seg(struct mlx5_wqe_ctrl_seg *wq_ctrl,
427                             struct dr_data_seg *data_seg)
428 {
429         wq_ctrl->signature = 0;
430         wq_ctrl->rsvd[0] = 0;
431         wq_ctrl->rsvd[1] = 0;
432         wq_ctrl->fm_ce_se = data_seg->send_flags & IB_SEND_SIGNALED ?
433                                 MLX5_WQE_CTRL_CQ_UPDATE : 0;
434         wq_ctrl->imm = 0;
435 }
436
437 static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr,
438                              u32 rkey, struct dr_data_seg *data_seg,
439                              u32 opcode, bool notify_hw)
440 {
441         struct mlx5_wqe_ctrl_seg *wq_ctrl;
442         int opcode_mod = 0;
443         unsigned int size;
444         unsigned int idx;
445
446         idx = dr_qp->sq.pc & (dr_qp->sq.wqe_cnt - 1);
447
448         wq_ctrl = mlx5_wq_cyc_get_wqe(&dr_qp->wq.sq, idx);
449         dr_set_ctrl_seg(wq_ctrl, data_seg);
450
451         switch (opcode) {
452         case MLX5_OPCODE_RDMA_READ:
453         case MLX5_OPCODE_RDMA_WRITE:
454                 dr_rdma_handle_icm_write_segments(wq_ctrl, remote_addr,
455                                                   rkey, data_seg, &size);
456                 break;
457         case MLX5_OPCODE_FLOW_TBL_ACCESS:
458                 opcode_mod = MLX5_CMD_OP_MOD_UPDATE_HEADER_MODIFY_ARGUMENT;
459                 dr_rdma_handle_flow_access_arg_segments(wq_ctrl, remote_addr,
460                                                         data_seg, &size);
461                 break;
462         default:
463                 WARN(true, "illegal opcode %d", opcode);
464                 return;
465         }
466
467         /* --------------------------------------------------------
468          * |opcode_mod (8 bit)|wqe_index (16 bits)| opcod (8 bits)|
469          * --------------------------------------------------------
470          */
471         wq_ctrl->opmod_idx_opcode =
472                 cpu_to_be32((opcode_mod << 24) |
473                             ((dr_qp->sq.pc & 0xffff) << 8) |
474                             opcode);
475         wq_ctrl->qpn_ds = cpu_to_be32(size | dr_qp->qpn << 8);
476
477         dr_qp->sq.pc += DIV_ROUND_UP(size * 16, MLX5_SEND_WQE_BB);
478         dr_qp->sq.wqe_head[idx] = dr_qp->sq.head++;
479
480         if (notify_hw)
481                 dr_cmd_notify_hw(dr_qp, wq_ctrl);
482 }
483
484 static void dr_post_send(struct mlx5dr_qp *dr_qp, struct postsend_info *send_info)
485 {
486         if (send_info->type == WRITE_ICM) {
487                 dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey,
488                                  &send_info->write, MLX5_OPCODE_RDMA_WRITE, false);
489                 dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey,
490                                  &send_info->read, MLX5_OPCODE_RDMA_READ, true);
491         } else { /* GTA_ARG */
492                 dr_rdma_segments(dr_qp, send_info->remote_addr, send_info->rkey,
493                                  &send_info->write, MLX5_OPCODE_FLOW_TBL_ACCESS, true);
494         }
495
496 }
497
498 /**
499  * mlx5dr_send_fill_and_append_ste_send_info: Add data to be sent
500  * with send_list parameters:
501  *
502  *     @ste:       The data that attached to this specific ste
503  *     @size:      of data to write
504  *     @offset:    of the data from start of the hw_ste entry
505  *     @data:      data
506  *     @ste_info:  ste to be sent with send_list
507  *     @send_list: to append into it
508  *     @copy_data: if true indicates that the data should be kept because
509  *                 it's not backuped any where (like in re-hash).
510  *                 if false, it lets the data to be updated after
511  *                 it was added to the list.
512  */
513 void mlx5dr_send_fill_and_append_ste_send_info(struct mlx5dr_ste *ste, u16 size,
514                                                u16 offset, u8 *data,
515                                                struct mlx5dr_ste_send_info *ste_info,
516                                                struct list_head *send_list,
517                                                bool copy_data)
518 {
519         ste_info->size = size;
520         ste_info->ste = ste;
521         ste_info->offset = offset;
522
523         if (copy_data) {
524                 memcpy(ste_info->data_cont, data, size);
525                 ste_info->data = ste_info->data_cont;
526         } else {
527                 ste_info->data = data;
528         }
529
530         list_add_tail(&ste_info->send_list, send_list);
531 }
532
533 /* The function tries to consume one wc each time, unless the queue is full, in
534  * that case, which means that the hw is behind the sw in a full queue len
535  * the function will drain the cq till it empty.
536  */
537 static int dr_handle_pending_wc(struct mlx5dr_domain *dmn,
538                                 struct mlx5dr_send_ring *send_ring)
539 {
540         bool is_drain = false;
541         int ne;
542
543         if (send_ring->pending_wqe < send_ring->signal_th)
544                 return 0;
545
546         /* Queue is full start drain it */
547         if (send_ring->pending_wqe >=
548             dmn->send_ring->signal_th * TH_NUMS_TO_DRAIN)
549                 is_drain = true;
550
551         do {
552                 ne = dr_poll_cq(send_ring->cq, 1);
553                 if (unlikely(ne < 0)) {
554                         mlx5_core_warn_once(dmn->mdev, "SMFS QPN 0x%x is disabled/limited",
555                                             send_ring->qp->qpn);
556                         send_ring->err_state = true;
557                         return ne;
558                 } else if (ne == 1) {
559                         send_ring->pending_wqe -= send_ring->signal_th;
560                 }
561         } while (ne == 1 ||
562                  (is_drain && send_ring->pending_wqe  >= send_ring->signal_th));
563
564         return 0;
565 }
566
567 static void dr_fill_write_args_segs(struct mlx5dr_send_ring *send_ring,
568                                     struct postsend_info *send_info)
569 {
570         send_ring->pending_wqe++;
571
572         if (send_ring->pending_wqe % send_ring->signal_th == 0)
573                 send_info->write.send_flags |= IB_SEND_SIGNALED;
574         else
575                 send_info->write.send_flags = 0;
576 }
577
578 static void dr_fill_write_icm_segs(struct mlx5dr_domain *dmn,
579                                    struct mlx5dr_send_ring *send_ring,
580                                    struct postsend_info *send_info)
581 {
582         u32 buff_offset;
583
584         if (send_info->write.length > dmn->info.max_inline_size) {
585                 buff_offset = (send_ring->tx_head &
586                                (dmn->send_ring->signal_th - 1)) *
587                               send_ring->max_post_send_size;
588                 /* Copy to ring mr */
589                 memcpy(send_ring->buf + buff_offset,
590                        (void *)(uintptr_t)send_info->write.addr,
591                        send_info->write.length);
592                 send_info->write.addr = (uintptr_t)send_ring->mr->dma_addr + buff_offset;
593                 send_info->write.lkey = send_ring->mr->mkey;
594
595                 send_ring->tx_head++;
596         }
597
598         send_ring->pending_wqe++;
599
600         if (send_ring->pending_wqe % send_ring->signal_th == 0)
601                 send_info->write.send_flags |= IB_SEND_SIGNALED;
602
603         send_ring->pending_wqe++;
604         send_info->read.length = send_info->write.length;
605
606         /* Read into dedicated sync buffer */
607         send_info->read.addr = (uintptr_t)send_ring->sync_mr->dma_addr;
608         send_info->read.lkey = send_ring->sync_mr->mkey;
609
610         if (send_ring->pending_wqe % send_ring->signal_th == 0)
611                 send_info->read.send_flags = IB_SEND_SIGNALED;
612         else
613                 send_info->read.send_flags = 0;
614 }
615
616 static void dr_fill_data_segs(struct mlx5dr_domain *dmn,
617                               struct mlx5dr_send_ring *send_ring,
618                               struct postsend_info *send_info)
619 {
620         if (send_info->type == WRITE_ICM)
621                 dr_fill_write_icm_segs(dmn, send_ring, send_info);
622         else /* args */
623                 dr_fill_write_args_segs(send_ring, send_info);
624 }
625
626 static int dr_postsend_icm_data(struct mlx5dr_domain *dmn,
627                                 struct postsend_info *send_info)
628 {
629         struct mlx5dr_send_ring *send_ring = dmn->send_ring;
630         int ret;
631
632         if (unlikely(dmn->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR ||
633                      send_ring->err_state)) {
634                 mlx5_core_dbg_once(dmn->mdev,
635                                    "Skipping post send: QP err state: %d, device state: %d\n",
636                                    send_ring->err_state, dmn->mdev->state);
637                 return 0;
638         }
639
640         spin_lock(&send_ring->lock);
641
642         ret = dr_handle_pending_wc(dmn, send_ring);
643         if (ret)
644                 goto out_unlock;
645
646         dr_fill_data_segs(dmn, send_ring, send_info);
647         dr_post_send(send_ring->qp, send_info);
648
649 out_unlock:
650         spin_unlock(&send_ring->lock);
651         return ret;
652 }
653
654 static int dr_get_tbl_copy_details(struct mlx5dr_domain *dmn,
655                                    struct mlx5dr_ste_htbl *htbl,
656                                    u8 **data,
657                                    u32 *byte_size,
658                                    int *iterations,
659                                    int *num_stes)
660 {
661         u32 chunk_byte_size = mlx5dr_icm_pool_get_chunk_byte_size(htbl->chunk);
662         int alloc_size;
663
664         if (chunk_byte_size > dmn->send_ring->max_post_send_size) {
665                 *iterations = chunk_byte_size / dmn->send_ring->max_post_send_size;
666                 *byte_size = dmn->send_ring->max_post_send_size;
667                 alloc_size = *byte_size;
668                 *num_stes = *byte_size / DR_STE_SIZE;
669         } else {
670                 *iterations = 1;
671                 *num_stes = mlx5dr_icm_pool_get_chunk_num_of_entries(htbl->chunk);
672                 alloc_size = *num_stes * DR_STE_SIZE;
673         }
674
675         *data = kvzalloc(alloc_size, GFP_KERNEL);
676         if (!*data)
677                 return -ENOMEM;
678
679         return 0;
680 }
681
682 /**
683  * mlx5dr_send_postsend_ste: write size bytes into offset from the hw cm.
684  *
685  *     @dmn:    Domain
686  *     @ste:    The ste struct that contains the data (at
687  *              least part of it)
688  *     @data:   The real data to send size data
689  *     @size:   for writing.
690  *     @offset: The offset from the icm mapped data to
691  *              start write to this for write only part of the
692  *              buffer.
693  *
694  * Return: 0 on success.
695  */
696 int mlx5dr_send_postsend_ste(struct mlx5dr_domain *dmn, struct mlx5dr_ste *ste,
697                              u8 *data, u16 size, u16 offset)
698 {
699         struct postsend_info send_info = {};
700
701         mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx, data, size);
702
703         send_info.write.addr = (uintptr_t)data;
704         send_info.write.length = size;
705         send_info.write.lkey = 0;
706         send_info.remote_addr = mlx5dr_ste_get_mr_addr(ste) + offset;
707         send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(ste->htbl->chunk);
708
709         return dr_postsend_icm_data(dmn, &send_info);
710 }
711
712 int mlx5dr_send_postsend_htbl(struct mlx5dr_domain *dmn,
713                               struct mlx5dr_ste_htbl *htbl,
714                               u8 *formatted_ste, u8 *mask)
715 {
716         u32 byte_size = mlx5dr_icm_pool_get_chunk_byte_size(htbl->chunk);
717         int num_stes_per_iter;
718         int iterations;
719         u8 *data;
720         int ret;
721         int i;
722         int j;
723
724         ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size,
725                                       &iterations, &num_stes_per_iter);
726         if (ret)
727                 return ret;
728
729         mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx, formatted_ste, DR_STE_SIZE);
730
731         /* Send the data iteration times */
732         for (i = 0; i < iterations; i++) {
733                 u32 ste_index = i * (byte_size / DR_STE_SIZE);
734                 struct postsend_info send_info = {};
735
736                 /* Copy all ste's on the data buffer
737                  * need to add the bit_mask
738                  */
739                 for (j = 0; j < num_stes_per_iter; j++) {
740                         struct mlx5dr_ste *ste = &htbl->chunk->ste_arr[ste_index + j];
741                         u32 ste_off = j * DR_STE_SIZE;
742
743                         if (mlx5dr_ste_is_not_used(ste)) {
744                                 memcpy(data + ste_off,
745                                        formatted_ste, DR_STE_SIZE);
746                         } else {
747                                 /* Copy data */
748                                 memcpy(data + ste_off,
749                                        htbl->chunk->hw_ste_arr +
750                                        DR_STE_SIZE_REDUCED * (ste_index + j),
751                                        DR_STE_SIZE_REDUCED);
752                                 /* Copy bit_mask */
753                                 memcpy(data + ste_off + DR_STE_SIZE_REDUCED,
754                                        mask, DR_STE_SIZE_MASK);
755                                 /* Only when we have mask we need to re-arrange the STE */
756                                 mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx,
757                                                                 data + (j * DR_STE_SIZE),
758                                                                 DR_STE_SIZE);
759                         }
760                 }
761
762                 send_info.write.addr = (uintptr_t)data;
763                 send_info.write.length = byte_size;
764                 send_info.write.lkey = 0;
765                 send_info.remote_addr =
766                         mlx5dr_ste_get_mr_addr(htbl->chunk->ste_arr + ste_index);
767                 send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(htbl->chunk);
768
769                 ret = dr_postsend_icm_data(dmn, &send_info);
770                 if (ret)
771                         goto out_free;
772         }
773
774 out_free:
775         kvfree(data);
776         return ret;
777 }
778
779 /* Initialize htble with default STEs */
780 int mlx5dr_send_postsend_formatted_htbl(struct mlx5dr_domain *dmn,
781                                         struct mlx5dr_ste_htbl *htbl,
782                                         u8 *ste_init_data,
783                                         bool update_hw_ste)
784 {
785         u32 byte_size = mlx5dr_icm_pool_get_chunk_byte_size(htbl->chunk);
786         int iterations;
787         int num_stes;
788         u8 *copy_dst;
789         u8 *data;
790         int ret;
791         int i;
792
793         ret = dr_get_tbl_copy_details(dmn, htbl, &data, &byte_size,
794                                       &iterations, &num_stes);
795         if (ret)
796                 return ret;
797
798         if (update_hw_ste) {
799                 /* Copy the reduced STE to hash table ste_arr */
800                 for (i = 0; i < num_stes; i++) {
801                         copy_dst = htbl->chunk->hw_ste_arr + i * DR_STE_SIZE_REDUCED;
802                         memcpy(copy_dst, ste_init_data, DR_STE_SIZE_REDUCED);
803                 }
804         }
805
806         mlx5dr_ste_prepare_for_postsend(dmn->ste_ctx, ste_init_data, DR_STE_SIZE);
807
808         /* Copy the same STE on the data buffer */
809         for (i = 0; i < num_stes; i++) {
810                 copy_dst = data + i * DR_STE_SIZE;
811                 memcpy(copy_dst, ste_init_data, DR_STE_SIZE);
812         }
813
814         /* Send the data iteration times */
815         for (i = 0; i < iterations; i++) {
816                 u8 ste_index = i * (byte_size / DR_STE_SIZE);
817                 struct postsend_info send_info = {};
818
819                 send_info.write.addr = (uintptr_t)data;
820                 send_info.write.length = byte_size;
821                 send_info.write.lkey = 0;
822                 send_info.remote_addr =
823                         mlx5dr_ste_get_mr_addr(htbl->chunk->ste_arr + ste_index);
824                 send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(htbl->chunk);
825
826                 ret = dr_postsend_icm_data(dmn, &send_info);
827                 if (ret)
828                         goto out_free;
829         }
830
831 out_free:
832         kvfree(data);
833         return ret;
834 }
835
836 int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn,
837                                 struct mlx5dr_action *action)
838 {
839         struct postsend_info send_info = {};
840
841         send_info.write.addr = (uintptr_t)action->rewrite->data;
842         send_info.write.length = action->rewrite->num_of_actions *
843                                  DR_MODIFY_ACTION_SIZE;
844         send_info.write.lkey = 0;
845         send_info.remote_addr =
846                 mlx5dr_icm_pool_get_chunk_mr_addr(action->rewrite->chunk);
847         send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(action->rewrite->chunk);
848
849         return dr_postsend_icm_data(dmn, &send_info);
850 }
851
852 int mlx5dr_send_postsend_pattern(struct mlx5dr_domain *dmn,
853                                  struct mlx5dr_icm_chunk *chunk,
854                                  u16 num_of_actions,
855                                  u8 *data)
856 {
857         struct postsend_info send_info = {};
858         int ret;
859
860         send_info.write.addr = (uintptr_t)data;
861         send_info.write.length = num_of_actions * DR_MODIFY_ACTION_SIZE;
862         send_info.remote_addr = mlx5dr_icm_pool_get_chunk_mr_addr(chunk);
863         send_info.rkey = mlx5dr_icm_pool_get_chunk_rkey(chunk);
864
865         ret = dr_postsend_icm_data(dmn, &send_info);
866         if (ret)
867                 return ret;
868
869         return 0;
870 }
871
872 int mlx5dr_send_postsend_args(struct mlx5dr_domain *dmn, u64 arg_id,
873                               u16 num_of_actions, u8 *actions_data)
874 {
875         int data_len, iter = 0, cur_sent;
876         u64 addr;
877         int ret;
878
879         addr = (uintptr_t)actions_data;
880         data_len = num_of_actions * DR_MODIFY_ACTION_SIZE;
881
882         do {
883                 struct postsend_info send_info = {};
884
885                 send_info.type = GTA_ARG;
886                 send_info.write.addr = addr;
887                 cur_sent = min_t(u32, data_len, DR_ACTION_CACHE_LINE_SIZE);
888                 send_info.write.length = cur_sent;
889                 send_info.write.lkey = 0;
890                 send_info.remote_addr = arg_id + iter;
891
892                 ret = dr_postsend_icm_data(dmn, &send_info);
893                 if (ret)
894                         goto out;
895
896                 iter++;
897                 addr += cur_sent;
898                 data_len -= cur_sent;
899         } while (data_len > 0);
900
901 out:
902         return ret;
903 }
904
905 static int dr_modify_qp_rst2init(struct mlx5_core_dev *mdev,
906                                  struct mlx5dr_qp *dr_qp,
907                                  int port)
908 {
909         u32 in[MLX5_ST_SZ_DW(rst2init_qp_in)] = {};
910         void *qpc;
911
912         qpc = MLX5_ADDR_OF(rst2init_qp_in, in, qpc);
913
914         MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, port);
915         MLX5_SET(qpc, qpc, pm_state, MLX5_QPC_PM_STATE_MIGRATED);
916         MLX5_SET(qpc, qpc, rre, 1);
917         MLX5_SET(qpc, qpc, rwe, 1);
918
919         MLX5_SET(rst2init_qp_in, in, opcode, MLX5_CMD_OP_RST2INIT_QP);
920         MLX5_SET(rst2init_qp_in, in, qpn, dr_qp->qpn);
921
922         return mlx5_cmd_exec_in(mdev, rst2init_qp, in);
923 }
924
925 static int dr_cmd_modify_qp_rtr2rts(struct mlx5_core_dev *mdev,
926                                     struct mlx5dr_qp *dr_qp,
927                                     struct dr_qp_rts_attr *attr)
928 {
929         u32 in[MLX5_ST_SZ_DW(rtr2rts_qp_in)] = {};
930         void *qpc;
931
932         qpc  = MLX5_ADDR_OF(rtr2rts_qp_in, in, qpc);
933
934         MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->qpn);
935
936         MLX5_SET(qpc, qpc, retry_count, attr->retry_cnt);
937         MLX5_SET(qpc, qpc, rnr_retry, attr->rnr_retry);
938         MLX5_SET(qpc, qpc, primary_address_path.ack_timeout, 0x8); /* ~1ms */
939
940         MLX5_SET(rtr2rts_qp_in, in, opcode, MLX5_CMD_OP_RTR2RTS_QP);
941         MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->qpn);
942
943         return mlx5_cmd_exec_in(mdev, rtr2rts_qp, in);
944 }
945
946 static int dr_cmd_modify_qp_init2rtr(struct mlx5_core_dev *mdev,
947                                      struct mlx5dr_qp *dr_qp,
948                                      struct dr_qp_rtr_attr *attr)
949 {
950         u32 in[MLX5_ST_SZ_DW(init2rtr_qp_in)] = {};
951         void *qpc;
952
953         qpc = MLX5_ADDR_OF(init2rtr_qp_in, in, qpc);
954
955         MLX5_SET(init2rtr_qp_in, in, qpn, dr_qp->qpn);
956
957         MLX5_SET(qpc, qpc, mtu, attr->mtu);
958         MLX5_SET(qpc, qpc, log_msg_max, DR_CHUNK_SIZE_MAX - 1);
959         MLX5_SET(qpc, qpc, remote_qpn, attr->qp_num);
960         memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rmac_47_32),
961                attr->dgid_attr.mac, sizeof(attr->dgid_attr.mac));
962         memcpy(MLX5_ADDR_OF(qpc, qpc, primary_address_path.rgid_rip),
963                attr->dgid_attr.gid, sizeof(attr->dgid_attr.gid));
964         MLX5_SET(qpc, qpc, primary_address_path.src_addr_index,
965                  attr->sgid_index);
966
967         if (attr->dgid_attr.roce_ver == MLX5_ROCE_VERSION_2)
968                 MLX5_SET(qpc, qpc, primary_address_path.udp_sport,
969                          attr->udp_src_port);
970
971         MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, attr->port_num);
972         MLX5_SET(qpc, qpc, primary_address_path.fl, attr->fl);
973         MLX5_SET(qpc, qpc, min_rnr_nak, 1);
974
975         MLX5_SET(init2rtr_qp_in, in, opcode, MLX5_CMD_OP_INIT2RTR_QP);
976         MLX5_SET(init2rtr_qp_in, in, qpn, dr_qp->qpn);
977
978         return mlx5_cmd_exec_in(mdev, init2rtr_qp, in);
979 }
980
981 static bool dr_send_allow_fl(struct mlx5dr_cmd_caps *caps)
982 {
983         /* Check whether RC RoCE QP creation with force loopback is allowed.
984          * There are two separate capability bits for this:
985          *  - force loopback when RoCE is enabled
986          *  - force loopback when RoCE is disabled
987          */
988         return ((caps->roce_caps.roce_en &&
989                  caps->roce_caps.fl_rc_qp_when_roce_enabled) ||
990                 (!caps->roce_caps.roce_en &&
991                  caps->roce_caps.fl_rc_qp_when_roce_disabled));
992 }
993
994 static int dr_prepare_qp_to_rts(struct mlx5dr_domain *dmn)
995 {
996         struct mlx5dr_qp *dr_qp = dmn->send_ring->qp;
997         struct dr_qp_rts_attr rts_attr = {};
998         struct dr_qp_rtr_attr rtr_attr = {};
999         enum ib_mtu mtu = IB_MTU_1024;
1000         u16 gid_index = 0;
1001         int port = 1;
1002         int ret;
1003
1004         /* Init */
1005         ret = dr_modify_qp_rst2init(dmn->mdev, dr_qp, port);
1006         if (ret) {
1007                 mlx5dr_err(dmn, "Failed modify QP rst2init\n");
1008                 return ret;
1009         }
1010
1011         /* RTR */
1012         rtr_attr.mtu            = mtu;
1013         rtr_attr.qp_num         = dr_qp->qpn;
1014         rtr_attr.min_rnr_timer  = 12;
1015         rtr_attr.port_num       = port;
1016         rtr_attr.udp_src_port   = dmn->info.caps.roce_min_src_udp;
1017
1018         /* If QP creation with force loopback is allowed, then there
1019          * is no need for GID index when creating the QP.
1020          * Otherwise we query GID attributes and use GID index.
1021          */
1022         rtr_attr.fl = dr_send_allow_fl(&dmn->info.caps);
1023         if (!rtr_attr.fl) {
1024                 ret = mlx5dr_cmd_query_gid(dmn->mdev, port, gid_index,
1025                                            &rtr_attr.dgid_attr);
1026                 if (ret)
1027                         return ret;
1028
1029                 rtr_attr.sgid_index = gid_index;
1030         }
1031
1032         ret = dr_cmd_modify_qp_init2rtr(dmn->mdev, dr_qp, &rtr_attr);
1033         if (ret) {
1034                 mlx5dr_err(dmn, "Failed modify QP init2rtr\n");
1035                 return ret;
1036         }
1037
1038         /* RTS */
1039         rts_attr.timeout        = 14;
1040         rts_attr.retry_cnt      = 7;
1041         rts_attr.rnr_retry      = 7;
1042
1043         ret = dr_cmd_modify_qp_rtr2rts(dmn->mdev, dr_qp, &rts_attr);
1044         if (ret) {
1045                 mlx5dr_err(dmn, "Failed modify QP rtr2rts\n");
1046                 return ret;
1047         }
1048
1049         return 0;
1050 }
1051
1052 static void dr_cq_complete(struct mlx5_core_cq *mcq,
1053                            struct mlx5_eqe *eqe)
1054 {
1055         pr_err("CQ completion CQ: #%u\n", mcq->cqn);
1056 }
1057
1058 static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev,
1059                                       struct mlx5_uars_page *uar,
1060                                       size_t ncqe)
1061 {
1062         u32 temp_cqc[MLX5_ST_SZ_DW(cqc)] = {};
1063         u32 out[MLX5_ST_SZ_DW(create_cq_out)];
1064         struct mlx5_wq_param wqp;
1065         struct mlx5_cqe64 *cqe;
1066         struct mlx5dr_cq *cq;
1067         int inlen, err, eqn;
1068         void *cqc, *in;
1069         __be64 *pas;
1070         int vector;
1071         u32 i;
1072
1073         cq = kzalloc(sizeof(*cq), GFP_KERNEL);
1074         if (!cq)
1075                 return NULL;
1076
1077         ncqe = roundup_pow_of_two(ncqe);
1078         MLX5_SET(cqc, temp_cqc, log_cq_size, ilog2(ncqe));
1079
1080         wqp.buf_numa_node = mdev->priv.numa_node;
1081         wqp.db_numa_node = mdev->priv.numa_node;
1082
1083         err = mlx5_cqwq_create(mdev, &wqp, temp_cqc, &cq->wq,
1084                                &cq->wq_ctrl);
1085         if (err)
1086                 goto out;
1087
1088         for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) {
1089                 cqe = mlx5_cqwq_get_wqe(&cq->wq, i);
1090                 cqe->op_own = MLX5_CQE_INVALID << 4 | MLX5_CQE_OWNER_MASK;
1091         }
1092
1093         inlen = MLX5_ST_SZ_BYTES(create_cq_in) +
1094                 sizeof(u64) * cq->wq_ctrl.buf.npages;
1095         in = kvzalloc(inlen, GFP_KERNEL);
1096         if (!in)
1097                 goto err_cqwq;
1098
1099         vector = raw_smp_processor_id() % mlx5_comp_vectors_max(mdev);
1100         err = mlx5_comp_eqn_get(mdev, vector, &eqn);
1101         if (err) {
1102                 kvfree(in);
1103                 goto err_cqwq;
1104         }
1105
1106         cqc = MLX5_ADDR_OF(create_cq_in, in, cq_context);
1107         MLX5_SET(cqc, cqc, log_cq_size, ilog2(ncqe));
1108         MLX5_SET(cqc, cqc, c_eqn_or_apu_element, eqn);
1109         MLX5_SET(cqc, cqc, uar_page, uar->index);
1110         MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.buf.page_shift -
1111                  MLX5_ADAPTER_PAGE_SHIFT);
1112         MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma);
1113
1114         pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
1115         mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, pas);
1116
1117         cq->mcq.comp  = dr_cq_complete;
1118
1119         err = mlx5_core_create_cq(mdev, &cq->mcq, in, inlen, out, sizeof(out));
1120         kvfree(in);
1121
1122         if (err)
1123                 goto err_cqwq;
1124
1125         cq->mcq.cqe_sz = 64;
1126         cq->mcq.set_ci_db = cq->wq_ctrl.db.db;
1127         cq->mcq.arm_db = cq->wq_ctrl.db.db + 1;
1128         *cq->mcq.set_ci_db = 0;
1129
1130         /* set no-zero value, in order to avoid the HW to run db-recovery on
1131          * CQ that used in polling mode.
1132          */
1133         *cq->mcq.arm_db = cpu_to_be32(2 << 28);
1134
1135         cq->mcq.vector = 0;
1136         cq->mcq.uar = uar;
1137         cq->mdev = mdev;
1138
1139         return cq;
1140
1141 err_cqwq:
1142         mlx5_wq_destroy(&cq->wq_ctrl);
1143 out:
1144         kfree(cq);
1145         return NULL;
1146 }
1147
1148 static void dr_destroy_cq(struct mlx5_core_dev *mdev, struct mlx5dr_cq *cq)
1149 {
1150         mlx5_core_destroy_cq(mdev, &cq->mcq);
1151         mlx5_wq_destroy(&cq->wq_ctrl);
1152         kfree(cq);
1153 }
1154
1155 static int dr_create_mkey(struct mlx5_core_dev *mdev, u32 pdn, u32 *mkey)
1156 {
1157         u32 in[MLX5_ST_SZ_DW(create_mkey_in)] = {};
1158         void *mkc;
1159
1160         mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1161         MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
1162         MLX5_SET(mkc, mkc, a, 1);
1163         MLX5_SET(mkc, mkc, rw, 1);
1164         MLX5_SET(mkc, mkc, rr, 1);
1165         MLX5_SET(mkc, mkc, lw, 1);
1166         MLX5_SET(mkc, mkc, lr, 1);
1167
1168         MLX5_SET(mkc, mkc, pd, pdn);
1169         MLX5_SET(mkc, mkc, length64, 1);
1170         MLX5_SET(mkc, mkc, qpn, 0xffffff);
1171
1172         return mlx5_core_create_mkey(mdev, mkey, in, sizeof(in));
1173 }
1174
1175 static struct mlx5dr_mr *dr_reg_mr(struct mlx5_core_dev *mdev,
1176                                    u32 pdn, void *buf, size_t size)
1177 {
1178         struct mlx5dr_mr *mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1179         struct device *dma_device;
1180         dma_addr_t dma_addr;
1181         int err;
1182
1183         if (!mr)
1184                 return NULL;
1185
1186         dma_device = mlx5_core_dma_dev(mdev);
1187         dma_addr = dma_map_single(dma_device, buf, size,
1188                                   DMA_BIDIRECTIONAL);
1189         err = dma_mapping_error(dma_device, dma_addr);
1190         if (err) {
1191                 mlx5_core_warn(mdev, "Can't dma buf\n");
1192                 kfree(mr);
1193                 return NULL;
1194         }
1195
1196         err = dr_create_mkey(mdev, pdn, &mr->mkey);
1197         if (err) {
1198                 mlx5_core_warn(mdev, "Can't create mkey\n");
1199                 dma_unmap_single(dma_device, dma_addr, size,
1200                                  DMA_BIDIRECTIONAL);
1201                 kfree(mr);
1202                 return NULL;
1203         }
1204
1205         mr->dma_addr = dma_addr;
1206         mr->size = size;
1207         mr->addr = buf;
1208
1209         return mr;
1210 }
1211
1212 static void dr_dereg_mr(struct mlx5_core_dev *mdev, struct mlx5dr_mr *mr)
1213 {
1214         mlx5_core_destroy_mkey(mdev, mr->mkey);
1215         dma_unmap_single(mlx5_core_dma_dev(mdev), mr->dma_addr, mr->size,
1216                          DMA_BIDIRECTIONAL);
1217         kfree(mr);
1218 }
1219
1220 int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn)
1221 {
1222         struct dr_qp_init_attr init_attr = {};
1223         int cq_size;
1224         int size;
1225         int ret;
1226
1227         dmn->send_ring = kzalloc(sizeof(*dmn->send_ring), GFP_KERNEL);
1228         if (!dmn->send_ring)
1229                 return -ENOMEM;
1230
1231         cq_size = QUEUE_SIZE + 1;
1232         dmn->send_ring->cq = dr_create_cq(dmn->mdev, dmn->uar, cq_size);
1233         if (!dmn->send_ring->cq) {
1234                 mlx5dr_err(dmn, "Failed creating CQ\n");
1235                 ret = -ENOMEM;
1236                 goto free_send_ring;
1237         }
1238
1239         init_attr.cqn = dmn->send_ring->cq->mcq.cqn;
1240         init_attr.pdn = dmn->pdn;
1241         init_attr.uar = dmn->uar;
1242         init_attr.max_send_wr = QUEUE_SIZE;
1243
1244         /* Isolated VL is applicable only if force loopback is supported */
1245         if (dr_send_allow_fl(&dmn->info.caps))
1246                 init_attr.isolate_vl_tc = dmn->info.caps.isolate_vl_tc;
1247
1248         spin_lock_init(&dmn->send_ring->lock);
1249
1250         dmn->send_ring->qp = dr_create_rc_qp(dmn->mdev, &init_attr);
1251         if (!dmn->send_ring->qp)  {
1252                 mlx5dr_err(dmn, "Failed creating QP\n");
1253                 ret = -ENOMEM;
1254                 goto clean_cq;
1255         }
1256
1257         dmn->send_ring->cq->qp = dmn->send_ring->qp;
1258
1259         dmn->info.max_send_wr = QUEUE_SIZE;
1260         dmn->info.max_inline_size = min(dmn->send_ring->qp->max_inline_data,
1261                                         DR_STE_SIZE);
1262
1263         dmn->send_ring->signal_th = dmn->info.max_send_wr /
1264                 SIGNAL_PER_DIV_QUEUE;
1265
1266         /* Prepare qp to be used */
1267         ret = dr_prepare_qp_to_rts(dmn);
1268         if (ret)
1269                 goto clean_qp;
1270
1271         dmn->send_ring->max_post_send_size =
1272                 mlx5dr_icm_pool_chunk_size_to_byte(DR_CHUNK_SIZE_1K,
1273                                                    DR_ICM_TYPE_STE);
1274
1275         /* Allocating the max size as a buffer for writing */
1276         size = dmn->send_ring->signal_th * dmn->send_ring->max_post_send_size;
1277         dmn->send_ring->buf = kzalloc(size, GFP_KERNEL);
1278         if (!dmn->send_ring->buf) {
1279                 ret = -ENOMEM;
1280                 goto clean_qp;
1281         }
1282
1283         dmn->send_ring->buf_size = size;
1284
1285         dmn->send_ring->mr = dr_reg_mr(dmn->mdev,
1286                                        dmn->pdn, dmn->send_ring->buf, size);
1287         if (!dmn->send_ring->mr) {
1288                 ret = -ENOMEM;
1289                 goto free_mem;
1290         }
1291
1292         dmn->send_ring->sync_buff = kzalloc(dmn->send_ring->max_post_send_size,
1293                                             GFP_KERNEL);
1294         if (!dmn->send_ring->sync_buff) {
1295                 ret = -ENOMEM;
1296                 goto clean_mr;
1297         }
1298
1299         dmn->send_ring->sync_mr = dr_reg_mr(dmn->mdev,
1300                                             dmn->pdn, dmn->send_ring->sync_buff,
1301                                             dmn->send_ring->max_post_send_size);
1302         if (!dmn->send_ring->sync_mr) {
1303                 ret = -ENOMEM;
1304                 goto free_sync_mem;
1305         }
1306
1307         return 0;
1308
1309 free_sync_mem:
1310         kfree(dmn->send_ring->sync_buff);
1311 clean_mr:
1312         dr_dereg_mr(dmn->mdev, dmn->send_ring->mr);
1313 free_mem:
1314         kfree(dmn->send_ring->buf);
1315 clean_qp:
1316         dr_destroy_qp(dmn->mdev, dmn->send_ring->qp);
1317 clean_cq:
1318         dr_destroy_cq(dmn->mdev, dmn->send_ring->cq);
1319 free_send_ring:
1320         kfree(dmn->send_ring);
1321
1322         return ret;
1323 }
1324
1325 void mlx5dr_send_ring_free(struct mlx5dr_domain *dmn,
1326                            struct mlx5dr_send_ring *send_ring)
1327 {
1328         dr_destroy_qp(dmn->mdev, send_ring->qp);
1329         dr_destroy_cq(dmn->mdev, send_ring->cq);
1330         dr_dereg_mr(dmn->mdev, send_ring->sync_mr);
1331         dr_dereg_mr(dmn->mdev, send_ring->mr);
1332         kfree(send_ring->buf);
1333         kfree(send_ring->sync_buff);
1334         kfree(send_ring);
1335 }
1336
1337 int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn)
1338 {
1339         struct mlx5dr_send_ring *send_ring = dmn->send_ring;
1340         struct postsend_info send_info = {};
1341         u8 data[DR_STE_SIZE];
1342         int num_of_sends_req;
1343         int ret;
1344         int i;
1345
1346         /* Sending this amount of requests makes sure we will get drain */
1347         num_of_sends_req = send_ring->signal_th * TH_NUMS_TO_DRAIN / 2;
1348
1349         /* Send fake requests forcing the last to be signaled */
1350         send_info.write.addr = (uintptr_t)data;
1351         send_info.write.length = DR_STE_SIZE;
1352         send_info.write.lkey = 0;
1353         /* Using the sync_mr in order to write/read */
1354         send_info.remote_addr = (uintptr_t)send_ring->sync_mr->addr;
1355         send_info.rkey = send_ring->sync_mr->mkey;
1356
1357         for (i = 0; i < num_of_sends_req; i++) {
1358                 ret = dr_postsend_icm_data(dmn, &send_info);
1359                 if (ret)
1360                         return ret;
1361         }
1362
1363         spin_lock(&send_ring->lock);
1364         ret = dr_handle_pending_wc(dmn, send_ring);
1365         spin_unlock(&send_ring->lock);
1366
1367         return ret;
1368 }