i2c: stm32: Use the correct style for SPDX License Identifier
[sfrench/cifs-2.6.git] / drivers / infiniband / sw / siw / siw_qp.c
1 // SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause
2
3 /* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
4 /* Copyright (c) 2008-2019, IBM Corporation */
5
6 #include <linux/errno.h>
7 #include <linux/types.h>
8 #include <linux/net.h>
9 #include <linux/scatterlist.h>
10 #include <linux/llist.h>
11 #include <asm/barrier.h>
12 #include <net/tcp.h>
13
14 #include "siw.h"
15 #include "siw_verbs.h"
16 #include "siw_mem.h"
17
18 static char siw_qp_state_to_string[SIW_QP_STATE_COUNT][sizeof "TERMINATE"] = {
19         [SIW_QP_STATE_IDLE] = "IDLE",
20         [SIW_QP_STATE_RTR] = "RTR",
21         [SIW_QP_STATE_RTS] = "RTS",
22         [SIW_QP_STATE_CLOSING] = "CLOSING",
23         [SIW_QP_STATE_TERMINATE] = "TERMINATE",
24         [SIW_QP_STATE_ERROR] = "ERROR"
25 };
26
27 /*
28  * iWARP (RDMAP, DDP and MPA) parameters as well as Softiwarp settings on a
29  * per-RDMAP message basis. Please keep order of initializer. All MPA len
30  * is initialized to minimum packet size.
31  */
32 struct iwarp_msg_info iwarp_pktinfo[RDMAP_TERMINATE + 1] = {
33         { /* RDMAP_RDMA_WRITE */
34           .hdr_len = sizeof(struct iwarp_rdma_write),
35           .ctrl.mpa_len = htons(sizeof(struct iwarp_rdma_write) - 2),
36           .ctrl.ddp_rdmap_ctrl = DDP_FLAG_TAGGED | DDP_FLAG_LAST |
37                                  cpu_to_be16(DDP_VERSION << 8) |
38                                  cpu_to_be16(RDMAP_VERSION << 6) |
39                                  cpu_to_be16(RDMAP_RDMA_WRITE),
40           .rx_data = siw_proc_write },
41         { /* RDMAP_RDMA_READ_REQ */
42           .hdr_len = sizeof(struct iwarp_rdma_rreq),
43           .ctrl.mpa_len = htons(sizeof(struct iwarp_rdma_rreq) - 2),
44           .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) |
45                                  cpu_to_be16(RDMAP_VERSION << 6) |
46                                  cpu_to_be16(RDMAP_RDMA_READ_REQ),
47           .rx_data = siw_proc_rreq },
48         { /* RDMAP_RDMA_READ_RESP */
49           .hdr_len = sizeof(struct iwarp_rdma_rresp),
50           .ctrl.mpa_len = htons(sizeof(struct iwarp_rdma_rresp) - 2),
51           .ctrl.ddp_rdmap_ctrl = DDP_FLAG_TAGGED | DDP_FLAG_LAST |
52                                  cpu_to_be16(DDP_VERSION << 8) |
53                                  cpu_to_be16(RDMAP_VERSION << 6) |
54                                  cpu_to_be16(RDMAP_RDMA_READ_RESP),
55           .rx_data = siw_proc_rresp },
56         { /* RDMAP_SEND */
57           .hdr_len = sizeof(struct iwarp_send),
58           .ctrl.mpa_len = htons(sizeof(struct iwarp_send) - 2),
59           .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) |
60                                  cpu_to_be16(RDMAP_VERSION << 6) |
61                                  cpu_to_be16(RDMAP_SEND),
62           .rx_data = siw_proc_send },
63         { /* RDMAP_SEND_INVAL */
64           .hdr_len = sizeof(struct iwarp_send_inv),
65           .ctrl.mpa_len = htons(sizeof(struct iwarp_send_inv) - 2),
66           .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) |
67                                  cpu_to_be16(RDMAP_VERSION << 6) |
68                                  cpu_to_be16(RDMAP_SEND_INVAL),
69           .rx_data = siw_proc_send },
70         { /* RDMAP_SEND_SE */
71           .hdr_len = sizeof(struct iwarp_send),
72           .ctrl.mpa_len = htons(sizeof(struct iwarp_send) - 2),
73           .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) |
74                                  cpu_to_be16(RDMAP_VERSION << 6) |
75                                  cpu_to_be16(RDMAP_SEND_SE),
76           .rx_data = siw_proc_send },
77         { /* RDMAP_SEND_SE_INVAL */
78           .hdr_len = sizeof(struct iwarp_send_inv),
79           .ctrl.mpa_len = htons(sizeof(struct iwarp_send_inv) - 2),
80           .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) |
81                                  cpu_to_be16(RDMAP_VERSION << 6) |
82                                  cpu_to_be16(RDMAP_SEND_SE_INVAL),
83           .rx_data = siw_proc_send },
84         { /* RDMAP_TERMINATE */
85           .hdr_len = sizeof(struct iwarp_terminate),
86           .ctrl.mpa_len = htons(sizeof(struct iwarp_terminate) - 2),
87           .ctrl.ddp_rdmap_ctrl = DDP_FLAG_LAST | cpu_to_be16(DDP_VERSION << 8) |
88                                  cpu_to_be16(RDMAP_VERSION << 6) |
89                                  cpu_to_be16(RDMAP_TERMINATE),
90           .rx_data = siw_proc_terminate }
91 };
92
93 void siw_qp_llp_data_ready(struct sock *sk)
94 {
95         struct siw_qp *qp;
96
97         read_lock(&sk->sk_callback_lock);
98
99         if (unlikely(!sk->sk_user_data || !sk_to_qp(sk)))
100                 goto done;
101
102         qp = sk_to_qp(sk);
103
104         if (likely(!qp->rx_stream.rx_suspend &&
105                    down_read_trylock(&qp->state_lock))) {
106                 read_descriptor_t rd_desc = { .arg.data = qp, .count = 1 };
107
108                 if (likely(qp->attrs.state == SIW_QP_STATE_RTS))
109                         /*
110                          * Implements data receive operation during
111                          * socket callback. TCP gracefully catches
112                          * the case where there is nothing to receive
113                          * (not calling siw_tcp_rx_data() then).
114                          */
115                         tcp_read_sock(sk, &rd_desc, siw_tcp_rx_data);
116
117                 up_read(&qp->state_lock);
118         } else {
119                 siw_dbg_qp(qp, "unable to process RX, suspend: %d\n",
120                            qp->rx_stream.rx_suspend);
121         }
122 done:
123         read_unlock(&sk->sk_callback_lock);
124 }
125
126 void siw_qp_llp_close(struct siw_qp *qp)
127 {
128         siw_dbg_qp(qp, "enter llp close, state = %s\n",
129                    siw_qp_state_to_string[qp->attrs.state]);
130
131         down_write(&qp->state_lock);
132
133         qp->rx_stream.rx_suspend = 1;
134         qp->tx_ctx.tx_suspend = 1;
135         qp->attrs.sk = NULL;
136
137         switch (qp->attrs.state) {
138         case SIW_QP_STATE_RTS:
139         case SIW_QP_STATE_RTR:
140         case SIW_QP_STATE_IDLE:
141         case SIW_QP_STATE_TERMINATE:
142                 qp->attrs.state = SIW_QP_STATE_ERROR;
143                 break;
144         /*
145          * SIW_QP_STATE_CLOSING:
146          *
147          * This is a forced close. shall the QP be moved to
148          * ERROR or IDLE ?
149          */
150         case SIW_QP_STATE_CLOSING:
151                 if (tx_wqe(qp)->wr_status == SIW_WR_IDLE)
152                         qp->attrs.state = SIW_QP_STATE_ERROR;
153                 else
154                         qp->attrs.state = SIW_QP_STATE_IDLE;
155                 break;
156
157         default:
158                 siw_dbg_qp(qp, "llp close: no state transition needed: %s\n",
159                            siw_qp_state_to_string[qp->attrs.state]);
160                 break;
161         }
162         siw_sq_flush(qp);
163         siw_rq_flush(qp);
164
165         /*
166          * Dereference closing CEP
167          */
168         if (qp->cep) {
169                 siw_cep_put(qp->cep);
170                 qp->cep = NULL;
171         }
172
173         up_write(&qp->state_lock);
174
175         siw_dbg_qp(qp, "llp close exit: state %s\n",
176                    siw_qp_state_to_string[qp->attrs.state]);
177 }
178
179 /*
180  * socket callback routine informing about newly available send space.
181  * Function schedules SQ work for processing SQ items.
182  */
183 void siw_qp_llp_write_space(struct sock *sk)
184 {
185         struct siw_cep *cep = sk_to_cep(sk);
186
187         cep->sk_write_space(sk);
188
189         if (!test_bit(SOCK_NOSPACE, &sk->sk_socket->flags))
190                 (void)siw_sq_start(cep->qp);
191 }
192
193 static int siw_qp_readq_init(struct siw_qp *qp, int irq_size, int orq_size)
194 {
195         irq_size = roundup_pow_of_two(irq_size);
196         orq_size = roundup_pow_of_two(orq_size);
197
198         qp->attrs.irq_size = irq_size;
199         qp->attrs.orq_size = orq_size;
200
201         qp->irq = vzalloc(irq_size * sizeof(struct siw_sqe));
202         if (!qp->irq) {
203                 siw_dbg_qp(qp, "irq malloc for %d failed\n", irq_size);
204                 qp->attrs.irq_size = 0;
205                 return -ENOMEM;
206         }
207         qp->orq = vzalloc(orq_size * sizeof(struct siw_sqe));
208         if (!qp->orq) {
209                 siw_dbg_qp(qp, "orq malloc for %d failed\n", orq_size);
210                 qp->attrs.orq_size = 0;
211                 qp->attrs.irq_size = 0;
212                 vfree(qp->irq);
213                 return -ENOMEM;
214         }
215         siw_dbg_qp(qp, "ORD %d, IRD %d\n", orq_size, irq_size);
216         return 0;
217 }
218
219 static int siw_qp_enable_crc(struct siw_qp *qp)
220 {
221         struct siw_rx_stream *c_rx = &qp->rx_stream;
222         struct siw_iwarp_tx *c_tx = &qp->tx_ctx;
223         int size;
224
225         if (siw_crypto_shash == NULL)
226                 return -ENOENT;
227
228         size = crypto_shash_descsize(siw_crypto_shash) +
229                 sizeof(struct shash_desc);
230
231         c_tx->mpa_crc_hd = kzalloc(size, GFP_KERNEL);
232         c_rx->mpa_crc_hd = kzalloc(size, GFP_KERNEL);
233         if (!c_tx->mpa_crc_hd || !c_rx->mpa_crc_hd) {
234                 kfree(c_tx->mpa_crc_hd);
235                 kfree(c_rx->mpa_crc_hd);
236                 c_tx->mpa_crc_hd = NULL;
237                 c_rx->mpa_crc_hd = NULL;
238                 return -ENOMEM;
239         }
240         c_tx->mpa_crc_hd->tfm = siw_crypto_shash;
241         c_rx->mpa_crc_hd->tfm = siw_crypto_shash;
242
243         return 0;
244 }
245
246 /*
247  * Send a non signalled READ or WRITE to peer side as negotiated
248  * with MPAv2 P2P setup protocol. The work request is only created
249  * as a current active WR and does not consume Send Queue space.
250  *
251  * Caller must hold QP state lock.
252  */
253 int siw_qp_mpa_rts(struct siw_qp *qp, enum mpa_v2_ctrl ctrl)
254 {
255         struct siw_wqe *wqe = tx_wqe(qp);
256         unsigned long flags;
257         int rv = 0;
258
259         spin_lock_irqsave(&qp->sq_lock, flags);
260
261         if (unlikely(wqe->wr_status != SIW_WR_IDLE)) {
262                 spin_unlock_irqrestore(&qp->sq_lock, flags);
263                 return -EIO;
264         }
265         memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE);
266
267         wqe->wr_status = SIW_WR_QUEUED;
268         wqe->sqe.flags = 0;
269         wqe->sqe.num_sge = 1;
270         wqe->sqe.sge[0].length = 0;
271         wqe->sqe.sge[0].laddr = 0;
272         wqe->sqe.sge[0].lkey = 0;
273         /*
274          * While it must not be checked for inbound zero length
275          * READ/WRITE, some HW may treat STag 0 special.
276          */
277         wqe->sqe.rkey = 1;
278         wqe->sqe.raddr = 0;
279         wqe->processed = 0;
280
281         if (ctrl & MPA_V2_RDMA_WRITE_RTR)
282                 wqe->sqe.opcode = SIW_OP_WRITE;
283         else if (ctrl & MPA_V2_RDMA_READ_RTR) {
284                 struct siw_sqe *rreq;
285
286                 wqe->sqe.opcode = SIW_OP_READ;
287
288                 spin_lock(&qp->orq_lock);
289
290                 rreq = orq_get_free(qp);
291                 if (rreq) {
292                         siw_read_to_orq(rreq, &wqe->sqe);
293                         qp->orq_put++;
294                 } else
295                         rv = -EIO;
296
297                 spin_unlock(&qp->orq_lock);
298         } else
299                 rv = -EINVAL;
300
301         if (rv)
302                 wqe->wr_status = SIW_WR_IDLE;
303
304         spin_unlock_irqrestore(&qp->sq_lock, flags);
305
306         if (!rv)
307                 rv = siw_sq_start(qp);
308
309         return rv;
310 }
311
312 /*
313  * Map memory access error to DDP tagged error
314  */
315 enum ddp_ecode siw_tagged_error(enum siw_access_state state)
316 {
317         switch (state) {
318         case E_STAG_INVALID:
319                 return DDP_ECODE_T_INVALID_STAG;
320         case E_BASE_BOUNDS:
321                 return DDP_ECODE_T_BASE_BOUNDS;
322         case E_PD_MISMATCH:
323                 return DDP_ECODE_T_STAG_NOT_ASSOC;
324         case E_ACCESS_PERM:
325                 /*
326                  * RFC 5041 (DDP) lacks an ecode for insufficient access
327                  * permissions. 'Invalid STag' seem to be the closest
328                  * match though.
329                  */
330                 return DDP_ECODE_T_INVALID_STAG;
331         default:
332                 WARN_ON(1);
333                 return DDP_ECODE_T_INVALID_STAG;
334         }
335 }
336
337 /*
338  * Map memory access error to RDMAP protection error
339  */
340 enum rdmap_ecode siw_rdmap_error(enum siw_access_state state)
341 {
342         switch (state) {
343         case E_STAG_INVALID:
344                 return RDMAP_ECODE_INVALID_STAG;
345         case E_BASE_BOUNDS:
346                 return RDMAP_ECODE_BASE_BOUNDS;
347         case E_PD_MISMATCH:
348                 return RDMAP_ECODE_STAG_NOT_ASSOC;
349         case E_ACCESS_PERM:
350                 return RDMAP_ECODE_ACCESS_RIGHTS;
351         default:
352                 return RDMAP_ECODE_UNSPECIFIED;
353         }
354 }
355
356 void siw_init_terminate(struct siw_qp *qp, enum term_elayer layer, u8 etype,
357                         u8 ecode, int in_tx)
358 {
359         if (!qp->term_info.valid) {
360                 memset(&qp->term_info, 0, sizeof(qp->term_info));
361                 qp->term_info.layer = layer;
362                 qp->term_info.etype = etype;
363                 qp->term_info.ecode = ecode;
364                 qp->term_info.in_tx = in_tx;
365                 qp->term_info.valid = 1;
366         }
367         siw_dbg_qp(qp, "init TERM: layer %d, type %d, code %d, in tx %s\n",
368                    layer, etype, ecode, in_tx ? "yes" : "no");
369 }
370
371 /*
372  * Send a TERMINATE message, as defined in RFC's 5040/5041/5044/6581.
373  * Sending TERMINATE messages is best effort - such messages
374  * can only be send if the QP is still connected and it does
375  * not have another outbound message in-progress, i.e. the
376  * TERMINATE message must not interfer with an incomplete current
377  * transmit operation.
378  */
379 void siw_send_terminate(struct siw_qp *qp)
380 {
381         struct kvec iov[3];
382         struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_EOR };
383         struct iwarp_terminate *term = NULL;
384         union iwarp_hdr *err_hdr = NULL;
385         struct socket *s = qp->attrs.sk;
386         struct siw_rx_stream *srx = &qp->rx_stream;
387         union iwarp_hdr *rx_hdr = &srx->hdr;
388         u32 crc = 0;
389         int num_frags, len_terminate, rv;
390
391         if (!qp->term_info.valid)
392                 return;
393
394         qp->term_info.valid = 0;
395
396         if (tx_wqe(qp)->wr_status == SIW_WR_INPROGRESS) {
397                 siw_dbg_qp(qp, "cannot send TERMINATE: op %d in progress\n",
398                            tx_type(tx_wqe(qp)));
399                 return;
400         }
401         if (!s && qp->cep)
402                 /* QP not yet in RTS. Take socket from connection end point */
403                 s = qp->cep->sock;
404
405         if (!s) {
406                 siw_dbg_qp(qp, "cannot send TERMINATE: not connected\n");
407                 return;
408         }
409
410         term = kzalloc(sizeof(*term), GFP_KERNEL);
411         if (!term)
412                 return;
413
414         term->ddp_qn = cpu_to_be32(RDMAP_UNTAGGED_QN_TERMINATE);
415         term->ddp_mo = 0;
416         term->ddp_msn = cpu_to_be32(1);
417
418         iov[0].iov_base = term;
419         iov[0].iov_len = sizeof(*term);
420
421         if ((qp->term_info.layer == TERM_ERROR_LAYER_DDP) ||
422             ((qp->term_info.layer == TERM_ERROR_LAYER_RDMAP) &&
423              (qp->term_info.etype != RDMAP_ETYPE_CATASTROPHIC))) {
424                 err_hdr = kzalloc(sizeof(*err_hdr), GFP_KERNEL);
425                 if (!err_hdr) {
426                         kfree(term);
427                         return;
428                 }
429         }
430         memcpy(&term->ctrl, &iwarp_pktinfo[RDMAP_TERMINATE].ctrl,
431                sizeof(struct iwarp_ctrl));
432
433         __rdmap_term_set_layer(term, qp->term_info.layer);
434         __rdmap_term_set_etype(term, qp->term_info.etype);
435         __rdmap_term_set_ecode(term, qp->term_info.ecode);
436
437         switch (qp->term_info.layer) {
438         case TERM_ERROR_LAYER_RDMAP:
439                 if (qp->term_info.etype == RDMAP_ETYPE_CATASTROPHIC)
440                         /* No additional DDP/RDMAP header to be included */
441                         break;
442
443                 if (qp->term_info.etype == RDMAP_ETYPE_REMOTE_PROTECTION) {
444                         /*
445                          * Complete RDMAP frame will get attached, and
446                          * DDP segment length is valid
447                          */
448                         term->flag_m = 1;
449                         term->flag_d = 1;
450                         term->flag_r = 1;
451
452                         if (qp->term_info.in_tx) {
453                                 struct iwarp_rdma_rreq *rreq;
454                                 struct siw_wqe *wqe = tx_wqe(qp);
455
456                                 /* Inbound RREQ error, detected during
457                                  * RRESP creation. Take state from
458                                  * current TX work queue element to
459                                  * reconstruct peers RREQ.
460                                  */
461                                 rreq = (struct iwarp_rdma_rreq *)err_hdr;
462
463                                 memcpy(&rreq->ctrl,
464                                        &iwarp_pktinfo[RDMAP_RDMA_READ_REQ].ctrl,
465                                        sizeof(struct iwarp_ctrl));
466
467                                 rreq->rsvd = 0;
468                                 rreq->ddp_qn =
469                                         htonl(RDMAP_UNTAGGED_QN_RDMA_READ);
470
471                                 /* Provide RREQ's MSN as kept aside */
472                                 rreq->ddp_msn = htonl(wqe->sqe.sge[0].length);
473
474                                 rreq->ddp_mo = htonl(wqe->processed);
475                                 rreq->sink_stag = htonl(wqe->sqe.rkey);
476                                 rreq->sink_to = cpu_to_be64(wqe->sqe.raddr);
477                                 rreq->read_size = htonl(wqe->sqe.sge[0].length);
478                                 rreq->source_stag = htonl(wqe->sqe.sge[0].lkey);
479                                 rreq->source_to =
480                                         cpu_to_be64(wqe->sqe.sge[0].laddr);
481
482                                 iov[1].iov_base = rreq;
483                                 iov[1].iov_len = sizeof(*rreq);
484
485                                 rx_hdr = (union iwarp_hdr *)rreq;
486                         } else {
487                                 /* Take RDMAP/DDP information from
488                                  * current (failed) inbound frame.
489                                  */
490                                 iov[1].iov_base = rx_hdr;
491
492                                 if (__rdmap_get_opcode(&rx_hdr->ctrl) ==
493                                     RDMAP_RDMA_READ_REQ)
494                                         iov[1].iov_len =
495                                                 sizeof(struct iwarp_rdma_rreq);
496                                 else /* SEND type */
497                                         iov[1].iov_len =
498                                                 sizeof(struct iwarp_send);
499                         }
500                 } else {
501                         /* Do not report DDP hdr information if packet
502                          * layout is unknown
503                          */
504                         if ((qp->term_info.ecode == RDMAP_ECODE_VERSION) ||
505                             (qp->term_info.ecode == RDMAP_ECODE_OPCODE))
506                                 break;
507
508                         iov[1].iov_base = rx_hdr;
509
510                         /* Only DDP frame will get attached */
511                         if (rx_hdr->ctrl.ddp_rdmap_ctrl & DDP_FLAG_TAGGED)
512                                 iov[1].iov_len =
513                                         sizeof(struct iwarp_rdma_write);
514                         else
515                                 iov[1].iov_len = sizeof(struct iwarp_send);
516
517                         term->flag_m = 1;
518                         term->flag_d = 1;
519                 }
520                 term->ctrl.mpa_len = cpu_to_be16(iov[1].iov_len);
521                 break;
522
523         case TERM_ERROR_LAYER_DDP:
524                 /* Report error encountered while DDP processing.
525                  * This can only happen as a result of inbound
526                  * DDP processing
527                  */
528
529                 /* Do not report DDP hdr information if packet
530                  * layout is unknown
531                  */
532                 if (((qp->term_info.etype == DDP_ETYPE_TAGGED_BUF) &&
533                      (qp->term_info.ecode == DDP_ECODE_T_VERSION)) ||
534                     ((qp->term_info.etype == DDP_ETYPE_UNTAGGED_BUF) &&
535                      (qp->term_info.ecode == DDP_ECODE_UT_VERSION)))
536                         break;
537
538                 iov[1].iov_base = rx_hdr;
539
540                 if (rx_hdr->ctrl.ddp_rdmap_ctrl & DDP_FLAG_TAGGED)
541                         iov[1].iov_len = sizeof(struct iwarp_ctrl_tagged);
542                 else
543                         iov[1].iov_len = sizeof(struct iwarp_ctrl_untagged);
544
545                 term->flag_m = 1;
546                 term->flag_d = 1;
547                 break;
548
549         default:
550                 break;
551         }
552         if (term->flag_m || term->flag_d || term->flag_r) {
553                 iov[2].iov_base = &crc;
554                 iov[2].iov_len = sizeof(crc);
555                 len_terminate = sizeof(*term) + iov[1].iov_len + MPA_CRC_SIZE;
556                 num_frags = 3;
557         } else {
558                 iov[1].iov_base = &crc;
559                 iov[1].iov_len = sizeof(crc);
560                 len_terminate = sizeof(*term) + MPA_CRC_SIZE;
561                 num_frags = 2;
562         }
563
564         /* Adjust DDP Segment Length parameter, if valid */
565         if (term->flag_m) {
566                 u32 real_ddp_len = be16_to_cpu(rx_hdr->ctrl.mpa_len);
567                 enum rdma_opcode op = __rdmap_get_opcode(&rx_hdr->ctrl);
568
569                 real_ddp_len -= iwarp_pktinfo[op].hdr_len - MPA_HDR_SIZE;
570                 rx_hdr->ctrl.mpa_len = cpu_to_be16(real_ddp_len);
571         }
572
573         term->ctrl.mpa_len =
574                 cpu_to_be16(len_terminate - (MPA_HDR_SIZE + MPA_CRC_SIZE));
575         if (qp->tx_ctx.mpa_crc_hd) {
576                 crypto_shash_init(qp->tx_ctx.mpa_crc_hd);
577                 if (crypto_shash_update(qp->tx_ctx.mpa_crc_hd,
578                                         (u8 *)iov[0].iov_base,
579                                         iov[0].iov_len))
580                         goto out;
581
582                 if (num_frags == 3) {
583                         if (crypto_shash_update(qp->tx_ctx.mpa_crc_hd,
584                                                 (u8 *)iov[1].iov_base,
585                                                 iov[1].iov_len))
586                                 goto out;
587                 }
588                 crypto_shash_final(qp->tx_ctx.mpa_crc_hd, (u8 *)&crc);
589         }
590
591         rv = kernel_sendmsg(s, &msg, iov, num_frags, len_terminate);
592         siw_dbg_qp(qp, "sent TERM: %s, layer %d, type %d, code %d (%d bytes)\n",
593                    rv == len_terminate ? "success" : "failure",
594                    __rdmap_term_layer(term), __rdmap_term_etype(term),
595                    __rdmap_term_ecode(term), rv);
596 out:
597         kfree(term);
598         kfree(err_hdr);
599 }
600
601 /*
602  * Handle all attrs other than state
603  */
604 static void siw_qp_modify_nonstate(struct siw_qp *qp,
605                                    struct siw_qp_attrs *attrs,
606                                    enum siw_qp_attr_mask mask)
607 {
608         if (mask & SIW_QP_ATTR_ACCESS_FLAGS) {
609                 if (attrs->flags & SIW_RDMA_BIND_ENABLED)
610                         qp->attrs.flags |= SIW_RDMA_BIND_ENABLED;
611                 else
612                         qp->attrs.flags &= ~SIW_RDMA_BIND_ENABLED;
613
614                 if (attrs->flags & SIW_RDMA_WRITE_ENABLED)
615                         qp->attrs.flags |= SIW_RDMA_WRITE_ENABLED;
616                 else
617                         qp->attrs.flags &= ~SIW_RDMA_WRITE_ENABLED;
618
619                 if (attrs->flags & SIW_RDMA_READ_ENABLED)
620                         qp->attrs.flags |= SIW_RDMA_READ_ENABLED;
621                 else
622                         qp->attrs.flags &= ~SIW_RDMA_READ_ENABLED;
623         }
624 }
625
626 static int siw_qp_nextstate_from_idle(struct siw_qp *qp,
627                                       struct siw_qp_attrs *attrs,
628                                       enum siw_qp_attr_mask mask)
629 {
630         int rv = 0;
631
632         switch (attrs->state) {
633         case SIW_QP_STATE_RTS:
634                 if (attrs->flags & SIW_MPA_CRC) {
635                         rv = siw_qp_enable_crc(qp);
636                         if (rv)
637                                 break;
638                 }
639                 if (!(mask & SIW_QP_ATTR_LLP_HANDLE)) {
640                         siw_dbg_qp(qp, "no socket\n");
641                         rv = -EINVAL;
642                         break;
643                 }
644                 if (!(mask & SIW_QP_ATTR_MPA)) {
645                         siw_dbg_qp(qp, "no MPA\n");
646                         rv = -EINVAL;
647                         break;
648                 }
649                 /*
650                  * Initialize iWARP TX state
651                  */
652                 qp->tx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_SEND] = 0;
653                 qp->tx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ] = 0;
654                 qp->tx_ctx.ddp_msn[RDMAP_UNTAGGED_QN_TERMINATE] = 0;
655
656                 /*
657                  * Initialize iWARP RX state
658                  */
659                 qp->rx_stream.ddp_msn[RDMAP_UNTAGGED_QN_SEND] = 1;
660                 qp->rx_stream.ddp_msn[RDMAP_UNTAGGED_QN_RDMA_READ] = 1;
661                 qp->rx_stream.ddp_msn[RDMAP_UNTAGGED_QN_TERMINATE] = 1;
662
663                 /*
664                  * init IRD free queue, caller has already checked
665                  * limits.
666                  */
667                 rv = siw_qp_readq_init(qp, attrs->irq_size,
668                                        attrs->orq_size);
669                 if (rv)
670                         break;
671
672                 qp->attrs.sk = attrs->sk;
673                 qp->attrs.state = SIW_QP_STATE_RTS;
674
675                 siw_dbg_qp(qp, "enter RTS: crc=%s, ord=%u, ird=%u\n",
676                            attrs->flags & SIW_MPA_CRC ? "y" : "n",
677                            qp->attrs.orq_size, qp->attrs.irq_size);
678                 break;
679
680         case SIW_QP_STATE_ERROR:
681                 siw_rq_flush(qp);
682                 qp->attrs.state = SIW_QP_STATE_ERROR;
683                 if (qp->cep) {
684                         siw_cep_put(qp->cep);
685                         qp->cep = NULL;
686                 }
687                 break;
688
689         default:
690                 break;
691         }
692         return rv;
693 }
694
695 static int siw_qp_nextstate_from_rts(struct siw_qp *qp,
696                                      struct siw_qp_attrs *attrs)
697 {
698         int drop_conn = 0;
699
700         switch (attrs->state) {
701         case SIW_QP_STATE_CLOSING:
702                 /*
703                  * Verbs: move to IDLE if SQ and ORQ are empty.
704                  * Move to ERROR otherwise. But first of all we must
705                  * close the connection. So we keep CLOSING or ERROR
706                  * as a transient state, schedule connection drop work
707                  * and wait for the socket state change upcall to
708                  * come back closed.
709                  */
710                 if (tx_wqe(qp)->wr_status == SIW_WR_IDLE) {
711                         qp->attrs.state = SIW_QP_STATE_CLOSING;
712                 } else {
713                         qp->attrs.state = SIW_QP_STATE_ERROR;
714                         siw_sq_flush(qp);
715                 }
716                 siw_rq_flush(qp);
717
718                 drop_conn = 1;
719                 break;
720
721         case SIW_QP_STATE_TERMINATE:
722                 qp->attrs.state = SIW_QP_STATE_TERMINATE;
723
724                 siw_init_terminate(qp, TERM_ERROR_LAYER_RDMAP,
725                                    RDMAP_ETYPE_CATASTROPHIC,
726                                    RDMAP_ECODE_UNSPECIFIED, 1);
727                 drop_conn = 1;
728                 break;
729
730         case SIW_QP_STATE_ERROR:
731                 /*
732                  * This is an emergency close.
733                  *
734                  * Any in progress transmit operation will get
735                  * cancelled.
736                  * This will likely result in a protocol failure,
737                  * if a TX operation is in transit. The caller
738                  * could unconditional wait to give the current
739                  * operation a chance to complete.
740                  * Esp., how to handle the non-empty IRQ case?
741                  * The peer was asking for data transfer at a valid
742                  * point in time.
743                  */
744                 siw_sq_flush(qp);
745                 siw_rq_flush(qp);
746                 qp->attrs.state = SIW_QP_STATE_ERROR;
747                 drop_conn = 1;
748                 break;
749
750         default:
751                 break;
752         }
753         return drop_conn;
754 }
755
756 static void siw_qp_nextstate_from_term(struct siw_qp *qp,
757                                        struct siw_qp_attrs *attrs)
758 {
759         switch (attrs->state) {
760         case SIW_QP_STATE_ERROR:
761                 siw_rq_flush(qp);
762                 qp->attrs.state = SIW_QP_STATE_ERROR;
763
764                 if (tx_wqe(qp)->wr_status != SIW_WR_IDLE)
765                         siw_sq_flush(qp);
766                 break;
767
768         default:
769                 break;
770         }
771 }
772
773 static int siw_qp_nextstate_from_close(struct siw_qp *qp,
774                                        struct siw_qp_attrs *attrs)
775 {
776         int rv = 0;
777
778         switch (attrs->state) {
779         case SIW_QP_STATE_IDLE:
780                 WARN_ON(tx_wqe(qp)->wr_status != SIW_WR_IDLE);
781                 qp->attrs.state = SIW_QP_STATE_IDLE;
782                 break;
783
784         case SIW_QP_STATE_CLOSING:
785                 /*
786                  * The LLP may already moved the QP to closing
787                  * due to graceful peer close init
788                  */
789                 break;
790
791         case SIW_QP_STATE_ERROR:
792                 /*
793                  * QP was moved to CLOSING by LLP event
794                  * not yet seen by user.
795                  */
796                 qp->attrs.state = SIW_QP_STATE_ERROR;
797
798                 if (tx_wqe(qp)->wr_status != SIW_WR_IDLE)
799                         siw_sq_flush(qp);
800
801                 siw_rq_flush(qp);
802                 break;
803
804         default:
805                 siw_dbg_qp(qp, "state transition undefined: %s => %s\n",
806                            siw_qp_state_to_string[qp->attrs.state],
807                            siw_qp_state_to_string[attrs->state]);
808
809                 rv = -ECONNABORTED;
810         }
811         return rv;
812 }
813
814 /*
815  * Caller must hold qp->state_lock
816  */
817 int siw_qp_modify(struct siw_qp *qp, struct siw_qp_attrs *attrs,
818                   enum siw_qp_attr_mask mask)
819 {
820         int drop_conn = 0, rv = 0;
821
822         if (!mask)
823                 return 0;
824
825         siw_dbg_qp(qp, "state: %s => %s\n",
826                    siw_qp_state_to_string[qp->attrs.state],
827                    siw_qp_state_to_string[attrs->state]);
828
829         if (mask != SIW_QP_ATTR_STATE)
830                 siw_qp_modify_nonstate(qp, attrs, mask);
831
832         if (!(mask & SIW_QP_ATTR_STATE))
833                 return 0;
834
835         switch (qp->attrs.state) {
836         case SIW_QP_STATE_IDLE:
837         case SIW_QP_STATE_RTR:
838                 rv = siw_qp_nextstate_from_idle(qp, attrs, mask);
839                 break;
840
841         case SIW_QP_STATE_RTS:
842                 drop_conn = siw_qp_nextstate_from_rts(qp, attrs);
843                 break;
844
845         case SIW_QP_STATE_TERMINATE:
846                 siw_qp_nextstate_from_term(qp, attrs);
847                 break;
848
849         case SIW_QP_STATE_CLOSING:
850                 siw_qp_nextstate_from_close(qp, attrs);
851                 break;
852         default:
853                 break;
854         }
855         if (drop_conn)
856                 siw_qp_cm_drop(qp, 0);
857
858         return rv;
859 }
860
861 void siw_read_to_orq(struct siw_sqe *rreq, struct siw_sqe *sqe)
862 {
863         rreq->id = sqe->id;
864         rreq->opcode = sqe->opcode;
865         rreq->sge[0].laddr = sqe->sge[0].laddr;
866         rreq->sge[0].length = sqe->sge[0].length;
867         rreq->sge[0].lkey = sqe->sge[0].lkey;
868         rreq->sge[1].lkey = sqe->sge[1].lkey;
869         rreq->flags = sqe->flags | SIW_WQE_VALID;
870         rreq->num_sge = 1;
871 }
872
873 /*
874  * Must be called with SQ locked.
875  * To avoid complete SQ starvation by constant inbound READ requests,
876  * the active IRQ will not be served after qp->irq_burst, if the
877  * SQ has pending work.
878  */
879 int siw_activate_tx(struct siw_qp *qp)
880 {
881         struct siw_sqe *irqe, *sqe;
882         struct siw_wqe *wqe = tx_wqe(qp);
883         int rv = 1;
884
885         irqe = &qp->irq[qp->irq_get % qp->attrs.irq_size];
886
887         if (irqe->flags & SIW_WQE_VALID) {
888                 sqe = sq_get_next(qp);
889
890                 /*
891                  * Avoid local WQE processing starvation in case
892                  * of constant inbound READ request stream
893                  */
894                 if (sqe && ++qp->irq_burst >= SIW_IRQ_MAXBURST_SQ_ACTIVE) {
895                         qp->irq_burst = 0;
896                         goto skip_irq;
897                 }
898                 memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE);
899                 wqe->wr_status = SIW_WR_QUEUED;
900
901                 /* start READ RESPONSE */
902                 wqe->sqe.opcode = SIW_OP_READ_RESPONSE;
903                 wqe->sqe.flags = 0;
904                 if (irqe->num_sge) {
905                         wqe->sqe.num_sge = 1;
906                         wqe->sqe.sge[0].length = irqe->sge[0].length;
907                         wqe->sqe.sge[0].laddr = irqe->sge[0].laddr;
908                         wqe->sqe.sge[0].lkey = irqe->sge[0].lkey;
909                 } else {
910                         wqe->sqe.num_sge = 0;
911                 }
912
913                 /* Retain original RREQ's message sequence number for
914                  * potential error reporting cases.
915                  */
916                 wqe->sqe.sge[1].length = irqe->sge[1].length;
917
918                 wqe->sqe.rkey = irqe->rkey;
919                 wqe->sqe.raddr = irqe->raddr;
920
921                 wqe->processed = 0;
922                 qp->irq_get++;
923
924                 /* mark current IRQ entry free */
925                 smp_store_mb(irqe->flags, 0);
926
927                 goto out;
928         }
929         sqe = sq_get_next(qp);
930         if (sqe) {
931 skip_irq:
932                 memset(wqe->mem, 0, sizeof(*wqe->mem) * SIW_MAX_SGE);
933                 wqe->wr_status = SIW_WR_QUEUED;
934
935                 /* First copy SQE to kernel private memory */
936                 memcpy(&wqe->sqe, sqe, sizeof(*sqe));
937
938                 if (wqe->sqe.opcode >= SIW_NUM_OPCODES) {
939                         rv = -EINVAL;
940                         goto out;
941                 }
942                 if (wqe->sqe.flags & SIW_WQE_INLINE) {
943                         if (wqe->sqe.opcode != SIW_OP_SEND &&
944                             wqe->sqe.opcode != SIW_OP_WRITE) {
945                                 rv = -EINVAL;
946                                 goto out;
947                         }
948                         if (wqe->sqe.sge[0].length > SIW_MAX_INLINE) {
949                                 rv = -EINVAL;
950                                 goto out;
951                         }
952                         wqe->sqe.sge[0].laddr = (u64)&wqe->sqe.sge[1];
953                         wqe->sqe.sge[0].lkey = 0;
954                         wqe->sqe.num_sge = 1;
955                 }
956                 if (wqe->sqe.flags & SIW_WQE_READ_FENCE) {
957                         /* A READ cannot be fenced */
958                         if (unlikely(wqe->sqe.opcode == SIW_OP_READ ||
959                                      wqe->sqe.opcode ==
960                                              SIW_OP_READ_LOCAL_INV)) {
961                                 siw_dbg_qp(qp, "cannot fence read\n");
962                                 rv = -EINVAL;
963                                 goto out;
964                         }
965                         spin_lock(&qp->orq_lock);
966
967                         if (!siw_orq_empty(qp)) {
968                                 qp->tx_ctx.orq_fence = 1;
969                                 rv = 0;
970                         }
971                         spin_unlock(&qp->orq_lock);
972
973                 } else if (wqe->sqe.opcode == SIW_OP_READ ||
974                            wqe->sqe.opcode == SIW_OP_READ_LOCAL_INV) {
975                         struct siw_sqe *rreq;
976
977                         wqe->sqe.num_sge = 1;
978
979                         spin_lock(&qp->orq_lock);
980
981                         rreq = orq_get_free(qp);
982                         if (rreq) {
983                                 /*
984                                  * Make an immediate copy in ORQ to be ready
985                                  * to process loopback READ reply
986                                  */
987                                 siw_read_to_orq(rreq, &wqe->sqe);
988                                 qp->orq_put++;
989                         } else {
990                                 qp->tx_ctx.orq_fence = 1;
991                                 rv = 0;
992                         }
993                         spin_unlock(&qp->orq_lock);
994                 }
995
996                 /* Clear SQE, can be re-used by application */
997                 smp_store_mb(sqe->flags, 0);
998                 qp->sq_get++;
999         } else {
1000                 rv = 0;
1001         }
1002 out:
1003         if (unlikely(rv < 0)) {
1004                 siw_dbg_qp(qp, "error %d\n", rv);
1005                 wqe->wr_status = SIW_WR_IDLE;
1006         }
1007         return rv;
1008 }
1009
1010 /*
1011  * Check if current CQ state qualifies for calling CQ completion
1012  * handler. Must be called with CQ lock held.
1013  */
1014 static bool siw_cq_notify_now(struct siw_cq *cq, u32 flags)
1015 {
1016         u64 cq_notify;
1017
1018         if (!cq->base_cq.comp_handler)
1019                 return false;
1020
1021         cq_notify = READ_ONCE(*cq->notify);
1022
1023         if ((cq_notify & SIW_NOTIFY_NEXT_COMPLETION) ||
1024             ((cq_notify & SIW_NOTIFY_SOLICITED) &&
1025              (flags & SIW_WQE_SOLICITED))) {
1026                 /* dis-arm CQ */
1027                 smp_store_mb(*cq->notify, SIW_NOTIFY_NOT);
1028
1029                 return true;
1030         }
1031         return false;
1032 }
1033
1034 int siw_sqe_complete(struct siw_qp *qp, struct siw_sqe *sqe, u32 bytes,
1035                      enum siw_wc_status status)
1036 {
1037         struct siw_cq *cq = qp->scq;
1038         int rv = 0;
1039
1040         if (cq) {
1041                 u32 sqe_flags = sqe->flags;
1042                 struct siw_cqe *cqe;
1043                 u32 idx;
1044                 unsigned long flags;
1045
1046                 spin_lock_irqsave(&cq->lock, flags);
1047
1048                 idx = cq->cq_put % cq->num_cqe;
1049                 cqe = &cq->queue[idx];
1050
1051                 if (!READ_ONCE(cqe->flags)) {
1052                         bool notify;
1053
1054                         cqe->id = sqe->id;
1055                         cqe->opcode = sqe->opcode;
1056                         cqe->status = status;
1057                         cqe->imm_data = 0;
1058                         cqe->bytes = bytes;
1059
1060                         if (cq->kernel_verbs)
1061                                 cqe->base_qp = qp->ib_qp;
1062                         else
1063                                 cqe->qp_id = qp_id(qp);
1064
1065                         /* mark CQE valid for application */
1066                         WRITE_ONCE(cqe->flags, SIW_WQE_VALID);
1067                         /* recycle SQE */
1068                         smp_store_mb(sqe->flags, 0);
1069
1070                         cq->cq_put++;
1071                         notify = siw_cq_notify_now(cq, sqe_flags);
1072
1073                         spin_unlock_irqrestore(&cq->lock, flags);
1074
1075                         if (notify) {
1076                                 siw_dbg_cq(cq, "Call completion handler\n");
1077                                 cq->base_cq.comp_handler(&cq->base_cq,
1078                                                 cq->base_cq.cq_context);
1079                         }
1080                 } else {
1081                         spin_unlock_irqrestore(&cq->lock, flags);
1082                         rv = -ENOMEM;
1083                         siw_cq_event(cq, IB_EVENT_CQ_ERR);
1084                 }
1085         } else {
1086                 /* recycle SQE */
1087                 smp_store_mb(sqe->flags, 0);
1088         }
1089         return rv;
1090 }
1091
1092 int siw_rqe_complete(struct siw_qp *qp, struct siw_rqe *rqe, u32 bytes,
1093                      u32 inval_stag, enum siw_wc_status status)
1094 {
1095         struct siw_cq *cq = qp->rcq;
1096         int rv = 0;
1097
1098         if (cq) {
1099                 struct siw_cqe *cqe;
1100                 u32 idx;
1101                 unsigned long flags;
1102
1103                 spin_lock_irqsave(&cq->lock, flags);
1104
1105                 idx = cq->cq_put % cq->num_cqe;
1106                 cqe = &cq->queue[idx];
1107
1108                 if (!READ_ONCE(cqe->flags)) {
1109                         bool notify;
1110                         u8 cqe_flags = SIW_WQE_VALID;
1111
1112                         cqe->id = rqe->id;
1113                         cqe->opcode = SIW_OP_RECEIVE;
1114                         cqe->status = status;
1115                         cqe->imm_data = 0;
1116                         cqe->bytes = bytes;
1117
1118                         if (cq->kernel_verbs) {
1119                                 cqe->base_qp = qp->ib_qp;
1120                                 if (inval_stag) {
1121                                         cqe_flags |= SIW_WQE_REM_INVAL;
1122                                         cqe->inval_stag = inval_stag;
1123                                 }
1124                         } else {
1125                                 cqe->qp_id = qp_id(qp);
1126                         }
1127                         /* mark CQE valid for application */
1128                         WRITE_ONCE(cqe->flags, cqe_flags);
1129                         /* recycle RQE */
1130                         smp_store_mb(rqe->flags, 0);
1131
1132                         cq->cq_put++;
1133                         notify = siw_cq_notify_now(cq, SIW_WQE_SIGNALLED);
1134
1135                         spin_unlock_irqrestore(&cq->lock, flags);
1136
1137                         if (notify) {
1138                                 siw_dbg_cq(cq, "Call completion handler\n");
1139                                 cq->base_cq.comp_handler(&cq->base_cq,
1140                                                 cq->base_cq.cq_context);
1141                         }
1142                 } else {
1143                         spin_unlock_irqrestore(&cq->lock, flags);
1144                         rv = -ENOMEM;
1145                         siw_cq_event(cq, IB_EVENT_CQ_ERR);
1146                 }
1147         } else {
1148                 /* recycle RQE */
1149                 smp_store_mb(rqe->flags, 0);
1150         }
1151         return rv;
1152 }
1153
1154 /*
1155  * siw_sq_flush()
1156  *
1157  * Flush SQ and ORRQ entries to CQ.
1158  *
1159  * Must be called with QP state write lock held.
1160  * Therefore, SQ and ORQ lock must not be taken.
1161  */
1162 void siw_sq_flush(struct siw_qp *qp)
1163 {
1164         struct siw_sqe *sqe;
1165         struct siw_wqe *wqe = tx_wqe(qp);
1166         int async_event = 0;
1167
1168         /*
1169          * Start with completing any work currently on the ORQ
1170          */
1171         while (qp->attrs.orq_size) {
1172                 sqe = &qp->orq[qp->orq_get % qp->attrs.orq_size];
1173                 if (!READ_ONCE(sqe->flags))
1174                         break;
1175
1176                 if (siw_sqe_complete(qp, sqe, 0, SIW_WC_WR_FLUSH_ERR) != 0)
1177                         break;
1178
1179                 WRITE_ONCE(sqe->flags, 0);
1180                 qp->orq_get++;
1181         }
1182         /*
1183          * Flush an in-progress WQE if present
1184          */
1185         if (wqe->wr_status != SIW_WR_IDLE) {
1186                 siw_dbg_qp(qp, "flush current SQE, type %d, status %d\n",
1187                            tx_type(wqe), wqe->wr_status);
1188
1189                 siw_wqe_put_mem(wqe, tx_type(wqe));
1190
1191                 if (tx_type(wqe) != SIW_OP_READ_RESPONSE &&
1192                     ((tx_type(wqe) != SIW_OP_READ &&
1193                       tx_type(wqe) != SIW_OP_READ_LOCAL_INV) ||
1194                      wqe->wr_status == SIW_WR_QUEUED))
1195                         /*
1196                          * An in-progress Read Request is already in
1197                          * the ORQ
1198                          */
1199                         siw_sqe_complete(qp, &wqe->sqe, wqe->bytes,
1200                                          SIW_WC_WR_FLUSH_ERR);
1201
1202                 wqe->wr_status = SIW_WR_IDLE;
1203         }
1204         /*
1205          * Flush the Send Queue
1206          */
1207         while (qp->attrs.sq_size) {
1208                 sqe = &qp->sendq[qp->sq_get % qp->attrs.sq_size];
1209                 if (!READ_ONCE(sqe->flags))
1210                         break;
1211
1212                 async_event = 1;
1213                 if (siw_sqe_complete(qp, sqe, 0, SIW_WC_WR_FLUSH_ERR) != 0)
1214                         /*
1215                          * Shall IB_EVENT_SQ_DRAINED be supressed if work
1216                          * completion fails?
1217                          */
1218                         break;
1219
1220                 WRITE_ONCE(sqe->flags, 0);
1221                 qp->sq_get++;
1222         }
1223         if (async_event)
1224                 siw_qp_event(qp, IB_EVENT_SQ_DRAINED);
1225 }
1226
1227 /*
1228  * siw_rq_flush()
1229  *
1230  * Flush recv queue entries to CQ. Also
1231  * takes care of pending active tagged and untagged
1232  * inbound transfers, which have target memory
1233  * referenced.
1234  *
1235  * Must be called with QP state write lock held.
1236  * Therefore, RQ lock must not be taken.
1237  */
1238 void siw_rq_flush(struct siw_qp *qp)
1239 {
1240         struct siw_wqe *wqe = &qp->rx_untagged.wqe_active;
1241
1242         /*
1243          * Flush an in-progress untagged operation if present
1244          */
1245         if (wqe->wr_status != SIW_WR_IDLE) {
1246                 siw_dbg_qp(qp, "flush current rqe, type %d, status %d\n",
1247                            rx_type(wqe), wqe->wr_status);
1248
1249                 siw_wqe_put_mem(wqe, rx_type(wqe));
1250
1251                 if (rx_type(wqe) == SIW_OP_RECEIVE) {
1252                         siw_rqe_complete(qp, &wqe->rqe, wqe->bytes,
1253                                          0, SIW_WC_WR_FLUSH_ERR);
1254                 } else if (rx_type(wqe) != SIW_OP_READ &&
1255                            rx_type(wqe) != SIW_OP_READ_RESPONSE &&
1256                            rx_type(wqe) != SIW_OP_WRITE) {
1257                         siw_sqe_complete(qp, &wqe->sqe, 0, SIW_WC_WR_FLUSH_ERR);
1258                 }
1259                 wqe->wr_status = SIW_WR_IDLE;
1260         }
1261         wqe = &qp->rx_tagged.wqe_active;
1262
1263         if (wqe->wr_status != SIW_WR_IDLE) {
1264                 siw_wqe_put_mem(wqe, rx_type(wqe));
1265                 wqe->wr_status = SIW_WR_IDLE;
1266         }
1267         /*
1268          * Flush the Receive Queue
1269          */
1270         while (qp->attrs.rq_size) {
1271                 struct siw_rqe *rqe =
1272                         &qp->recvq[qp->rq_get % qp->attrs.rq_size];
1273
1274                 if (!READ_ONCE(rqe->flags))
1275                         break;
1276
1277                 if (siw_rqe_complete(qp, rqe, 0, 0, SIW_WC_WR_FLUSH_ERR) != 0)
1278                         break;
1279
1280                 WRITE_ONCE(rqe->flags, 0);
1281                 qp->rq_get++;
1282         }
1283 }
1284
1285 int siw_qp_add(struct siw_device *sdev, struct siw_qp *qp)
1286 {
1287         int rv = xa_alloc(&sdev->qp_xa, &qp->ib_qp->qp_num, qp, xa_limit_32b,
1288                           GFP_KERNEL);
1289
1290         if (!rv) {
1291                 kref_init(&qp->ref);
1292                 qp->sdev = sdev;
1293                 qp->qp_num = qp->ib_qp->qp_num;
1294                 siw_dbg_qp(qp, "new QP\n");
1295         }
1296         return rv;
1297 }
1298
1299 void siw_free_qp(struct kref *ref)
1300 {
1301         struct siw_qp *found, *qp = container_of(ref, struct siw_qp, ref);
1302         struct siw_device *sdev = qp->sdev;
1303         unsigned long flags;
1304
1305         if (qp->cep)
1306                 siw_cep_put(qp->cep);
1307
1308         found = xa_erase(&sdev->qp_xa, qp_id(qp));
1309         WARN_ON(found != qp);
1310         spin_lock_irqsave(&sdev->lock, flags);
1311         list_del(&qp->devq);
1312         spin_unlock_irqrestore(&sdev->lock, flags);
1313
1314         vfree(qp->sendq);
1315         vfree(qp->recvq);
1316         vfree(qp->irq);
1317         vfree(qp->orq);
1318
1319         siw_put_tx_cpu(qp->tx_cpu);
1320
1321         atomic_dec(&sdev->num_qp);
1322         siw_dbg_qp(qp, "free QP\n");
1323         kfree_rcu(qp, rcu);
1324 }