Merge remote-tracking branch 'regulator/fix/core' into regulator-linus
[sfrench/cifs-2.6.git] / drivers / infiniband / sw / rdmavt / qp.c
1 /*
2  * Copyright(c) 2016, 2017 Intel Corporation.
3  *
4  * This file is provided under a dual BSD/GPLv2 license.  When using or
5  * redistributing this file, you may do so under either license.
6  *
7  * GPL LICENSE SUMMARY
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of version 2 of the GNU General Public License as
11  * published by the Free Software Foundation.
12  *
13  * This program is distributed in the hope that it will be useful, but
14  * WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16  * General Public License for more details.
17  *
18  * BSD LICENSE
19  *
20  * Redistribution and use in source and binary forms, with or without
21  * modification, are permitted provided that the following conditions
22  * are met:
23  *
24  *  - Redistributions of source code must retain the above copyright
25  *    notice, this list of conditions and the following disclaimer.
26  *  - Redistributions in binary form must reproduce the above copyright
27  *    notice, this list of conditions and the following disclaimer in
28  *    the documentation and/or other materials provided with the
29  *    distribution.
30  *  - Neither the name of Intel Corporation nor the names of its
31  *    contributors may be used to endorse or promote products derived
32  *    from this software without specific prior written permission.
33  *
34  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
35  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
36  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
37  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
38  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
39  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
40  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
41  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
42  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
43  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
44  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
45  *
46  */
47
48 #include <linux/hash.h>
49 #include <linux/bitops.h>
50 #include <linux/lockdep.h>
51 #include <linux/vmalloc.h>
52 #include <linux/slab.h>
53 #include <rdma/ib_verbs.h>
54 #include <rdma/ib_hdrs.h>
55 #include "qp.h"
56 #include "vt.h"
57 #include "trace.h"
58
59 static void rvt_rc_timeout(unsigned long arg);
60
61 /*
62  * Convert the AETH RNR timeout code into the number of microseconds.
63  */
64 static const u32 ib_rvt_rnr_table[32] = {
65         655360, /* 00: 655.36 */
66         10,     /* 01:    .01 */
67         20,     /* 02     .02 */
68         30,     /* 03:    .03 */
69         40,     /* 04:    .04 */
70         60,     /* 05:    .06 */
71         80,     /* 06:    .08 */
72         120,    /* 07:    .12 */
73         160,    /* 08:    .16 */
74         240,    /* 09:    .24 */
75         320,    /* 0A:    .32 */
76         480,    /* 0B:    .48 */
77         640,    /* 0C:    .64 */
78         960,    /* 0D:    .96 */
79         1280,   /* 0E:   1.28 */
80         1920,   /* 0F:   1.92 */
81         2560,   /* 10:   2.56 */
82         3840,   /* 11:   3.84 */
83         5120,   /* 12:   5.12 */
84         7680,   /* 13:   7.68 */
85         10240,  /* 14:  10.24 */
86         15360,  /* 15:  15.36 */
87         20480,  /* 16:  20.48 */
88         30720,  /* 17:  30.72 */
89         40960,  /* 18:  40.96 */
90         61440,  /* 19:  61.44 */
91         81920,  /* 1A:  81.92 */
92         122880, /* 1B: 122.88 */
93         163840, /* 1C: 163.84 */
94         245760, /* 1D: 245.76 */
95         327680, /* 1E: 327.68 */
96         491520  /* 1F: 491.52 */
97 };
98
99 /*
100  * Note that it is OK to post send work requests in the SQE and ERR
101  * states; rvt_do_send() will process them and generate error
102  * completions as per IB 1.2 C10-96.
103  */
104 const int ib_rvt_state_ops[IB_QPS_ERR + 1] = {
105         [IB_QPS_RESET] = 0,
106         [IB_QPS_INIT] = RVT_POST_RECV_OK,
107         [IB_QPS_RTR] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK,
108         [IB_QPS_RTS] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK |
109             RVT_POST_SEND_OK | RVT_PROCESS_SEND_OK |
110             RVT_PROCESS_NEXT_SEND_OK,
111         [IB_QPS_SQD] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK |
112             RVT_POST_SEND_OK | RVT_PROCESS_SEND_OK,
113         [IB_QPS_SQE] = RVT_POST_RECV_OK | RVT_PROCESS_RECV_OK |
114             RVT_POST_SEND_OK | RVT_FLUSH_SEND,
115         [IB_QPS_ERR] = RVT_POST_RECV_OK | RVT_FLUSH_RECV |
116             RVT_POST_SEND_OK | RVT_FLUSH_SEND,
117 };
118 EXPORT_SYMBOL(ib_rvt_state_ops);
119
120 static void get_map_page(struct rvt_qpn_table *qpt,
121                          struct rvt_qpn_map *map)
122 {
123         unsigned long page = get_zeroed_page(GFP_KERNEL);
124
125         /*
126          * Free the page if someone raced with us installing it.
127          */
128
129         spin_lock(&qpt->lock);
130         if (map->page)
131                 free_page(page);
132         else
133                 map->page = (void *)page;
134         spin_unlock(&qpt->lock);
135 }
136
137 /**
138  * init_qpn_table - initialize the QP number table for a device
139  * @qpt: the QPN table
140  */
141 static int init_qpn_table(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt)
142 {
143         u32 offset, i;
144         struct rvt_qpn_map *map;
145         int ret = 0;
146
147         if (!(rdi->dparms.qpn_res_end >= rdi->dparms.qpn_res_start))
148                 return -EINVAL;
149
150         spin_lock_init(&qpt->lock);
151
152         qpt->last = rdi->dparms.qpn_start;
153         qpt->incr = rdi->dparms.qpn_inc << rdi->dparms.qos_shift;
154
155         /*
156          * Drivers may want some QPs beyond what we need for verbs let them use
157          * our qpn table. No need for two. Lets go ahead and mark the bitmaps
158          * for those. The reserved range must be *after* the range which verbs
159          * will pick from.
160          */
161
162         /* Figure out number of bit maps needed before reserved range */
163         qpt->nmaps = rdi->dparms.qpn_res_start / RVT_BITS_PER_PAGE;
164
165         /* This should always be zero */
166         offset = rdi->dparms.qpn_res_start & RVT_BITS_PER_PAGE_MASK;
167
168         /* Starting with the first reserved bit map */
169         map = &qpt->map[qpt->nmaps];
170
171         rvt_pr_info(rdi, "Reserving QPNs from 0x%x to 0x%x for non-verbs use\n",
172                     rdi->dparms.qpn_res_start, rdi->dparms.qpn_res_end);
173         for (i = rdi->dparms.qpn_res_start; i <= rdi->dparms.qpn_res_end; i++) {
174                 if (!map->page) {
175                         get_map_page(qpt, map);
176                         if (!map->page) {
177                                 ret = -ENOMEM;
178                                 break;
179                         }
180                 }
181                 set_bit(offset, map->page);
182                 offset++;
183                 if (offset == RVT_BITS_PER_PAGE) {
184                         /* next page */
185                         qpt->nmaps++;
186                         map++;
187                         offset = 0;
188                 }
189         }
190         return ret;
191 }
192
193 /**
194  * free_qpn_table - free the QP number table for a device
195  * @qpt: the QPN table
196  */
197 static void free_qpn_table(struct rvt_qpn_table *qpt)
198 {
199         int i;
200
201         for (i = 0; i < ARRAY_SIZE(qpt->map); i++)
202                 free_page((unsigned long)qpt->map[i].page);
203 }
204
205 /**
206  * rvt_driver_qp_init - Init driver qp resources
207  * @rdi: rvt dev strucutre
208  *
209  * Return: 0 on success
210  */
211 int rvt_driver_qp_init(struct rvt_dev_info *rdi)
212 {
213         int i;
214         int ret = -ENOMEM;
215
216         if (!rdi->dparms.qp_table_size)
217                 return -EINVAL;
218
219         /*
220          * If driver is not doing any QP allocation then make sure it is
221          * providing the necessary QP functions.
222          */
223         if (!rdi->driver_f.free_all_qps ||
224             !rdi->driver_f.qp_priv_alloc ||
225             !rdi->driver_f.qp_priv_free ||
226             !rdi->driver_f.notify_qp_reset ||
227             !rdi->driver_f.notify_restart_rc)
228                 return -EINVAL;
229
230         /* allocate parent object */
231         rdi->qp_dev = kzalloc_node(sizeof(*rdi->qp_dev), GFP_KERNEL,
232                                    rdi->dparms.node);
233         if (!rdi->qp_dev)
234                 return -ENOMEM;
235
236         /* allocate hash table */
237         rdi->qp_dev->qp_table_size = rdi->dparms.qp_table_size;
238         rdi->qp_dev->qp_table_bits = ilog2(rdi->dparms.qp_table_size);
239         rdi->qp_dev->qp_table =
240                 kmalloc_node(rdi->qp_dev->qp_table_size *
241                              sizeof(*rdi->qp_dev->qp_table),
242                              GFP_KERNEL, rdi->dparms.node);
243         if (!rdi->qp_dev->qp_table)
244                 goto no_qp_table;
245
246         for (i = 0; i < rdi->qp_dev->qp_table_size; i++)
247                 RCU_INIT_POINTER(rdi->qp_dev->qp_table[i], NULL);
248
249         spin_lock_init(&rdi->qp_dev->qpt_lock);
250
251         /* initialize qpn map */
252         if (init_qpn_table(rdi, &rdi->qp_dev->qpn_table))
253                 goto fail_table;
254
255         spin_lock_init(&rdi->n_qps_lock);
256
257         return 0;
258
259 fail_table:
260         kfree(rdi->qp_dev->qp_table);
261         free_qpn_table(&rdi->qp_dev->qpn_table);
262
263 no_qp_table:
264         kfree(rdi->qp_dev);
265
266         return ret;
267 }
268
269 /**
270  * free_all_qps - check for QPs still in use
271  * @qpt: the QP table to empty
272  *
273  * There should not be any QPs still in use.
274  * Free memory for table.
275  */
276 static unsigned rvt_free_all_qps(struct rvt_dev_info *rdi)
277 {
278         unsigned long flags;
279         struct rvt_qp *qp;
280         unsigned n, qp_inuse = 0;
281         spinlock_t *ql; /* work around too long line below */
282
283         if (rdi->driver_f.free_all_qps)
284                 qp_inuse = rdi->driver_f.free_all_qps(rdi);
285
286         qp_inuse += rvt_mcast_tree_empty(rdi);
287
288         if (!rdi->qp_dev)
289                 return qp_inuse;
290
291         ql = &rdi->qp_dev->qpt_lock;
292         spin_lock_irqsave(ql, flags);
293         for (n = 0; n < rdi->qp_dev->qp_table_size; n++) {
294                 qp = rcu_dereference_protected(rdi->qp_dev->qp_table[n],
295                                                lockdep_is_held(ql));
296                 RCU_INIT_POINTER(rdi->qp_dev->qp_table[n], NULL);
297
298                 for (; qp; qp = rcu_dereference_protected(qp->next,
299                                                           lockdep_is_held(ql)))
300                         qp_inuse++;
301         }
302         spin_unlock_irqrestore(ql, flags);
303         synchronize_rcu();
304         return qp_inuse;
305 }
306
307 /**
308  * rvt_qp_exit - clean up qps on device exit
309  * @rdi: rvt dev structure
310  *
311  * Check for qp leaks and free resources.
312  */
313 void rvt_qp_exit(struct rvt_dev_info *rdi)
314 {
315         u32 qps_inuse = rvt_free_all_qps(rdi);
316
317         if (qps_inuse)
318                 rvt_pr_err(rdi, "QP memory leak! %u still in use\n",
319                            qps_inuse);
320         if (!rdi->qp_dev)
321                 return;
322
323         kfree(rdi->qp_dev->qp_table);
324         free_qpn_table(&rdi->qp_dev->qpn_table);
325         kfree(rdi->qp_dev);
326 }
327
328 static inline unsigned mk_qpn(struct rvt_qpn_table *qpt,
329                               struct rvt_qpn_map *map, unsigned off)
330 {
331         return (map - qpt->map) * RVT_BITS_PER_PAGE + off;
332 }
333
334 /**
335  * alloc_qpn - Allocate the next available qpn or zero/one for QP type
336  *             IB_QPT_SMI/IB_QPT_GSI
337  *@rdi: rvt device info structure
338  *@qpt: queue pair number table pointer
339  *@port_num: IB port number, 1 based, comes from core
340  *
341  * Return: The queue pair number
342  */
343 static int alloc_qpn(struct rvt_dev_info *rdi, struct rvt_qpn_table *qpt,
344                      enum ib_qp_type type, u8 port_num)
345 {
346         u32 i, offset, max_scan, qpn;
347         struct rvt_qpn_map *map;
348         u32 ret;
349
350         if (rdi->driver_f.alloc_qpn)
351                 return rdi->driver_f.alloc_qpn(rdi, qpt, type, port_num);
352
353         if (type == IB_QPT_SMI || type == IB_QPT_GSI) {
354                 unsigned n;
355
356                 ret = type == IB_QPT_GSI;
357                 n = 1 << (ret + 2 * (port_num - 1));
358                 spin_lock(&qpt->lock);
359                 if (qpt->flags & n)
360                         ret = -EINVAL;
361                 else
362                         qpt->flags |= n;
363                 spin_unlock(&qpt->lock);
364                 goto bail;
365         }
366
367         qpn = qpt->last + qpt->incr;
368         if (qpn >= RVT_QPN_MAX)
369                 qpn = qpt->incr | ((qpt->last & 1) ^ 1);
370         /* offset carries bit 0 */
371         offset = qpn & RVT_BITS_PER_PAGE_MASK;
372         map = &qpt->map[qpn / RVT_BITS_PER_PAGE];
373         max_scan = qpt->nmaps - !offset;
374         for (i = 0;;) {
375                 if (unlikely(!map->page)) {
376                         get_map_page(qpt, map);
377                         if (unlikely(!map->page))
378                                 break;
379                 }
380                 do {
381                         if (!test_and_set_bit(offset, map->page)) {
382                                 qpt->last = qpn;
383                                 ret = qpn;
384                                 goto bail;
385                         }
386                         offset += qpt->incr;
387                         /*
388                          * This qpn might be bogus if offset >= BITS_PER_PAGE.
389                          * That is OK.   It gets re-assigned below
390                          */
391                         qpn = mk_qpn(qpt, map, offset);
392                 } while (offset < RVT_BITS_PER_PAGE && qpn < RVT_QPN_MAX);
393                 /*
394                  * In order to keep the number of pages allocated to a
395                  * minimum, we scan the all existing pages before increasing
396                  * the size of the bitmap table.
397                  */
398                 if (++i > max_scan) {
399                         if (qpt->nmaps == RVT_QPNMAP_ENTRIES)
400                                 break;
401                         map = &qpt->map[qpt->nmaps++];
402                         /* start at incr with current bit 0 */
403                         offset = qpt->incr | (offset & 1);
404                 } else if (map < &qpt->map[qpt->nmaps]) {
405                         ++map;
406                         /* start at incr with current bit 0 */
407                         offset = qpt->incr | (offset & 1);
408                 } else {
409                         map = &qpt->map[0];
410                         /* wrap to first map page, invert bit 0 */
411                         offset = qpt->incr | ((offset & 1) ^ 1);
412                 }
413                 /* there can be no set bits in low-order QoS bits */
414                 WARN_ON(offset & (BIT(rdi->dparms.qos_shift) - 1));
415                 qpn = mk_qpn(qpt, map, offset);
416         }
417
418         ret = -ENOMEM;
419
420 bail:
421         return ret;
422 }
423
424 static void free_qpn(struct rvt_qpn_table *qpt, u32 qpn)
425 {
426         struct rvt_qpn_map *map;
427
428         map = qpt->map + qpn / RVT_BITS_PER_PAGE;
429         if (map->page)
430                 clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page);
431 }
432
433 /**
434  * rvt_clear_mr_refs - Drop help mr refs
435  * @qp: rvt qp data structure
436  * @clr_sends: If shoudl clear send side or not
437  */
438 static void rvt_clear_mr_refs(struct rvt_qp *qp, int clr_sends)
439 {
440         unsigned n;
441         struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
442
443         if (test_and_clear_bit(RVT_R_REWIND_SGE, &qp->r_aflags))
444                 rvt_put_ss(&qp->s_rdma_read_sge);
445
446         rvt_put_ss(&qp->r_sge);
447
448         if (clr_sends) {
449                 while (qp->s_last != qp->s_head) {
450                         struct rvt_swqe *wqe = rvt_get_swqe_ptr(qp, qp->s_last);
451                         unsigned i;
452
453                         for (i = 0; i < wqe->wr.num_sge; i++) {
454                                 struct rvt_sge *sge = &wqe->sg_list[i];
455
456                                 rvt_put_mr(sge->mr);
457                         }
458                         if (qp->ibqp.qp_type == IB_QPT_UD ||
459                             qp->ibqp.qp_type == IB_QPT_SMI ||
460                             qp->ibqp.qp_type == IB_QPT_GSI)
461                                 atomic_dec(&ibah_to_rvtah(
462                                                 wqe->ud_wr.ah)->refcount);
463                         if (++qp->s_last >= qp->s_size)
464                                 qp->s_last = 0;
465                         smp_wmb(); /* see qp_set_savail */
466                 }
467                 if (qp->s_rdma_mr) {
468                         rvt_put_mr(qp->s_rdma_mr);
469                         qp->s_rdma_mr = NULL;
470                 }
471         }
472
473         if (qp->ibqp.qp_type != IB_QPT_RC)
474                 return;
475
476         for (n = 0; n < rvt_max_atomic(rdi); n++) {
477                 struct rvt_ack_entry *e = &qp->s_ack_queue[n];
478
479                 if (e->rdma_sge.mr) {
480                         rvt_put_mr(e->rdma_sge.mr);
481                         e->rdma_sge.mr = NULL;
482                 }
483         }
484 }
485
486 /**
487  * rvt_remove_qp - remove qp form table
488  * @rdi: rvt dev struct
489  * @qp: qp to remove
490  *
491  * Remove the QP from the table so it can't be found asynchronously by
492  * the receive routine.
493  */
494 static void rvt_remove_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp)
495 {
496         struct rvt_ibport *rvp = rdi->ports[qp->port_num - 1];
497         u32 n = hash_32(qp->ibqp.qp_num, rdi->qp_dev->qp_table_bits);
498         unsigned long flags;
499         int removed = 1;
500
501         spin_lock_irqsave(&rdi->qp_dev->qpt_lock, flags);
502
503         if (rcu_dereference_protected(rvp->qp[0],
504                         lockdep_is_held(&rdi->qp_dev->qpt_lock)) == qp) {
505                 RCU_INIT_POINTER(rvp->qp[0], NULL);
506         } else if (rcu_dereference_protected(rvp->qp[1],
507                         lockdep_is_held(&rdi->qp_dev->qpt_lock)) == qp) {
508                 RCU_INIT_POINTER(rvp->qp[1], NULL);
509         } else {
510                 struct rvt_qp *q;
511                 struct rvt_qp __rcu **qpp;
512
513                 removed = 0;
514                 qpp = &rdi->qp_dev->qp_table[n];
515                 for (; (q = rcu_dereference_protected(*qpp,
516                         lockdep_is_held(&rdi->qp_dev->qpt_lock))) != NULL;
517                         qpp = &q->next) {
518                         if (q == qp) {
519                                 RCU_INIT_POINTER(*qpp,
520                                      rcu_dereference_protected(qp->next,
521                                      lockdep_is_held(&rdi->qp_dev->qpt_lock)));
522                                 removed = 1;
523                                 trace_rvt_qpremove(qp, n);
524                                 break;
525                         }
526                 }
527         }
528
529         spin_unlock_irqrestore(&rdi->qp_dev->qpt_lock, flags);
530         if (removed) {
531                 synchronize_rcu();
532                 rvt_put_qp(qp);
533         }
534 }
535
536 /**
537  * rvt_init_qp - initialize the QP state to the reset state
538  * @qp: the QP to init or reinit
539  * @type: the QP type
540  *
541  * This function is called from both rvt_create_qp() and
542  * rvt_reset_qp().   The difference is that the reset
543  * patch the necessary locks to protect against concurent
544  * access.
545  */
546 static void rvt_init_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
547                         enum ib_qp_type type)
548 {
549         qp->remote_qpn = 0;
550         qp->qkey = 0;
551         qp->qp_access_flags = 0;
552         qp->s_flags &= RVT_S_SIGNAL_REQ_WR;
553         qp->s_hdrwords = 0;
554         qp->s_wqe = NULL;
555         qp->s_draining = 0;
556         qp->s_next_psn = 0;
557         qp->s_last_psn = 0;
558         qp->s_sending_psn = 0;
559         qp->s_sending_hpsn = 0;
560         qp->s_psn = 0;
561         qp->r_psn = 0;
562         qp->r_msn = 0;
563         if (type == IB_QPT_RC) {
564                 qp->s_state = IB_OPCODE_RC_SEND_LAST;
565                 qp->r_state = IB_OPCODE_RC_SEND_LAST;
566         } else {
567                 qp->s_state = IB_OPCODE_UC_SEND_LAST;
568                 qp->r_state = IB_OPCODE_UC_SEND_LAST;
569         }
570         qp->s_ack_state = IB_OPCODE_RC_ACKNOWLEDGE;
571         qp->r_nak_state = 0;
572         qp->r_aflags = 0;
573         qp->r_flags = 0;
574         qp->s_head = 0;
575         qp->s_tail = 0;
576         qp->s_cur = 0;
577         qp->s_acked = 0;
578         qp->s_last = 0;
579         qp->s_ssn = 1;
580         qp->s_lsn = 0;
581         qp->s_mig_state = IB_MIG_MIGRATED;
582         qp->r_head_ack_queue = 0;
583         qp->s_tail_ack_queue = 0;
584         qp->s_num_rd_atomic = 0;
585         if (qp->r_rq.wq) {
586                 qp->r_rq.wq->head = 0;
587                 qp->r_rq.wq->tail = 0;
588         }
589         qp->r_sge.num_sge = 0;
590         atomic_set(&qp->s_reserved_used, 0);
591 }
592
593 /**
594  * rvt_reset_qp - initialize the QP state to the reset state
595  * @qp: the QP to reset
596  * @type: the QP type
597  *
598  * r_lock, s_hlock, and s_lock are required to be held by the caller
599  */
600 static void rvt_reset_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp,
601                          enum ib_qp_type type)
602         __must_hold(&qp->s_lock)
603         __must_hold(&qp->s_hlock)
604         __must_hold(&qp->r_lock)
605 {
606         lockdep_assert_held(&qp->r_lock);
607         lockdep_assert_held(&qp->s_hlock);
608         lockdep_assert_held(&qp->s_lock);
609         if (qp->state != IB_QPS_RESET) {
610                 qp->state = IB_QPS_RESET;
611
612                 /* Let drivers flush their waitlist */
613                 rdi->driver_f.flush_qp_waiters(qp);
614                 rvt_stop_rc_timers(qp);
615                 qp->s_flags &= ~(RVT_S_TIMER | RVT_S_ANY_WAIT);
616                 spin_unlock(&qp->s_lock);
617                 spin_unlock(&qp->s_hlock);
618                 spin_unlock_irq(&qp->r_lock);
619
620                 /* Stop the send queue and the retry timer */
621                 rdi->driver_f.stop_send_queue(qp);
622                 rvt_del_timers_sync(qp);
623                 /* Wait for things to stop */
624                 rdi->driver_f.quiesce_qp(qp);
625
626                 /* take qp out the hash and wait for it to be unused */
627                 rvt_remove_qp(rdi, qp);
628                 wait_event(qp->wait, !atomic_read(&qp->refcount));
629
630                 /* grab the lock b/c it was locked at call time */
631                 spin_lock_irq(&qp->r_lock);
632                 spin_lock(&qp->s_hlock);
633                 spin_lock(&qp->s_lock);
634
635                 rvt_clear_mr_refs(qp, 1);
636                 /*
637                  * Let the driver do any tear down or re-init it needs to for
638                  * a qp that has been reset
639                  */
640                 rdi->driver_f.notify_qp_reset(qp);
641         }
642         rvt_init_qp(rdi, qp, type);
643         lockdep_assert_held(&qp->r_lock);
644         lockdep_assert_held(&qp->s_hlock);
645         lockdep_assert_held(&qp->s_lock);
646 }
647
648 /**
649  * rvt_create_qp - create a queue pair for a device
650  * @ibpd: the protection domain who's device we create the queue pair for
651  * @init_attr: the attributes of the queue pair
652  * @udata: user data for libibverbs.so
653  *
654  * Queue pair creation is mostly an rvt issue. However, drivers have their own
655  * unique idea of what queue pair numbers mean. For instance there is a reserved
656  * range for PSM.
657  *
658  * Return: the queue pair on success, otherwise returns an errno.
659  *
660  * Called by the ib_create_qp() core verbs function.
661  */
662 struct ib_qp *rvt_create_qp(struct ib_pd *ibpd,
663                             struct ib_qp_init_attr *init_attr,
664                             struct ib_udata *udata)
665 {
666         struct rvt_qp *qp;
667         int err;
668         struct rvt_swqe *swq = NULL;
669         size_t sz;
670         size_t sg_list_sz;
671         struct ib_qp *ret = ERR_PTR(-ENOMEM);
672         struct rvt_dev_info *rdi = ib_to_rvt(ibpd->device);
673         void *priv = NULL;
674         size_t sqsize;
675
676         if (!rdi)
677                 return ERR_PTR(-EINVAL);
678
679         if (init_attr->cap.max_send_sge > rdi->dparms.props.max_sge ||
680             init_attr->cap.max_send_wr > rdi->dparms.props.max_qp_wr ||
681             init_attr->create_flags)
682                 return ERR_PTR(-EINVAL);
683
684         /* Check receive queue parameters if no SRQ is specified. */
685         if (!init_attr->srq) {
686                 if (init_attr->cap.max_recv_sge > rdi->dparms.props.max_sge ||
687                     init_attr->cap.max_recv_wr > rdi->dparms.props.max_qp_wr)
688                         return ERR_PTR(-EINVAL);
689
690                 if (init_attr->cap.max_send_sge +
691                     init_attr->cap.max_send_wr +
692                     init_attr->cap.max_recv_sge +
693                     init_attr->cap.max_recv_wr == 0)
694                         return ERR_PTR(-EINVAL);
695         }
696         sqsize =
697                 init_attr->cap.max_send_wr + 1 +
698                 rdi->dparms.reserved_operations;
699         switch (init_attr->qp_type) {
700         case IB_QPT_SMI:
701         case IB_QPT_GSI:
702                 if (init_attr->port_num == 0 ||
703                     init_attr->port_num > ibpd->device->phys_port_cnt)
704                         return ERR_PTR(-EINVAL);
705         case IB_QPT_UC:
706         case IB_QPT_RC:
707         case IB_QPT_UD:
708                 sz = sizeof(struct rvt_sge) *
709                         init_attr->cap.max_send_sge +
710                         sizeof(struct rvt_swqe);
711                 swq = vzalloc_node(sqsize * sz, rdi->dparms.node);
712                 if (!swq)
713                         return ERR_PTR(-ENOMEM);
714
715                 sz = sizeof(*qp);
716                 sg_list_sz = 0;
717                 if (init_attr->srq) {
718                         struct rvt_srq *srq = ibsrq_to_rvtsrq(init_attr->srq);
719
720                         if (srq->rq.max_sge > 1)
721                                 sg_list_sz = sizeof(*qp->r_sg_list) *
722                                         (srq->rq.max_sge - 1);
723                 } else if (init_attr->cap.max_recv_sge > 1)
724                         sg_list_sz = sizeof(*qp->r_sg_list) *
725                                 (init_attr->cap.max_recv_sge - 1);
726                 qp = kzalloc_node(sz + sg_list_sz, GFP_KERNEL,
727                                   rdi->dparms.node);
728                 if (!qp)
729                         goto bail_swq;
730
731                 RCU_INIT_POINTER(qp->next, NULL);
732                 if (init_attr->qp_type == IB_QPT_RC) {
733                         qp->s_ack_queue =
734                                 kzalloc_node(
735                                         sizeof(*qp->s_ack_queue) *
736                                          rvt_max_atomic(rdi),
737                                         GFP_KERNEL,
738                                         rdi->dparms.node);
739                         if (!qp->s_ack_queue)
740                                 goto bail_qp;
741                 }
742                 /* initialize timers needed for rc qp */
743                 setup_timer(&qp->s_timer, rvt_rc_timeout, (unsigned long)qp);
744                 hrtimer_init(&qp->s_rnr_timer, CLOCK_MONOTONIC,
745                              HRTIMER_MODE_REL);
746                 qp->s_rnr_timer.function = rvt_rc_rnr_retry;
747
748                 /*
749                  * Driver needs to set up it's private QP structure and do any
750                  * initialization that is needed.
751                  */
752                 priv = rdi->driver_f.qp_priv_alloc(rdi, qp);
753                 if (IS_ERR(priv)) {
754                         ret = priv;
755                         goto bail_qp;
756                 }
757                 qp->priv = priv;
758                 qp->timeout_jiffies =
759                         usecs_to_jiffies((4096UL * (1UL << qp->timeout)) /
760                                 1000UL);
761                 if (init_attr->srq) {
762                         sz = 0;
763                 } else {
764                         qp->r_rq.size = init_attr->cap.max_recv_wr + 1;
765                         qp->r_rq.max_sge = init_attr->cap.max_recv_sge;
766                         sz = (sizeof(struct ib_sge) * qp->r_rq.max_sge) +
767                                 sizeof(struct rvt_rwqe);
768                         if (udata)
769                                 qp->r_rq.wq = vmalloc_user(
770                                                 sizeof(struct rvt_rwq) +
771                                                 qp->r_rq.size * sz);
772                         else
773                                 qp->r_rq.wq = vzalloc_node(
774                                                 sizeof(struct rvt_rwq) +
775                                                 qp->r_rq.size * sz,
776                                                 rdi->dparms.node);
777                         if (!qp->r_rq.wq)
778                                 goto bail_driver_priv;
779                 }
780
781                 /*
782                  * ib_create_qp() will initialize qp->ibqp
783                  * except for qp->ibqp.qp_num.
784                  */
785                 spin_lock_init(&qp->r_lock);
786                 spin_lock_init(&qp->s_hlock);
787                 spin_lock_init(&qp->s_lock);
788                 spin_lock_init(&qp->r_rq.lock);
789                 atomic_set(&qp->refcount, 0);
790                 atomic_set(&qp->local_ops_pending, 0);
791                 init_waitqueue_head(&qp->wait);
792                 init_timer(&qp->s_timer);
793                 qp->s_timer.data = (unsigned long)qp;
794                 INIT_LIST_HEAD(&qp->rspwait);
795                 qp->state = IB_QPS_RESET;
796                 qp->s_wq = swq;
797                 qp->s_size = sqsize;
798                 qp->s_avail = init_attr->cap.max_send_wr;
799                 qp->s_max_sge = init_attr->cap.max_send_sge;
800                 if (init_attr->sq_sig_type == IB_SIGNAL_REQ_WR)
801                         qp->s_flags = RVT_S_SIGNAL_REQ_WR;
802
803                 err = alloc_qpn(rdi, &rdi->qp_dev->qpn_table,
804                                 init_attr->qp_type,
805                                 init_attr->port_num);
806                 if (err < 0) {
807                         ret = ERR_PTR(err);
808                         goto bail_rq_wq;
809                 }
810                 qp->ibqp.qp_num = err;
811                 qp->port_num = init_attr->port_num;
812                 rvt_init_qp(rdi, qp, init_attr->qp_type);
813                 break;
814
815         default:
816                 /* Don't support raw QPs */
817                 return ERR_PTR(-EINVAL);
818         }
819
820         init_attr->cap.max_inline_data = 0;
821
822         /*
823          * Return the address of the RWQ as the offset to mmap.
824          * See rvt_mmap() for details.
825          */
826         if (udata && udata->outlen >= sizeof(__u64)) {
827                 if (!qp->r_rq.wq) {
828                         __u64 offset = 0;
829
830                         err = ib_copy_to_udata(udata, &offset,
831                                                sizeof(offset));
832                         if (err) {
833                                 ret = ERR_PTR(err);
834                                 goto bail_qpn;
835                         }
836                 } else {
837                         u32 s = sizeof(struct rvt_rwq) + qp->r_rq.size * sz;
838
839                         qp->ip = rvt_create_mmap_info(rdi, s,
840                                                       ibpd->uobject->context,
841                                                       qp->r_rq.wq);
842                         if (!qp->ip) {
843                                 ret = ERR_PTR(-ENOMEM);
844                                 goto bail_qpn;
845                         }
846
847                         err = ib_copy_to_udata(udata, &qp->ip->offset,
848                                                sizeof(qp->ip->offset));
849                         if (err) {
850                                 ret = ERR_PTR(err);
851                                 goto bail_ip;
852                         }
853                 }
854                 qp->pid = current->pid;
855         }
856
857         spin_lock(&rdi->n_qps_lock);
858         if (rdi->n_qps_allocated == rdi->dparms.props.max_qp) {
859                 spin_unlock(&rdi->n_qps_lock);
860                 ret = ERR_PTR(-ENOMEM);
861                 goto bail_ip;
862         }
863
864         rdi->n_qps_allocated++;
865         /*
866          * Maintain a busy_jiffies variable that will be added to the timeout
867          * period in mod_retry_timer and add_retry_timer. This busy jiffies
868          * is scaled by the number of rc qps created for the device to reduce
869          * the number of timeouts occurring when there is a large number of
870          * qps. busy_jiffies is incremented every rc qp scaling interval.
871          * The scaling interval is selected based on extensive performance
872          * evaluation of targeted workloads.
873          */
874         if (init_attr->qp_type == IB_QPT_RC) {
875                 rdi->n_rc_qps++;
876                 rdi->busy_jiffies = rdi->n_rc_qps / RC_QP_SCALING_INTERVAL;
877         }
878         spin_unlock(&rdi->n_qps_lock);
879
880         if (qp->ip) {
881                 spin_lock_irq(&rdi->pending_lock);
882                 list_add(&qp->ip->pending_mmaps, &rdi->pending_mmaps);
883                 spin_unlock_irq(&rdi->pending_lock);
884         }
885
886         ret = &qp->ibqp;
887
888         /*
889          * We have our QP and its good, now keep track of what types of opcodes
890          * can be processed on this QP. We do this by keeping track of what the
891          * 3 high order bits of the opcode are.
892          */
893         switch (init_attr->qp_type) {
894         case IB_QPT_SMI:
895         case IB_QPT_GSI:
896         case IB_QPT_UD:
897                 qp->allowed_ops = IB_OPCODE_UD;
898                 break;
899         case IB_QPT_RC:
900                 qp->allowed_ops = IB_OPCODE_RC;
901                 break;
902         case IB_QPT_UC:
903                 qp->allowed_ops = IB_OPCODE_UC;
904                 break;
905         default:
906                 ret = ERR_PTR(-EINVAL);
907                 goto bail_ip;
908         }
909
910         return ret;
911
912 bail_ip:
913         if (qp->ip)
914                 kref_put(&qp->ip->ref, rvt_release_mmap_info);
915
916 bail_qpn:
917         free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num);
918
919 bail_rq_wq:
920         if (!qp->ip)
921                 vfree(qp->r_rq.wq);
922
923 bail_driver_priv:
924         rdi->driver_f.qp_priv_free(rdi, qp);
925
926 bail_qp:
927         kfree(qp->s_ack_queue);
928         kfree(qp);
929
930 bail_swq:
931         vfree(swq);
932
933         return ret;
934 }
935
936 /**
937  * rvt_error_qp - put a QP into the error state
938  * @qp: the QP to put into the error state
939  * @err: the receive completion error to signal if a RWQE is active
940  *
941  * Flushes both send and receive work queues.
942  *
943  * Return: true if last WQE event should be generated.
944  * The QP r_lock and s_lock should be held and interrupts disabled.
945  * If we are already in error state, just return.
946  */
947 int rvt_error_qp(struct rvt_qp *qp, enum ib_wc_status err)
948 {
949         struct ib_wc wc;
950         int ret = 0;
951         struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
952
953         lockdep_assert_held(&qp->r_lock);
954         lockdep_assert_held(&qp->s_lock);
955         if (qp->state == IB_QPS_ERR || qp->state == IB_QPS_RESET)
956                 goto bail;
957
958         qp->state = IB_QPS_ERR;
959
960         if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) {
961                 qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR);
962                 del_timer(&qp->s_timer);
963         }
964
965         if (qp->s_flags & RVT_S_ANY_WAIT_SEND)
966                 qp->s_flags &= ~RVT_S_ANY_WAIT_SEND;
967
968         rdi->driver_f.notify_error_qp(qp);
969
970         /* Schedule the sending tasklet to drain the send work queue. */
971         if (ACCESS_ONCE(qp->s_last) != qp->s_head)
972                 rdi->driver_f.schedule_send(qp);
973
974         rvt_clear_mr_refs(qp, 0);
975
976         memset(&wc, 0, sizeof(wc));
977         wc.qp = &qp->ibqp;
978         wc.opcode = IB_WC_RECV;
979
980         if (test_and_clear_bit(RVT_R_WRID_VALID, &qp->r_aflags)) {
981                 wc.wr_id = qp->r_wr_id;
982                 wc.status = err;
983                 rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1);
984         }
985         wc.status = IB_WC_WR_FLUSH_ERR;
986
987         if (qp->r_rq.wq) {
988                 struct rvt_rwq *wq;
989                 u32 head;
990                 u32 tail;
991
992                 spin_lock(&qp->r_rq.lock);
993
994                 /* sanity check pointers before trusting them */
995                 wq = qp->r_rq.wq;
996                 head = wq->head;
997                 if (head >= qp->r_rq.size)
998                         head = 0;
999                 tail = wq->tail;
1000                 if (tail >= qp->r_rq.size)
1001                         tail = 0;
1002                 while (tail != head) {
1003                         wc.wr_id = rvt_get_rwqe_ptr(&qp->r_rq, tail)->wr_id;
1004                         if (++tail >= qp->r_rq.size)
1005                                 tail = 0;
1006                         rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1);
1007                 }
1008                 wq->tail = tail;
1009
1010                 spin_unlock(&qp->r_rq.lock);
1011         } else if (qp->ibqp.event_handler) {
1012                 ret = 1;
1013         }
1014
1015 bail:
1016         return ret;
1017 }
1018 EXPORT_SYMBOL(rvt_error_qp);
1019
1020 /*
1021  * Put the QP into the hash table.
1022  * The hash table holds a reference to the QP.
1023  */
1024 static void rvt_insert_qp(struct rvt_dev_info *rdi, struct rvt_qp *qp)
1025 {
1026         struct rvt_ibport *rvp = rdi->ports[qp->port_num - 1];
1027         unsigned long flags;
1028
1029         rvt_get_qp(qp);
1030         spin_lock_irqsave(&rdi->qp_dev->qpt_lock, flags);
1031
1032         if (qp->ibqp.qp_num <= 1) {
1033                 rcu_assign_pointer(rvp->qp[qp->ibqp.qp_num], qp);
1034         } else {
1035                 u32 n = hash_32(qp->ibqp.qp_num, rdi->qp_dev->qp_table_bits);
1036
1037                 qp->next = rdi->qp_dev->qp_table[n];
1038                 rcu_assign_pointer(rdi->qp_dev->qp_table[n], qp);
1039                 trace_rvt_qpinsert(qp, n);
1040         }
1041
1042         spin_unlock_irqrestore(&rdi->qp_dev->qpt_lock, flags);
1043 }
1044
1045 /**
1046  * rvt_modify_qp - modify the attributes of a queue pair
1047  * @ibqp: the queue pair who's attributes we're modifying
1048  * @attr: the new attributes
1049  * @attr_mask: the mask of attributes to modify
1050  * @udata: user data for libibverbs.so
1051  *
1052  * Return: 0 on success, otherwise returns an errno.
1053  */
1054 int rvt_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1055                   int attr_mask, struct ib_udata *udata)
1056 {
1057         struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
1058         struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
1059         enum ib_qp_state cur_state, new_state;
1060         struct ib_event ev;
1061         int lastwqe = 0;
1062         int mig = 0;
1063         int pmtu = 0; /* for gcc warning only */
1064         enum rdma_link_layer link;
1065
1066         link = rdma_port_get_link_layer(ibqp->device, qp->port_num);
1067
1068         spin_lock_irq(&qp->r_lock);
1069         spin_lock(&qp->s_hlock);
1070         spin_lock(&qp->s_lock);
1071
1072         cur_state = attr_mask & IB_QP_CUR_STATE ?
1073                 attr->cur_qp_state : qp->state;
1074         new_state = attr_mask & IB_QP_STATE ? attr->qp_state : cur_state;
1075
1076         if (!ib_modify_qp_is_ok(cur_state, new_state, ibqp->qp_type,
1077                                 attr_mask, link))
1078                 goto inval;
1079
1080         if (rdi->driver_f.check_modify_qp &&
1081             rdi->driver_f.check_modify_qp(qp, attr, attr_mask, udata))
1082                 goto inval;
1083
1084         if (attr_mask & IB_QP_AV) {
1085                 if (rdma_ah_get_dlid(&attr->ah_attr) >=
1086                     be16_to_cpu(IB_MULTICAST_LID_BASE))
1087                         goto inval;
1088                 if (rvt_check_ah(qp->ibqp.device, &attr->ah_attr))
1089                         goto inval;
1090         }
1091
1092         if (attr_mask & IB_QP_ALT_PATH) {
1093                 if (rdma_ah_get_dlid(&attr->alt_ah_attr) >=
1094                     be16_to_cpu(IB_MULTICAST_LID_BASE))
1095                         goto inval;
1096                 if (rvt_check_ah(qp->ibqp.device, &attr->alt_ah_attr))
1097                         goto inval;
1098                 if (attr->alt_pkey_index >= rvt_get_npkeys(rdi))
1099                         goto inval;
1100         }
1101
1102         if (attr_mask & IB_QP_PKEY_INDEX)
1103                 if (attr->pkey_index >= rvt_get_npkeys(rdi))
1104                         goto inval;
1105
1106         if (attr_mask & IB_QP_MIN_RNR_TIMER)
1107                 if (attr->min_rnr_timer > 31)
1108                         goto inval;
1109
1110         if (attr_mask & IB_QP_PORT)
1111                 if (qp->ibqp.qp_type == IB_QPT_SMI ||
1112                     qp->ibqp.qp_type == IB_QPT_GSI ||
1113                     attr->port_num == 0 ||
1114                     attr->port_num > ibqp->device->phys_port_cnt)
1115                         goto inval;
1116
1117         if (attr_mask & IB_QP_DEST_QPN)
1118                 if (attr->dest_qp_num > RVT_QPN_MASK)
1119                         goto inval;
1120
1121         if (attr_mask & IB_QP_RETRY_CNT)
1122                 if (attr->retry_cnt > 7)
1123                         goto inval;
1124
1125         if (attr_mask & IB_QP_RNR_RETRY)
1126                 if (attr->rnr_retry > 7)
1127                         goto inval;
1128
1129         /*
1130          * Don't allow invalid path_mtu values.  OK to set greater
1131          * than the active mtu (or even the max_cap, if we have tuned
1132          * that to a small mtu.  We'll set qp->path_mtu
1133          * to the lesser of requested attribute mtu and active,
1134          * for packetizing messages.
1135          * Note that the QP port has to be set in INIT and MTU in RTR.
1136          */
1137         if (attr_mask & IB_QP_PATH_MTU) {
1138                 pmtu = rdi->driver_f.get_pmtu_from_attr(rdi, qp, attr);
1139                 if (pmtu < 0)
1140                         goto inval;
1141         }
1142
1143         if (attr_mask & IB_QP_PATH_MIG_STATE) {
1144                 if (attr->path_mig_state == IB_MIG_REARM) {
1145                         if (qp->s_mig_state == IB_MIG_ARMED)
1146                                 goto inval;
1147                         if (new_state != IB_QPS_RTS)
1148                                 goto inval;
1149                 } else if (attr->path_mig_state == IB_MIG_MIGRATED) {
1150                         if (qp->s_mig_state == IB_MIG_REARM)
1151                                 goto inval;
1152                         if (new_state != IB_QPS_RTS && new_state != IB_QPS_SQD)
1153                                 goto inval;
1154                         if (qp->s_mig_state == IB_MIG_ARMED)
1155                                 mig = 1;
1156                 } else {
1157                         goto inval;
1158                 }
1159         }
1160
1161         if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
1162                 if (attr->max_dest_rd_atomic > rdi->dparms.max_rdma_atomic)
1163                         goto inval;
1164
1165         switch (new_state) {
1166         case IB_QPS_RESET:
1167                 if (qp->state != IB_QPS_RESET)
1168                         rvt_reset_qp(rdi, qp, ibqp->qp_type);
1169                 break;
1170
1171         case IB_QPS_RTR:
1172                 /* Allow event to re-trigger if QP set to RTR more than once */
1173                 qp->r_flags &= ~RVT_R_COMM_EST;
1174                 qp->state = new_state;
1175                 break;
1176
1177         case IB_QPS_SQD:
1178                 qp->s_draining = qp->s_last != qp->s_cur;
1179                 qp->state = new_state;
1180                 break;
1181
1182         case IB_QPS_SQE:
1183                 if (qp->ibqp.qp_type == IB_QPT_RC)
1184                         goto inval;
1185                 qp->state = new_state;
1186                 break;
1187
1188         case IB_QPS_ERR:
1189                 lastwqe = rvt_error_qp(qp, IB_WC_WR_FLUSH_ERR);
1190                 break;
1191
1192         default:
1193                 qp->state = new_state;
1194                 break;
1195         }
1196
1197         if (attr_mask & IB_QP_PKEY_INDEX)
1198                 qp->s_pkey_index = attr->pkey_index;
1199
1200         if (attr_mask & IB_QP_PORT)
1201                 qp->port_num = attr->port_num;
1202
1203         if (attr_mask & IB_QP_DEST_QPN)
1204                 qp->remote_qpn = attr->dest_qp_num;
1205
1206         if (attr_mask & IB_QP_SQ_PSN) {
1207                 qp->s_next_psn = attr->sq_psn & rdi->dparms.psn_modify_mask;
1208                 qp->s_psn = qp->s_next_psn;
1209                 qp->s_sending_psn = qp->s_next_psn;
1210                 qp->s_last_psn = qp->s_next_psn - 1;
1211                 qp->s_sending_hpsn = qp->s_last_psn;
1212         }
1213
1214         if (attr_mask & IB_QP_RQ_PSN)
1215                 qp->r_psn = attr->rq_psn & rdi->dparms.psn_modify_mask;
1216
1217         if (attr_mask & IB_QP_ACCESS_FLAGS)
1218                 qp->qp_access_flags = attr->qp_access_flags;
1219
1220         if (attr_mask & IB_QP_AV) {
1221                 qp->remote_ah_attr = attr->ah_attr;
1222                 qp->s_srate = rdma_ah_get_static_rate(&attr->ah_attr);
1223                 qp->srate_mbps = ib_rate_to_mbps(qp->s_srate);
1224         }
1225
1226         if (attr_mask & IB_QP_ALT_PATH) {
1227                 qp->alt_ah_attr = attr->alt_ah_attr;
1228                 qp->s_alt_pkey_index = attr->alt_pkey_index;
1229         }
1230
1231         if (attr_mask & IB_QP_PATH_MIG_STATE) {
1232                 qp->s_mig_state = attr->path_mig_state;
1233                 if (mig) {
1234                         qp->remote_ah_attr = qp->alt_ah_attr;
1235                         qp->port_num = rdma_ah_get_port_num(&qp->alt_ah_attr);
1236                         qp->s_pkey_index = qp->s_alt_pkey_index;
1237                 }
1238         }
1239
1240         if (attr_mask & IB_QP_PATH_MTU) {
1241                 qp->pmtu = rdi->driver_f.mtu_from_qp(rdi, qp, pmtu);
1242                 qp->path_mtu = rdi->driver_f.mtu_to_path_mtu(qp->pmtu);
1243                 qp->log_pmtu = ilog2(qp->pmtu);
1244         }
1245
1246         if (attr_mask & IB_QP_RETRY_CNT) {
1247                 qp->s_retry_cnt = attr->retry_cnt;
1248                 qp->s_retry = attr->retry_cnt;
1249         }
1250
1251         if (attr_mask & IB_QP_RNR_RETRY) {
1252                 qp->s_rnr_retry_cnt = attr->rnr_retry;
1253                 qp->s_rnr_retry = attr->rnr_retry;
1254         }
1255
1256         if (attr_mask & IB_QP_MIN_RNR_TIMER)
1257                 qp->r_min_rnr_timer = attr->min_rnr_timer;
1258
1259         if (attr_mask & IB_QP_TIMEOUT) {
1260                 qp->timeout = attr->timeout;
1261                 qp->timeout_jiffies = rvt_timeout_to_jiffies(qp->timeout);
1262         }
1263
1264         if (attr_mask & IB_QP_QKEY)
1265                 qp->qkey = attr->qkey;
1266
1267         if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC)
1268                 qp->r_max_rd_atomic = attr->max_dest_rd_atomic;
1269
1270         if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC)
1271                 qp->s_max_rd_atomic = attr->max_rd_atomic;
1272
1273         if (rdi->driver_f.modify_qp)
1274                 rdi->driver_f.modify_qp(qp, attr, attr_mask, udata);
1275
1276         spin_unlock(&qp->s_lock);
1277         spin_unlock(&qp->s_hlock);
1278         spin_unlock_irq(&qp->r_lock);
1279
1280         if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
1281                 rvt_insert_qp(rdi, qp);
1282
1283         if (lastwqe) {
1284                 ev.device = qp->ibqp.device;
1285                 ev.element.qp = &qp->ibqp;
1286                 ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
1287                 qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
1288         }
1289         if (mig) {
1290                 ev.device = qp->ibqp.device;
1291                 ev.element.qp = &qp->ibqp;
1292                 ev.event = IB_EVENT_PATH_MIG;
1293                 qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
1294         }
1295         return 0;
1296
1297 inval:
1298         spin_unlock(&qp->s_lock);
1299         spin_unlock(&qp->s_hlock);
1300         spin_unlock_irq(&qp->r_lock);
1301         return -EINVAL;
1302 }
1303
1304 /** rvt_free_qpn - Free a qpn from the bit map
1305  * @qpt: QP table
1306  * @qpn: queue pair number to free
1307  */
1308 static void rvt_free_qpn(struct rvt_qpn_table *qpt, u32 qpn)
1309 {
1310         struct rvt_qpn_map *map;
1311
1312         map = qpt->map + qpn / RVT_BITS_PER_PAGE;
1313         if (map->page)
1314                 clear_bit(qpn & RVT_BITS_PER_PAGE_MASK, map->page);
1315 }
1316
1317 /**
1318  * rvt_destroy_qp - destroy a queue pair
1319  * @ibqp: the queue pair to destroy
1320  *
1321  * Note that this can be called while the QP is actively sending or
1322  * receiving!
1323  *
1324  * Return: 0 on success.
1325  */
1326 int rvt_destroy_qp(struct ib_qp *ibqp)
1327 {
1328         struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
1329         struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
1330
1331         spin_lock_irq(&qp->r_lock);
1332         spin_lock(&qp->s_hlock);
1333         spin_lock(&qp->s_lock);
1334         rvt_reset_qp(rdi, qp, ibqp->qp_type);
1335         spin_unlock(&qp->s_lock);
1336         spin_unlock(&qp->s_hlock);
1337         spin_unlock_irq(&qp->r_lock);
1338
1339         /* qpn is now available for use again */
1340         rvt_free_qpn(&rdi->qp_dev->qpn_table, qp->ibqp.qp_num);
1341
1342         spin_lock(&rdi->n_qps_lock);
1343         rdi->n_qps_allocated--;
1344         if (qp->ibqp.qp_type == IB_QPT_RC) {
1345                 rdi->n_rc_qps--;
1346                 rdi->busy_jiffies = rdi->n_rc_qps / RC_QP_SCALING_INTERVAL;
1347         }
1348         spin_unlock(&rdi->n_qps_lock);
1349
1350         if (qp->ip)
1351                 kref_put(&qp->ip->ref, rvt_release_mmap_info);
1352         else
1353                 vfree(qp->r_rq.wq);
1354         vfree(qp->s_wq);
1355         rdi->driver_f.qp_priv_free(rdi, qp);
1356         kfree(qp->s_ack_queue);
1357         kfree(qp);
1358         return 0;
1359 }
1360
1361 /**
1362  * rvt_query_qp - query an ipbq
1363  * @ibqp: IB qp to query
1364  * @attr: attr struct to fill in
1365  * @attr_mask: attr mask ignored
1366  * @init_attr: struct to fill in
1367  *
1368  * Return: always 0
1369  */
1370 int rvt_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
1371                  int attr_mask, struct ib_qp_init_attr *init_attr)
1372 {
1373         struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
1374         struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
1375
1376         attr->qp_state = qp->state;
1377         attr->cur_qp_state = attr->qp_state;
1378         attr->path_mtu = qp->path_mtu;
1379         attr->path_mig_state = qp->s_mig_state;
1380         attr->qkey = qp->qkey;
1381         attr->rq_psn = qp->r_psn & rdi->dparms.psn_mask;
1382         attr->sq_psn = qp->s_next_psn & rdi->dparms.psn_mask;
1383         attr->dest_qp_num = qp->remote_qpn;
1384         attr->qp_access_flags = qp->qp_access_flags;
1385         attr->cap.max_send_wr = qp->s_size - 1 -
1386                 rdi->dparms.reserved_operations;
1387         attr->cap.max_recv_wr = qp->ibqp.srq ? 0 : qp->r_rq.size - 1;
1388         attr->cap.max_send_sge = qp->s_max_sge;
1389         attr->cap.max_recv_sge = qp->r_rq.max_sge;
1390         attr->cap.max_inline_data = 0;
1391         attr->ah_attr = qp->remote_ah_attr;
1392         attr->alt_ah_attr = qp->alt_ah_attr;
1393         attr->pkey_index = qp->s_pkey_index;
1394         attr->alt_pkey_index = qp->s_alt_pkey_index;
1395         attr->en_sqd_async_notify = 0;
1396         attr->sq_draining = qp->s_draining;
1397         attr->max_rd_atomic = qp->s_max_rd_atomic;
1398         attr->max_dest_rd_atomic = qp->r_max_rd_atomic;
1399         attr->min_rnr_timer = qp->r_min_rnr_timer;
1400         attr->port_num = qp->port_num;
1401         attr->timeout = qp->timeout;
1402         attr->retry_cnt = qp->s_retry_cnt;
1403         attr->rnr_retry = qp->s_rnr_retry_cnt;
1404         attr->alt_port_num =
1405                 rdma_ah_get_port_num(&qp->alt_ah_attr);
1406         attr->alt_timeout = qp->alt_timeout;
1407
1408         init_attr->event_handler = qp->ibqp.event_handler;
1409         init_attr->qp_context = qp->ibqp.qp_context;
1410         init_attr->send_cq = qp->ibqp.send_cq;
1411         init_attr->recv_cq = qp->ibqp.recv_cq;
1412         init_attr->srq = qp->ibqp.srq;
1413         init_attr->cap = attr->cap;
1414         if (qp->s_flags & RVT_S_SIGNAL_REQ_WR)
1415                 init_attr->sq_sig_type = IB_SIGNAL_REQ_WR;
1416         else
1417                 init_attr->sq_sig_type = IB_SIGNAL_ALL_WR;
1418         init_attr->qp_type = qp->ibqp.qp_type;
1419         init_attr->port_num = qp->port_num;
1420         return 0;
1421 }
1422
1423 /**
1424  * rvt_post_receive - post a receive on a QP
1425  * @ibqp: the QP to post the receive on
1426  * @wr: the WR to post
1427  * @bad_wr: the first bad WR is put here
1428  *
1429  * This may be called from interrupt context.
1430  *
1431  * Return: 0 on success otherwise errno
1432  */
1433 int rvt_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
1434                   struct ib_recv_wr **bad_wr)
1435 {
1436         struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
1437         struct rvt_rwq *wq = qp->r_rq.wq;
1438         unsigned long flags;
1439         int qp_err_flush = (ib_rvt_state_ops[qp->state] & RVT_FLUSH_RECV) &&
1440                                 !qp->ibqp.srq;
1441
1442         /* Check that state is OK to post receive. */
1443         if (!(ib_rvt_state_ops[qp->state] & RVT_POST_RECV_OK) || !wq) {
1444                 *bad_wr = wr;
1445                 return -EINVAL;
1446         }
1447
1448         for (; wr; wr = wr->next) {
1449                 struct rvt_rwqe *wqe;
1450                 u32 next;
1451                 int i;
1452
1453                 if ((unsigned)wr->num_sge > qp->r_rq.max_sge) {
1454                         *bad_wr = wr;
1455                         return -EINVAL;
1456                 }
1457
1458                 spin_lock_irqsave(&qp->r_rq.lock, flags);
1459                 next = wq->head + 1;
1460                 if (next >= qp->r_rq.size)
1461                         next = 0;
1462                 if (next == wq->tail) {
1463                         spin_unlock_irqrestore(&qp->r_rq.lock, flags);
1464                         *bad_wr = wr;
1465                         return -ENOMEM;
1466                 }
1467                 if (unlikely(qp_err_flush)) {
1468                         struct ib_wc wc;
1469
1470                         memset(&wc, 0, sizeof(wc));
1471                         wc.qp = &qp->ibqp;
1472                         wc.opcode = IB_WC_RECV;
1473                         wc.wr_id = wr->wr_id;
1474                         wc.status = IB_WC_WR_FLUSH_ERR;
1475                         rvt_cq_enter(ibcq_to_rvtcq(qp->ibqp.recv_cq), &wc, 1);
1476                 } else {
1477                         wqe = rvt_get_rwqe_ptr(&qp->r_rq, wq->head);
1478                         wqe->wr_id = wr->wr_id;
1479                         wqe->num_sge = wr->num_sge;
1480                         for (i = 0; i < wr->num_sge; i++)
1481                                 wqe->sg_list[i] = wr->sg_list[i];
1482                         /*
1483                          * Make sure queue entry is written
1484                          * before the head index.
1485                          */
1486                         smp_wmb();
1487                         wq->head = next;
1488                 }
1489                 spin_unlock_irqrestore(&qp->r_rq.lock, flags);
1490         }
1491         return 0;
1492 }
1493
1494 /**
1495  * rvt_qp_valid_operation - validate post send wr request
1496  * @qp - the qp
1497  * @post-parms - the post send table for the driver
1498  * @wr - the work request
1499  *
1500  * The routine validates the operation based on the
1501  * validation table an returns the length of the operation
1502  * which can extend beyond the ib_send_bw.  Operation
1503  * dependent flags key atomic operation validation.
1504  *
1505  * There is an exception for UD qps that validates the pd and
1506  * overrides the length to include the additional UD specific
1507  * length.
1508  *
1509  * Returns a negative error or the length of the work request
1510  * for building the swqe.
1511  */
1512 static inline int rvt_qp_valid_operation(
1513         struct rvt_qp *qp,
1514         const struct rvt_operation_params *post_parms,
1515         struct ib_send_wr *wr)
1516 {
1517         int len;
1518
1519         if (wr->opcode >= RVT_OPERATION_MAX || !post_parms[wr->opcode].length)
1520                 return -EINVAL;
1521         if (!(post_parms[wr->opcode].qpt_support & BIT(qp->ibqp.qp_type)))
1522                 return -EINVAL;
1523         if ((post_parms[wr->opcode].flags & RVT_OPERATION_PRIV) &&
1524             ibpd_to_rvtpd(qp->ibqp.pd)->user)
1525                 return -EINVAL;
1526         if (post_parms[wr->opcode].flags & RVT_OPERATION_ATOMIC_SGE &&
1527             (wr->num_sge == 0 ||
1528              wr->sg_list[0].length < sizeof(u64) ||
1529              wr->sg_list[0].addr & (sizeof(u64) - 1)))
1530                 return -EINVAL;
1531         if (post_parms[wr->opcode].flags & RVT_OPERATION_ATOMIC &&
1532             !qp->s_max_rd_atomic)
1533                 return -EINVAL;
1534         len = post_parms[wr->opcode].length;
1535         /* UD specific */
1536         if (qp->ibqp.qp_type != IB_QPT_UC &&
1537             qp->ibqp.qp_type != IB_QPT_RC) {
1538                 if (qp->ibqp.pd != ud_wr(wr)->ah->pd)
1539                         return -EINVAL;
1540                 len = sizeof(struct ib_ud_wr);
1541         }
1542         return len;
1543 }
1544
1545 /**
1546  * rvt_qp_is_avail - determine queue capacity
1547  * @qp - the qp
1548  * @rdi - the rdmavt device
1549  * @reserved_op - is reserved operation
1550  *
1551  * This assumes the s_hlock is held but the s_last
1552  * qp variable is uncontrolled.
1553  *
1554  * For non reserved operations, the qp->s_avail
1555  * may be changed.
1556  *
1557  * The return value is zero or a -ENOMEM.
1558  */
1559 static inline int rvt_qp_is_avail(
1560         struct rvt_qp *qp,
1561         struct rvt_dev_info *rdi,
1562         bool reserved_op)
1563 {
1564         u32 slast;
1565         u32 avail;
1566         u32 reserved_used;
1567
1568         /* see rvt_qp_wqe_unreserve() */
1569         smp_mb__before_atomic();
1570         reserved_used = atomic_read(&qp->s_reserved_used);
1571         if (unlikely(reserved_op)) {
1572                 /* see rvt_qp_wqe_unreserve() */
1573                 smp_mb__before_atomic();
1574                 if (reserved_used >= rdi->dparms.reserved_operations)
1575                         return -ENOMEM;
1576                 return 0;
1577         }
1578         /* non-reserved operations */
1579         if (likely(qp->s_avail))
1580                 return 0;
1581         smp_read_barrier_depends(); /* see rc.c */
1582         slast = ACCESS_ONCE(qp->s_last);
1583         if (qp->s_head >= slast)
1584                 avail = qp->s_size - (qp->s_head - slast);
1585         else
1586                 avail = slast - qp->s_head;
1587
1588         /* see rvt_qp_wqe_unreserve() */
1589         smp_mb__before_atomic();
1590         reserved_used = atomic_read(&qp->s_reserved_used);
1591         avail =  avail - 1 -
1592                 (rdi->dparms.reserved_operations - reserved_used);
1593         /* insure we don't assign a negative s_avail */
1594         if ((s32)avail <= 0)
1595                 return -ENOMEM;
1596         qp->s_avail = avail;
1597         if (WARN_ON(qp->s_avail >
1598                     (qp->s_size - 1 - rdi->dparms.reserved_operations)))
1599                 rvt_pr_err(rdi,
1600                            "More avail entries than QP RB size.\nQP: %u, size: %u, avail: %u\nhead: %u, tail: %u, cur: %u, acked: %u, last: %u",
1601                            qp->ibqp.qp_num, qp->s_size, qp->s_avail,
1602                            qp->s_head, qp->s_tail, qp->s_cur,
1603                            qp->s_acked, qp->s_last);
1604         return 0;
1605 }
1606
1607 /**
1608  * rvt_post_one_wr - post one RC, UC, or UD send work request
1609  * @qp: the QP to post on
1610  * @wr: the work request to send
1611  */
1612 static int rvt_post_one_wr(struct rvt_qp *qp,
1613                            struct ib_send_wr *wr,
1614                            int *call_send)
1615 {
1616         struct rvt_swqe *wqe;
1617         u32 next;
1618         int i;
1619         int j;
1620         int acc;
1621         struct rvt_lkey_table *rkt;
1622         struct rvt_pd *pd;
1623         struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
1624         u8 log_pmtu;
1625         int ret;
1626         size_t cplen;
1627         bool reserved_op;
1628         int local_ops_delayed = 0;
1629
1630         BUILD_BUG_ON(IB_QPT_MAX >= (sizeof(u32) * BITS_PER_BYTE));
1631
1632         /* IB spec says that num_sge == 0 is OK. */
1633         if (unlikely(wr->num_sge > qp->s_max_sge))
1634                 return -EINVAL;
1635
1636         ret = rvt_qp_valid_operation(qp, rdi->post_parms, wr);
1637         if (ret < 0)
1638                 return ret;
1639         cplen = ret;
1640
1641         /*
1642          * Local operations include fast register and local invalidate.
1643          * Fast register needs to be processed immediately because the
1644          * registered lkey may be used by following work requests and the
1645          * lkey needs to be valid at the time those requests are posted.
1646          * Local invalidate can be processed immediately if fencing is
1647          * not required and no previous local invalidate ops are pending.
1648          * Signaled local operations that have been processed immediately
1649          * need to have requests with "completion only" flags set posted
1650          * to the send queue in order to generate completions.
1651          */
1652         if ((rdi->post_parms[wr->opcode].flags & RVT_OPERATION_LOCAL)) {
1653                 switch (wr->opcode) {
1654                 case IB_WR_REG_MR:
1655                         ret = rvt_fast_reg_mr(qp,
1656                                               reg_wr(wr)->mr,
1657                                               reg_wr(wr)->key,
1658                                               reg_wr(wr)->access);
1659                         if (ret || !(wr->send_flags & IB_SEND_SIGNALED))
1660                                 return ret;
1661                         break;
1662                 case IB_WR_LOCAL_INV:
1663                         if ((wr->send_flags & IB_SEND_FENCE) ||
1664                             atomic_read(&qp->local_ops_pending)) {
1665                                 local_ops_delayed = 1;
1666                         } else {
1667                                 ret = rvt_invalidate_rkey(
1668                                         qp, wr->ex.invalidate_rkey);
1669                                 if (ret || !(wr->send_flags & IB_SEND_SIGNALED))
1670                                         return ret;
1671                         }
1672                         break;
1673                 default:
1674                         return -EINVAL;
1675                 }
1676         }
1677
1678         reserved_op = rdi->post_parms[wr->opcode].flags &
1679                         RVT_OPERATION_USE_RESERVE;
1680         /* check for avail */
1681         ret = rvt_qp_is_avail(qp, rdi, reserved_op);
1682         if (ret)
1683                 return ret;
1684         next = qp->s_head + 1;
1685         if (next >= qp->s_size)
1686                 next = 0;
1687
1688         rkt = &rdi->lkey_table;
1689         pd = ibpd_to_rvtpd(qp->ibqp.pd);
1690         wqe = rvt_get_swqe_ptr(qp, qp->s_head);
1691
1692         /* cplen has length from above */
1693         memcpy(&wqe->wr, wr, cplen);
1694
1695         wqe->length = 0;
1696         j = 0;
1697         if (wr->num_sge) {
1698                 acc = wr->opcode >= IB_WR_RDMA_READ ?
1699                         IB_ACCESS_LOCAL_WRITE : 0;
1700                 for (i = 0; i < wr->num_sge; i++) {
1701                         u32 length = wr->sg_list[i].length;
1702                         int ok;
1703
1704                         if (length == 0)
1705                                 continue;
1706                         ok = rvt_lkey_ok(rkt, pd, &wqe->sg_list[j],
1707                                          &wr->sg_list[i], acc);
1708                         if (!ok) {
1709                                 ret = -EINVAL;
1710                                 goto bail_inval_free;
1711                         }
1712                         wqe->length += length;
1713                         j++;
1714                 }
1715                 wqe->wr.num_sge = j;
1716         }
1717
1718         /* general part of wqe valid - allow for driver checks */
1719         if (rdi->driver_f.check_send_wqe) {
1720                 ret = rdi->driver_f.check_send_wqe(qp, wqe);
1721                 if (ret < 0)
1722                         goto bail_inval_free;
1723                 if (ret)
1724                         *call_send = ret;
1725         }
1726
1727         log_pmtu = qp->log_pmtu;
1728         if (qp->ibqp.qp_type != IB_QPT_UC &&
1729             qp->ibqp.qp_type != IB_QPT_RC) {
1730                 struct rvt_ah *ah = ibah_to_rvtah(wqe->ud_wr.ah);
1731
1732                 log_pmtu = ah->log_pmtu;
1733                 atomic_inc(&ibah_to_rvtah(ud_wr(wr)->ah)->refcount);
1734         }
1735
1736         if (rdi->post_parms[wr->opcode].flags & RVT_OPERATION_LOCAL) {
1737                 if (local_ops_delayed)
1738                         atomic_inc(&qp->local_ops_pending);
1739                 else
1740                         wqe->wr.send_flags |= RVT_SEND_COMPLETION_ONLY;
1741                 wqe->ssn = 0;
1742                 wqe->psn = 0;
1743                 wqe->lpsn = 0;
1744         } else {
1745                 wqe->ssn = qp->s_ssn++;
1746                 wqe->psn = qp->s_next_psn;
1747                 wqe->lpsn = wqe->psn +
1748                                 (wqe->length ?
1749                                         ((wqe->length - 1) >> log_pmtu) :
1750                                         0);
1751                 qp->s_next_psn = wqe->lpsn + 1;
1752         }
1753         if (unlikely(reserved_op)) {
1754                 wqe->wr.send_flags |= RVT_SEND_RESERVE_USED;
1755                 rvt_qp_wqe_reserve(qp, wqe);
1756         } else {
1757                 wqe->wr.send_flags &= ~RVT_SEND_RESERVE_USED;
1758                 qp->s_avail--;
1759         }
1760         trace_rvt_post_one_wr(qp, wqe);
1761         smp_wmb(); /* see request builders */
1762         qp->s_head = next;
1763
1764         return 0;
1765
1766 bail_inval_free:
1767         /* release mr holds */
1768         while (j) {
1769                 struct rvt_sge *sge = &wqe->sg_list[--j];
1770
1771                 rvt_put_mr(sge->mr);
1772         }
1773         return ret;
1774 }
1775
1776 /**
1777  * rvt_post_send - post a send on a QP
1778  * @ibqp: the QP to post the send on
1779  * @wr: the list of work requests to post
1780  * @bad_wr: the first bad WR is put here
1781  *
1782  * This may be called from interrupt context.
1783  *
1784  * Return: 0 on success else errno
1785  */
1786 int rvt_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
1787                   struct ib_send_wr **bad_wr)
1788 {
1789         struct rvt_qp *qp = ibqp_to_rvtqp(ibqp);
1790         struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
1791         unsigned long flags = 0;
1792         int call_send;
1793         unsigned nreq = 0;
1794         int err = 0;
1795
1796         spin_lock_irqsave(&qp->s_hlock, flags);
1797
1798         /*
1799          * Ensure QP state is such that we can send. If not bail out early,
1800          * there is no need to do this every time we post a send.
1801          */
1802         if (unlikely(!(ib_rvt_state_ops[qp->state] & RVT_POST_SEND_OK))) {
1803                 spin_unlock_irqrestore(&qp->s_hlock, flags);
1804                 return -EINVAL;
1805         }
1806
1807         /*
1808          * If the send queue is empty, and we only have a single WR then just go
1809          * ahead and kick the send engine into gear. Otherwise we will always
1810          * just schedule the send to happen later.
1811          */
1812         call_send = qp->s_head == ACCESS_ONCE(qp->s_last) && !wr->next;
1813
1814         for (; wr; wr = wr->next) {
1815                 err = rvt_post_one_wr(qp, wr, &call_send);
1816                 if (unlikely(err)) {
1817                         *bad_wr = wr;
1818                         goto bail;
1819                 }
1820                 nreq++;
1821         }
1822 bail:
1823         spin_unlock_irqrestore(&qp->s_hlock, flags);
1824         if (nreq) {
1825                 if (call_send)
1826                         rdi->driver_f.do_send(qp);
1827                 else
1828                         rdi->driver_f.schedule_send_no_lock(qp);
1829         }
1830         return err;
1831 }
1832
1833 /**
1834  * rvt_post_srq_receive - post a receive on a shared receive queue
1835  * @ibsrq: the SRQ to post the receive on
1836  * @wr: the list of work requests to post
1837  * @bad_wr: A pointer to the first WR to cause a problem is put here
1838  *
1839  * This may be called from interrupt context.
1840  *
1841  * Return: 0 on success else errno
1842  */
1843 int rvt_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
1844                       struct ib_recv_wr **bad_wr)
1845 {
1846         struct rvt_srq *srq = ibsrq_to_rvtsrq(ibsrq);
1847         struct rvt_rwq *wq;
1848         unsigned long flags;
1849
1850         for (; wr; wr = wr->next) {
1851                 struct rvt_rwqe *wqe;
1852                 u32 next;
1853                 int i;
1854
1855                 if ((unsigned)wr->num_sge > srq->rq.max_sge) {
1856                         *bad_wr = wr;
1857                         return -EINVAL;
1858                 }
1859
1860                 spin_lock_irqsave(&srq->rq.lock, flags);
1861                 wq = srq->rq.wq;
1862                 next = wq->head + 1;
1863                 if (next >= srq->rq.size)
1864                         next = 0;
1865                 if (next == wq->tail) {
1866                         spin_unlock_irqrestore(&srq->rq.lock, flags);
1867                         *bad_wr = wr;
1868                         return -ENOMEM;
1869                 }
1870
1871                 wqe = rvt_get_rwqe_ptr(&srq->rq, wq->head);
1872                 wqe->wr_id = wr->wr_id;
1873                 wqe->num_sge = wr->num_sge;
1874                 for (i = 0; i < wr->num_sge; i++)
1875                         wqe->sg_list[i] = wr->sg_list[i];
1876                 /* Make sure queue entry is written before the head index. */
1877                 smp_wmb();
1878                 wq->head = next;
1879                 spin_unlock_irqrestore(&srq->rq.lock, flags);
1880         }
1881         return 0;
1882 }
1883
1884 /**
1885  * qp_comm_est - handle trap with QP established
1886  * @qp: the QP
1887  */
1888 void rvt_comm_est(struct rvt_qp *qp)
1889 {
1890         qp->r_flags |= RVT_R_COMM_EST;
1891         if (qp->ibqp.event_handler) {
1892                 struct ib_event ev;
1893
1894                 ev.device = qp->ibqp.device;
1895                 ev.element.qp = &qp->ibqp;
1896                 ev.event = IB_EVENT_COMM_EST;
1897                 qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
1898         }
1899 }
1900 EXPORT_SYMBOL(rvt_comm_est);
1901
1902 void rvt_rc_error(struct rvt_qp *qp, enum ib_wc_status err)
1903 {
1904         unsigned long flags;
1905         int lastwqe;
1906
1907         spin_lock_irqsave(&qp->s_lock, flags);
1908         lastwqe = rvt_error_qp(qp, err);
1909         spin_unlock_irqrestore(&qp->s_lock, flags);
1910
1911         if (lastwqe) {
1912                 struct ib_event ev;
1913
1914                 ev.device = qp->ibqp.device;
1915                 ev.element.qp = &qp->ibqp;
1916                 ev.event = IB_EVENT_QP_LAST_WQE_REACHED;
1917                 qp->ibqp.event_handler(&ev, qp->ibqp.qp_context);
1918         }
1919 }
1920 EXPORT_SYMBOL(rvt_rc_error);
1921
1922 /*
1923  *  rvt_rnr_tbl_to_usec - return index into ib_rvt_rnr_table
1924  *  @index - the index
1925  *  return usec from an index into ib_rvt_rnr_table
1926  */
1927 unsigned long rvt_rnr_tbl_to_usec(u32 index)
1928 {
1929         return ib_rvt_rnr_table[(index & IB_AETH_CREDIT_MASK)];
1930 }
1931 EXPORT_SYMBOL(rvt_rnr_tbl_to_usec);
1932
1933 static inline unsigned long rvt_aeth_to_usec(u32 aeth)
1934 {
1935         return ib_rvt_rnr_table[(aeth >> IB_AETH_CREDIT_SHIFT) &
1936                                   IB_AETH_CREDIT_MASK];
1937 }
1938
1939 /*
1940  *  rvt_add_retry_timer - add/start a retry timer
1941  *  @qp - the QP
1942  *  add a retry timer on the QP
1943  */
1944 void rvt_add_retry_timer(struct rvt_qp *qp)
1945 {
1946         struct ib_qp *ibqp = &qp->ibqp;
1947         struct rvt_dev_info *rdi = ib_to_rvt(ibqp->device);
1948
1949         lockdep_assert_held(&qp->s_lock);
1950         qp->s_flags |= RVT_S_TIMER;
1951        /* 4.096 usec. * (1 << qp->timeout) */
1952         qp->s_timer.expires = jiffies + qp->timeout_jiffies +
1953                              rdi->busy_jiffies;
1954         add_timer(&qp->s_timer);
1955 }
1956 EXPORT_SYMBOL(rvt_add_retry_timer);
1957
1958 /**
1959  * rvt_add_rnr_timer - add/start an rnr timer
1960  * @qp - the QP
1961  * @aeth - aeth of RNR timeout, simulated aeth for loopback
1962  * add an rnr timer on the QP
1963  */
1964 void rvt_add_rnr_timer(struct rvt_qp *qp, u32 aeth)
1965 {
1966         u32 to;
1967
1968         lockdep_assert_held(&qp->s_lock);
1969         qp->s_flags |= RVT_S_WAIT_RNR;
1970         to = rvt_aeth_to_usec(aeth);
1971         hrtimer_start(&qp->s_rnr_timer,
1972                       ns_to_ktime(1000 * to), HRTIMER_MODE_REL);
1973 }
1974 EXPORT_SYMBOL(rvt_add_rnr_timer);
1975
1976 /**
1977  * rvt_stop_rc_timers - stop all timers
1978  * @qp - the QP
1979  * stop any pending timers
1980  */
1981 void rvt_stop_rc_timers(struct rvt_qp *qp)
1982 {
1983         lockdep_assert_held(&qp->s_lock);
1984         /* Remove QP from all timers */
1985         if (qp->s_flags & (RVT_S_TIMER | RVT_S_WAIT_RNR)) {
1986                 qp->s_flags &= ~(RVT_S_TIMER | RVT_S_WAIT_RNR);
1987                 del_timer(&qp->s_timer);
1988                 hrtimer_try_to_cancel(&qp->s_rnr_timer);
1989         }
1990 }
1991 EXPORT_SYMBOL(rvt_stop_rc_timers);
1992
1993 /**
1994  * rvt_stop_rnr_timer - stop an rnr timer
1995  * @qp - the QP
1996  *
1997  * stop an rnr timer and return if the timer
1998  * had been pending.
1999  */
2000 static int rvt_stop_rnr_timer(struct rvt_qp *qp)
2001 {
2002         int rval = 0;
2003
2004         lockdep_assert_held(&qp->s_lock);
2005         /* Remove QP from rnr timer */
2006         if (qp->s_flags & RVT_S_WAIT_RNR) {
2007                 qp->s_flags &= ~RVT_S_WAIT_RNR;
2008                 rval = hrtimer_try_to_cancel(&qp->s_rnr_timer);
2009         }
2010         return rval;
2011 }
2012
2013 /**
2014  * rvt_del_timers_sync - wait for any timeout routines to exit
2015  * @qp - the QP
2016  */
2017 void rvt_del_timers_sync(struct rvt_qp *qp)
2018 {
2019         del_timer_sync(&qp->s_timer);
2020         hrtimer_cancel(&qp->s_rnr_timer);
2021 }
2022 EXPORT_SYMBOL(rvt_del_timers_sync);
2023
2024 /**
2025  * This is called from s_timer for missing responses.
2026  */
2027 static void rvt_rc_timeout(unsigned long arg)
2028 {
2029         struct rvt_qp *qp = (struct rvt_qp *)arg;
2030         struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
2031         unsigned long flags;
2032
2033         spin_lock_irqsave(&qp->r_lock, flags);
2034         spin_lock(&qp->s_lock);
2035         if (qp->s_flags & RVT_S_TIMER) {
2036                 struct rvt_ibport *rvp = rdi->ports[qp->port_num - 1];
2037
2038                 qp->s_flags &= ~RVT_S_TIMER;
2039                 rvp->n_rc_timeouts++;
2040                 del_timer(&qp->s_timer);
2041                 trace_rvt_rc_timeout(qp, qp->s_last_psn + 1);
2042                 if (rdi->driver_f.notify_restart_rc)
2043                         rdi->driver_f.notify_restart_rc(qp,
2044                                                         qp->s_last_psn + 1,
2045                                                         1);
2046                 rdi->driver_f.schedule_send(qp);
2047         }
2048         spin_unlock(&qp->s_lock);
2049         spin_unlock_irqrestore(&qp->r_lock, flags);
2050 }
2051
2052 /*
2053  * This is called from s_timer for RNR timeouts.
2054  */
2055 enum hrtimer_restart rvt_rc_rnr_retry(struct hrtimer *t)
2056 {
2057         struct rvt_qp *qp = container_of(t, struct rvt_qp, s_rnr_timer);
2058         struct rvt_dev_info *rdi = ib_to_rvt(qp->ibqp.device);
2059         unsigned long flags;
2060
2061         spin_lock_irqsave(&qp->s_lock, flags);
2062         rvt_stop_rnr_timer(qp);
2063         rdi->driver_f.schedule_send(qp);
2064         spin_unlock_irqrestore(&qp->s_lock, flags);
2065         return HRTIMER_NORESTART;
2066 }
2067 EXPORT_SYMBOL(rvt_rc_rnr_retry);