Merge tag 'gpio-v4.20-1' of git://git.kernel.org/pub/scm/linux/kernel/git/linusw...
[sfrench/cifs-2.6.git] / drivers / infiniband / ulp / srp / ib_srp.c
1 /*
2  * Copyright (c) 2005 Cisco Systems.  All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32
33 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
34
35 #include <linux/module.h>
36 #include <linux/init.h>
37 #include <linux/slab.h>
38 #include <linux/err.h>
39 #include <linux/string.h>
40 #include <linux/parser.h>
41 #include <linux/random.h>
42 #include <linux/jiffies.h>
43 #include <linux/lockdep.h>
44 #include <linux/inet.h>
45 #include <rdma/ib_cache.h>
46
47 #include <linux/atomic.h>
48
49 #include <scsi/scsi.h>
50 #include <scsi/scsi_device.h>
51 #include <scsi/scsi_dbg.h>
52 #include <scsi/scsi_tcq.h>
53 #include <scsi/srp.h>
54 #include <scsi/scsi_transport_srp.h>
55
56 #include "ib_srp.h"
57
58 #define DRV_NAME        "ib_srp"
59 #define PFX             DRV_NAME ": "
60
61 MODULE_AUTHOR("Roland Dreier");
62 MODULE_DESCRIPTION("InfiniBand SCSI RDMA Protocol initiator");
63 MODULE_LICENSE("Dual BSD/GPL");
64
65 #if !defined(CONFIG_DYNAMIC_DEBUG)
66 #define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt)
67 #define DYNAMIC_DEBUG_BRANCH(descriptor) false
68 #endif
69
70 static unsigned int srp_sg_tablesize;
71 static unsigned int cmd_sg_entries;
72 static unsigned int indirect_sg_entries;
73 static bool allow_ext_sg;
74 static bool prefer_fr = true;
75 static bool register_always = true;
76 static bool never_register;
77 static int topspin_workarounds = 1;
78
79 module_param(srp_sg_tablesize, uint, 0444);
80 MODULE_PARM_DESC(srp_sg_tablesize, "Deprecated name for cmd_sg_entries");
81
82 module_param(cmd_sg_entries, uint, 0444);
83 MODULE_PARM_DESC(cmd_sg_entries,
84                  "Default number of gather/scatter entries in the SRP command (default is 12, max 255)");
85
86 module_param(indirect_sg_entries, uint, 0444);
87 MODULE_PARM_DESC(indirect_sg_entries,
88                  "Default max number of gather/scatter entries (default is 12, max is " __stringify(SG_MAX_SEGMENTS) ")");
89
90 module_param(allow_ext_sg, bool, 0444);
91 MODULE_PARM_DESC(allow_ext_sg,
92                   "Default behavior when there are more than cmd_sg_entries S/G entries after mapping; fails the request when false (default false)");
93
94 module_param(topspin_workarounds, int, 0444);
95 MODULE_PARM_DESC(topspin_workarounds,
96                  "Enable workarounds for Topspin/Cisco SRP target bugs if != 0");
97
98 module_param(prefer_fr, bool, 0444);
99 MODULE_PARM_DESC(prefer_fr,
100 "Whether to use fast registration if both FMR and fast registration are supported");
101
102 module_param(register_always, bool, 0444);
103 MODULE_PARM_DESC(register_always,
104                  "Use memory registration even for contiguous memory regions");
105
106 module_param(never_register, bool, 0444);
107 MODULE_PARM_DESC(never_register, "Never register memory");
108
109 static const struct kernel_param_ops srp_tmo_ops;
110
111 static int srp_reconnect_delay = 10;
112 module_param_cb(reconnect_delay, &srp_tmo_ops, &srp_reconnect_delay,
113                 S_IRUGO | S_IWUSR);
114 MODULE_PARM_DESC(reconnect_delay, "Time between successive reconnect attempts");
115
116 static int srp_fast_io_fail_tmo = 15;
117 module_param_cb(fast_io_fail_tmo, &srp_tmo_ops, &srp_fast_io_fail_tmo,
118                 S_IRUGO | S_IWUSR);
119 MODULE_PARM_DESC(fast_io_fail_tmo,
120                  "Number of seconds between the observation of a transport"
121                  " layer error and failing all I/O. \"off\" means that this"
122                  " functionality is disabled.");
123
124 static int srp_dev_loss_tmo = 600;
125 module_param_cb(dev_loss_tmo, &srp_tmo_ops, &srp_dev_loss_tmo,
126                 S_IRUGO | S_IWUSR);
127 MODULE_PARM_DESC(dev_loss_tmo,
128                  "Maximum number of seconds that the SRP transport should"
129                  " insulate transport layer errors. After this time has been"
130                  " exceeded the SCSI host is removed. Should be"
131                  " between 1 and " __stringify(SCSI_DEVICE_BLOCK_MAX_TIMEOUT)
132                  " if fast_io_fail_tmo has not been set. \"off\" means that"
133                  " this functionality is disabled.");
134
135 static unsigned ch_count;
136 module_param(ch_count, uint, 0444);
137 MODULE_PARM_DESC(ch_count,
138                  "Number of RDMA channels to use for communication with an SRP target. Using more than one channel improves performance if the HCA supports multiple completion vectors. The default value is the minimum of four times the number of online CPU sockets and the number of completion vectors supported by the HCA.");
139
140 static void srp_add_one(struct ib_device *device);
141 static void srp_remove_one(struct ib_device *device, void *client_data);
142 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc);
143 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
144                 const char *opname);
145 static int srp_ib_cm_handler(struct ib_cm_id *cm_id,
146                              const struct ib_cm_event *event);
147 static int srp_rdma_cm_handler(struct rdma_cm_id *cm_id,
148                                struct rdma_cm_event *event);
149
150 static struct scsi_transport_template *ib_srp_transport_template;
151 static struct workqueue_struct *srp_remove_wq;
152
153 static struct ib_client srp_client = {
154         .name   = "srp",
155         .add    = srp_add_one,
156         .remove = srp_remove_one
157 };
158
159 static struct ib_sa_client srp_sa_client;
160
161 static int srp_tmo_get(char *buffer, const struct kernel_param *kp)
162 {
163         int tmo = *(int *)kp->arg;
164
165         if (tmo >= 0)
166                 return sprintf(buffer, "%d", tmo);
167         else
168                 return sprintf(buffer, "off");
169 }
170
171 static int srp_tmo_set(const char *val, const struct kernel_param *kp)
172 {
173         int tmo, res;
174
175         res = srp_parse_tmo(&tmo, val);
176         if (res)
177                 goto out;
178
179         if (kp->arg == &srp_reconnect_delay)
180                 res = srp_tmo_valid(tmo, srp_fast_io_fail_tmo,
181                                     srp_dev_loss_tmo);
182         else if (kp->arg == &srp_fast_io_fail_tmo)
183                 res = srp_tmo_valid(srp_reconnect_delay, tmo, srp_dev_loss_tmo);
184         else
185                 res = srp_tmo_valid(srp_reconnect_delay, srp_fast_io_fail_tmo,
186                                     tmo);
187         if (res)
188                 goto out;
189         *(int *)kp->arg = tmo;
190
191 out:
192         return res;
193 }
194
195 static const struct kernel_param_ops srp_tmo_ops = {
196         .get = srp_tmo_get,
197         .set = srp_tmo_set,
198 };
199
200 static inline struct srp_target_port *host_to_target(struct Scsi_Host *host)
201 {
202         return (struct srp_target_port *) host->hostdata;
203 }
204
205 static const char *srp_target_info(struct Scsi_Host *host)
206 {
207         return host_to_target(host)->target_name;
208 }
209
210 static int srp_target_is_topspin(struct srp_target_port *target)
211 {
212         static const u8 topspin_oui[3] = { 0x00, 0x05, 0xad };
213         static const u8 cisco_oui[3]   = { 0x00, 0x1b, 0x0d };
214
215         return topspin_workarounds &&
216                 (!memcmp(&target->ioc_guid, topspin_oui, sizeof topspin_oui) ||
217                  !memcmp(&target->ioc_guid, cisco_oui, sizeof cisco_oui));
218 }
219
220 static struct srp_iu *srp_alloc_iu(struct srp_host *host, size_t size,
221                                    gfp_t gfp_mask,
222                                    enum dma_data_direction direction)
223 {
224         struct srp_iu *iu;
225
226         iu = kmalloc(sizeof *iu, gfp_mask);
227         if (!iu)
228                 goto out;
229
230         iu->buf = kzalloc(size, gfp_mask);
231         if (!iu->buf)
232                 goto out_free_iu;
233
234         iu->dma = ib_dma_map_single(host->srp_dev->dev, iu->buf, size,
235                                     direction);
236         if (ib_dma_mapping_error(host->srp_dev->dev, iu->dma))
237                 goto out_free_buf;
238
239         iu->size      = size;
240         iu->direction = direction;
241
242         return iu;
243
244 out_free_buf:
245         kfree(iu->buf);
246 out_free_iu:
247         kfree(iu);
248 out:
249         return NULL;
250 }
251
252 static void srp_free_iu(struct srp_host *host, struct srp_iu *iu)
253 {
254         if (!iu)
255                 return;
256
257         ib_dma_unmap_single(host->srp_dev->dev, iu->dma, iu->size,
258                             iu->direction);
259         kfree(iu->buf);
260         kfree(iu);
261 }
262
263 static void srp_qp_event(struct ib_event *event, void *context)
264 {
265         pr_debug("QP event %s (%d)\n",
266                  ib_event_msg(event->event), event->event);
267 }
268
269 static int srp_init_ib_qp(struct srp_target_port *target,
270                           struct ib_qp *qp)
271 {
272         struct ib_qp_attr *attr;
273         int ret;
274
275         attr = kmalloc(sizeof *attr, GFP_KERNEL);
276         if (!attr)
277                 return -ENOMEM;
278
279         ret = ib_find_cached_pkey(target->srp_host->srp_dev->dev,
280                                   target->srp_host->port,
281                                   be16_to_cpu(target->ib_cm.pkey),
282                                   &attr->pkey_index);
283         if (ret)
284                 goto out;
285
286         attr->qp_state        = IB_QPS_INIT;
287         attr->qp_access_flags = (IB_ACCESS_REMOTE_READ |
288                                     IB_ACCESS_REMOTE_WRITE);
289         attr->port_num        = target->srp_host->port;
290
291         ret = ib_modify_qp(qp, attr,
292                            IB_QP_STATE          |
293                            IB_QP_PKEY_INDEX     |
294                            IB_QP_ACCESS_FLAGS   |
295                            IB_QP_PORT);
296
297 out:
298         kfree(attr);
299         return ret;
300 }
301
302 static int srp_new_ib_cm_id(struct srp_rdma_ch *ch)
303 {
304         struct srp_target_port *target = ch->target;
305         struct ib_cm_id *new_cm_id;
306
307         new_cm_id = ib_create_cm_id(target->srp_host->srp_dev->dev,
308                                     srp_ib_cm_handler, ch);
309         if (IS_ERR(new_cm_id))
310                 return PTR_ERR(new_cm_id);
311
312         if (ch->ib_cm.cm_id)
313                 ib_destroy_cm_id(ch->ib_cm.cm_id);
314         ch->ib_cm.cm_id = new_cm_id;
315         if (rdma_cap_opa_ah(target->srp_host->srp_dev->dev,
316                             target->srp_host->port))
317                 ch->ib_cm.path.rec_type = SA_PATH_REC_TYPE_OPA;
318         else
319                 ch->ib_cm.path.rec_type = SA_PATH_REC_TYPE_IB;
320         ch->ib_cm.path.sgid = target->sgid;
321         ch->ib_cm.path.dgid = target->ib_cm.orig_dgid;
322         ch->ib_cm.path.pkey = target->ib_cm.pkey;
323         ch->ib_cm.path.service_id = target->ib_cm.service_id;
324
325         return 0;
326 }
327
328 static int srp_new_rdma_cm_id(struct srp_rdma_ch *ch)
329 {
330         struct srp_target_port *target = ch->target;
331         struct rdma_cm_id *new_cm_id;
332         int ret;
333
334         new_cm_id = rdma_create_id(target->net, srp_rdma_cm_handler, ch,
335                                    RDMA_PS_TCP, IB_QPT_RC);
336         if (IS_ERR(new_cm_id)) {
337                 ret = PTR_ERR(new_cm_id);
338                 new_cm_id = NULL;
339                 goto out;
340         }
341
342         init_completion(&ch->done);
343         ret = rdma_resolve_addr(new_cm_id, target->rdma_cm.src_specified ?
344                                 (struct sockaddr *)&target->rdma_cm.src : NULL,
345                                 (struct sockaddr *)&target->rdma_cm.dst,
346                                 SRP_PATH_REC_TIMEOUT_MS);
347         if (ret) {
348                 pr_err("No route available from %pIS to %pIS (%d)\n",
349                        &target->rdma_cm.src, &target->rdma_cm.dst, ret);
350                 goto out;
351         }
352         ret = wait_for_completion_interruptible(&ch->done);
353         if (ret < 0)
354                 goto out;
355
356         ret = ch->status;
357         if (ret) {
358                 pr_err("Resolving address %pIS failed (%d)\n",
359                        &target->rdma_cm.dst, ret);
360                 goto out;
361         }
362
363         swap(ch->rdma_cm.cm_id, new_cm_id);
364
365 out:
366         if (new_cm_id)
367                 rdma_destroy_id(new_cm_id);
368
369         return ret;
370 }
371
372 static int srp_new_cm_id(struct srp_rdma_ch *ch)
373 {
374         struct srp_target_port *target = ch->target;
375
376         return target->using_rdma_cm ? srp_new_rdma_cm_id(ch) :
377                 srp_new_ib_cm_id(ch);
378 }
379
380 static struct ib_fmr_pool *srp_alloc_fmr_pool(struct srp_target_port *target)
381 {
382         struct srp_device *dev = target->srp_host->srp_dev;
383         struct ib_fmr_pool_param fmr_param;
384
385         memset(&fmr_param, 0, sizeof(fmr_param));
386         fmr_param.pool_size         = target->mr_pool_size;
387         fmr_param.dirty_watermark   = fmr_param.pool_size / 4;
388         fmr_param.cache             = 1;
389         fmr_param.max_pages_per_fmr = dev->max_pages_per_mr;
390         fmr_param.page_shift        = ilog2(dev->mr_page_size);
391         fmr_param.access            = (IB_ACCESS_LOCAL_WRITE |
392                                        IB_ACCESS_REMOTE_WRITE |
393                                        IB_ACCESS_REMOTE_READ);
394
395         return ib_create_fmr_pool(dev->pd, &fmr_param);
396 }
397
398 /**
399  * srp_destroy_fr_pool() - free the resources owned by a pool
400  * @pool: Fast registration pool to be destroyed.
401  */
402 static void srp_destroy_fr_pool(struct srp_fr_pool *pool)
403 {
404         int i;
405         struct srp_fr_desc *d;
406
407         if (!pool)
408                 return;
409
410         for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
411                 if (d->mr)
412                         ib_dereg_mr(d->mr);
413         }
414         kfree(pool);
415 }
416
417 /**
418  * srp_create_fr_pool() - allocate and initialize a pool for fast registration
419  * @device:            IB device to allocate fast registration descriptors for.
420  * @pd:                Protection domain associated with the FR descriptors.
421  * @pool_size:         Number of descriptors to allocate.
422  * @max_page_list_len: Maximum fast registration work request page list length.
423  */
424 static struct srp_fr_pool *srp_create_fr_pool(struct ib_device *device,
425                                               struct ib_pd *pd, int pool_size,
426                                               int max_page_list_len)
427 {
428         struct srp_fr_pool *pool;
429         struct srp_fr_desc *d;
430         struct ib_mr *mr;
431         int i, ret = -EINVAL;
432         enum ib_mr_type mr_type;
433
434         if (pool_size <= 0)
435                 goto err;
436         ret = -ENOMEM;
437         pool = kzalloc(sizeof(struct srp_fr_pool) +
438                        pool_size * sizeof(struct srp_fr_desc), GFP_KERNEL);
439         if (!pool)
440                 goto err;
441         pool->size = pool_size;
442         pool->max_page_list_len = max_page_list_len;
443         spin_lock_init(&pool->lock);
444         INIT_LIST_HEAD(&pool->free_list);
445
446         if (device->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG)
447                 mr_type = IB_MR_TYPE_SG_GAPS;
448         else
449                 mr_type = IB_MR_TYPE_MEM_REG;
450
451         for (i = 0, d = &pool->desc[0]; i < pool->size; i++, d++) {
452                 mr = ib_alloc_mr(pd, mr_type, max_page_list_len);
453                 if (IS_ERR(mr)) {
454                         ret = PTR_ERR(mr);
455                         if (ret == -ENOMEM)
456                                 pr_info("%s: ib_alloc_mr() failed. Try to reduce max_cmd_per_lun, max_sect or ch_count\n",
457                                         dev_name(&device->dev));
458                         goto destroy_pool;
459                 }
460                 d->mr = mr;
461                 list_add_tail(&d->entry, &pool->free_list);
462         }
463
464 out:
465         return pool;
466
467 destroy_pool:
468         srp_destroy_fr_pool(pool);
469
470 err:
471         pool = ERR_PTR(ret);
472         goto out;
473 }
474
475 /**
476  * srp_fr_pool_get() - obtain a descriptor suitable for fast registration
477  * @pool: Pool to obtain descriptor from.
478  */
479 static struct srp_fr_desc *srp_fr_pool_get(struct srp_fr_pool *pool)
480 {
481         struct srp_fr_desc *d = NULL;
482         unsigned long flags;
483
484         spin_lock_irqsave(&pool->lock, flags);
485         if (!list_empty(&pool->free_list)) {
486                 d = list_first_entry(&pool->free_list, typeof(*d), entry);
487                 list_del(&d->entry);
488         }
489         spin_unlock_irqrestore(&pool->lock, flags);
490
491         return d;
492 }
493
494 /**
495  * srp_fr_pool_put() - put an FR descriptor back in the free list
496  * @pool: Pool the descriptor was allocated from.
497  * @desc: Pointer to an array of fast registration descriptor pointers.
498  * @n:    Number of descriptors to put back.
499  *
500  * Note: The caller must already have queued an invalidation request for
501  * desc->mr->rkey before calling this function.
502  */
503 static void srp_fr_pool_put(struct srp_fr_pool *pool, struct srp_fr_desc **desc,
504                             int n)
505 {
506         unsigned long flags;
507         int i;
508
509         spin_lock_irqsave(&pool->lock, flags);
510         for (i = 0; i < n; i++)
511                 list_add(&desc[i]->entry, &pool->free_list);
512         spin_unlock_irqrestore(&pool->lock, flags);
513 }
514
515 static struct srp_fr_pool *srp_alloc_fr_pool(struct srp_target_port *target)
516 {
517         struct srp_device *dev = target->srp_host->srp_dev;
518
519         return srp_create_fr_pool(dev->dev, dev->pd, target->mr_pool_size,
520                                   dev->max_pages_per_mr);
521 }
522
523 /**
524  * srp_destroy_qp() - destroy an RDMA queue pair
525  * @ch: SRP RDMA channel.
526  *
527  * Drain the qp before destroying it.  This avoids that the receive
528  * completion handler can access the queue pair while it is
529  * being destroyed.
530  */
531 static void srp_destroy_qp(struct srp_rdma_ch *ch)
532 {
533         spin_lock_irq(&ch->lock);
534         ib_process_cq_direct(ch->send_cq, -1);
535         spin_unlock_irq(&ch->lock);
536
537         ib_drain_qp(ch->qp);
538         ib_destroy_qp(ch->qp);
539 }
540
541 static int srp_create_ch_ib(struct srp_rdma_ch *ch)
542 {
543         struct srp_target_port *target = ch->target;
544         struct srp_device *dev = target->srp_host->srp_dev;
545         struct ib_qp_init_attr *init_attr;
546         struct ib_cq *recv_cq, *send_cq;
547         struct ib_qp *qp;
548         struct ib_fmr_pool *fmr_pool = NULL;
549         struct srp_fr_pool *fr_pool = NULL;
550         const int m = 1 + dev->use_fast_reg * target->mr_per_cmd * 2;
551         int ret;
552
553         init_attr = kzalloc(sizeof *init_attr, GFP_KERNEL);
554         if (!init_attr)
555                 return -ENOMEM;
556
557         /* queue_size + 1 for ib_drain_rq() */
558         recv_cq = ib_alloc_cq(dev->dev, ch, target->queue_size + 1,
559                                 ch->comp_vector, IB_POLL_SOFTIRQ);
560         if (IS_ERR(recv_cq)) {
561                 ret = PTR_ERR(recv_cq);
562                 goto err;
563         }
564
565         send_cq = ib_alloc_cq(dev->dev, ch, m * target->queue_size,
566                                 ch->comp_vector, IB_POLL_DIRECT);
567         if (IS_ERR(send_cq)) {
568                 ret = PTR_ERR(send_cq);
569                 goto err_recv_cq;
570         }
571
572         init_attr->event_handler       = srp_qp_event;
573         init_attr->cap.max_send_wr     = m * target->queue_size;
574         init_attr->cap.max_recv_wr     = target->queue_size + 1;
575         init_attr->cap.max_recv_sge    = 1;
576         init_attr->cap.max_send_sge    = 1;
577         init_attr->sq_sig_type         = IB_SIGNAL_REQ_WR;
578         init_attr->qp_type             = IB_QPT_RC;
579         init_attr->send_cq             = send_cq;
580         init_attr->recv_cq             = recv_cq;
581
582         if (target->using_rdma_cm) {
583                 ret = rdma_create_qp(ch->rdma_cm.cm_id, dev->pd, init_attr);
584                 qp = ch->rdma_cm.cm_id->qp;
585         } else {
586                 qp = ib_create_qp(dev->pd, init_attr);
587                 if (!IS_ERR(qp)) {
588                         ret = srp_init_ib_qp(target, qp);
589                         if (ret)
590                                 ib_destroy_qp(qp);
591                 } else {
592                         ret = PTR_ERR(qp);
593                 }
594         }
595         if (ret) {
596                 pr_err("QP creation failed for dev %s: %d\n",
597                        dev_name(&dev->dev->dev), ret);
598                 goto err_send_cq;
599         }
600
601         if (dev->use_fast_reg) {
602                 fr_pool = srp_alloc_fr_pool(target);
603                 if (IS_ERR(fr_pool)) {
604                         ret = PTR_ERR(fr_pool);
605                         shost_printk(KERN_WARNING, target->scsi_host, PFX
606                                      "FR pool allocation failed (%d)\n", ret);
607                         goto err_qp;
608                 }
609         } else if (dev->use_fmr) {
610                 fmr_pool = srp_alloc_fmr_pool(target);
611                 if (IS_ERR(fmr_pool)) {
612                         ret = PTR_ERR(fmr_pool);
613                         shost_printk(KERN_WARNING, target->scsi_host, PFX
614                                      "FMR pool allocation failed (%d)\n", ret);
615                         goto err_qp;
616                 }
617         }
618
619         if (ch->qp)
620                 srp_destroy_qp(ch);
621         if (ch->recv_cq)
622                 ib_free_cq(ch->recv_cq);
623         if (ch->send_cq)
624                 ib_free_cq(ch->send_cq);
625
626         ch->qp = qp;
627         ch->recv_cq = recv_cq;
628         ch->send_cq = send_cq;
629
630         if (dev->use_fast_reg) {
631                 if (ch->fr_pool)
632                         srp_destroy_fr_pool(ch->fr_pool);
633                 ch->fr_pool = fr_pool;
634         } else if (dev->use_fmr) {
635                 if (ch->fmr_pool)
636                         ib_destroy_fmr_pool(ch->fmr_pool);
637                 ch->fmr_pool = fmr_pool;
638         }
639
640         kfree(init_attr);
641         return 0;
642
643 err_qp:
644         if (target->using_rdma_cm)
645                 rdma_destroy_qp(ch->rdma_cm.cm_id);
646         else
647                 ib_destroy_qp(qp);
648
649 err_send_cq:
650         ib_free_cq(send_cq);
651
652 err_recv_cq:
653         ib_free_cq(recv_cq);
654
655 err:
656         kfree(init_attr);
657         return ret;
658 }
659
660 /*
661  * Note: this function may be called without srp_alloc_iu_bufs() having been
662  * invoked. Hence the ch->[rt]x_ring checks.
663  */
664 static void srp_free_ch_ib(struct srp_target_port *target,
665                            struct srp_rdma_ch *ch)
666 {
667         struct srp_device *dev = target->srp_host->srp_dev;
668         int i;
669
670         if (!ch->target)
671                 return;
672
673         if (target->using_rdma_cm) {
674                 if (ch->rdma_cm.cm_id) {
675                         rdma_destroy_id(ch->rdma_cm.cm_id);
676                         ch->rdma_cm.cm_id = NULL;
677                 }
678         } else {
679                 if (ch->ib_cm.cm_id) {
680                         ib_destroy_cm_id(ch->ib_cm.cm_id);
681                         ch->ib_cm.cm_id = NULL;
682                 }
683         }
684
685         /* If srp_new_cm_id() succeeded but srp_create_ch_ib() not, return. */
686         if (!ch->qp)
687                 return;
688
689         if (dev->use_fast_reg) {
690                 if (ch->fr_pool)
691                         srp_destroy_fr_pool(ch->fr_pool);
692         } else if (dev->use_fmr) {
693                 if (ch->fmr_pool)
694                         ib_destroy_fmr_pool(ch->fmr_pool);
695         }
696
697         srp_destroy_qp(ch);
698         ib_free_cq(ch->send_cq);
699         ib_free_cq(ch->recv_cq);
700
701         /*
702          * Avoid that the SCSI error handler tries to use this channel after
703          * it has been freed. The SCSI error handler can namely continue
704          * trying to perform recovery actions after scsi_remove_host()
705          * returned.
706          */
707         ch->target = NULL;
708
709         ch->qp = NULL;
710         ch->send_cq = ch->recv_cq = NULL;
711
712         if (ch->rx_ring) {
713                 for (i = 0; i < target->queue_size; ++i)
714                         srp_free_iu(target->srp_host, ch->rx_ring[i]);
715                 kfree(ch->rx_ring);
716                 ch->rx_ring = NULL;
717         }
718         if (ch->tx_ring) {
719                 for (i = 0; i < target->queue_size; ++i)
720                         srp_free_iu(target->srp_host, ch->tx_ring[i]);
721                 kfree(ch->tx_ring);
722                 ch->tx_ring = NULL;
723         }
724 }
725
726 static void srp_path_rec_completion(int status,
727                                     struct sa_path_rec *pathrec,
728                                     void *ch_ptr)
729 {
730         struct srp_rdma_ch *ch = ch_ptr;
731         struct srp_target_port *target = ch->target;
732
733         ch->status = status;
734         if (status)
735                 shost_printk(KERN_ERR, target->scsi_host,
736                              PFX "Got failed path rec status %d\n", status);
737         else
738                 ch->ib_cm.path = *pathrec;
739         complete(&ch->done);
740 }
741
742 static int srp_ib_lookup_path(struct srp_rdma_ch *ch)
743 {
744         struct srp_target_port *target = ch->target;
745         int ret;
746
747         ch->ib_cm.path.numb_path = 1;
748
749         init_completion(&ch->done);
750
751         ch->ib_cm.path_query_id = ib_sa_path_rec_get(&srp_sa_client,
752                                                target->srp_host->srp_dev->dev,
753                                                target->srp_host->port,
754                                                &ch->ib_cm.path,
755                                                IB_SA_PATH_REC_SERVICE_ID |
756                                                IB_SA_PATH_REC_DGID       |
757                                                IB_SA_PATH_REC_SGID       |
758                                                IB_SA_PATH_REC_NUMB_PATH  |
759                                                IB_SA_PATH_REC_PKEY,
760                                                SRP_PATH_REC_TIMEOUT_MS,
761                                                GFP_KERNEL,
762                                                srp_path_rec_completion,
763                                                ch, &ch->ib_cm.path_query);
764         if (ch->ib_cm.path_query_id < 0)
765                 return ch->ib_cm.path_query_id;
766
767         ret = wait_for_completion_interruptible(&ch->done);
768         if (ret < 0)
769                 return ret;
770
771         if (ch->status < 0)
772                 shost_printk(KERN_WARNING, target->scsi_host,
773                              PFX "Path record query failed: sgid %pI6, dgid %pI6, pkey %#04x, service_id %#16llx\n",
774                              ch->ib_cm.path.sgid.raw, ch->ib_cm.path.dgid.raw,
775                              be16_to_cpu(target->ib_cm.pkey),
776                              be64_to_cpu(target->ib_cm.service_id));
777
778         return ch->status;
779 }
780
781 static int srp_rdma_lookup_path(struct srp_rdma_ch *ch)
782 {
783         struct srp_target_port *target = ch->target;
784         int ret;
785
786         init_completion(&ch->done);
787
788         ret = rdma_resolve_route(ch->rdma_cm.cm_id, SRP_PATH_REC_TIMEOUT_MS);
789         if (ret)
790                 return ret;
791
792         wait_for_completion_interruptible(&ch->done);
793
794         if (ch->status != 0)
795                 shost_printk(KERN_WARNING, target->scsi_host,
796                              PFX "Path resolution failed\n");
797
798         return ch->status;
799 }
800
801 static int srp_lookup_path(struct srp_rdma_ch *ch)
802 {
803         struct srp_target_port *target = ch->target;
804
805         return target->using_rdma_cm ? srp_rdma_lookup_path(ch) :
806                 srp_ib_lookup_path(ch);
807 }
808
809 static u8 srp_get_subnet_timeout(struct srp_host *host)
810 {
811         struct ib_port_attr attr;
812         int ret;
813         u8 subnet_timeout = 18;
814
815         ret = ib_query_port(host->srp_dev->dev, host->port, &attr);
816         if (ret == 0)
817                 subnet_timeout = attr.subnet_timeout;
818
819         if (unlikely(subnet_timeout < 15))
820                 pr_warn("%s: subnet timeout %d may cause SRP login to fail.\n",
821                         dev_name(&host->srp_dev->dev->dev), subnet_timeout);
822
823         return subnet_timeout;
824 }
825
826 static int srp_send_req(struct srp_rdma_ch *ch, bool multich)
827 {
828         struct srp_target_port *target = ch->target;
829         struct {
830                 struct rdma_conn_param    rdma_param;
831                 struct srp_login_req_rdma rdma_req;
832                 struct ib_cm_req_param    ib_param;
833                 struct srp_login_req      ib_req;
834         } *req = NULL;
835         char *ipi, *tpi;
836         int status;
837
838         req = kzalloc(sizeof *req, GFP_KERNEL);
839         if (!req)
840                 return -ENOMEM;
841
842         req->ib_param.flow_control = 1;
843         req->ib_param.retry_count = target->tl_retry_count;
844
845         /*
846          * Pick some arbitrary defaults here; we could make these
847          * module parameters if anyone cared about setting them.
848          */
849         req->ib_param.responder_resources = 4;
850         req->ib_param.rnr_retry_count = 7;
851         req->ib_param.max_cm_retries = 15;
852
853         req->ib_req.opcode = SRP_LOGIN_REQ;
854         req->ib_req.tag = 0;
855         req->ib_req.req_it_iu_len = cpu_to_be32(target->max_iu_len);
856         req->ib_req.req_buf_fmt = cpu_to_be16(SRP_BUF_FORMAT_DIRECT |
857                                               SRP_BUF_FORMAT_INDIRECT);
858         req->ib_req.req_flags = (multich ? SRP_MULTICHAN_MULTI :
859                                  SRP_MULTICHAN_SINGLE);
860
861         if (target->using_rdma_cm) {
862                 req->rdma_param.flow_control = req->ib_param.flow_control;
863                 req->rdma_param.responder_resources =
864                         req->ib_param.responder_resources;
865                 req->rdma_param.initiator_depth = req->ib_param.initiator_depth;
866                 req->rdma_param.retry_count = req->ib_param.retry_count;
867                 req->rdma_param.rnr_retry_count = req->ib_param.rnr_retry_count;
868                 req->rdma_param.private_data = &req->rdma_req;
869                 req->rdma_param.private_data_len = sizeof(req->rdma_req);
870
871                 req->rdma_req.opcode = req->ib_req.opcode;
872                 req->rdma_req.tag = req->ib_req.tag;
873                 req->rdma_req.req_it_iu_len = req->ib_req.req_it_iu_len;
874                 req->rdma_req.req_buf_fmt = req->ib_req.req_buf_fmt;
875                 req->rdma_req.req_flags = req->ib_req.req_flags;
876
877                 ipi = req->rdma_req.initiator_port_id;
878                 tpi = req->rdma_req.target_port_id;
879         } else {
880                 u8 subnet_timeout;
881
882                 subnet_timeout = srp_get_subnet_timeout(target->srp_host);
883
884                 req->ib_param.primary_path = &ch->ib_cm.path;
885                 req->ib_param.alternate_path = NULL;
886                 req->ib_param.service_id = target->ib_cm.service_id;
887                 get_random_bytes(&req->ib_param.starting_psn, 4);
888                 req->ib_param.starting_psn &= 0xffffff;
889                 req->ib_param.qp_num = ch->qp->qp_num;
890                 req->ib_param.qp_type = ch->qp->qp_type;
891                 req->ib_param.local_cm_response_timeout = subnet_timeout + 2;
892                 req->ib_param.remote_cm_response_timeout = subnet_timeout + 2;
893                 req->ib_param.private_data = &req->ib_req;
894                 req->ib_param.private_data_len = sizeof(req->ib_req);
895
896                 ipi = req->ib_req.initiator_port_id;
897                 tpi = req->ib_req.target_port_id;
898         }
899
900         /*
901          * In the published SRP specification (draft rev. 16a), the
902          * port identifier format is 8 bytes of ID extension followed
903          * by 8 bytes of GUID.  Older drafts put the two halves in the
904          * opposite order, so that the GUID comes first.
905          *
906          * Targets conforming to these obsolete drafts can be
907          * recognized by the I/O Class they report.
908          */
909         if (target->io_class == SRP_REV10_IB_IO_CLASS) {
910                 memcpy(ipi,     &target->sgid.global.interface_id, 8);
911                 memcpy(ipi + 8, &target->initiator_ext, 8);
912                 memcpy(tpi,     &target->ioc_guid, 8);
913                 memcpy(tpi + 8, &target->id_ext, 8);
914         } else {
915                 memcpy(ipi,     &target->initiator_ext, 8);
916                 memcpy(ipi + 8, &target->sgid.global.interface_id, 8);
917                 memcpy(tpi,     &target->id_ext, 8);
918                 memcpy(tpi + 8, &target->ioc_guid, 8);
919         }
920
921         /*
922          * Topspin/Cisco SRP targets will reject our login unless we
923          * zero out the first 8 bytes of our initiator port ID and set
924          * the second 8 bytes to the local node GUID.
925          */
926         if (srp_target_is_topspin(target)) {
927                 shost_printk(KERN_DEBUG, target->scsi_host,
928                              PFX "Topspin/Cisco initiator port ID workaround "
929                              "activated for target GUID %016llx\n",
930                              be64_to_cpu(target->ioc_guid));
931                 memset(ipi, 0, 8);
932                 memcpy(ipi + 8, &target->srp_host->srp_dev->dev->node_guid, 8);
933         }
934
935         if (target->using_rdma_cm)
936                 status = rdma_connect(ch->rdma_cm.cm_id, &req->rdma_param);
937         else
938                 status = ib_send_cm_req(ch->ib_cm.cm_id, &req->ib_param);
939
940         kfree(req);
941
942         return status;
943 }
944
945 static bool srp_queue_remove_work(struct srp_target_port *target)
946 {
947         bool changed = false;
948
949         spin_lock_irq(&target->lock);
950         if (target->state != SRP_TARGET_REMOVED) {
951                 target->state = SRP_TARGET_REMOVED;
952                 changed = true;
953         }
954         spin_unlock_irq(&target->lock);
955
956         if (changed)
957                 queue_work(srp_remove_wq, &target->remove_work);
958
959         return changed;
960 }
961
962 static void srp_disconnect_target(struct srp_target_port *target)
963 {
964         struct srp_rdma_ch *ch;
965         int i, ret;
966
967         /* XXX should send SRP_I_LOGOUT request */
968
969         for (i = 0; i < target->ch_count; i++) {
970                 ch = &target->ch[i];
971                 ch->connected = false;
972                 ret = 0;
973                 if (target->using_rdma_cm) {
974                         if (ch->rdma_cm.cm_id)
975                                 rdma_disconnect(ch->rdma_cm.cm_id);
976                 } else {
977                         if (ch->ib_cm.cm_id)
978                                 ret = ib_send_cm_dreq(ch->ib_cm.cm_id,
979                                                       NULL, 0);
980                 }
981                 if (ret < 0) {
982                         shost_printk(KERN_DEBUG, target->scsi_host,
983                                      PFX "Sending CM DREQ failed\n");
984                 }
985         }
986 }
987
988 static void srp_free_req_data(struct srp_target_port *target,
989                               struct srp_rdma_ch *ch)
990 {
991         struct srp_device *dev = target->srp_host->srp_dev;
992         struct ib_device *ibdev = dev->dev;
993         struct srp_request *req;
994         int i;
995
996         if (!ch->req_ring)
997                 return;
998
999         for (i = 0; i < target->req_ring_size; ++i) {
1000                 req = &ch->req_ring[i];
1001                 if (dev->use_fast_reg) {
1002                         kfree(req->fr_list);
1003                 } else {
1004                         kfree(req->fmr_list);
1005                         kfree(req->map_page);
1006                 }
1007                 if (req->indirect_dma_addr) {
1008                         ib_dma_unmap_single(ibdev, req->indirect_dma_addr,
1009                                             target->indirect_size,
1010                                             DMA_TO_DEVICE);
1011                 }
1012                 kfree(req->indirect_desc);
1013         }
1014
1015         kfree(ch->req_ring);
1016         ch->req_ring = NULL;
1017 }
1018
1019 static int srp_alloc_req_data(struct srp_rdma_ch *ch)
1020 {
1021         struct srp_target_port *target = ch->target;
1022         struct srp_device *srp_dev = target->srp_host->srp_dev;
1023         struct ib_device *ibdev = srp_dev->dev;
1024         struct srp_request *req;
1025         void *mr_list;
1026         dma_addr_t dma_addr;
1027         int i, ret = -ENOMEM;
1028
1029         ch->req_ring = kcalloc(target->req_ring_size, sizeof(*ch->req_ring),
1030                                GFP_KERNEL);
1031         if (!ch->req_ring)
1032                 goto out;
1033
1034         for (i = 0; i < target->req_ring_size; ++i) {
1035                 req = &ch->req_ring[i];
1036                 mr_list = kmalloc_array(target->mr_per_cmd, sizeof(void *),
1037                                         GFP_KERNEL);
1038                 if (!mr_list)
1039                         goto out;
1040                 if (srp_dev->use_fast_reg) {
1041                         req->fr_list = mr_list;
1042                 } else {
1043                         req->fmr_list = mr_list;
1044                         req->map_page = kmalloc_array(srp_dev->max_pages_per_mr,
1045                                                       sizeof(void *),
1046                                                       GFP_KERNEL);
1047                         if (!req->map_page)
1048                                 goto out;
1049                 }
1050                 req->indirect_desc = kmalloc(target->indirect_size, GFP_KERNEL);
1051                 if (!req->indirect_desc)
1052                         goto out;
1053
1054                 dma_addr = ib_dma_map_single(ibdev, req->indirect_desc,
1055                                              target->indirect_size,
1056                                              DMA_TO_DEVICE);
1057                 if (ib_dma_mapping_error(ibdev, dma_addr))
1058                         goto out;
1059
1060                 req->indirect_dma_addr = dma_addr;
1061         }
1062         ret = 0;
1063
1064 out:
1065         return ret;
1066 }
1067
1068 /**
1069  * srp_del_scsi_host_attr() - Remove attributes defined in the host template.
1070  * @shost: SCSI host whose attributes to remove from sysfs.
1071  *
1072  * Note: Any attributes defined in the host template and that did not exist
1073  * before invocation of this function will be ignored.
1074  */
1075 static void srp_del_scsi_host_attr(struct Scsi_Host *shost)
1076 {
1077         struct device_attribute **attr;
1078
1079         for (attr = shost->hostt->shost_attrs; attr && *attr; ++attr)
1080                 device_remove_file(&shost->shost_dev, *attr);
1081 }
1082
1083 static void srp_remove_target(struct srp_target_port *target)
1084 {
1085         struct srp_rdma_ch *ch;
1086         int i;
1087
1088         WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
1089
1090         srp_del_scsi_host_attr(target->scsi_host);
1091         srp_rport_get(target->rport);
1092         srp_remove_host(target->scsi_host);
1093         scsi_remove_host(target->scsi_host);
1094         srp_stop_rport_timers(target->rport);
1095         srp_disconnect_target(target);
1096         kobj_ns_drop(KOBJ_NS_TYPE_NET, target->net);
1097         for (i = 0; i < target->ch_count; i++) {
1098                 ch = &target->ch[i];
1099                 srp_free_ch_ib(target, ch);
1100         }
1101         cancel_work_sync(&target->tl_err_work);
1102         srp_rport_put(target->rport);
1103         for (i = 0; i < target->ch_count; i++) {
1104                 ch = &target->ch[i];
1105                 srp_free_req_data(target, ch);
1106         }
1107         kfree(target->ch);
1108         target->ch = NULL;
1109
1110         spin_lock(&target->srp_host->target_lock);
1111         list_del(&target->list);
1112         spin_unlock(&target->srp_host->target_lock);
1113
1114         scsi_host_put(target->scsi_host);
1115 }
1116
1117 static void srp_remove_work(struct work_struct *work)
1118 {
1119         struct srp_target_port *target =
1120                 container_of(work, struct srp_target_port, remove_work);
1121
1122         WARN_ON_ONCE(target->state != SRP_TARGET_REMOVED);
1123
1124         srp_remove_target(target);
1125 }
1126
1127 static void srp_rport_delete(struct srp_rport *rport)
1128 {
1129         struct srp_target_port *target = rport->lld_data;
1130
1131         srp_queue_remove_work(target);
1132 }
1133
1134 /**
1135  * srp_connected_ch() - number of connected channels
1136  * @target: SRP target port.
1137  */
1138 static int srp_connected_ch(struct srp_target_port *target)
1139 {
1140         int i, c = 0;
1141
1142         for (i = 0; i < target->ch_count; i++)
1143                 c += target->ch[i].connected;
1144
1145         return c;
1146 }
1147
1148 static int srp_connect_ch(struct srp_rdma_ch *ch, bool multich)
1149 {
1150         struct srp_target_port *target = ch->target;
1151         int ret;
1152
1153         WARN_ON_ONCE(!multich && srp_connected_ch(target) > 0);
1154
1155         ret = srp_lookup_path(ch);
1156         if (ret)
1157                 goto out;
1158
1159         while (1) {
1160                 init_completion(&ch->done);
1161                 ret = srp_send_req(ch, multich);
1162                 if (ret)
1163                         goto out;
1164                 ret = wait_for_completion_interruptible(&ch->done);
1165                 if (ret < 0)
1166                         goto out;
1167
1168                 /*
1169                  * The CM event handling code will set status to
1170                  * SRP_PORT_REDIRECT if we get a port redirect REJ
1171                  * back, or SRP_DLID_REDIRECT if we get a lid/qp
1172                  * redirect REJ back.
1173                  */
1174                 ret = ch->status;
1175                 switch (ret) {
1176                 case 0:
1177                         ch->connected = true;
1178                         goto out;
1179
1180                 case SRP_PORT_REDIRECT:
1181                         ret = srp_lookup_path(ch);
1182                         if (ret)
1183                                 goto out;
1184                         break;
1185
1186                 case SRP_DLID_REDIRECT:
1187                         break;
1188
1189                 case SRP_STALE_CONN:
1190                         shost_printk(KERN_ERR, target->scsi_host, PFX
1191                                      "giving up on stale connection\n");
1192                         ret = -ECONNRESET;
1193                         goto out;
1194
1195                 default:
1196                         goto out;
1197                 }
1198         }
1199
1200 out:
1201         return ret <= 0 ? ret : -ENODEV;
1202 }
1203
1204 static void srp_inv_rkey_err_done(struct ib_cq *cq, struct ib_wc *wc)
1205 {
1206         srp_handle_qp_err(cq, wc, "INV RKEY");
1207 }
1208
1209 static int srp_inv_rkey(struct srp_request *req, struct srp_rdma_ch *ch,
1210                 u32 rkey)
1211 {
1212         struct ib_send_wr wr = {
1213                 .opcode             = IB_WR_LOCAL_INV,
1214                 .next               = NULL,
1215                 .num_sge            = 0,
1216                 .send_flags         = 0,
1217                 .ex.invalidate_rkey = rkey,
1218         };
1219
1220         wr.wr_cqe = &req->reg_cqe;
1221         req->reg_cqe.done = srp_inv_rkey_err_done;
1222         return ib_post_send(ch->qp, &wr, NULL);
1223 }
1224
1225 static void srp_unmap_data(struct scsi_cmnd *scmnd,
1226                            struct srp_rdma_ch *ch,
1227                            struct srp_request *req)
1228 {
1229         struct srp_target_port *target = ch->target;
1230         struct srp_device *dev = target->srp_host->srp_dev;
1231         struct ib_device *ibdev = dev->dev;
1232         int i, res;
1233
1234         if (!scsi_sglist(scmnd) ||
1235             (scmnd->sc_data_direction != DMA_TO_DEVICE &&
1236              scmnd->sc_data_direction != DMA_FROM_DEVICE))
1237                 return;
1238
1239         if (dev->use_fast_reg) {
1240                 struct srp_fr_desc **pfr;
1241
1242                 for (i = req->nmdesc, pfr = req->fr_list; i > 0; i--, pfr++) {
1243                         res = srp_inv_rkey(req, ch, (*pfr)->mr->rkey);
1244                         if (res < 0) {
1245                                 shost_printk(KERN_ERR, target->scsi_host, PFX
1246                                   "Queueing INV WR for rkey %#x failed (%d)\n",
1247                                   (*pfr)->mr->rkey, res);
1248                                 queue_work(system_long_wq,
1249                                            &target->tl_err_work);
1250                         }
1251                 }
1252                 if (req->nmdesc)
1253                         srp_fr_pool_put(ch->fr_pool, req->fr_list,
1254                                         req->nmdesc);
1255         } else if (dev->use_fmr) {
1256                 struct ib_pool_fmr **pfmr;
1257
1258                 for (i = req->nmdesc, pfmr = req->fmr_list; i > 0; i--, pfmr++)
1259                         ib_fmr_pool_unmap(*pfmr);
1260         }
1261
1262         ib_dma_unmap_sg(ibdev, scsi_sglist(scmnd), scsi_sg_count(scmnd),
1263                         scmnd->sc_data_direction);
1264 }
1265
1266 /**
1267  * srp_claim_req - Take ownership of the scmnd associated with a request.
1268  * @ch: SRP RDMA channel.
1269  * @req: SRP request.
1270  * @sdev: If not NULL, only take ownership for this SCSI device.
1271  * @scmnd: If NULL, take ownership of @req->scmnd. If not NULL, only take
1272  *         ownership of @req->scmnd if it equals @scmnd.
1273  *
1274  * Return value:
1275  * Either NULL or a pointer to the SCSI command the caller became owner of.
1276  */
1277 static struct scsi_cmnd *srp_claim_req(struct srp_rdma_ch *ch,
1278                                        struct srp_request *req,
1279                                        struct scsi_device *sdev,
1280                                        struct scsi_cmnd *scmnd)
1281 {
1282         unsigned long flags;
1283
1284         spin_lock_irqsave(&ch->lock, flags);
1285         if (req->scmnd &&
1286             (!sdev || req->scmnd->device == sdev) &&
1287             (!scmnd || req->scmnd == scmnd)) {
1288                 scmnd = req->scmnd;
1289                 req->scmnd = NULL;
1290         } else {
1291                 scmnd = NULL;
1292         }
1293         spin_unlock_irqrestore(&ch->lock, flags);
1294
1295         return scmnd;
1296 }
1297
1298 /**
1299  * srp_free_req() - Unmap data and adjust ch->req_lim.
1300  * @ch:     SRP RDMA channel.
1301  * @req:    Request to be freed.
1302  * @scmnd:  SCSI command associated with @req.
1303  * @req_lim_delta: Amount to be added to @target->req_lim.
1304  */
1305 static void srp_free_req(struct srp_rdma_ch *ch, struct srp_request *req,
1306                          struct scsi_cmnd *scmnd, s32 req_lim_delta)
1307 {
1308         unsigned long flags;
1309
1310         srp_unmap_data(scmnd, ch, req);
1311
1312         spin_lock_irqsave(&ch->lock, flags);
1313         ch->req_lim += req_lim_delta;
1314         spin_unlock_irqrestore(&ch->lock, flags);
1315 }
1316
1317 static void srp_finish_req(struct srp_rdma_ch *ch, struct srp_request *req,
1318                            struct scsi_device *sdev, int result)
1319 {
1320         struct scsi_cmnd *scmnd = srp_claim_req(ch, req, sdev, NULL);
1321
1322         if (scmnd) {
1323                 srp_free_req(ch, req, scmnd, 0);
1324                 scmnd->result = result;
1325                 scmnd->scsi_done(scmnd);
1326         }
1327 }
1328
1329 static void srp_terminate_io(struct srp_rport *rport)
1330 {
1331         struct srp_target_port *target = rport->lld_data;
1332         struct srp_rdma_ch *ch;
1333         struct Scsi_Host *shost = target->scsi_host;
1334         struct scsi_device *sdev;
1335         int i, j;
1336
1337         /*
1338          * Invoking srp_terminate_io() while srp_queuecommand() is running
1339          * is not safe. Hence the warning statement below.
1340          */
1341         shost_for_each_device(sdev, shost)
1342                 WARN_ON_ONCE(sdev->request_queue->request_fn_active);
1343
1344         for (i = 0; i < target->ch_count; i++) {
1345                 ch = &target->ch[i];
1346
1347                 for (j = 0; j < target->req_ring_size; ++j) {
1348                         struct srp_request *req = &ch->req_ring[j];
1349
1350                         srp_finish_req(ch, req, NULL,
1351                                        DID_TRANSPORT_FAILFAST << 16);
1352                 }
1353         }
1354 }
1355
1356 /*
1357  * It is up to the caller to ensure that srp_rport_reconnect() calls are
1358  * serialized and that no concurrent srp_queuecommand(), srp_abort(),
1359  * srp_reset_device() or srp_reset_host() calls will occur while this function
1360  * is in progress. One way to realize that is not to call this function
1361  * directly but to call srp_reconnect_rport() instead since that last function
1362  * serializes calls of this function via rport->mutex and also blocks
1363  * srp_queuecommand() calls before invoking this function.
1364  */
1365 static int srp_rport_reconnect(struct srp_rport *rport)
1366 {
1367         struct srp_target_port *target = rport->lld_data;
1368         struct srp_rdma_ch *ch;
1369         int i, j, ret = 0;
1370         bool multich = false;
1371
1372         srp_disconnect_target(target);
1373
1374         if (target->state == SRP_TARGET_SCANNING)
1375                 return -ENODEV;
1376
1377         /*
1378          * Now get a new local CM ID so that we avoid confusing the target in
1379          * case things are really fouled up. Doing so also ensures that all CM
1380          * callbacks will have finished before a new QP is allocated.
1381          */
1382         for (i = 0; i < target->ch_count; i++) {
1383                 ch = &target->ch[i];
1384                 ret += srp_new_cm_id(ch);
1385         }
1386         for (i = 0; i < target->ch_count; i++) {
1387                 ch = &target->ch[i];
1388                 for (j = 0; j < target->req_ring_size; ++j) {
1389                         struct srp_request *req = &ch->req_ring[j];
1390
1391                         srp_finish_req(ch, req, NULL, DID_RESET << 16);
1392                 }
1393         }
1394         for (i = 0; i < target->ch_count; i++) {
1395                 ch = &target->ch[i];
1396                 /*
1397                  * Whether or not creating a new CM ID succeeded, create a new
1398                  * QP. This guarantees that all completion callback function
1399                  * invocations have finished before request resetting starts.
1400                  */
1401                 ret += srp_create_ch_ib(ch);
1402
1403                 INIT_LIST_HEAD(&ch->free_tx);
1404                 for (j = 0; j < target->queue_size; ++j)
1405                         list_add(&ch->tx_ring[j]->list, &ch->free_tx);
1406         }
1407
1408         target->qp_in_error = false;
1409
1410         for (i = 0; i < target->ch_count; i++) {
1411                 ch = &target->ch[i];
1412                 if (ret)
1413                         break;
1414                 ret = srp_connect_ch(ch, multich);
1415                 multich = true;
1416         }
1417
1418         if (ret == 0)
1419                 shost_printk(KERN_INFO, target->scsi_host,
1420                              PFX "reconnect succeeded\n");
1421
1422         return ret;
1423 }
1424
1425 static void srp_map_desc(struct srp_map_state *state, dma_addr_t dma_addr,
1426                          unsigned int dma_len, u32 rkey)
1427 {
1428         struct srp_direct_buf *desc = state->desc;
1429
1430         WARN_ON_ONCE(!dma_len);
1431
1432         desc->va = cpu_to_be64(dma_addr);
1433         desc->key = cpu_to_be32(rkey);
1434         desc->len = cpu_to_be32(dma_len);
1435
1436         state->total_len += dma_len;
1437         state->desc++;
1438         state->ndesc++;
1439 }
1440
1441 static int srp_map_finish_fmr(struct srp_map_state *state,
1442                               struct srp_rdma_ch *ch)
1443 {
1444         struct srp_target_port *target = ch->target;
1445         struct srp_device *dev = target->srp_host->srp_dev;
1446         struct ib_pool_fmr *fmr;
1447         u64 io_addr = 0;
1448
1449         if (state->fmr.next >= state->fmr.end) {
1450                 shost_printk(KERN_ERR, ch->target->scsi_host,
1451                              PFX "Out of MRs (mr_per_cmd = %d)\n",
1452                              ch->target->mr_per_cmd);
1453                 return -ENOMEM;
1454         }
1455
1456         WARN_ON_ONCE(!dev->use_fmr);
1457
1458         if (state->npages == 0)
1459                 return 0;
1460
1461         if (state->npages == 1 && target->global_rkey) {
1462                 srp_map_desc(state, state->base_dma_addr, state->dma_len,
1463                              target->global_rkey);
1464                 goto reset_state;
1465         }
1466
1467         fmr = ib_fmr_pool_map_phys(ch->fmr_pool, state->pages,
1468                                    state->npages, io_addr);
1469         if (IS_ERR(fmr))
1470                 return PTR_ERR(fmr);
1471
1472         *state->fmr.next++ = fmr;
1473         state->nmdesc++;
1474
1475         srp_map_desc(state, state->base_dma_addr & ~dev->mr_page_mask,
1476                      state->dma_len, fmr->fmr->rkey);
1477
1478 reset_state:
1479         state->npages = 0;
1480         state->dma_len = 0;
1481
1482         return 0;
1483 }
1484
1485 static void srp_reg_mr_err_done(struct ib_cq *cq, struct ib_wc *wc)
1486 {
1487         srp_handle_qp_err(cq, wc, "FAST REG");
1488 }
1489
1490 /*
1491  * Map up to sg_nents elements of state->sg where *sg_offset_p is the offset
1492  * where to start in the first element. If sg_offset_p != NULL then
1493  * *sg_offset_p is updated to the offset in state->sg[retval] of the first
1494  * byte that has not yet been mapped.
1495  */
1496 static int srp_map_finish_fr(struct srp_map_state *state,
1497                              struct srp_request *req,
1498                              struct srp_rdma_ch *ch, int sg_nents,
1499                              unsigned int *sg_offset_p)
1500 {
1501         struct srp_target_port *target = ch->target;
1502         struct srp_device *dev = target->srp_host->srp_dev;
1503         struct ib_reg_wr wr;
1504         struct srp_fr_desc *desc;
1505         u32 rkey;
1506         int n, err;
1507
1508         if (state->fr.next >= state->fr.end) {
1509                 shost_printk(KERN_ERR, ch->target->scsi_host,
1510                              PFX "Out of MRs (mr_per_cmd = %d)\n",
1511                              ch->target->mr_per_cmd);
1512                 return -ENOMEM;
1513         }
1514
1515         WARN_ON_ONCE(!dev->use_fast_reg);
1516
1517         if (sg_nents == 1 && target->global_rkey) {
1518                 unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
1519
1520                 srp_map_desc(state, sg_dma_address(state->sg) + sg_offset,
1521                              sg_dma_len(state->sg) - sg_offset,
1522                              target->global_rkey);
1523                 if (sg_offset_p)
1524                         *sg_offset_p = 0;
1525                 return 1;
1526         }
1527
1528         desc = srp_fr_pool_get(ch->fr_pool);
1529         if (!desc)
1530                 return -ENOMEM;
1531
1532         rkey = ib_inc_rkey(desc->mr->rkey);
1533         ib_update_fast_reg_key(desc->mr, rkey);
1534
1535         n = ib_map_mr_sg(desc->mr, state->sg, sg_nents, sg_offset_p,
1536                          dev->mr_page_size);
1537         if (unlikely(n < 0)) {
1538                 srp_fr_pool_put(ch->fr_pool, &desc, 1);
1539                 pr_debug("%s: ib_map_mr_sg(%d, %d) returned %d.\n",
1540                          dev_name(&req->scmnd->device->sdev_gendev), sg_nents,
1541                          sg_offset_p ? *sg_offset_p : -1, n);
1542                 return n;
1543         }
1544
1545         WARN_ON_ONCE(desc->mr->length == 0);
1546
1547         req->reg_cqe.done = srp_reg_mr_err_done;
1548
1549         wr.wr.next = NULL;
1550         wr.wr.opcode = IB_WR_REG_MR;
1551         wr.wr.wr_cqe = &req->reg_cqe;
1552         wr.wr.num_sge = 0;
1553         wr.wr.send_flags = 0;
1554         wr.mr = desc->mr;
1555         wr.key = desc->mr->rkey;
1556         wr.access = (IB_ACCESS_LOCAL_WRITE |
1557                      IB_ACCESS_REMOTE_READ |
1558                      IB_ACCESS_REMOTE_WRITE);
1559
1560         *state->fr.next++ = desc;
1561         state->nmdesc++;
1562
1563         srp_map_desc(state, desc->mr->iova,
1564                      desc->mr->length, desc->mr->rkey);
1565
1566         err = ib_post_send(ch->qp, &wr.wr, NULL);
1567         if (unlikely(err)) {
1568                 WARN_ON_ONCE(err == -ENOMEM);
1569                 return err;
1570         }
1571
1572         return n;
1573 }
1574
1575 static int srp_map_sg_entry(struct srp_map_state *state,
1576                             struct srp_rdma_ch *ch,
1577                             struct scatterlist *sg)
1578 {
1579         struct srp_target_port *target = ch->target;
1580         struct srp_device *dev = target->srp_host->srp_dev;
1581         struct ib_device *ibdev = dev->dev;
1582         dma_addr_t dma_addr = ib_sg_dma_address(ibdev, sg);
1583         unsigned int dma_len = ib_sg_dma_len(ibdev, sg);
1584         unsigned int len = 0;
1585         int ret;
1586
1587         WARN_ON_ONCE(!dma_len);
1588
1589         while (dma_len) {
1590                 unsigned offset = dma_addr & ~dev->mr_page_mask;
1591
1592                 if (state->npages == dev->max_pages_per_mr ||
1593                     (state->npages > 0 && offset != 0)) {
1594                         ret = srp_map_finish_fmr(state, ch);
1595                         if (ret)
1596                                 return ret;
1597                 }
1598
1599                 len = min_t(unsigned int, dma_len, dev->mr_page_size - offset);
1600
1601                 if (!state->npages)
1602                         state->base_dma_addr = dma_addr;
1603                 state->pages[state->npages++] = dma_addr & dev->mr_page_mask;
1604                 state->dma_len += len;
1605                 dma_addr += len;
1606                 dma_len -= len;
1607         }
1608
1609         /*
1610          * If the end of the MR is not on a page boundary then we need to
1611          * close it out and start a new one -- we can only merge at page
1612          * boundaries.
1613          */
1614         ret = 0;
1615         if ((dma_addr & ~dev->mr_page_mask) != 0)
1616                 ret = srp_map_finish_fmr(state, ch);
1617         return ret;
1618 }
1619
1620 static int srp_map_sg_fmr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1621                           struct srp_request *req, struct scatterlist *scat,
1622                           int count)
1623 {
1624         struct scatterlist *sg;
1625         int i, ret;
1626
1627         state->pages = req->map_page;
1628         state->fmr.next = req->fmr_list;
1629         state->fmr.end = req->fmr_list + ch->target->mr_per_cmd;
1630
1631         for_each_sg(scat, sg, count, i) {
1632                 ret = srp_map_sg_entry(state, ch, sg);
1633                 if (ret)
1634                         return ret;
1635         }
1636
1637         ret = srp_map_finish_fmr(state, ch);
1638         if (ret)
1639                 return ret;
1640
1641         return 0;
1642 }
1643
1644 static int srp_map_sg_fr(struct srp_map_state *state, struct srp_rdma_ch *ch,
1645                          struct srp_request *req, struct scatterlist *scat,
1646                          int count)
1647 {
1648         unsigned int sg_offset = 0;
1649
1650         state->fr.next = req->fr_list;
1651         state->fr.end = req->fr_list + ch->target->mr_per_cmd;
1652         state->sg = scat;
1653
1654         if (count == 0)
1655                 return 0;
1656
1657         while (count) {
1658                 int i, n;
1659
1660                 n = srp_map_finish_fr(state, req, ch, count, &sg_offset);
1661                 if (unlikely(n < 0))
1662                         return n;
1663
1664                 count -= n;
1665                 for (i = 0; i < n; i++)
1666                         state->sg = sg_next(state->sg);
1667         }
1668
1669         return 0;
1670 }
1671
1672 static int srp_map_sg_dma(struct srp_map_state *state, struct srp_rdma_ch *ch,
1673                           struct srp_request *req, struct scatterlist *scat,
1674                           int count)
1675 {
1676         struct srp_target_port *target = ch->target;
1677         struct srp_device *dev = target->srp_host->srp_dev;
1678         struct scatterlist *sg;
1679         int i;
1680
1681         for_each_sg(scat, sg, count, i) {
1682                 srp_map_desc(state, ib_sg_dma_address(dev->dev, sg),
1683                              ib_sg_dma_len(dev->dev, sg),
1684                              target->global_rkey);
1685         }
1686
1687         return 0;
1688 }
1689
1690 /*
1691  * Register the indirect data buffer descriptor with the HCA.
1692  *
1693  * Note: since the indirect data buffer descriptor has been allocated with
1694  * kmalloc() it is guaranteed that this buffer is a physically contiguous
1695  * memory buffer.
1696  */
1697 static int srp_map_idb(struct srp_rdma_ch *ch, struct srp_request *req,
1698                        void **next_mr, void **end_mr, u32 idb_len,
1699                        __be32 *idb_rkey)
1700 {
1701         struct srp_target_port *target = ch->target;
1702         struct srp_device *dev = target->srp_host->srp_dev;
1703         struct srp_map_state state;
1704         struct srp_direct_buf idb_desc;
1705         u64 idb_pages[1];
1706         struct scatterlist idb_sg[1];
1707         int ret;
1708
1709         memset(&state, 0, sizeof(state));
1710         memset(&idb_desc, 0, sizeof(idb_desc));
1711         state.gen.next = next_mr;
1712         state.gen.end = end_mr;
1713         state.desc = &idb_desc;
1714         state.base_dma_addr = req->indirect_dma_addr;
1715         state.dma_len = idb_len;
1716
1717         if (dev->use_fast_reg) {
1718                 state.sg = idb_sg;
1719                 sg_init_one(idb_sg, req->indirect_desc, idb_len);
1720                 idb_sg->dma_address = req->indirect_dma_addr; /* hack! */
1721 #ifdef CONFIG_NEED_SG_DMA_LENGTH
1722                 idb_sg->dma_length = idb_sg->length;          /* hack^2 */
1723 #endif
1724                 ret = srp_map_finish_fr(&state, req, ch, 1, NULL);
1725                 if (ret < 0)
1726                         return ret;
1727                 WARN_ON_ONCE(ret < 1);
1728         } else if (dev->use_fmr) {
1729                 state.pages = idb_pages;
1730                 state.pages[0] = (req->indirect_dma_addr &
1731                                   dev->mr_page_mask);
1732                 state.npages = 1;
1733                 ret = srp_map_finish_fmr(&state, ch);
1734                 if (ret < 0)
1735                         return ret;
1736         } else {
1737                 return -EINVAL;
1738         }
1739
1740         *idb_rkey = idb_desc.key;
1741
1742         return 0;
1743 }
1744
1745 static void srp_check_mapping(struct srp_map_state *state,
1746                               struct srp_rdma_ch *ch, struct srp_request *req,
1747                               struct scatterlist *scat, int count)
1748 {
1749         struct srp_device *dev = ch->target->srp_host->srp_dev;
1750         struct srp_fr_desc **pfr;
1751         u64 desc_len = 0, mr_len = 0;
1752         int i;
1753
1754         for (i = 0; i < state->ndesc; i++)
1755                 desc_len += be32_to_cpu(req->indirect_desc[i].len);
1756         if (dev->use_fast_reg)
1757                 for (i = 0, pfr = req->fr_list; i < state->nmdesc; i++, pfr++)
1758                         mr_len += (*pfr)->mr->length;
1759         else if (dev->use_fmr)
1760                 for (i = 0; i < state->nmdesc; i++)
1761                         mr_len += be32_to_cpu(req->indirect_desc[i].len);
1762         if (desc_len != scsi_bufflen(req->scmnd) ||
1763             mr_len > scsi_bufflen(req->scmnd))
1764                 pr_err("Inconsistent: scsi len %d <> desc len %lld <> mr len %lld; ndesc %d; nmdesc = %d\n",
1765                        scsi_bufflen(req->scmnd), desc_len, mr_len,
1766                        state->ndesc, state->nmdesc);
1767 }
1768
1769 /**
1770  * srp_map_data() - map SCSI data buffer onto an SRP request
1771  * @scmnd: SCSI command to map
1772  * @ch: SRP RDMA channel
1773  * @req: SRP request
1774  *
1775  * Returns the length in bytes of the SRP_CMD IU or a negative value if
1776  * mapping failed.
1777  */
1778 static int srp_map_data(struct scsi_cmnd *scmnd, struct srp_rdma_ch *ch,
1779                         struct srp_request *req)
1780 {
1781         struct srp_target_port *target = ch->target;
1782         struct scatterlist *scat;
1783         struct srp_cmd *cmd = req->cmd->buf;
1784         int len, nents, count, ret;
1785         struct srp_device *dev;
1786         struct ib_device *ibdev;
1787         struct srp_map_state state;
1788         struct srp_indirect_buf *indirect_hdr;
1789         u32 idb_len, table_len;
1790         __be32 idb_rkey;
1791         u8 fmt;
1792
1793         if (!scsi_sglist(scmnd) || scmnd->sc_data_direction == DMA_NONE)
1794                 return sizeof (struct srp_cmd);
1795
1796         if (scmnd->sc_data_direction != DMA_FROM_DEVICE &&
1797             scmnd->sc_data_direction != DMA_TO_DEVICE) {
1798                 shost_printk(KERN_WARNING, target->scsi_host,
1799                              PFX "Unhandled data direction %d\n",
1800                              scmnd->sc_data_direction);
1801                 return -EINVAL;
1802         }
1803
1804         nents = scsi_sg_count(scmnd);
1805         scat  = scsi_sglist(scmnd);
1806
1807         dev = target->srp_host->srp_dev;
1808         ibdev = dev->dev;
1809
1810         count = ib_dma_map_sg(ibdev, scat, nents, scmnd->sc_data_direction);
1811         if (unlikely(count == 0))
1812                 return -EIO;
1813
1814         fmt = SRP_DATA_DESC_DIRECT;
1815         len = sizeof (struct srp_cmd) + sizeof (struct srp_direct_buf);
1816
1817         if (count == 1 && target->global_rkey) {
1818                 /*
1819                  * The midlayer only generated a single gather/scatter
1820                  * entry, or DMA mapping coalesced everything to a
1821                  * single entry.  So a direct descriptor along with
1822                  * the DMA MR suffices.
1823                  */
1824                 struct srp_direct_buf *buf = (void *) cmd->add_data;
1825
1826                 buf->va  = cpu_to_be64(ib_sg_dma_address(ibdev, scat));
1827                 buf->key = cpu_to_be32(target->global_rkey);
1828                 buf->len = cpu_to_be32(ib_sg_dma_len(ibdev, scat));
1829
1830                 req->nmdesc = 0;
1831                 goto map_complete;
1832         }
1833
1834         /*
1835          * We have more than one scatter/gather entry, so build our indirect
1836          * descriptor table, trying to merge as many entries as we can.
1837          */
1838         indirect_hdr = (void *) cmd->add_data;
1839
1840         ib_dma_sync_single_for_cpu(ibdev, req->indirect_dma_addr,
1841                                    target->indirect_size, DMA_TO_DEVICE);
1842
1843         memset(&state, 0, sizeof(state));
1844         state.desc = req->indirect_desc;
1845         if (dev->use_fast_reg)
1846                 ret = srp_map_sg_fr(&state, ch, req, scat, count);
1847         else if (dev->use_fmr)
1848                 ret = srp_map_sg_fmr(&state, ch, req, scat, count);
1849         else
1850                 ret = srp_map_sg_dma(&state, ch, req, scat, count);
1851         req->nmdesc = state.nmdesc;
1852         if (ret < 0)
1853                 goto unmap;
1854
1855         {
1856                 DEFINE_DYNAMIC_DEBUG_METADATA(ddm,
1857                         "Memory mapping consistency check");
1858                 if (DYNAMIC_DEBUG_BRANCH(ddm))
1859                         srp_check_mapping(&state, ch, req, scat, count);
1860         }
1861
1862         /* We've mapped the request, now pull as much of the indirect
1863          * descriptor table as we can into the command buffer. If this
1864          * target is not using an external indirect table, we are
1865          * guaranteed to fit into the command, as the SCSI layer won't
1866          * give us more S/G entries than we allow.
1867          */
1868         if (state.ndesc == 1) {
1869                 /*
1870                  * Memory registration collapsed the sg-list into one entry,
1871                  * so use a direct descriptor.
1872                  */
1873                 struct srp_direct_buf *buf = (void *) cmd->add_data;
1874
1875                 *buf = req->indirect_desc[0];
1876                 goto map_complete;
1877         }
1878
1879         if (unlikely(target->cmd_sg_cnt < state.ndesc &&
1880                                                 !target->allow_ext_sg)) {
1881                 shost_printk(KERN_ERR, target->scsi_host,
1882                              "Could not fit S/G list into SRP_CMD\n");
1883                 ret = -EIO;
1884                 goto unmap;
1885         }
1886
1887         count = min(state.ndesc, target->cmd_sg_cnt);
1888         table_len = state.ndesc * sizeof (struct srp_direct_buf);
1889         idb_len = sizeof(struct srp_indirect_buf) + table_len;
1890
1891         fmt = SRP_DATA_DESC_INDIRECT;
1892         len = sizeof(struct srp_cmd) + sizeof (struct srp_indirect_buf);
1893         len += count * sizeof (struct srp_direct_buf);
1894
1895         memcpy(indirect_hdr->desc_list, req->indirect_desc,
1896                count * sizeof (struct srp_direct_buf));
1897
1898         if (!target->global_rkey) {
1899                 ret = srp_map_idb(ch, req, state.gen.next, state.gen.end,
1900                                   idb_len, &idb_rkey);
1901                 if (ret < 0)
1902                         goto unmap;
1903                 req->nmdesc++;
1904         } else {
1905                 idb_rkey = cpu_to_be32(target->global_rkey);
1906         }
1907
1908         indirect_hdr->table_desc.va = cpu_to_be64(req->indirect_dma_addr);
1909         indirect_hdr->table_desc.key = idb_rkey;
1910         indirect_hdr->table_desc.len = cpu_to_be32(table_len);
1911         indirect_hdr->len = cpu_to_be32(state.total_len);
1912
1913         if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1914                 cmd->data_out_desc_cnt = count;
1915         else
1916                 cmd->data_in_desc_cnt = count;
1917
1918         ib_dma_sync_single_for_device(ibdev, req->indirect_dma_addr, table_len,
1919                                       DMA_TO_DEVICE);
1920
1921 map_complete:
1922         if (scmnd->sc_data_direction == DMA_TO_DEVICE)
1923                 cmd->buf_fmt = fmt << 4;
1924         else
1925                 cmd->buf_fmt = fmt;
1926
1927         return len;
1928
1929 unmap:
1930         srp_unmap_data(scmnd, ch, req);
1931         if (ret == -ENOMEM && req->nmdesc >= target->mr_pool_size)
1932                 ret = -E2BIG;
1933         return ret;
1934 }
1935
1936 /*
1937  * Return an IU and possible credit to the free pool
1938  */
1939 static void srp_put_tx_iu(struct srp_rdma_ch *ch, struct srp_iu *iu,
1940                           enum srp_iu_type iu_type)
1941 {
1942         unsigned long flags;
1943
1944         spin_lock_irqsave(&ch->lock, flags);
1945         list_add(&iu->list, &ch->free_tx);
1946         if (iu_type != SRP_IU_RSP)
1947                 ++ch->req_lim;
1948         spin_unlock_irqrestore(&ch->lock, flags);
1949 }
1950
1951 /*
1952  * Must be called with ch->lock held to protect req_lim and free_tx.
1953  * If IU is not sent, it must be returned using srp_put_tx_iu().
1954  *
1955  * Note:
1956  * An upper limit for the number of allocated information units for each
1957  * request type is:
1958  * - SRP_IU_CMD: SRP_CMD_SQ_SIZE, since the SCSI mid-layer never queues
1959  *   more than Scsi_Host.can_queue requests.
1960  * - SRP_IU_TSK_MGMT: SRP_TSK_MGMT_SQ_SIZE.
1961  * - SRP_IU_RSP: 1, since a conforming SRP target never sends more than
1962  *   one unanswered SRP request to an initiator.
1963  */
1964 static struct srp_iu *__srp_get_tx_iu(struct srp_rdma_ch *ch,
1965                                       enum srp_iu_type iu_type)
1966 {
1967         struct srp_target_port *target = ch->target;
1968         s32 rsv = (iu_type == SRP_IU_TSK_MGMT) ? 0 : SRP_TSK_MGMT_SQ_SIZE;
1969         struct srp_iu *iu;
1970
1971         lockdep_assert_held(&ch->lock);
1972
1973         ib_process_cq_direct(ch->send_cq, -1);
1974
1975         if (list_empty(&ch->free_tx))
1976                 return NULL;
1977
1978         /* Initiator responses to target requests do not consume credits */
1979         if (iu_type != SRP_IU_RSP) {
1980                 if (ch->req_lim <= rsv) {
1981                         ++target->zero_req_lim;
1982                         return NULL;
1983                 }
1984
1985                 --ch->req_lim;
1986         }
1987
1988         iu = list_first_entry(&ch->free_tx, struct srp_iu, list);
1989         list_del(&iu->list);
1990         return iu;
1991 }
1992
1993 /*
1994  * Note: if this function is called from inside ib_drain_sq() then it will
1995  * be called without ch->lock being held. If ib_drain_sq() dequeues a WQE
1996  * with status IB_WC_SUCCESS then that's a bug.
1997  */
1998 static void srp_send_done(struct ib_cq *cq, struct ib_wc *wc)
1999 {
2000         struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
2001         struct srp_rdma_ch *ch = cq->cq_context;
2002
2003         if (unlikely(wc->status != IB_WC_SUCCESS)) {
2004                 srp_handle_qp_err(cq, wc, "SEND");
2005                 return;
2006         }
2007
2008         lockdep_assert_held(&ch->lock);
2009
2010         list_add(&iu->list, &ch->free_tx);
2011 }
2012
2013 static int srp_post_send(struct srp_rdma_ch *ch, struct srp_iu *iu, int len)
2014 {
2015         struct srp_target_port *target = ch->target;
2016         struct ib_sge list;
2017         struct ib_send_wr wr;
2018
2019         list.addr   = iu->dma;
2020         list.length = len;
2021         list.lkey   = target->lkey;
2022
2023         iu->cqe.done = srp_send_done;
2024
2025         wr.next       = NULL;
2026         wr.wr_cqe     = &iu->cqe;
2027         wr.sg_list    = &list;
2028         wr.num_sge    = 1;
2029         wr.opcode     = IB_WR_SEND;
2030         wr.send_flags = IB_SEND_SIGNALED;
2031
2032         return ib_post_send(ch->qp, &wr, NULL);
2033 }
2034
2035 static int srp_post_recv(struct srp_rdma_ch *ch, struct srp_iu *iu)
2036 {
2037         struct srp_target_port *target = ch->target;
2038         struct ib_recv_wr wr;
2039         struct ib_sge list;
2040
2041         list.addr   = iu->dma;
2042         list.length = iu->size;
2043         list.lkey   = target->lkey;
2044
2045         iu->cqe.done = srp_recv_done;
2046
2047         wr.next     = NULL;
2048         wr.wr_cqe   = &iu->cqe;
2049         wr.sg_list  = &list;
2050         wr.num_sge  = 1;
2051
2052         return ib_post_recv(ch->qp, &wr, NULL);
2053 }
2054
2055 static void srp_process_rsp(struct srp_rdma_ch *ch, struct srp_rsp *rsp)
2056 {
2057         struct srp_target_port *target = ch->target;
2058         struct srp_request *req;
2059         struct scsi_cmnd *scmnd;
2060         unsigned long flags;
2061
2062         if (unlikely(rsp->tag & SRP_TAG_TSK_MGMT)) {
2063                 spin_lock_irqsave(&ch->lock, flags);
2064                 ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
2065                 if (rsp->tag == ch->tsk_mgmt_tag) {
2066                         ch->tsk_mgmt_status = -1;
2067                         if (be32_to_cpu(rsp->resp_data_len) >= 4)
2068                                 ch->tsk_mgmt_status = rsp->data[3];
2069                         complete(&ch->tsk_mgmt_done);
2070                 } else {
2071                         shost_printk(KERN_ERR, target->scsi_host,
2072                                      "Received tsk mgmt response too late for tag %#llx\n",
2073                                      rsp->tag);
2074                 }
2075                 spin_unlock_irqrestore(&ch->lock, flags);
2076         } else {
2077                 scmnd = scsi_host_find_tag(target->scsi_host, rsp->tag);
2078                 if (scmnd && scmnd->host_scribble) {
2079                         req = (void *)scmnd->host_scribble;
2080                         scmnd = srp_claim_req(ch, req, NULL, scmnd);
2081                 } else {
2082                         scmnd = NULL;
2083                 }
2084                 if (!scmnd) {
2085                         shost_printk(KERN_ERR, target->scsi_host,
2086                                      "Null scmnd for RSP w/tag %#016llx received on ch %td / QP %#x\n",
2087                                      rsp->tag, ch - target->ch, ch->qp->qp_num);
2088
2089                         spin_lock_irqsave(&ch->lock, flags);
2090                         ch->req_lim += be32_to_cpu(rsp->req_lim_delta);
2091                         spin_unlock_irqrestore(&ch->lock, flags);
2092
2093                         return;
2094                 }
2095                 scmnd->result = rsp->status;
2096
2097                 if (rsp->flags & SRP_RSP_FLAG_SNSVALID) {
2098                         memcpy(scmnd->sense_buffer, rsp->data +
2099                                be32_to_cpu(rsp->resp_data_len),
2100                                min_t(int, be32_to_cpu(rsp->sense_data_len),
2101                                      SCSI_SENSE_BUFFERSIZE));
2102                 }
2103
2104                 if (unlikely(rsp->flags & SRP_RSP_FLAG_DIUNDER))
2105                         scsi_set_resid(scmnd, be32_to_cpu(rsp->data_in_res_cnt));
2106                 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DIOVER))
2107                         scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_in_res_cnt));
2108                 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOUNDER))
2109                         scsi_set_resid(scmnd, be32_to_cpu(rsp->data_out_res_cnt));
2110                 else if (unlikely(rsp->flags & SRP_RSP_FLAG_DOOVER))
2111                         scsi_set_resid(scmnd, -be32_to_cpu(rsp->data_out_res_cnt));
2112
2113                 srp_free_req(ch, req, scmnd,
2114                              be32_to_cpu(rsp->req_lim_delta));
2115
2116                 scmnd->host_scribble = NULL;
2117                 scmnd->scsi_done(scmnd);
2118         }
2119 }
2120
2121 static int srp_response_common(struct srp_rdma_ch *ch, s32 req_delta,
2122                                void *rsp, int len)
2123 {
2124         struct srp_target_port *target = ch->target;
2125         struct ib_device *dev = target->srp_host->srp_dev->dev;
2126         unsigned long flags;
2127         struct srp_iu *iu;
2128         int err;
2129
2130         spin_lock_irqsave(&ch->lock, flags);
2131         ch->req_lim += req_delta;
2132         iu = __srp_get_tx_iu(ch, SRP_IU_RSP);
2133         spin_unlock_irqrestore(&ch->lock, flags);
2134
2135         if (!iu) {
2136                 shost_printk(KERN_ERR, target->scsi_host, PFX
2137                              "no IU available to send response\n");
2138                 return 1;
2139         }
2140
2141         ib_dma_sync_single_for_cpu(dev, iu->dma, len, DMA_TO_DEVICE);
2142         memcpy(iu->buf, rsp, len);
2143         ib_dma_sync_single_for_device(dev, iu->dma, len, DMA_TO_DEVICE);
2144
2145         err = srp_post_send(ch, iu, len);
2146         if (err) {
2147                 shost_printk(KERN_ERR, target->scsi_host, PFX
2148                              "unable to post response: %d\n", err);
2149                 srp_put_tx_iu(ch, iu, SRP_IU_RSP);
2150         }
2151
2152         return err;
2153 }
2154
2155 static void srp_process_cred_req(struct srp_rdma_ch *ch,
2156                                  struct srp_cred_req *req)
2157 {
2158         struct srp_cred_rsp rsp = {
2159                 .opcode = SRP_CRED_RSP,
2160                 .tag = req->tag,
2161         };
2162         s32 delta = be32_to_cpu(req->req_lim_delta);
2163
2164         if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
2165                 shost_printk(KERN_ERR, ch->target->scsi_host, PFX
2166                              "problems processing SRP_CRED_REQ\n");
2167 }
2168
2169 static void srp_process_aer_req(struct srp_rdma_ch *ch,
2170                                 struct srp_aer_req *req)
2171 {
2172         struct srp_target_port *target = ch->target;
2173         struct srp_aer_rsp rsp = {
2174                 .opcode = SRP_AER_RSP,
2175                 .tag = req->tag,
2176         };
2177         s32 delta = be32_to_cpu(req->req_lim_delta);
2178
2179         shost_printk(KERN_ERR, target->scsi_host, PFX
2180                      "ignoring AER for LUN %llu\n", scsilun_to_int(&req->lun));
2181
2182         if (srp_response_common(ch, delta, &rsp, sizeof(rsp)))
2183                 shost_printk(KERN_ERR, target->scsi_host, PFX
2184                              "problems processing SRP_AER_REQ\n");
2185 }
2186
2187 static void srp_recv_done(struct ib_cq *cq, struct ib_wc *wc)
2188 {
2189         struct srp_iu *iu = container_of(wc->wr_cqe, struct srp_iu, cqe);
2190         struct srp_rdma_ch *ch = cq->cq_context;
2191         struct srp_target_port *target = ch->target;
2192         struct ib_device *dev = target->srp_host->srp_dev->dev;
2193         int res;
2194         u8 opcode;
2195
2196         if (unlikely(wc->status != IB_WC_SUCCESS)) {
2197                 srp_handle_qp_err(cq, wc, "RECV");
2198                 return;
2199         }
2200
2201         ib_dma_sync_single_for_cpu(dev, iu->dma, ch->max_ti_iu_len,
2202                                    DMA_FROM_DEVICE);
2203
2204         opcode = *(u8 *) iu->buf;
2205
2206         if (0) {
2207                 shost_printk(KERN_ERR, target->scsi_host,
2208                              PFX "recv completion, opcode 0x%02x\n", opcode);
2209                 print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 8, 1,
2210                                iu->buf, wc->byte_len, true);
2211         }
2212
2213         switch (opcode) {
2214         case SRP_RSP:
2215                 srp_process_rsp(ch, iu->buf);
2216                 break;
2217
2218         case SRP_CRED_REQ:
2219                 srp_process_cred_req(ch, iu->buf);
2220                 break;
2221
2222         case SRP_AER_REQ:
2223                 srp_process_aer_req(ch, iu->buf);
2224                 break;
2225
2226         case SRP_T_LOGOUT:
2227                 /* XXX Handle target logout */
2228                 shost_printk(KERN_WARNING, target->scsi_host,
2229                              PFX "Got target logout request\n");
2230                 break;
2231
2232         default:
2233                 shost_printk(KERN_WARNING, target->scsi_host,
2234                              PFX "Unhandled SRP opcode 0x%02x\n", opcode);
2235                 break;
2236         }
2237
2238         ib_dma_sync_single_for_device(dev, iu->dma, ch->max_ti_iu_len,
2239                                       DMA_FROM_DEVICE);
2240
2241         res = srp_post_recv(ch, iu);
2242         if (res != 0)
2243                 shost_printk(KERN_ERR, target->scsi_host,
2244                              PFX "Recv failed with error code %d\n", res);
2245 }
2246
2247 /**
2248  * srp_tl_err_work() - handle a transport layer error
2249  * @work: Work structure embedded in an SRP target port.
2250  *
2251  * Note: This function may get invoked before the rport has been created,
2252  * hence the target->rport test.
2253  */
2254 static void srp_tl_err_work(struct work_struct *work)
2255 {
2256         struct srp_target_port *target;
2257
2258         target = container_of(work, struct srp_target_port, tl_err_work);
2259         if (target->rport)
2260                 srp_start_tl_fail_timers(target->rport);
2261 }
2262
2263 static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
2264                 const char *opname)
2265 {
2266         struct srp_rdma_ch *ch = cq->cq_context;
2267         struct srp_target_port *target = ch->target;
2268
2269         if (ch->connected && !target->qp_in_error) {
2270                 shost_printk(KERN_ERR, target->scsi_host,
2271                              PFX "failed %s status %s (%d) for CQE %p\n",
2272                              opname, ib_wc_status_msg(wc->status), wc->status,
2273                              wc->wr_cqe);
2274                 queue_work(system_long_wq, &target->tl_err_work);
2275         }
2276         target->qp_in_error = true;
2277 }
2278
2279 static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
2280 {
2281         struct srp_target_port *target = host_to_target(shost);
2282         struct srp_rport *rport = target->rport;
2283         struct srp_rdma_ch *ch;
2284         struct srp_request *req;
2285         struct srp_iu *iu;
2286         struct srp_cmd *cmd;
2287         struct ib_device *dev;
2288         unsigned long flags;
2289         u32 tag;
2290         u16 idx;
2291         int len, ret;
2292         const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler;
2293
2294         /*
2295          * The SCSI EH thread is the only context from which srp_queuecommand()
2296          * can get invoked for blocked devices (SDEV_BLOCK /
2297          * SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by
2298          * locking the rport mutex if invoked from inside the SCSI EH.
2299          */
2300         if (in_scsi_eh)
2301                 mutex_lock(&rport->mutex);
2302
2303         scmnd->result = srp_chkready(target->rport);
2304         if (unlikely(scmnd->result))
2305                 goto err;
2306
2307         WARN_ON_ONCE(scmnd->request->tag < 0);
2308         tag = blk_mq_unique_tag(scmnd->request);
2309         ch = &target->ch[blk_mq_unique_tag_to_hwq(tag)];
2310         idx = blk_mq_unique_tag_to_tag(tag);
2311         WARN_ONCE(idx >= target->req_ring_size, "%s: tag %#x: idx %d >= %d\n",
2312                   dev_name(&shost->shost_gendev), tag, idx,
2313                   target->req_ring_size);
2314
2315         spin_lock_irqsave(&ch->lock, flags);
2316         iu = __srp_get_tx_iu(ch, SRP_IU_CMD);
2317         spin_unlock_irqrestore(&ch->lock, flags);
2318
2319         if (!iu)
2320                 goto err;
2321
2322         req = &ch->req_ring[idx];
2323         dev = target->srp_host->srp_dev->dev;
2324         ib_dma_sync_single_for_cpu(dev, iu->dma, target->max_iu_len,
2325                                    DMA_TO_DEVICE);
2326
2327         scmnd->host_scribble = (void *) req;
2328
2329         cmd = iu->buf;
2330         memset(cmd, 0, sizeof *cmd);
2331
2332         cmd->opcode = SRP_CMD;
2333         int_to_scsilun(scmnd->device->lun, &cmd->lun);
2334         cmd->tag    = tag;
2335         memcpy(cmd->cdb, scmnd->cmnd, scmnd->cmd_len);
2336
2337         req->scmnd    = scmnd;
2338         req->cmd      = iu;
2339
2340         len = srp_map_data(scmnd, ch, req);
2341         if (len < 0) {
2342                 shost_printk(KERN_ERR, target->scsi_host,
2343                              PFX "Failed to map data (%d)\n", len);
2344                 /*
2345                  * If we ran out of memory descriptors (-ENOMEM) because an
2346                  * application is queuing many requests with more than
2347                  * max_pages_per_mr sg-list elements, tell the SCSI mid-layer
2348                  * to reduce queue depth temporarily.
2349                  */
2350                 scmnd->result = len == -ENOMEM ?
2351                         DID_OK << 16 | QUEUE_FULL << 1 : DID_ERROR << 16;
2352                 goto err_iu;
2353         }
2354
2355         ib_dma_sync_single_for_device(dev, iu->dma, target->max_iu_len,
2356                                       DMA_TO_DEVICE);
2357
2358         if (srp_post_send(ch, iu, len)) {
2359                 shost_printk(KERN_ERR, target->scsi_host, PFX "Send failed\n");
2360                 goto err_unmap;
2361         }
2362
2363         ret = 0;
2364
2365 unlock_rport:
2366         if (in_scsi_eh)
2367                 mutex_unlock(&rport->mutex);
2368
2369         return ret;
2370
2371 err_unmap:
2372         srp_unmap_data(scmnd, ch, req);
2373
2374 err_iu:
2375         srp_put_tx_iu(ch, iu, SRP_IU_CMD);
2376
2377         /*
2378          * Avoid that the loops that iterate over the request ring can
2379          * encounter a dangling SCSI command pointer.
2380          */
2381         req->scmnd = NULL;
2382
2383 err:
2384         if (scmnd->result) {
2385                 scmnd->scsi_done(scmnd);
2386                 ret = 0;
2387         } else {
2388                 ret = SCSI_MLQUEUE_HOST_BUSY;
2389         }
2390
2391         goto unlock_rport;
2392 }
2393
2394 /*
2395  * Note: the resources allocated in this function are freed in
2396  * srp_free_ch_ib().
2397  */
2398 static int srp_alloc_iu_bufs(struct srp_rdma_ch *ch)
2399 {
2400         struct srp_target_port *target = ch->target;
2401         int i;
2402
2403         ch->rx_ring = kcalloc(target->queue_size, sizeof(*ch->rx_ring),
2404                               GFP_KERNEL);
2405         if (!ch->rx_ring)
2406                 goto err_no_ring;
2407         ch->tx_ring = kcalloc(target->queue_size, sizeof(*ch->tx_ring),
2408                               GFP_KERNEL);
2409         if (!ch->tx_ring)
2410                 goto err_no_ring;
2411
2412         for (i = 0; i < target->queue_size; ++i) {
2413                 ch->rx_ring[i] = srp_alloc_iu(target->srp_host,
2414                                               ch->max_ti_iu_len,
2415                                               GFP_KERNEL, DMA_FROM_DEVICE);
2416                 if (!ch->rx_ring[i])
2417                         goto err;
2418         }
2419
2420         for (i = 0; i < target->queue_size; ++i) {
2421                 ch->tx_ring[i] = srp_alloc_iu(target->srp_host,
2422                                               target->max_iu_len,
2423                                               GFP_KERNEL, DMA_TO_DEVICE);
2424                 if (!ch->tx_ring[i])
2425                         goto err;
2426
2427                 list_add(&ch->tx_ring[i]->list, &ch->free_tx);
2428         }
2429
2430         return 0;
2431
2432 err:
2433         for (i = 0; i < target->queue_size; ++i) {
2434                 srp_free_iu(target->srp_host, ch->rx_ring[i]);
2435                 srp_free_iu(target->srp_host, ch->tx_ring[i]);
2436         }
2437
2438
2439 err_no_ring:
2440         kfree(ch->tx_ring);
2441         ch->tx_ring = NULL;
2442         kfree(ch->rx_ring);
2443         ch->rx_ring = NULL;
2444
2445         return -ENOMEM;
2446 }
2447
2448 static uint32_t srp_compute_rq_tmo(struct ib_qp_attr *qp_attr, int attr_mask)
2449 {
2450         uint64_t T_tr_ns, max_compl_time_ms;
2451         uint32_t rq_tmo_jiffies;
2452
2453         /*
2454          * According to section 11.2.4.2 in the IBTA spec (Modify Queue Pair,
2455          * table 91), both the QP timeout and the retry count have to be set
2456          * for RC QP's during the RTR to RTS transition.
2457          */
2458         WARN_ON_ONCE((attr_mask & (IB_QP_TIMEOUT | IB_QP_RETRY_CNT)) !=
2459                      (IB_QP_TIMEOUT | IB_QP_RETRY_CNT));
2460
2461         /*
2462          * Set target->rq_tmo_jiffies to one second more than the largest time
2463          * it can take before an error completion is generated. See also
2464          * C9-140..142 in the IBTA spec for more information about how to
2465          * convert the QP Local ACK Timeout value to nanoseconds.
2466          */
2467         T_tr_ns = 4096 * (1ULL << qp_attr->timeout);
2468         max_compl_time_ms = qp_attr->retry_cnt * 4 * T_tr_ns;
2469         do_div(max_compl_time_ms, NSEC_PER_MSEC);
2470         rq_tmo_jiffies = msecs_to_jiffies(max_compl_time_ms + 1000);
2471
2472         return rq_tmo_jiffies;
2473 }
2474
2475 static void srp_cm_rep_handler(struct ib_cm_id *cm_id,
2476                                const struct srp_login_rsp *lrsp,
2477                                struct srp_rdma_ch *ch)
2478 {
2479         struct srp_target_port *target = ch->target;
2480         struct ib_qp_attr *qp_attr = NULL;
2481         int attr_mask = 0;
2482         int ret = 0;
2483         int i;
2484
2485         if (lrsp->opcode == SRP_LOGIN_RSP) {
2486                 ch->max_ti_iu_len = be32_to_cpu(lrsp->max_ti_iu_len);
2487                 ch->req_lim       = be32_to_cpu(lrsp->req_lim_delta);
2488
2489                 /*
2490                  * Reserve credits for task management so we don't
2491                  * bounce requests back to the SCSI mid-layer.
2492                  */
2493                 target->scsi_host->can_queue
2494                         = min(ch->req_lim - SRP_TSK_MGMT_SQ_SIZE,
2495                               target->scsi_host->can_queue);
2496                 target->scsi_host->cmd_per_lun
2497                         = min_t(int, target->scsi_host->can_queue,
2498                                 target->scsi_host->cmd_per_lun);
2499         } else {
2500                 shost_printk(KERN_WARNING, target->scsi_host,
2501                              PFX "Unhandled RSP opcode %#x\n", lrsp->opcode);
2502                 ret = -ECONNRESET;
2503                 goto error;
2504         }
2505
2506         if (!ch->rx_ring) {
2507                 ret = srp_alloc_iu_bufs(ch);
2508                 if (ret)
2509                         goto error;
2510         }
2511
2512         for (i = 0; i < target->queue_size; i++) {
2513                 struct srp_iu *iu = ch->rx_ring[i];
2514
2515                 ret = srp_post_recv(ch, iu);
2516                 if (ret)
2517                         goto error;
2518         }
2519
2520         if (!target->using_rdma_cm) {
2521                 ret = -ENOMEM;
2522                 qp_attr = kmalloc(sizeof(*qp_attr), GFP_KERNEL);
2523                 if (!qp_attr)
2524                         goto error;
2525
2526                 qp_attr->qp_state = IB_QPS_RTR;
2527                 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2528                 if (ret)
2529                         goto error_free;
2530
2531                 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2532                 if (ret)
2533                         goto error_free;
2534
2535                 qp_attr->qp_state = IB_QPS_RTS;
2536                 ret = ib_cm_init_qp_attr(cm_id, qp_attr, &attr_mask);
2537                 if (ret)
2538                         goto error_free;
2539
2540                 target->rq_tmo_jiffies = srp_compute_rq_tmo(qp_attr, attr_mask);
2541
2542                 ret = ib_modify_qp(ch->qp, qp_attr, attr_mask);
2543                 if (ret)
2544                         goto error_free;
2545
2546                 ret = ib_send_cm_rtu(cm_id, NULL, 0);
2547         }
2548
2549 error_free:
2550         kfree(qp_attr);
2551
2552 error:
2553         ch->status = ret;
2554 }
2555
2556 static void srp_ib_cm_rej_handler(struct ib_cm_id *cm_id,
2557                                   const struct ib_cm_event *event,
2558                                   struct srp_rdma_ch *ch)
2559 {
2560         struct srp_target_port *target = ch->target;
2561         struct Scsi_Host *shost = target->scsi_host;
2562         struct ib_class_port_info *cpi;
2563         int opcode;
2564         u16 dlid;
2565
2566         switch (event->param.rej_rcvd.reason) {
2567         case IB_CM_REJ_PORT_CM_REDIRECT:
2568                 cpi = event->param.rej_rcvd.ari;
2569                 dlid = be16_to_cpu(cpi->redirect_lid);
2570                 sa_path_set_dlid(&ch->ib_cm.path, dlid);
2571                 ch->ib_cm.path.pkey = cpi->redirect_pkey;
2572                 cm_id->remote_cm_qpn = be32_to_cpu(cpi->redirect_qp) & 0x00ffffff;
2573                 memcpy(ch->ib_cm.path.dgid.raw, cpi->redirect_gid, 16);
2574
2575                 ch->status = dlid ? SRP_DLID_REDIRECT : SRP_PORT_REDIRECT;
2576                 break;
2577
2578         case IB_CM_REJ_PORT_REDIRECT:
2579                 if (srp_target_is_topspin(target)) {
2580                         union ib_gid *dgid = &ch->ib_cm.path.dgid;
2581
2582                         /*
2583                          * Topspin/Cisco SRP gateways incorrectly send
2584                          * reject reason code 25 when they mean 24
2585                          * (port redirect).
2586                          */
2587                         memcpy(dgid->raw, event->param.rej_rcvd.ari, 16);
2588
2589                         shost_printk(KERN_DEBUG, shost,
2590                                      PFX "Topspin/Cisco redirect to target port GID %016llx%016llx\n",
2591                                      be64_to_cpu(dgid->global.subnet_prefix),
2592                                      be64_to_cpu(dgid->global.interface_id));
2593
2594                         ch->status = SRP_PORT_REDIRECT;
2595                 } else {
2596                         shost_printk(KERN_WARNING, shost,
2597                                      "  REJ reason: IB_CM_REJ_PORT_REDIRECT\n");
2598                         ch->status = -ECONNRESET;
2599                 }
2600                 break;
2601
2602         case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID:
2603                 shost_printk(KERN_WARNING, shost,
2604                             "  REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
2605                 ch->status = -ECONNRESET;
2606                 break;
2607
2608         case IB_CM_REJ_CONSUMER_DEFINED:
2609                 opcode = *(u8 *) event->private_data;
2610                 if (opcode == SRP_LOGIN_REJ) {
2611                         struct srp_login_rej *rej = event->private_data;
2612                         u32 reason = be32_to_cpu(rej->reason);
2613
2614                         if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE)
2615                                 shost_printk(KERN_WARNING, shost,
2616                                              PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
2617                         else
2618                                 shost_printk(KERN_WARNING, shost, PFX
2619                                              "SRP LOGIN from %pI6 to %pI6 REJECTED, reason 0x%08x\n",
2620                                              target->sgid.raw,
2621                                              target->ib_cm.orig_dgid.raw,
2622                                              reason);
2623                 } else
2624                         shost_printk(KERN_WARNING, shost,
2625                                      "  REJ reason: IB_CM_REJ_CONSUMER_DEFINED,"
2626                                      " opcode 0x%02x\n", opcode);
2627                 ch->status = -ECONNRESET;
2628                 break;
2629
2630         case IB_CM_REJ_STALE_CONN:
2631                 shost_printk(KERN_WARNING, shost, "  REJ reason: stale connection\n");
2632                 ch->status = SRP_STALE_CONN;
2633                 break;
2634
2635         default:
2636                 shost_printk(KERN_WARNING, shost, "  REJ reason 0x%x\n",
2637                              event->param.rej_rcvd.reason);
2638                 ch->status = -ECONNRESET;
2639         }
2640 }
2641
2642 static int srp_ib_cm_handler(struct ib_cm_id *cm_id,
2643                              const struct ib_cm_event *event)
2644 {
2645         struct srp_rdma_ch *ch = cm_id->context;
2646         struct srp_target_port *target = ch->target;
2647         int comp = 0;
2648
2649         switch (event->event) {
2650         case IB_CM_REQ_ERROR:
2651                 shost_printk(KERN_DEBUG, target->scsi_host,
2652                              PFX "Sending CM REQ failed\n");
2653                 comp = 1;
2654                 ch->status = -ECONNRESET;
2655                 break;
2656
2657         case IB_CM_REP_RECEIVED:
2658                 comp = 1;
2659                 srp_cm_rep_handler(cm_id, event->private_data, ch);
2660                 break;
2661
2662         case IB_CM_REJ_RECEIVED:
2663                 shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n");
2664                 comp = 1;
2665
2666                 srp_ib_cm_rej_handler(cm_id, event, ch);
2667                 break;
2668
2669         case IB_CM_DREQ_RECEIVED:
2670                 shost_printk(KERN_WARNING, target->scsi_host,
2671                              PFX "DREQ received - connection closed\n");
2672                 ch->connected = false;
2673                 if (ib_send_cm_drep(cm_id, NULL, 0))
2674                         shost_printk(KERN_ERR, target->scsi_host,
2675                                      PFX "Sending CM DREP failed\n");
2676                 queue_work(system_long_wq, &target->tl_err_work);
2677                 break;
2678
2679         case IB_CM_TIMEWAIT_EXIT:
2680                 shost_printk(KERN_ERR, target->scsi_host,
2681                              PFX "connection closed\n");
2682                 comp = 1;
2683
2684                 ch->status = 0;
2685                 break;
2686
2687         case IB_CM_MRA_RECEIVED:
2688         case IB_CM_DREQ_ERROR:
2689         case IB_CM_DREP_RECEIVED:
2690                 break;
2691
2692         default:
2693                 shost_printk(KERN_WARNING, target->scsi_host,
2694                              PFX "Unhandled CM event %d\n", event->event);
2695                 break;
2696         }
2697
2698         if (comp)
2699                 complete(&ch->done);
2700
2701         return 0;
2702 }
2703
2704 static void srp_rdma_cm_rej_handler(struct srp_rdma_ch *ch,
2705                                     struct rdma_cm_event *event)
2706 {
2707         struct srp_target_port *target = ch->target;
2708         struct Scsi_Host *shost = target->scsi_host;
2709         int opcode;
2710
2711         switch (event->status) {
2712         case IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID:
2713                 shost_printk(KERN_WARNING, shost,
2714                             "  REJ reason: IB_CM_REJ_DUPLICATE_LOCAL_COMM_ID\n");
2715                 ch->status = -ECONNRESET;
2716                 break;
2717
2718         case IB_CM_REJ_CONSUMER_DEFINED:
2719                 opcode = *(u8 *) event->param.conn.private_data;
2720                 if (opcode == SRP_LOGIN_REJ) {
2721                         struct srp_login_rej *rej =
2722                                 (struct srp_login_rej *)
2723                                 event->param.conn.private_data;
2724                         u32 reason = be32_to_cpu(rej->reason);
2725
2726                         if (reason == SRP_LOGIN_REJ_REQ_IT_IU_LENGTH_TOO_LARGE)
2727                                 shost_printk(KERN_WARNING, shost,
2728                                              PFX "SRP_LOGIN_REJ: requested max_it_iu_len too large\n");
2729                         else
2730                                 shost_printk(KERN_WARNING, shost,
2731                                             PFX "SRP LOGIN REJECTED, reason 0x%08x\n", reason);
2732                 } else {
2733                         shost_printk(KERN_WARNING, shost,
2734                                      "  REJ reason: IB_CM_REJ_CONSUMER_DEFINED, opcode 0x%02x\n",
2735                                      opcode);
2736                 }
2737                 ch->status = -ECONNRESET;
2738                 break;
2739
2740         case IB_CM_REJ_STALE_CONN:
2741                 shost_printk(KERN_WARNING, shost,
2742                              "  REJ reason: stale connection\n");
2743                 ch->status = SRP_STALE_CONN;
2744                 break;
2745
2746         default:
2747                 shost_printk(KERN_WARNING, shost, "  REJ reason 0x%x\n",
2748                              event->status);
2749                 ch->status = -ECONNRESET;
2750                 break;
2751         }
2752 }
2753
2754 static int srp_rdma_cm_handler(struct rdma_cm_id *cm_id,
2755                                struct rdma_cm_event *event)
2756 {
2757         struct srp_rdma_ch *ch = cm_id->context;
2758         struct srp_target_port *target = ch->target;
2759         int comp = 0;
2760
2761         switch (event->event) {
2762         case RDMA_CM_EVENT_ADDR_RESOLVED:
2763                 ch->status = 0;
2764                 comp = 1;
2765                 break;
2766
2767         case RDMA_CM_EVENT_ADDR_ERROR:
2768                 ch->status = -ENXIO;
2769                 comp = 1;
2770                 break;
2771
2772         case RDMA_CM_EVENT_ROUTE_RESOLVED:
2773                 ch->status = 0;
2774                 comp = 1;
2775                 break;
2776
2777         case RDMA_CM_EVENT_ROUTE_ERROR:
2778         case RDMA_CM_EVENT_UNREACHABLE:
2779                 ch->status = -EHOSTUNREACH;
2780                 comp = 1;
2781                 break;
2782
2783         case RDMA_CM_EVENT_CONNECT_ERROR:
2784                 shost_printk(KERN_DEBUG, target->scsi_host,
2785                              PFX "Sending CM REQ failed\n");
2786                 comp = 1;
2787                 ch->status = -ECONNRESET;
2788                 break;
2789
2790         case RDMA_CM_EVENT_ESTABLISHED:
2791                 comp = 1;
2792                 srp_cm_rep_handler(NULL, event->param.conn.private_data, ch);
2793                 break;
2794
2795         case RDMA_CM_EVENT_REJECTED:
2796                 shost_printk(KERN_DEBUG, target->scsi_host, PFX "REJ received\n");
2797                 comp = 1;
2798
2799                 srp_rdma_cm_rej_handler(ch, event);
2800                 break;
2801
2802         case RDMA_CM_EVENT_DISCONNECTED:
2803                 if (ch->connected) {
2804                         shost_printk(KERN_WARNING, target->scsi_host,
2805                                      PFX "received DREQ\n");
2806                         rdma_disconnect(ch->rdma_cm.cm_id);
2807                         comp = 1;
2808                         ch->status = 0;
2809                         queue_work(system_long_wq, &target->tl_err_work);
2810                 }
2811                 break;
2812
2813         case RDMA_CM_EVENT_TIMEWAIT_EXIT:
2814                 shost_printk(KERN_ERR, target->scsi_host,
2815                              PFX "connection closed\n");
2816
2817                 comp = 1;
2818                 ch->status = 0;
2819                 break;
2820
2821         default:
2822                 shost_printk(KERN_WARNING, target->scsi_host,
2823                              PFX "Unhandled CM event %d\n", event->event);
2824                 break;
2825         }
2826
2827         if (comp)
2828                 complete(&ch->done);
2829
2830         return 0;
2831 }
2832
2833 /**
2834  * srp_change_queue_depth - setting device queue depth
2835  * @sdev: scsi device struct
2836  * @qdepth: requested queue depth
2837  *
2838  * Returns queue depth.
2839  */
2840 static int
2841 srp_change_queue_depth(struct scsi_device *sdev, int qdepth)
2842 {
2843         if (!sdev->tagged_supported)
2844                 qdepth = 1;
2845         return scsi_change_queue_depth(sdev, qdepth);
2846 }
2847
2848 static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag, u64 lun,
2849                              u8 func, u8 *status)
2850 {
2851         struct srp_target_port *target = ch->target;
2852         struct srp_rport *rport = target->rport;
2853         struct ib_device *dev = target->srp_host->srp_dev->dev;
2854         struct srp_iu *iu;
2855         struct srp_tsk_mgmt *tsk_mgmt;
2856         int res;
2857
2858         if (!ch->connected || target->qp_in_error)
2859                 return -1;
2860
2861         /*
2862          * Lock the rport mutex to avoid that srp_create_ch_ib() is
2863          * invoked while a task management function is being sent.
2864          */
2865         mutex_lock(&rport->mutex);
2866         spin_lock_irq(&ch->lock);
2867         iu = __srp_get_tx_iu(ch, SRP_IU_TSK_MGMT);
2868         spin_unlock_irq(&ch->lock);
2869
2870         if (!iu) {
2871                 mutex_unlock(&rport->mutex);
2872
2873                 return -1;
2874         }
2875
2876         ib_dma_sync_single_for_cpu(dev, iu->dma, sizeof *tsk_mgmt,
2877                                    DMA_TO_DEVICE);
2878         tsk_mgmt = iu->buf;
2879         memset(tsk_mgmt, 0, sizeof *tsk_mgmt);
2880
2881         tsk_mgmt->opcode        = SRP_TSK_MGMT;
2882         int_to_scsilun(lun, &tsk_mgmt->lun);
2883         tsk_mgmt->tsk_mgmt_func = func;
2884         tsk_mgmt->task_tag      = req_tag;
2885
2886         spin_lock_irq(&ch->lock);
2887         ch->tsk_mgmt_tag = (ch->tsk_mgmt_tag + 1) | SRP_TAG_TSK_MGMT;
2888         tsk_mgmt->tag = ch->tsk_mgmt_tag;
2889         spin_unlock_irq(&ch->lock);
2890
2891         init_completion(&ch->tsk_mgmt_done);
2892
2893         ib_dma_sync_single_for_device(dev, iu->dma, sizeof *tsk_mgmt,
2894                                       DMA_TO_DEVICE);
2895         if (srp_post_send(ch, iu, sizeof(*tsk_mgmt))) {
2896                 srp_put_tx_iu(ch, iu, SRP_IU_TSK_MGMT);
2897                 mutex_unlock(&rport->mutex);
2898
2899                 return -1;
2900         }
2901         res = wait_for_completion_timeout(&ch->tsk_mgmt_done,
2902                                         msecs_to_jiffies(SRP_ABORT_TIMEOUT_MS));
2903         if (res > 0 && status)
2904                 *status = ch->tsk_mgmt_status;
2905         mutex_unlock(&rport->mutex);
2906
2907         WARN_ON_ONCE(res < 0);
2908
2909         return res > 0 ? 0 : -1;
2910 }
2911
2912 static int srp_abort(struct scsi_cmnd *scmnd)
2913 {
2914         struct srp_target_port *target = host_to_target(scmnd->device->host);
2915         struct srp_request *req = (struct srp_request *) scmnd->host_scribble;
2916         u32 tag;
2917         u16 ch_idx;
2918         struct srp_rdma_ch *ch;
2919         int ret;
2920
2921         shost_printk(KERN_ERR, target->scsi_host, "SRP abort called\n");
2922
2923         if (!req)
2924                 return SUCCESS;
2925         tag = blk_mq_unique_tag(scmnd->request);
2926         ch_idx = blk_mq_unique_tag_to_hwq(tag);
2927         if (WARN_ON_ONCE(ch_idx >= target->ch_count))
2928                 return SUCCESS;
2929         ch = &target->ch[ch_idx];
2930         if (!srp_claim_req(ch, req, NULL, scmnd))
2931                 return SUCCESS;
2932         shost_printk(KERN_ERR, target->scsi_host,
2933                      "Sending SRP abort for tag %#x\n", tag);
2934         if (srp_send_tsk_mgmt(ch, tag, scmnd->device->lun,
2935                               SRP_TSK_ABORT_TASK, NULL) == 0)
2936                 ret = SUCCESS;
2937         else if (target->rport->state == SRP_RPORT_LOST)
2938                 ret = FAST_IO_FAIL;
2939         else
2940                 ret = FAILED;
2941         if (ret == SUCCESS) {
2942                 srp_free_req(ch, req, scmnd, 0);
2943                 scmnd->result = DID_ABORT << 16;
2944                 scmnd->scsi_done(scmnd);
2945         }
2946
2947         return ret;
2948 }
2949
2950 static int srp_reset_device(struct scsi_cmnd *scmnd)
2951 {
2952         struct srp_target_port *target = host_to_target(scmnd->device->host);
2953         struct srp_rdma_ch *ch;
2954         int i, j;
2955         u8 status;
2956
2957         shost_printk(KERN_ERR, target->scsi_host, "SRP reset_device called\n");
2958
2959         ch = &target->ch[0];
2960         if (srp_send_tsk_mgmt(ch, SRP_TAG_NO_REQ, scmnd->device->lun,
2961                               SRP_TSK_LUN_RESET, &status))
2962                 return FAILED;
2963         if (status)
2964                 return FAILED;
2965
2966         for (i = 0; i < target->ch_count; i++) {
2967                 ch = &target->ch[i];
2968                 for (j = 0; j < target->req_ring_size; ++j) {
2969                         struct srp_request *req = &ch->req_ring[j];
2970
2971                         srp_finish_req(ch, req, scmnd->device, DID_RESET << 16);
2972                 }
2973         }
2974
2975         return SUCCESS;
2976 }
2977
2978 static int srp_reset_host(struct scsi_cmnd *scmnd)
2979 {
2980         struct srp_target_port *target = host_to_target(scmnd->device->host);
2981
2982         shost_printk(KERN_ERR, target->scsi_host, PFX "SRP reset_host called\n");
2983
2984         return srp_reconnect_rport(target->rport) == 0 ? SUCCESS : FAILED;
2985 }
2986
2987 static int srp_target_alloc(struct scsi_target *starget)
2988 {
2989         struct Scsi_Host *shost = dev_to_shost(starget->dev.parent);
2990         struct srp_target_port *target = host_to_target(shost);
2991
2992         if (target->target_can_queue)
2993                 starget->can_queue = target->target_can_queue;
2994         return 0;
2995 }
2996
2997 static int srp_slave_alloc(struct scsi_device *sdev)
2998 {
2999         struct Scsi_Host *shost = sdev->host;
3000         struct srp_target_port *target = host_to_target(shost);
3001         struct srp_device *srp_dev = target->srp_host->srp_dev;
3002         struct ib_device *ibdev = srp_dev->dev;
3003
3004         if (!(ibdev->attrs.device_cap_flags & IB_DEVICE_SG_GAPS_REG))
3005                 blk_queue_virt_boundary(sdev->request_queue,
3006                                         ~srp_dev->mr_page_mask);
3007
3008         return 0;
3009 }
3010
3011 static int srp_slave_configure(struct scsi_device *sdev)
3012 {
3013         struct Scsi_Host *shost = sdev->host;
3014         struct srp_target_port *target = host_to_target(shost);
3015         struct request_queue *q = sdev->request_queue;
3016         unsigned long timeout;
3017
3018         if (sdev->type == TYPE_DISK) {
3019                 timeout = max_t(unsigned, 30 * HZ, target->rq_tmo_jiffies);
3020                 blk_queue_rq_timeout(q, timeout);
3021         }
3022
3023         return 0;
3024 }
3025
3026 static ssize_t show_id_ext(struct device *dev, struct device_attribute *attr,
3027                            char *buf)
3028 {
3029         struct srp_target_port *target = host_to_target(class_to_shost(dev));
3030
3031         return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->id_ext));
3032 }
3033
3034 static ssize_t show_ioc_guid(struct device *dev, struct device_attribute *attr,
3035                              char *buf)
3036 {
3037         struct srp_target_port *target = host_to_target(class_to_shost(dev));
3038
3039         return sprintf(buf, "0x%016llx\n", be64_to_cpu(target->ioc_guid));
3040 }
3041
3042 static ssize_t show_service_id(struct device *dev,
3043                                struct device_attribute *attr, char *buf)
3044 {
3045         struct srp_target_port *target = host_to_target(class_to_shost(dev));
3046
3047         if (target->using_rdma_cm)
3048                 return -ENOENT;
3049         return sprintf(buf, "0x%016llx\n",
3050                        be64_to_cpu(target->ib_cm.service_id));
3051 }
3052
3053 static ssize_t show_pkey(struct device *dev, struct device_attribute *attr,
3054                          char *buf)
3055 {
3056         struct srp_target_port *target = host_to_target(class_to_shost(dev));
3057
3058         if (target->using_rdma_cm)
3059                 return -ENOENT;
3060         return sprintf(buf, "0x%04x\n", be16_to_cpu(target->ib_cm.pkey));
3061 }
3062
3063 static ssize_t show_sgid(struct device *dev, struct device_attribute *attr,
3064                          char *buf)
3065 {
3066         struct srp_target_port *target = host_to_target(class_to_shost(dev));
3067
3068         return sprintf(buf, "%pI6\n", target->sgid.raw);
3069 }
3070
3071 static ssize_t show_dgid(struct device *dev, struct device_attribute *attr,
3072                          char *buf)
3073 {
3074         struct srp_target_port *target = host_to_target(class_to_shost(dev));
3075         struct srp_rdma_ch *ch = &target->ch[0];
3076
3077         if (target->using_rdma_cm)
3078                 return -ENOENT;
3079         return sprintf(buf, "%pI6\n", ch->ib_cm.path.dgid.raw);
3080 }
3081
3082 static ssize_t show_orig_dgid(struct device *dev,
3083                               struct device_attribute *attr, char *buf)
3084 {
3085         struct srp_target_port *target = host_to_target(class_to_shost(dev));
3086
3087         if (target->using_rdma_cm)
3088                 return -ENOENT;
3089         return sprintf(buf, "%pI6\n", target->ib_cm.orig_dgid.raw);
3090 }
3091
3092 static ssize_t show_req_lim(struct device *dev,
3093                             struct device_attribute *attr, char *buf)
3094 {
3095         struct srp_target_port *target = host_to_target(class_to_shost(dev));
3096         struct srp_rdma_ch *ch;
3097         int i, req_lim = INT_MAX;
3098
3099         for (i = 0; i < target->ch_count; i++) {
3100                 ch = &target->ch[i];
3101                 req_lim = min(req_lim, ch->req_lim);
3102         }
3103         return sprintf(buf, "%d\n", req_lim);
3104 }
3105
3106 static ssize_t show_zero_req_lim(struct device *dev,
3107                                  struct device_attribute *attr, char *buf)
3108 {
3109         struct srp_target_port *target = host_to_target(class_to_shost(dev));
3110
3111         return sprintf(buf, "%d\n", target->zero_req_lim);
3112 }
3113
3114 static ssize_t show_local_ib_port(struct device *dev,
3115                                   struct device_attribute *attr, char *buf)
3116 {
3117         struct srp_target_port *target = host_to_target(class_to_shost(dev));
3118
3119         return sprintf(buf, "%d\n", target->srp_host->port);
3120 }
3121
3122 static ssize_t show_local_ib_device(struct device *dev,
3123                                     struct device_attribute *attr, char *buf)
3124 {
3125         struct srp_target_port *target = host_to_target(class_to_shost(dev));
3126
3127         return sprintf(buf, "%s\n", target->srp_host->srp_dev->dev->name);
3128 }
3129
3130 static ssize_t show_ch_count(struct device *dev, struct device_attribute *attr,
3131                              char *buf)
3132 {
3133         struct srp_target_port *target = host_to_target(class_to_shost(dev));
3134
3135         return sprintf(buf, "%d\n", target->ch_count);
3136 }
3137
3138 static ssize_t show_comp_vector(struct device *dev,
3139                                 struct device_attribute *attr, char *buf)
3140 {
3141         struct srp_target_port *target = host_to_target(class_to_shost(dev));
3142
3143         return sprintf(buf, "%d\n", target->comp_vector);
3144 }
3145
3146 static ssize_t show_tl_retry_count(struct device *dev,
3147                                    struct device_attribute *attr, char *buf)
3148 {
3149         struct srp_target_port *target = host_to_target(class_to_shost(dev));
3150
3151         return sprintf(buf, "%d\n", target->tl_retry_count);
3152 }
3153
3154 static ssize_t show_cmd_sg_entries(struct device *dev,
3155                                    struct device_attribute *attr, char *buf)
3156 {
3157         struct srp_target_port *target = host_to_target(class_to_shost(dev));
3158
3159         return sprintf(buf, "%u\n", target->cmd_sg_cnt);
3160 }
3161
3162 static ssize_t show_allow_ext_sg(struct device *dev,
3163                                  struct device_attribute *attr, char *buf)
3164 {
3165         struct srp_target_port *target = host_to_target(class_to_shost(dev));
3166
3167         return sprintf(buf, "%s\n", target->allow_ext_sg ? "true" : "false");
3168 }
3169
3170 static DEVICE_ATTR(id_ext,          S_IRUGO, show_id_ext,          NULL);
3171 static DEVICE_ATTR(ioc_guid,        S_IRUGO, show_ioc_guid,        NULL);
3172 static DEVICE_ATTR(service_id,      S_IRUGO, show_service_id,      NULL);
3173 static DEVICE_ATTR(pkey,            S_IRUGO, show_pkey,            NULL);
3174 static DEVICE_ATTR(sgid,            S_IRUGO, show_sgid,            NULL);
3175 static DEVICE_ATTR(dgid,            S_IRUGO, show_dgid,            NULL);
3176 static DEVICE_ATTR(orig_dgid,       S_IRUGO, show_orig_dgid,       NULL);
3177 static DEVICE_ATTR(req_lim,         S_IRUGO, show_req_lim,         NULL);
3178 static DEVICE_ATTR(zero_req_lim,    S_IRUGO, show_zero_req_lim,    NULL);
3179 static DEVICE_ATTR(local_ib_port,   S_IRUGO, show_local_ib_port,   NULL);
3180 static DEVICE_ATTR(local_ib_device, S_IRUGO, show_local_ib_device, NULL);
3181 static DEVICE_ATTR(ch_count,        S_IRUGO, show_ch_count,        NULL);
3182 static DEVICE_ATTR(comp_vector,     S_IRUGO, show_comp_vector,     NULL);
3183 static DEVICE_ATTR(tl_retry_count,  S_IRUGO, show_tl_retry_count,  NULL);
3184 static DEVICE_ATTR(cmd_sg_entries,  S_IRUGO, show_cmd_sg_entries,  NULL);
3185 static DEVICE_ATTR(allow_ext_sg,    S_IRUGO, show_allow_ext_sg,    NULL);
3186
3187 static struct device_attribute *srp_host_attrs[] = {
3188         &dev_attr_id_ext,
3189         &dev_attr_ioc_guid,
3190         &dev_attr_service_id,
3191         &dev_attr_pkey,
3192         &dev_attr_sgid,
3193         &dev_attr_dgid,
3194         &dev_attr_orig_dgid,
3195         &dev_attr_req_lim,
3196         &dev_attr_zero_req_lim,
3197         &dev_attr_local_ib_port,
3198         &dev_attr_local_ib_device,
3199         &dev_attr_ch_count,
3200         &dev_attr_comp_vector,
3201         &dev_attr_tl_retry_count,
3202         &dev_attr_cmd_sg_entries,
3203         &dev_attr_allow_ext_sg,
3204         NULL
3205 };
3206
3207 static struct scsi_host_template srp_template = {
3208         .module                         = THIS_MODULE,
3209         .name                           = "InfiniBand SRP initiator",
3210         .proc_name                      = DRV_NAME,
3211         .target_alloc                   = srp_target_alloc,
3212         .slave_alloc                    = srp_slave_alloc,
3213         .slave_configure                = srp_slave_configure,
3214         .info                           = srp_target_info,
3215         .queuecommand                   = srp_queuecommand,
3216         .change_queue_depth             = srp_change_queue_depth,
3217         .eh_timed_out                   = srp_timed_out,
3218         .eh_abort_handler               = srp_abort,
3219         .eh_device_reset_handler        = srp_reset_device,
3220         .eh_host_reset_handler          = srp_reset_host,
3221         .skip_settle_delay              = true,
3222         .sg_tablesize                   = SRP_DEF_SG_TABLESIZE,
3223         .can_queue                      = SRP_DEFAULT_CMD_SQ_SIZE,
3224         .this_id                        = -1,
3225         .cmd_per_lun                    = SRP_DEFAULT_CMD_SQ_SIZE,
3226         .use_clustering                 = ENABLE_CLUSTERING,
3227         .shost_attrs                    = srp_host_attrs,
3228         .track_queue_depth              = 1,
3229 };
3230
3231 static int srp_sdev_count(struct Scsi_Host *host)
3232 {
3233         struct scsi_device *sdev;
3234         int c = 0;
3235
3236         shost_for_each_device(sdev, host)
3237                 c++;
3238
3239         return c;
3240 }
3241
3242 /*
3243  * Return values:
3244  * < 0 upon failure. Caller is responsible for SRP target port cleanup.
3245  * 0 and target->state == SRP_TARGET_REMOVED if asynchronous target port
3246  *    removal has been scheduled.
3247  * 0 and target->state != SRP_TARGET_REMOVED upon success.
3248  */
3249 static int srp_add_target(struct srp_host *host, struct srp_target_port *target)
3250 {
3251         struct srp_rport_identifiers ids;
3252         struct srp_rport *rport;
3253
3254         target->state = SRP_TARGET_SCANNING;
3255         sprintf(target->target_name, "SRP.T10:%016llX",
3256                 be64_to_cpu(target->id_ext));
3257
3258         if (scsi_add_host(target->scsi_host, host->srp_dev->dev->dev.parent))
3259                 return -ENODEV;
3260
3261         memcpy(ids.port_id, &target->id_ext, 8);
3262         memcpy(ids.port_id + 8, &target->ioc_guid, 8);
3263         ids.roles = SRP_RPORT_ROLE_TARGET;
3264         rport = srp_rport_add(target->scsi_host, &ids);
3265         if (IS_ERR(rport)) {
3266                 scsi_remove_host(target->scsi_host);
3267                 return PTR_ERR(rport);
3268         }
3269
3270         rport->lld_data = target;
3271         target->rport = rport;
3272
3273         spin_lock(&host->target_lock);
3274         list_add_tail(&target->list, &host->target_list);
3275         spin_unlock(&host->target_lock);
3276
3277         scsi_scan_target(&target->scsi_host->shost_gendev,
3278                          0, target->scsi_id, SCAN_WILD_CARD, SCSI_SCAN_INITIAL);
3279
3280         if (srp_connected_ch(target) < target->ch_count ||
3281             target->qp_in_error) {
3282                 shost_printk(KERN_INFO, target->scsi_host,
3283                              PFX "SCSI scan failed - removing SCSI host\n");
3284                 srp_queue_remove_work(target);
3285                 goto out;
3286         }
3287
3288         pr_debug("%s: SCSI scan succeeded - detected %d LUNs\n",
3289                  dev_name(&target->scsi_host->shost_gendev),
3290                  srp_sdev_count(target->scsi_host));
3291
3292         spin_lock_irq(&target->lock);
3293         if (target->state == SRP_TARGET_SCANNING)
3294                 target->state = SRP_TARGET_LIVE;
3295         spin_unlock_irq(&target->lock);
3296
3297 out:
3298         return 0;
3299 }
3300
3301 static void srp_release_dev(struct device *dev)
3302 {
3303         struct srp_host *host =
3304                 container_of(dev, struct srp_host, dev);
3305
3306         complete(&host->released);
3307 }
3308
3309 static struct class srp_class = {
3310         .name    = "infiniband_srp",
3311         .dev_release = srp_release_dev
3312 };
3313
3314 /**
3315  * srp_conn_unique() - check whether the connection to a target is unique
3316  * @host:   SRP host.
3317  * @target: SRP target port.
3318  */
3319 static bool srp_conn_unique(struct srp_host *host,
3320                             struct srp_target_port *target)
3321 {
3322         struct srp_target_port *t;
3323         bool ret = false;
3324
3325         if (target->state == SRP_TARGET_REMOVED)
3326                 goto out;
3327
3328         ret = true;
3329
3330         spin_lock(&host->target_lock);
3331         list_for_each_entry(t, &host->target_list, list) {
3332                 if (t != target &&
3333                     target->id_ext == t->id_ext &&
3334                     target->ioc_guid == t->ioc_guid &&
3335                     target->initiator_ext == t->initiator_ext) {
3336                         ret = false;
3337                         break;
3338                 }
3339         }
3340         spin_unlock(&host->target_lock);
3341
3342 out:
3343         return ret;
3344 }
3345
3346 /*
3347  * Target ports are added by writing
3348  *
3349  *     id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,dgid=<dest GID>,
3350  *     pkey=<P_Key>,service_id=<service ID>
3351  * or
3352  *     id_ext=<SRP ID ext>,ioc_guid=<SRP IOC GUID>,
3353  *     [src=<IPv4 address>,]dest=<IPv4 address>:<port number>
3354  *
3355  * to the add_target sysfs attribute.
3356  */
3357 enum {
3358         SRP_OPT_ERR             = 0,
3359         SRP_OPT_ID_EXT          = 1 << 0,
3360         SRP_OPT_IOC_GUID        = 1 << 1,
3361         SRP_OPT_DGID            = 1 << 2,
3362         SRP_OPT_PKEY            = 1 << 3,
3363         SRP_OPT_SERVICE_ID      = 1 << 4,
3364         SRP_OPT_MAX_SECT        = 1 << 5,
3365         SRP_OPT_MAX_CMD_PER_LUN = 1 << 6,
3366         SRP_OPT_IO_CLASS        = 1 << 7,
3367         SRP_OPT_INITIATOR_EXT   = 1 << 8,
3368         SRP_OPT_CMD_SG_ENTRIES  = 1 << 9,
3369         SRP_OPT_ALLOW_EXT_SG    = 1 << 10,
3370         SRP_OPT_SG_TABLESIZE    = 1 << 11,
3371         SRP_OPT_COMP_VECTOR     = 1 << 12,
3372         SRP_OPT_TL_RETRY_COUNT  = 1 << 13,
3373         SRP_OPT_QUEUE_SIZE      = 1 << 14,
3374         SRP_OPT_IP_SRC          = 1 << 15,
3375         SRP_OPT_IP_DEST         = 1 << 16,
3376         SRP_OPT_TARGET_CAN_QUEUE= 1 << 17,
3377 };
3378
3379 static unsigned int srp_opt_mandatory[] = {
3380         SRP_OPT_ID_EXT          |
3381         SRP_OPT_IOC_GUID        |
3382         SRP_OPT_DGID            |
3383         SRP_OPT_PKEY            |
3384         SRP_OPT_SERVICE_ID,
3385         SRP_OPT_ID_EXT          |
3386         SRP_OPT_IOC_GUID        |
3387         SRP_OPT_IP_DEST,
3388 };
3389
3390 static const match_table_t srp_opt_tokens = {
3391         { SRP_OPT_ID_EXT,               "id_ext=%s"             },
3392         { SRP_OPT_IOC_GUID,             "ioc_guid=%s"           },
3393         { SRP_OPT_DGID,                 "dgid=%s"               },
3394         { SRP_OPT_PKEY,                 "pkey=%x"               },
3395         { SRP_OPT_SERVICE_ID,           "service_id=%s"         },
3396         { SRP_OPT_MAX_SECT,             "max_sect=%d"           },
3397         { SRP_OPT_MAX_CMD_PER_LUN,      "max_cmd_per_lun=%d"    },
3398         { SRP_OPT_TARGET_CAN_QUEUE,     "target_can_queue=%d"   },
3399         { SRP_OPT_IO_CLASS,             "io_class=%x"           },
3400         { SRP_OPT_INITIATOR_EXT,        "initiator_ext=%s"      },
3401         { SRP_OPT_CMD_SG_ENTRIES,       "cmd_sg_entries=%u"     },
3402         { SRP_OPT_ALLOW_EXT_SG,         "allow_ext_sg=%u"       },
3403         { SRP_OPT_SG_TABLESIZE,         "sg_tablesize=%u"       },
3404         { SRP_OPT_COMP_VECTOR,          "comp_vector=%u"        },
3405         { SRP_OPT_TL_RETRY_COUNT,       "tl_retry_count=%u"     },
3406         { SRP_OPT_QUEUE_SIZE,           "queue_size=%d"         },
3407         { SRP_OPT_IP_SRC,               "src=%s"                },
3408         { SRP_OPT_IP_DEST,              "dest=%s"               },
3409         { SRP_OPT_ERR,                  NULL                    }
3410 };
3411
3412 /**
3413  * srp_parse_in - parse an IP address and port number combination
3414  *
3415  * Parse the following address formats:
3416  * - IPv4: <ip_address>:<port>, e.g. 1.2.3.4:5.
3417  * - IPv6: \[<ipv6_address>\]:<port>, e.g. [1::2:3%4]:5.
3418  */
3419 static int srp_parse_in(struct net *net, struct sockaddr_storage *sa,
3420                         const char *addr_port_str)
3421 {
3422         char *addr_end, *addr = kstrdup(addr_port_str, GFP_KERNEL);
3423         char *port_str;
3424         int ret;
3425
3426         if (!addr)
3427                 return -ENOMEM;
3428         port_str = strrchr(addr, ':');
3429         if (!port_str)
3430                 return -EINVAL;
3431         *port_str++ = '\0';
3432         ret = inet_pton_with_scope(net, AF_INET, addr, port_str, sa);
3433         if (ret && addr[0]) {
3434                 addr_end = addr + strlen(addr) - 1;
3435                 if (addr[0] == '[' && *addr_end == ']') {
3436                         *addr_end = '\0';
3437                         ret = inet_pton_with_scope(net, AF_INET6, addr + 1,
3438                                                    port_str, sa);
3439                 }
3440         }
3441         kfree(addr);
3442         pr_debug("%s -> %pISpfsc\n", addr_port_str, sa);
3443         return ret;
3444 }
3445
3446 static int srp_parse_options(struct net *net, const char *buf,
3447                              struct srp_target_port *target)
3448 {
3449         char *options, *sep_opt;
3450         char *p;
3451         substring_t args[MAX_OPT_ARGS];
3452         unsigned long long ull;
3453         int opt_mask = 0;
3454         int token;
3455         int ret = -EINVAL;
3456         int i;
3457
3458         options = kstrdup(buf, GFP_KERNEL);
3459         if (!options)
3460                 return -ENOMEM;
3461
3462         sep_opt = options;
3463         while ((p = strsep(&sep_opt, ",\n")) != NULL) {
3464                 if (!*p)
3465                         continue;
3466
3467                 token = match_token(p, srp_opt_tokens, args);
3468                 opt_mask |= token;
3469
3470                 switch (token) {
3471                 case SRP_OPT_ID_EXT:
3472                         p = match_strdup(args);
3473                         if (!p) {
3474                                 ret = -ENOMEM;
3475                                 goto out;
3476                         }
3477                         ret = kstrtoull(p, 16, &ull);
3478                         if (ret) {
3479                                 pr_warn("invalid id_ext parameter '%s'\n", p);
3480                                 kfree(p);
3481                                 goto out;
3482                         }
3483                         target->id_ext = cpu_to_be64(ull);
3484                         kfree(p);
3485                         break;
3486
3487                 case SRP_OPT_IOC_GUID:
3488                         p = match_strdup(args);
3489                         if (!p) {
3490                                 ret = -ENOMEM;
3491                                 goto out;
3492                         }
3493                         ret = kstrtoull(p, 16, &ull);
3494                         if (ret) {
3495                                 pr_warn("invalid ioc_guid parameter '%s'\n", p);
3496                                 kfree(p);
3497                                 goto out;
3498                         }
3499                         target->ioc_guid = cpu_to_be64(ull);
3500                         kfree(p);
3501                         break;
3502
3503                 case SRP_OPT_DGID:
3504                         p = match_strdup(args);
3505                         if (!p) {
3506                                 ret = -ENOMEM;
3507                                 goto out;
3508                         }
3509                         if (strlen(p) != 32) {
3510                                 pr_warn("bad dest GID parameter '%s'\n", p);
3511                                 kfree(p);
3512                                 goto out;
3513                         }
3514
3515                         ret = hex2bin(target->ib_cm.orig_dgid.raw, p, 16);
3516                         kfree(p);
3517                         if (ret < 0)
3518                                 goto out;
3519                         break;
3520
3521                 case SRP_OPT_PKEY:
3522                         if (match_hex(args, &token)) {
3523                                 pr_warn("bad P_Key parameter '%s'\n", p);
3524                                 goto out;
3525                         }
3526                         target->ib_cm.pkey = cpu_to_be16(token);
3527                         break;
3528
3529                 case SRP_OPT_SERVICE_ID:
3530                         p = match_strdup(args);
3531                         if (!p) {
3532                                 ret = -ENOMEM;
3533                                 goto out;
3534                         }
3535                         ret = kstrtoull(p, 16, &ull);
3536                         if (ret) {
3537                                 pr_warn("bad service_id parameter '%s'\n", p);
3538                                 kfree(p);
3539                                 goto out;
3540                         }
3541                         target->ib_cm.service_id = cpu_to_be64(ull);
3542                         kfree(p);
3543                         break;
3544
3545                 case SRP_OPT_IP_SRC:
3546                         p = match_strdup(args);
3547                         if (!p) {
3548                                 ret = -ENOMEM;
3549                                 goto out;
3550                         }
3551                         ret = srp_parse_in(net, &target->rdma_cm.src.ss, p);
3552                         if (ret < 0) {
3553                                 pr_warn("bad source parameter '%s'\n", p);
3554                                 kfree(p);
3555                                 goto out;
3556                         }
3557                         target->rdma_cm.src_specified = true;
3558                         kfree(p);
3559                         break;
3560
3561                 case SRP_OPT_IP_DEST:
3562                         p = match_strdup(args);
3563                         if (!p) {
3564                                 ret = -ENOMEM;
3565                                 goto out;
3566                         }
3567                         ret = srp_parse_in(net, &target->rdma_cm.dst.ss, p);
3568                         if (ret < 0) {
3569                                 pr_warn("bad dest parameter '%s'\n", p);
3570                                 kfree(p);
3571                                 goto out;
3572                         }
3573                         target->using_rdma_cm = true;
3574                         kfree(p);
3575                         break;
3576
3577                 case SRP_OPT_MAX_SECT:
3578                         if (match_int(args, &token)) {
3579                                 pr_warn("bad max sect parameter '%s'\n", p);
3580                                 goto out;
3581                         }
3582                         target->scsi_host->max_sectors = token;
3583                         break;
3584
3585                 case SRP_OPT_QUEUE_SIZE:
3586                         if (match_int(args, &token) || token < 1) {
3587                                 pr_warn("bad queue_size parameter '%s'\n", p);
3588                                 goto out;
3589                         }
3590                         target->scsi_host->can_queue = token;
3591                         target->queue_size = token + SRP_RSP_SQ_SIZE +
3592                                              SRP_TSK_MGMT_SQ_SIZE;
3593                         if (!(opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3594                                 target->scsi_host->cmd_per_lun = token;
3595                         break;
3596
3597                 case SRP_OPT_MAX_CMD_PER_LUN:
3598                         if (match_int(args, &token) || token < 1) {
3599                                 pr_warn("bad max cmd_per_lun parameter '%s'\n",
3600                                         p);
3601                                 goto out;
3602                         }
3603                         target->scsi_host->cmd_per_lun = token;
3604                         break;
3605
3606                 case SRP_OPT_TARGET_CAN_QUEUE:
3607                         if (match_int(args, &token) || token < 1) {
3608                                 pr_warn("bad max target_can_queue parameter '%s'\n",
3609                                         p);
3610                                 goto out;
3611                         }
3612                         target->target_can_queue = token;
3613                         break;
3614
3615                 case SRP_OPT_IO_CLASS:
3616                         if (match_hex(args, &token)) {
3617                                 pr_warn("bad IO class parameter '%s'\n", p);
3618                                 goto out;
3619                         }
3620                         if (token != SRP_REV10_IB_IO_CLASS &&
3621                             token != SRP_REV16A_IB_IO_CLASS) {
3622                                 pr_warn("unknown IO class parameter value %x specified (use %x or %x).\n",
3623                                         token, SRP_REV10_IB_IO_CLASS,
3624                                         SRP_REV16A_IB_IO_CLASS);
3625                                 goto out;
3626                         }
3627                         target->io_class = token;
3628                         break;
3629
3630                 case SRP_OPT_INITIATOR_EXT:
3631                         p = match_strdup(args);
3632                         if (!p) {
3633                                 ret = -ENOMEM;
3634                                 goto out;
3635                         }
3636                         ret = kstrtoull(p, 16, &ull);
3637                         if (ret) {
3638                                 pr_warn("bad initiator_ext value '%s'\n", p);
3639                                 kfree(p);
3640                                 goto out;
3641                         }
3642                         target->initiator_ext = cpu_to_be64(ull);
3643                         kfree(p);
3644                         break;
3645
3646                 case SRP_OPT_CMD_SG_ENTRIES:
3647                         if (match_int(args, &token) || token < 1 || token > 255) {
3648                                 pr_warn("bad max cmd_sg_entries parameter '%s'\n",
3649                                         p);
3650                                 goto out;
3651                         }
3652                         target->cmd_sg_cnt = token;
3653                         break;
3654
3655                 case SRP_OPT_ALLOW_EXT_SG:
3656                         if (match_int(args, &token)) {
3657                                 pr_warn("bad allow_ext_sg parameter '%s'\n", p);
3658                                 goto out;
3659                         }
3660                         target->allow_ext_sg = !!token;
3661                         break;
3662
3663                 case SRP_OPT_SG_TABLESIZE:
3664                         if (match_int(args, &token) || token < 1 ||
3665                                         token > SG_MAX_SEGMENTS) {
3666                                 pr_warn("bad max sg_tablesize parameter '%s'\n",
3667                                         p);
3668                                 goto out;
3669                         }
3670                         target->sg_tablesize = token;
3671                         break;
3672
3673                 case SRP_OPT_COMP_VECTOR:
3674                         if (match_int(args, &token) || token < 0) {
3675                                 pr_warn("bad comp_vector parameter '%s'\n", p);
3676                                 goto out;
3677                         }
3678                         target->comp_vector = token;
3679                         break;
3680
3681                 case SRP_OPT_TL_RETRY_COUNT:
3682                         if (match_int(args, &token) || token < 2 || token > 7) {
3683                                 pr_warn("bad tl_retry_count parameter '%s' (must be a number between 2 and 7)\n",
3684                                         p);
3685                                 goto out;
3686                         }
3687                         target->tl_retry_count = token;
3688                         break;
3689
3690                 default:
3691                         pr_warn("unknown parameter or missing value '%s' in target creation request\n",
3692                                 p);
3693                         goto out;
3694                 }
3695         }
3696
3697         for (i = 0; i < ARRAY_SIZE(srp_opt_mandatory); i++) {
3698                 if ((opt_mask & srp_opt_mandatory[i]) == srp_opt_mandatory[i]) {
3699                         ret = 0;
3700                         break;
3701                 }
3702         }
3703         if (ret)
3704                 pr_warn("target creation request is missing one or more parameters\n");
3705
3706         if (target->scsi_host->cmd_per_lun > target->scsi_host->can_queue
3707             && (opt_mask & SRP_OPT_MAX_CMD_PER_LUN))
3708                 pr_warn("cmd_per_lun = %d > queue_size = %d\n",
3709                         target->scsi_host->cmd_per_lun,
3710                         target->scsi_host->can_queue);
3711
3712 out:
3713         kfree(options);
3714         return ret;
3715 }
3716
3717 static ssize_t srp_create_target(struct device *dev,
3718                                  struct device_attribute *attr,
3719                                  const char *buf, size_t count)
3720 {
3721         struct srp_host *host =
3722                 container_of(dev, struct srp_host, dev);
3723         struct Scsi_Host *target_host;
3724         struct srp_target_port *target;
3725         struct srp_rdma_ch *ch;
3726         struct srp_device *srp_dev = host->srp_dev;
3727         struct ib_device *ibdev = srp_dev->dev;
3728         int ret, node_idx, node, cpu, i;
3729         unsigned int max_sectors_per_mr, mr_per_cmd = 0;
3730         bool multich = false;
3731
3732         target_host = scsi_host_alloc(&srp_template,
3733                                       sizeof (struct srp_target_port));
3734         if (!target_host)
3735                 return -ENOMEM;
3736
3737         target_host->transportt  = ib_srp_transport_template;
3738         target_host->max_channel = 0;
3739         target_host->max_id      = 1;
3740         target_host->max_lun     = -1LL;
3741         target_host->max_cmd_len = sizeof ((struct srp_cmd *) (void *) 0L)->cdb;
3742
3743         target = host_to_target(target_host);
3744
3745         target->net             = kobj_ns_grab_current(KOBJ_NS_TYPE_NET);
3746         target->io_class        = SRP_REV16A_IB_IO_CLASS;
3747         target->scsi_host       = target_host;
3748         target->srp_host        = host;
3749         target->lkey            = host->srp_dev->pd->local_dma_lkey;
3750         target->global_rkey     = host->srp_dev->global_rkey;
3751         target->cmd_sg_cnt      = cmd_sg_entries;
3752         target->sg_tablesize    = indirect_sg_entries ? : cmd_sg_entries;
3753         target->allow_ext_sg    = allow_ext_sg;
3754         target->tl_retry_count  = 7;
3755         target->queue_size      = SRP_DEFAULT_QUEUE_SIZE;
3756
3757         /*
3758          * Avoid that the SCSI host can be removed by srp_remove_target()
3759          * before this function returns.
3760          */
3761         scsi_host_get(target->scsi_host);
3762
3763         ret = mutex_lock_interruptible(&host->add_target_mutex);
3764         if (ret < 0)
3765                 goto put;
3766
3767         ret = srp_parse_options(target->net, buf, target);
3768         if (ret)
3769                 goto out;
3770
3771         target->req_ring_size = target->queue_size - SRP_TSK_MGMT_SQ_SIZE;
3772
3773         if (!srp_conn_unique(target->srp_host, target)) {
3774                 if (target->using_rdma_cm) {
3775                         shost_printk(KERN_INFO, target->scsi_host,
3776                                      PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;dest=%pIS\n",
3777                                      be64_to_cpu(target->id_ext),
3778                                      be64_to_cpu(target->ioc_guid),
3779                                      &target->rdma_cm.dst);
3780                 } else {
3781                         shost_printk(KERN_INFO, target->scsi_host,
3782                                      PFX "Already connected to target port with id_ext=%016llx;ioc_guid=%016llx;initiator_ext=%016llx\n",
3783                                      be64_to_cpu(target->id_ext),
3784                                      be64_to_cpu(target->ioc_guid),
3785                                      be64_to_cpu(target->initiator_ext));
3786                 }
3787                 ret = -EEXIST;
3788                 goto out;
3789         }
3790
3791         if (!srp_dev->has_fmr && !srp_dev->has_fr && !target->allow_ext_sg &&
3792             target->cmd_sg_cnt < target->sg_tablesize) {
3793                 pr_warn("No MR pool and no external indirect descriptors, limiting sg_tablesize to cmd_sg_cnt\n");
3794                 target->sg_tablesize = target->cmd_sg_cnt;
3795         }
3796
3797         if (srp_dev->use_fast_reg || srp_dev->use_fmr) {
3798                 bool gaps_reg = (ibdev->attrs.device_cap_flags &
3799                                  IB_DEVICE_SG_GAPS_REG);
3800
3801                 max_sectors_per_mr = srp_dev->max_pages_per_mr <<
3802                                   (ilog2(srp_dev->mr_page_size) - 9);
3803                 if (!gaps_reg) {
3804                         /*
3805                          * FR and FMR can only map one HCA page per entry. If
3806                          * the start address is not aligned on a HCA page
3807                          * boundary two entries will be used for the head and
3808                          * the tail although these two entries combined
3809                          * contain at most one HCA page of data. Hence the "+
3810                          * 1" in the calculation below.
3811                          *
3812                          * The indirect data buffer descriptor is contiguous
3813                          * so the memory for that buffer will only be
3814                          * registered if register_always is true. Hence add
3815                          * one to mr_per_cmd if register_always has been set.
3816                          */
3817                         mr_per_cmd = register_always +
3818                                 (target->scsi_host->max_sectors + 1 +
3819                                  max_sectors_per_mr - 1) / max_sectors_per_mr;
3820                 } else {
3821                         mr_per_cmd = register_always +
3822                                 (target->sg_tablesize +
3823                                  srp_dev->max_pages_per_mr - 1) /
3824                                 srp_dev->max_pages_per_mr;
3825                 }
3826                 pr_debug("max_sectors = %u; max_pages_per_mr = %u; mr_page_size = %u; max_sectors_per_mr = %u; mr_per_cmd = %u\n",
3827                          target->scsi_host->max_sectors, srp_dev->max_pages_per_mr, srp_dev->mr_page_size,
3828                          max_sectors_per_mr, mr_per_cmd);
3829         }
3830
3831         target_host->sg_tablesize = target->sg_tablesize;
3832         target->mr_pool_size = target->scsi_host->can_queue * mr_per_cmd;
3833         target->mr_per_cmd = mr_per_cmd;
3834         target->indirect_size = target->sg_tablesize *
3835                                 sizeof (struct srp_direct_buf);
3836         target->max_iu_len = sizeof (struct srp_cmd) +
3837                              sizeof (struct srp_indirect_buf) +
3838                              target->cmd_sg_cnt * sizeof (struct srp_direct_buf);
3839
3840         INIT_WORK(&target->tl_err_work, srp_tl_err_work);
3841         INIT_WORK(&target->remove_work, srp_remove_work);
3842         spin_lock_init(&target->lock);
3843         ret = rdma_query_gid(ibdev, host->port, 0, &target->sgid);
3844         if (ret)
3845                 goto out;
3846
3847         ret = -ENOMEM;
3848         target->ch_count = max_t(unsigned, num_online_nodes(),
3849                                  min(ch_count ? :
3850                                      min(4 * num_online_nodes(),
3851                                          ibdev->num_comp_vectors),
3852                                      num_online_cpus()));
3853         target->ch = kcalloc(target->ch_count, sizeof(*target->ch),
3854                              GFP_KERNEL);
3855         if (!target->ch)
3856                 goto out;
3857
3858         node_idx = 0;
3859         for_each_online_node(node) {
3860                 const int ch_start = (node_idx * target->ch_count /
3861                                       num_online_nodes());
3862                 const int ch_end = ((node_idx + 1) * target->ch_count /
3863                                     num_online_nodes());
3864                 const int cv_start = node_idx * ibdev->num_comp_vectors /
3865                                      num_online_nodes();
3866                 const int cv_end = (node_idx + 1) * ibdev->num_comp_vectors /
3867                                    num_online_nodes();
3868                 int cpu_idx = 0;
3869
3870                 for_each_online_cpu(cpu) {
3871                         if (cpu_to_node(cpu) != node)
3872                                 continue;
3873                         if (ch_start + cpu_idx >= ch_end)
3874                                 continue;
3875                         ch = &target->ch[ch_start + cpu_idx];
3876                         ch->target = target;
3877                         ch->comp_vector = cv_start == cv_end ? cv_start :
3878                                 cv_start + cpu_idx % (cv_end - cv_start);
3879                         spin_lock_init(&ch->lock);
3880                         INIT_LIST_HEAD(&ch->free_tx);
3881                         ret = srp_new_cm_id(ch);
3882                         if (ret)
3883                                 goto err_disconnect;
3884
3885                         ret = srp_create_ch_ib(ch);
3886                         if (ret)
3887                                 goto err_disconnect;
3888
3889                         ret = srp_alloc_req_data(ch);
3890                         if (ret)
3891                                 goto err_disconnect;
3892
3893                         ret = srp_connect_ch(ch, multich);
3894                         if (ret) {
3895                                 char dst[64];
3896
3897                                 if (target->using_rdma_cm)
3898                                         snprintf(dst, sizeof(dst), "%pIS",
3899                                                  &target->rdma_cm.dst);
3900                                 else
3901                                         snprintf(dst, sizeof(dst), "%pI6",
3902                                                  target->ib_cm.orig_dgid.raw);
3903                                 shost_printk(KERN_ERR, target->scsi_host,
3904                                              PFX "Connection %d/%d to %s failed\n",
3905                                              ch_start + cpu_idx,
3906                                              target->ch_count, dst);
3907                                 if (node_idx == 0 && cpu_idx == 0) {
3908                                         goto free_ch;
3909                                 } else {
3910                                         srp_free_ch_ib(target, ch);
3911                                         srp_free_req_data(target, ch);
3912                                         target->ch_count = ch - target->ch;
3913                                         goto connected;
3914                                 }
3915                         }
3916
3917                         multich = true;
3918                         cpu_idx++;
3919                 }
3920                 node_idx++;
3921         }
3922
3923 connected:
3924         target->scsi_host->nr_hw_queues = target->ch_count;
3925
3926         ret = srp_add_target(host, target);
3927         if (ret)
3928                 goto err_disconnect;
3929
3930         if (target->state != SRP_TARGET_REMOVED) {
3931                 if (target->using_rdma_cm) {
3932                         shost_printk(KERN_DEBUG, target->scsi_host, PFX
3933                                      "new target: id_ext %016llx ioc_guid %016llx sgid %pI6 dest %pIS\n",
3934                                      be64_to_cpu(target->id_ext),
3935                                      be64_to_cpu(target->ioc_guid),
3936                                      target->sgid.raw, &target->rdma_cm.dst);
3937                 } else {
3938                         shost_printk(KERN_DEBUG, target->scsi_host, PFX
3939                                      "new target: id_ext %016llx ioc_guid %016llx pkey %04x service_id %016llx sgid %pI6 dgid %pI6\n",
3940                                      be64_to_cpu(target->id_ext),
3941                                      be64_to_cpu(target->ioc_guid),
3942                                      be16_to_cpu(target->ib_cm.pkey),
3943                                      be64_to_cpu(target->ib_cm.service_id),
3944                                      target->sgid.raw,
3945                                      target->ib_cm.orig_dgid.raw);
3946                 }
3947         }
3948
3949         ret = count;
3950
3951 out:
3952         mutex_unlock(&host->add_target_mutex);
3953
3954 put:
3955         scsi_host_put(target->scsi_host);
3956         if (ret < 0) {
3957                 /*
3958                  * If a call to srp_remove_target() has not been scheduled,
3959                  * drop the network namespace reference now that was obtained
3960                  * earlier in this function.
3961                  */
3962                 if (target->state != SRP_TARGET_REMOVED)
3963                         kobj_ns_drop(KOBJ_NS_TYPE_NET, target->net);
3964                 scsi_host_put(target->scsi_host);
3965         }
3966
3967         return ret;
3968
3969 err_disconnect:
3970         srp_disconnect_target(target);
3971
3972 free_ch:
3973         for (i = 0; i < target->ch_count; i++) {
3974                 ch = &target->ch[i];
3975                 srp_free_ch_ib(target, ch);
3976                 srp_free_req_data(target, ch);
3977         }
3978
3979         kfree(target->ch);
3980         goto out;
3981 }
3982
3983 static DEVICE_ATTR(add_target, S_IWUSR, NULL, srp_create_target);
3984
3985 static ssize_t show_ibdev(struct device *dev, struct device_attribute *attr,
3986                           char *buf)
3987 {
3988         struct srp_host *host = container_of(dev, struct srp_host, dev);
3989
3990         return sprintf(buf, "%s\n", host->srp_dev->dev->name);
3991 }
3992
3993 static DEVICE_ATTR(ibdev, S_IRUGO, show_ibdev, NULL);
3994
3995 static ssize_t show_port(struct device *dev, struct device_attribute *attr,
3996                          char *buf)
3997 {
3998         struct srp_host *host = container_of(dev, struct srp_host, dev);
3999
4000         return sprintf(buf, "%d\n", host->port);
4001 }
4002
4003 static DEVICE_ATTR(port, S_IRUGO, show_port, NULL);
4004
4005 static struct srp_host *srp_add_port(struct srp_device *device, u8 port)
4006 {
4007         struct srp_host *host;
4008
4009         host = kzalloc(sizeof *host, GFP_KERNEL);
4010         if (!host)
4011                 return NULL;
4012
4013         INIT_LIST_HEAD(&host->target_list);
4014         spin_lock_init(&host->target_lock);
4015         init_completion(&host->released);
4016         mutex_init(&host->add_target_mutex);
4017         host->srp_dev = device;
4018         host->port = port;
4019
4020         host->dev.class = &srp_class;
4021         host->dev.parent = device->dev->dev.parent;
4022         dev_set_name(&host->dev, "srp-%s-%d", device->dev->name, port);
4023
4024         if (device_register(&host->dev))
4025                 goto free_host;
4026         if (device_create_file(&host->dev, &dev_attr_add_target))
4027                 goto err_class;
4028         if (device_create_file(&host->dev, &dev_attr_ibdev))
4029                 goto err_class;
4030         if (device_create_file(&host->dev, &dev_attr_port))
4031                 goto err_class;
4032
4033         return host;
4034
4035 err_class:
4036         device_unregister(&host->dev);
4037
4038 free_host:
4039         kfree(host);
4040
4041         return NULL;
4042 }
4043
4044 static void srp_add_one(struct ib_device *device)
4045 {
4046         struct srp_device *srp_dev;
4047         struct ib_device_attr *attr = &device->attrs;
4048         struct srp_host *host;
4049         int mr_page_shift, p;
4050         u64 max_pages_per_mr;
4051         unsigned int flags = 0;
4052
4053         srp_dev = kzalloc(sizeof(*srp_dev), GFP_KERNEL);
4054         if (!srp_dev)
4055                 return;
4056
4057         /*
4058          * Use the smallest page size supported by the HCA, down to a
4059          * minimum of 4096 bytes. We're unlikely to build large sglists
4060          * out of smaller entries.
4061          */
4062         mr_page_shift           = max(12, ffs(attr->page_size_cap) - 1);
4063         srp_dev->mr_page_size   = 1 << mr_page_shift;
4064         srp_dev->mr_page_mask   = ~((u64) srp_dev->mr_page_size - 1);
4065         max_pages_per_mr        = attr->max_mr_size;
4066         do_div(max_pages_per_mr, srp_dev->mr_page_size);
4067         pr_debug("%s: %llu / %u = %llu <> %u\n", __func__,
4068                  attr->max_mr_size, srp_dev->mr_page_size,
4069                  max_pages_per_mr, SRP_MAX_PAGES_PER_MR);
4070         srp_dev->max_pages_per_mr = min_t(u64, SRP_MAX_PAGES_PER_MR,
4071                                           max_pages_per_mr);
4072
4073         srp_dev->has_fmr = (device->alloc_fmr && device->dealloc_fmr &&
4074                             device->map_phys_fmr && device->unmap_fmr);
4075         srp_dev->has_fr = (attr->device_cap_flags &
4076                            IB_DEVICE_MEM_MGT_EXTENSIONS);
4077         if (!never_register && !srp_dev->has_fmr && !srp_dev->has_fr) {
4078                 dev_warn(&device->dev, "neither FMR nor FR is supported\n");
4079         } else if (!never_register &&
4080                    attr->max_mr_size >= 2 * srp_dev->mr_page_size) {
4081                 srp_dev->use_fast_reg = (srp_dev->has_fr &&
4082                                          (!srp_dev->has_fmr || prefer_fr));
4083                 srp_dev->use_fmr = !srp_dev->use_fast_reg && srp_dev->has_fmr;
4084         }
4085
4086         if (never_register || !register_always ||
4087             (!srp_dev->has_fmr && !srp_dev->has_fr))
4088                 flags |= IB_PD_UNSAFE_GLOBAL_RKEY;
4089
4090         if (srp_dev->use_fast_reg) {
4091                 srp_dev->max_pages_per_mr =
4092                         min_t(u32, srp_dev->max_pages_per_mr,
4093                               attr->max_fast_reg_page_list_len);
4094         }
4095         srp_dev->mr_max_size    = srp_dev->mr_page_size *
4096                                    srp_dev->max_pages_per_mr;
4097         pr_debug("%s: mr_page_shift = %d, device->max_mr_size = %#llx, device->max_fast_reg_page_list_len = %u, max_pages_per_mr = %d, mr_max_size = %#x\n",
4098                  device->name, mr_page_shift, attr->max_mr_size,
4099                  attr->max_fast_reg_page_list_len,
4100                  srp_dev->max_pages_per_mr, srp_dev->mr_max_size);
4101
4102         INIT_LIST_HEAD(&srp_dev->dev_list);
4103
4104         srp_dev->dev = device;
4105         srp_dev->pd  = ib_alloc_pd(device, flags);
4106         if (IS_ERR(srp_dev->pd))
4107                 goto free_dev;
4108
4109         if (flags & IB_PD_UNSAFE_GLOBAL_RKEY) {
4110                 srp_dev->global_rkey = srp_dev->pd->unsafe_global_rkey;
4111                 WARN_ON_ONCE(srp_dev->global_rkey == 0);
4112         }
4113
4114         for (p = rdma_start_port(device); p <= rdma_end_port(device); ++p) {
4115                 host = srp_add_port(srp_dev, p);
4116                 if (host)
4117                         list_add_tail(&host->list, &srp_dev->dev_list);
4118         }
4119
4120         ib_set_client_data(device, &srp_client, srp_dev);
4121         return;
4122
4123 free_dev:
4124         kfree(srp_dev);
4125 }
4126
4127 static void srp_remove_one(struct ib_device *device, void *client_data)
4128 {
4129         struct srp_device *srp_dev;
4130         struct srp_host *host, *tmp_host;
4131         struct srp_target_port *target;
4132
4133         srp_dev = client_data;
4134         if (!srp_dev)
4135                 return;
4136
4137         list_for_each_entry_safe(host, tmp_host, &srp_dev->dev_list, list) {
4138                 device_unregister(&host->dev);
4139                 /*
4140                  * Wait for the sysfs entry to go away, so that no new
4141                  * target ports can be created.
4142                  */
4143                 wait_for_completion(&host->released);
4144
4145                 /*
4146                  * Remove all target ports.
4147                  */
4148                 spin_lock(&host->target_lock);
4149                 list_for_each_entry(target, &host->target_list, list)
4150                         srp_queue_remove_work(target);
4151                 spin_unlock(&host->target_lock);
4152
4153                 /*
4154                  * Wait for tl_err and target port removal tasks.
4155                  */
4156                 flush_workqueue(system_long_wq);
4157                 flush_workqueue(srp_remove_wq);
4158
4159                 kfree(host);
4160         }
4161
4162         ib_dealloc_pd(srp_dev->pd);
4163
4164         kfree(srp_dev);
4165 }
4166
4167 static struct srp_function_template ib_srp_transport_functions = {
4168         .has_rport_state         = true,
4169         .reset_timer_if_blocked  = true,
4170         .reconnect_delay         = &srp_reconnect_delay,
4171         .fast_io_fail_tmo        = &srp_fast_io_fail_tmo,
4172         .dev_loss_tmo            = &srp_dev_loss_tmo,
4173         .reconnect               = srp_rport_reconnect,
4174         .rport_delete            = srp_rport_delete,
4175         .terminate_rport_io      = srp_terminate_io,
4176 };
4177
4178 static int __init srp_init_module(void)
4179 {
4180         int ret;
4181
4182         if (srp_sg_tablesize) {
4183                 pr_warn("srp_sg_tablesize is deprecated, please use cmd_sg_entries\n");
4184                 if (!cmd_sg_entries)
4185                         cmd_sg_entries = srp_sg_tablesize;
4186         }
4187
4188         if (!cmd_sg_entries)
4189                 cmd_sg_entries = SRP_DEF_SG_TABLESIZE;
4190
4191         if (cmd_sg_entries > 255) {
4192                 pr_warn("Clamping cmd_sg_entries to 255\n");
4193                 cmd_sg_entries = 255;
4194         }
4195
4196         if (!indirect_sg_entries)
4197                 indirect_sg_entries = cmd_sg_entries;
4198         else if (indirect_sg_entries < cmd_sg_entries) {
4199                 pr_warn("Bumping up indirect_sg_entries to match cmd_sg_entries (%u)\n",
4200                         cmd_sg_entries);
4201                 indirect_sg_entries = cmd_sg_entries;
4202         }
4203
4204         if (indirect_sg_entries > SG_MAX_SEGMENTS) {
4205                 pr_warn("Clamping indirect_sg_entries to %u\n",
4206                         SG_MAX_SEGMENTS);
4207                 indirect_sg_entries = SG_MAX_SEGMENTS;
4208         }
4209
4210         srp_remove_wq = create_workqueue("srp_remove");
4211         if (!srp_remove_wq) {
4212                 ret = -ENOMEM;
4213                 goto out;
4214         }
4215
4216         ret = -ENOMEM;
4217         ib_srp_transport_template =
4218                 srp_attach_transport(&ib_srp_transport_functions);
4219         if (!ib_srp_transport_template)
4220                 goto destroy_wq;
4221
4222         ret = class_register(&srp_class);
4223         if (ret) {
4224                 pr_err("couldn't register class infiniband_srp\n");
4225                 goto release_tr;
4226         }
4227
4228         ib_sa_register_client(&srp_sa_client);
4229
4230         ret = ib_register_client(&srp_client);
4231         if (ret) {
4232                 pr_err("couldn't register IB client\n");
4233                 goto unreg_sa;
4234         }
4235
4236 out:
4237         return ret;
4238
4239 unreg_sa:
4240         ib_sa_unregister_client(&srp_sa_client);
4241         class_unregister(&srp_class);
4242
4243 release_tr:
4244         srp_release_transport(ib_srp_transport_template);
4245
4246 destroy_wq:
4247         destroy_workqueue(srp_remove_wq);
4248         goto out;
4249 }
4250
4251 static void __exit srp_cleanup_module(void)
4252 {
4253         ib_unregister_client(&srp_client);
4254         ib_sa_unregister_client(&srp_sa_client);
4255         class_unregister(&srp_class);
4256         srp_release_transport(ib_srp_transport_template);
4257         destroy_workqueue(srp_remove_wq);
4258 }
4259
4260 module_init(srp_init_module);
4261 module_exit(srp_cleanup_module);