drivers/nvme/target/core.c

   1 /*
   2  * Common code for the NVMe target.
   3  * Copyright (c) 2015-2016 HGST, a Western Digital Company.
   4  *
   5  * This program is free software; you can redistribute it and/or modify it
   6  * under the terms and conditions of the GNU General Public License,
   7  * version 2, as published by the Free Software Foundation.
   8  *
   9  * This program is distributed in the hope it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  12  * more details.
  13  */
  14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  15 #include <linux/module.h>
  16 #include <linux/random.h>
  17 #include <linux/rculist.h>
  18
  19 #include "nvmet.h"
  20
  21 static struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX];
  22 static DEFINE_IDA(cntlid_ida);
  23
  24 /*
  25  * This read/write semaphore is used to synchronize access to configuration
  26  * information on a target system that will result in discovery log page
  27  * information change for at least one host.
  28  * The full list of resources to protected by this semaphore is:
  29  *
  30  *  - subsystems list
  31  *  - per-subsystem allowed hosts list
  32  *  - allow_any_host subsystem attribute
  33  *  - nvmet_genctr
  34  *  - the nvmet_transports array
  35  *
  36  * When updating any of those lists/structures write lock should be obtained,
  37  * while when reading (popolating discovery log page or checking host-subsystem
  38  * link) read lock is obtained to allow concurrent reads.
  39  */
  40 DECLARE_RWSEM(nvmet_config_sem);
  41
  42 static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port,
  43                 const char *subsysnqn);
  44
  45 u16 nvmet_copy_to_sgl(struct nvmet_req *req, off_t off, const void *buf,
  46                 size_t len)
  47 {
  48         if (sg_pcopy_from_buffer(req->sg, req->sg_cnt, buf, len, off) != len)
  49                 return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR;
  50         return 0;
  51 }
  52
  53 u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf, size_t len)
  54 {
  55         if (sg_pcopy_to_buffer(req->sg, req->sg_cnt, buf, len, off) != len)
  56                 return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR;
  57         return 0;
  58 }
  59
  60 static u32 nvmet_async_event_result(struct nvmet_async_event *aen)
  61 {
  62         return aen->event_type | (aen->event_info << 8) | (aen->log_page << 16);
  63 }
  64
  65 static void nvmet_async_events_free(struct nvmet_ctrl *ctrl)
  66 {
  67         struct nvmet_req *req;
  68
  69         while (1) {
  70                 mutex_lock(&ctrl->lock);
  71                 if (!ctrl->nr_async_event_cmds) {
  72                         mutex_unlock(&ctrl->lock);
  73                         return;
  74                 }
  75
  76                 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
  77                 mutex_unlock(&ctrl->lock);
  78                 nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_SC_DNR);
  79         }
  80 }
  81
  82 static void nvmet_async_event_work(struct work_struct *work)
  83 {
  84         struct nvmet_ctrl *ctrl =
  85                 container_of(work, struct nvmet_ctrl, async_event_work);
  86         struct nvmet_async_event *aen;
  87         struct nvmet_req *req;
  88
  89         while (1) {
  90                 mutex_lock(&ctrl->lock);
  91                 aen = list_first_entry_or_null(&ctrl->async_events,
  92                                 struct nvmet_async_event, entry);
  93                 if (!aen || !ctrl->nr_async_event_cmds) {
  94                         mutex_unlock(&ctrl->lock);
  95                         return;
  96                 }
  97
  98                 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
  99                 nvmet_set_result(req, nvmet_async_event_result(aen));
 100
 101                 list_del(&aen->entry);
 102                 kfree(aen);
 103
 104                 mutex_unlock(&ctrl->lock);
 105                 nvmet_req_complete(req, 0);
 106         }
 107 }
 108
 109 static void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type,
 110                 u8 event_info, u8 log_page)
 111 {
 112         struct nvmet_async_event *aen;
 113
 114         aen = kmalloc(sizeof(*aen), GFP_KERNEL);
 115         if (!aen)
 116                 return;
 117
 118         aen->event_type = event_type;
 119         aen->event_info = event_info;
 120         aen->log_page = log_page;
 121
 122         mutex_lock(&ctrl->lock);
 123         list_add_tail(&aen->entry, &ctrl->async_events);
 124         mutex_unlock(&ctrl->lock);
 125
 126         schedule_work(&ctrl->async_event_work);
 127 }
 128
 129 int nvmet_register_transport(struct nvmet_fabrics_ops *ops)
 130 {
 131         int ret = 0;
 132
 133         down_write(&nvmet_config_sem);
 134         if (nvmet_transports[ops->type])
 135                 ret = -EINVAL;
 136         else
 137                 nvmet_transports[ops->type] = ops;
 138         up_write(&nvmet_config_sem);
 139
 140         return ret;
 141 }
 142 EXPORT_SYMBOL_GPL(nvmet_register_transport);
 143
 144 void nvmet_unregister_transport(struct nvmet_fabrics_ops *ops)
 145 {
 146         down_write(&nvmet_config_sem);
 147         nvmet_transports[ops->type] = NULL;
 148         up_write(&nvmet_config_sem);
 149 }
 150 EXPORT_SYMBOL_GPL(nvmet_unregister_transport);
 151
 152 int nvmet_enable_port(struct nvmet_port *port)
 153 {
 154         struct nvmet_fabrics_ops *ops;
 155         int ret;
 156
 157         lockdep_assert_held(&nvmet_config_sem);
 158
 159         ops = nvmet_transports[port->disc_addr.trtype];
 160         if (!ops) {
 161                 up_write(&nvmet_config_sem);
 162                 request_module("nvmet-transport-%d", port->disc_addr.trtype);
 163                 down_write(&nvmet_config_sem);
 164                 ops = nvmet_transports[port->disc_addr.trtype];
 165                 if (!ops) {
 166                         pr_err("transport type %d not supported\n",
 167                                 port->disc_addr.trtype);
 168                         return -EINVAL;
 169                 }
 170         }
 171
 172         if (!try_module_get(ops->owner))
 173                 return -EINVAL;
 174
 175         ret = ops->add_port(port);
 176         if (ret) {
 177                 module_put(ops->owner);
 178                 return ret;
 179         }
 180
 181         port->enabled = true;
 182         return 0;
 183 }
 184
 185 void nvmet_disable_port(struct nvmet_port *port)
 186 {
 187         struct nvmet_fabrics_ops *ops;
 188
 189         lockdep_assert_held(&nvmet_config_sem);
 190
 191         port->enabled = false;
 192
 193         ops = nvmet_transports[port->disc_addr.trtype];
 194         ops->remove_port(port);
 195         module_put(ops->owner);
 196 }
 197
 198 static void nvmet_keep_alive_timer(struct work_struct *work)
 199 {
 200         struct nvmet_ctrl *ctrl = container_of(to_delayed_work(work),
 201                         struct nvmet_ctrl, ka_work);
 202
 203         pr_err("ctrl %d keep-alive timer (%d seconds) expired!\n",
 204                 ctrl->cntlid, ctrl->kato);
 205
 206         nvmet_ctrl_fatal_error(ctrl);
 207 }
 208
 209 static void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl)
 210 {
 211         pr_debug("ctrl %d start keep-alive timer for %d secs\n",
 212                 ctrl->cntlid, ctrl->kato);
 213
 214         INIT_DELAYED_WORK(&ctrl->ka_work, nvmet_keep_alive_timer);
 215         schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
 216 }
 217
 218 static void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl)
 219 {
 220         pr_debug("ctrl %d stop keep-alive\n", ctrl->cntlid);
 221
 222         cancel_delayed_work_sync(&ctrl->ka_work);
 223 }
 224
 225 static struct nvmet_ns *__nvmet_find_namespace(struct nvmet_ctrl *ctrl,
 226                 __le32 nsid)
 227 {
 228         struct nvmet_ns *ns;
 229
 230         list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) {
 231                 if (ns->nsid == le32_to_cpu(nsid))
 232                         return ns;
 233         }
 234
 235         return NULL;
 236 }
 237
 238 struct nvmet_ns *nvmet_find_namespace(struct nvmet_ctrl *ctrl, __le32 nsid)
 239 {
 240         struct nvmet_ns *ns;
 241
 242         rcu_read_lock();
 243         ns = __nvmet_find_namespace(ctrl, nsid);
 244         if (ns)
 245                 percpu_ref_get(&ns->ref);
 246         rcu_read_unlock();
 247
 248         return ns;
 249 }
 250
 251 static void nvmet_destroy_namespace(struct percpu_ref *ref)
 252 {
 253         struct nvmet_ns *ns = container_of(ref, struct nvmet_ns, ref);
 254
 255         complete(&ns->disable_done);
 256 }
 257
 258 void nvmet_put_namespace(struct nvmet_ns *ns)
 259 {
 260         percpu_ref_put(&ns->ref);
 261 }
 262
 263 int nvmet_ns_enable(struct nvmet_ns *ns)
 264 {
 265         struct nvmet_subsys *subsys = ns->subsys;
 266         struct nvmet_ctrl *ctrl;
 267         int ret = 0;
 268
 269         mutex_lock(&subsys->lock);
 270         if (ns->enabled)
 271                 goto out_unlock;
 272
 273         ns->bdev = blkdev_get_by_path(ns->device_path, FMODE_READ | FMODE_WRITE,
 274                         NULL);
 275         if (IS_ERR(ns->bdev)) {
 276                 pr_err("failed to open block device %s: (%ld)\n",
 277                        ns->device_path, PTR_ERR(ns->bdev));
 278                 ret = PTR_ERR(ns->bdev);
 279                 ns->bdev = NULL;
 280                 goto out_unlock;
 281         }
 282
 283         ns->size = i_size_read(ns->bdev->bd_inode);
 284         ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev));
 285
 286         ret = percpu_ref_init(&ns->ref, nvmet_destroy_namespace,
 287                                 0, GFP_KERNEL);
 288         if (ret)
 289                 goto out_blkdev_put;
 290
 291         if (ns->nsid > subsys->max_nsid)
 292                 subsys->max_nsid = ns->nsid;
 293
 294         /*
 295          * The namespaces list needs to be sorted to simplify the implementation
 296          * of the Identify Namepace List subcommand.
 297          */
 298         if (list_empty(&subsys->namespaces)) {
 299                 list_add_tail_rcu(&ns->dev_link, &subsys->namespaces);
 300         } else {
 301                 struct nvmet_ns *old;
 302
 303                 list_for_each_entry_rcu(old, &subsys->namespaces, dev_link) {
 304                         BUG_ON(ns->nsid == old->nsid);
 305                         if (ns->nsid < old->nsid)
 306                                 break;
 307                 }
 308
 309                 list_add_tail_rcu(&ns->dev_link, &old->dev_link);
 310         }
 311
 312         list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
 313                 nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, 0, 0);
 314
 315         ns->enabled = true;
 316         ret = 0;
 317 out_unlock:
 318         mutex_unlock(&subsys->lock);
 319         return ret;
 320 out_blkdev_put:
 321         blkdev_put(ns->bdev, FMODE_WRITE|FMODE_READ);
 322         ns->bdev = NULL;
 323         goto out_unlock;
 324 }
 325
 326 void nvmet_ns_disable(struct nvmet_ns *ns)
 327 {
 328         struct nvmet_subsys *subsys = ns->subsys;
 329         struct nvmet_ctrl *ctrl;
 330
 331         mutex_lock(&subsys->lock);
 332         if (!ns->enabled)
 333                 goto out_unlock;
 334
 335         ns->enabled = false;
 336         list_del_rcu(&ns->dev_link);
 337         mutex_unlock(&subsys->lock);
 338
 339         /*
 340          * Now that we removed the namespaces from the lookup list, we
 341          * can kill the per_cpu ref and wait for any remaining references
 342          * to be dropped, as well as a RCU grace period for anyone only
 343          * using the namepace under rcu_read_lock().  Note that we can't
 344          * use call_rcu here as we need to ensure the namespaces have
 345          * been fully destroyed before unloading the module.
 346          */
 347         percpu_ref_kill(&ns->ref);
 348         synchronize_rcu();
 349         wait_for_completion(&ns->disable_done);
 350         percpu_ref_exit(&ns->ref);
 351
 352         mutex_lock(&subsys->lock);
 353         list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
 354                 nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE, 0, 0);
 355
 356         if (ns->bdev)
 357                 blkdev_put(ns->bdev, FMODE_WRITE|FMODE_READ);
 358 out_unlock:
 359         mutex_unlock(&subsys->lock);
 360 }
 361
 362 void nvmet_ns_free(struct nvmet_ns *ns)
 363 {
 364         nvmet_ns_disable(ns);
 365
 366         kfree(ns->device_path);
 367         kfree(ns);
 368 }
 369
 370 struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid)
 371 {
 372         struct nvmet_ns *ns;
 373
 374         ns = kzalloc(sizeof(*ns), GFP_KERNEL);
 375         if (!ns)
 376                 return NULL;
 377
 378         INIT_LIST_HEAD(&ns->dev_link);
 379         init_completion(&ns->disable_done);
 380
 381         ns->nsid = nsid;
 382         ns->subsys = subsys;
 383         uuid_gen(&ns->uuid);
 384
 385         return ns;
 386 }
 387
 388 static void __nvmet_req_complete(struct nvmet_req *req, u16 status)
 389 {
 390         if (status)
 391                 nvmet_set_status(req, status);
 392
 393         if (req->sq->size)
 394                 req->sq->sqhd = (req->sq->sqhd + 1) % req->sq->size;
 395         req->rsp->sq_head = cpu_to_le16(req->sq->sqhd);
 396         req->rsp->sq_id = cpu_to_le16(req->sq->qid);
 397         req->rsp->command_id = req->cmd->common.command_id;
 398
 399         if (req->ns)
 400                 nvmet_put_namespace(req->ns);
 401         req->ops->queue_response(req);
 402 }
 403
 404 void nvmet_req_complete(struct nvmet_req *req, u16 status)
 405 {
 406         __nvmet_req_complete(req, status);
 407         percpu_ref_put(&req->sq->ref);
 408 }
 409 EXPORT_SYMBOL_GPL(nvmet_req_complete);
 410
 411 void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq,
 412                 u16 qid, u16 size)
 413 {
 414         cq->qid = qid;
 415         cq->size = size;
 416
 417         ctrl->cqs[qid] = cq;
 418 }
 419
 420 void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq,
 421                 u16 qid, u16 size)
 422 {
 423         sq->sqhd = 0;
 424         sq->qid = qid;
 425         sq->size = size;
 426
 427         ctrl->sqs[qid] = sq;
 428 }
 429
 430 static void nvmet_confirm_sq(struct percpu_ref *ref)
 431 {
 432         struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref);
 433
 434         complete(&sq->confirm_done);
 435 }
 436
 437 void nvmet_sq_destroy(struct nvmet_sq *sq)
 438 {
 439         /*
 440          * If this is the admin queue, complete all AERs so that our
 441          * queue doesn't have outstanding requests on it.
 442          */
 443         if (sq->ctrl && sq->ctrl->sqs && sq->ctrl->sqs[0] == sq)
 444                 nvmet_async_events_free(sq->ctrl);
 445         percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq);
 446         wait_for_completion(&sq->confirm_done);
 447         wait_for_completion(&sq->free_done);
 448         percpu_ref_exit(&sq->ref);
 449
 450         if (sq->ctrl) {
 451                 nvmet_ctrl_put(sq->ctrl);
 452                 sq->ctrl = NULL; /* allows reusing the queue later */
 453         }
 454 }
 455 EXPORT_SYMBOL_GPL(nvmet_sq_destroy);
 456
 457 static void nvmet_sq_free(struct percpu_ref *ref)
 458 {
 459         struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref);
 460
 461         complete(&sq->free_done);
 462 }
 463
 464 int nvmet_sq_init(struct nvmet_sq *sq)
 465 {
 466         int ret;
 467
 468         ret = percpu_ref_init(&sq->ref, nvmet_sq_free, 0, GFP_KERNEL);
 469         if (ret) {
 470                 pr_err("percpu_ref init failed!\n");
 471                 return ret;
 472         }
 473         init_completion(&sq->free_done);
 474         init_completion(&sq->confirm_done);
 475
 476         return 0;
 477 }
 478 EXPORT_SYMBOL_GPL(nvmet_sq_init);
 479
 480 bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
 481                 struct nvmet_sq *sq, struct nvmet_fabrics_ops *ops)
 482 {
 483         u8 flags = req->cmd->common.flags;
 484         u16 status;
 485
 486         req->cq = cq;
 487         req->sq = sq;
 488         req->ops = ops;
 489         req->sg = NULL;
 490         req->sg_cnt = 0;
 491         req->rsp->status = 0;
 492
 493         /* no support for fused commands yet */
 494         if (unlikely(flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND))) {
 495                 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
 496                 goto fail;
 497         }
 498
 499         /* either variant of SGLs is fine, as we don't support metadata */
 500         if (unlikely((flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METABUF &&
 501                      (flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METASEG)) {
 502                 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
 503                 goto fail;
 504         }
 505
 506         if (unlikely(!req->sq->ctrl))
 507                 /* will return an error for any Non-connect command: */
 508                 status = nvmet_parse_connect_cmd(req);
 509         else if (likely(req->sq->qid != 0))
 510                 status = nvmet_parse_io_cmd(req);
 511         else if (req->cmd->common.opcode == nvme_fabrics_command)
 512                 status = nvmet_parse_fabrics_cmd(req);
 513         else if (req->sq->ctrl->subsys->type == NVME_NQN_DISC)
 514                 status = nvmet_parse_discovery_cmd(req);
 515         else
 516                 status = nvmet_parse_admin_cmd(req);
 517
 518         if (status)
 519                 goto fail;
 520
 521         if (unlikely(!percpu_ref_tryget_live(&sq->ref))) {
 522                 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
 523                 goto fail;
 524         }
 525
 526         return true;
 527
 528 fail:
 529         __nvmet_req_complete(req, status);
 530         return false;
 531 }
 532 EXPORT_SYMBOL_GPL(nvmet_req_init);
 533
 534 void nvmet_req_uninit(struct nvmet_req *req)
 535 {
 536         percpu_ref_put(&req->sq->ref);
 537 }
 538 EXPORT_SYMBOL_GPL(nvmet_req_uninit);
 539
 540 static inline bool nvmet_cc_en(u32 cc)
 541 {
 542         return (cc >> NVME_CC_EN_SHIFT) & 0x1;
 543 }
 544
 545 static inline u8 nvmet_cc_css(u32 cc)
 546 {
 547         return (cc >> NVME_CC_CSS_SHIFT) & 0x7;
 548 }
 549
 550 static inline u8 nvmet_cc_mps(u32 cc)
 551 {
 552         return (cc >> NVME_CC_MPS_SHIFT) & 0xf;
 553 }
 554
 555 static inline u8 nvmet_cc_ams(u32 cc)
 556 {
 557         return (cc >> NVME_CC_AMS_SHIFT) & 0x7;
 558 }
 559
 560 static inline u8 nvmet_cc_shn(u32 cc)
 561 {
 562         return (cc >> NVME_CC_SHN_SHIFT) & 0x3;
 563 }
 564
 565 static inline u8 nvmet_cc_iosqes(u32 cc)
 566 {
 567         return (cc >> NVME_CC_IOSQES_SHIFT) & 0xf;
 568 }
 569
 570 static inline u8 nvmet_cc_iocqes(u32 cc)
 571 {
 572         return (cc >> NVME_CC_IOCQES_SHIFT) & 0xf;
 573 }
 574
 575 static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl)
 576 {
 577         lockdep_assert_held(&ctrl->lock);
 578
 579         if (nvmet_cc_iosqes(ctrl->cc) != NVME_NVM_IOSQES ||
 580             nvmet_cc_iocqes(ctrl->cc) != NVME_NVM_IOCQES ||
 581             nvmet_cc_mps(ctrl->cc) != 0 ||
 582             nvmet_cc_ams(ctrl->cc) != 0 ||
 583             nvmet_cc_css(ctrl->cc) != 0) {
 584                 ctrl->csts = NVME_CSTS_CFS;
 585                 return;
 586         }
 587
 588         ctrl->csts = NVME_CSTS_RDY;
 589 }
 590
 591 static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl)
 592 {
 593         lockdep_assert_held(&ctrl->lock);
 594
 595         /* XXX: tear down queues? */
 596         ctrl->csts &= ~NVME_CSTS_RDY;
 597         ctrl->cc = 0;
 598 }
 599
 600 void nvmet_update_cc(struct nvmet_ctrl *ctrl, u32 new)
 601 {
 602         u32 old;
 603
 604         mutex_lock(&ctrl->lock);
 605         old = ctrl->cc;
 606         ctrl->cc = new;
 607
 608         if (nvmet_cc_en(new) && !nvmet_cc_en(old))
 609                 nvmet_start_ctrl(ctrl);
 610         if (!nvmet_cc_en(new) && nvmet_cc_en(old))
 611                 nvmet_clear_ctrl(ctrl);
 612         if (nvmet_cc_shn(new) && !nvmet_cc_shn(old)) {
 613                 nvmet_clear_ctrl(ctrl);
 614                 ctrl->csts |= NVME_CSTS_SHST_CMPLT;
 615         }
 616         if (!nvmet_cc_shn(new) && nvmet_cc_shn(old))
 617                 ctrl->csts &= ~NVME_CSTS_SHST_CMPLT;
 618         mutex_unlock(&ctrl->lock);
 619 }
 620
 621 static void nvmet_init_cap(struct nvmet_ctrl *ctrl)
 622 {
 623         /* command sets supported: NVMe command set: */
 624         ctrl->cap = (1ULL << 37);
 625         /* CC.EN timeout in 500msec units: */
 626         ctrl->cap |= (15ULL << 24);
 627         /* maximum queue entries supported: */
 628         ctrl->cap |= NVMET_QUEUE_SIZE - 1;
 629 }
 630
 631 u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid,
 632                 struct nvmet_req *req, struct nvmet_ctrl **ret)
 633 {
 634         struct nvmet_subsys *subsys;
 635         struct nvmet_ctrl *ctrl;
 636         u16 status = 0;
 637
 638         subsys = nvmet_find_get_subsys(req->port, subsysnqn);
 639         if (!subsys) {
 640                 pr_warn("connect request for invalid subsystem %s!\n",
 641                         subsysnqn);
 642                 req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn);
 643                 return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
 644         }
 645
 646         mutex_lock(&subsys->lock);
 647         list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
 648                 if (ctrl->cntlid == cntlid) {
 649                         if (strncmp(hostnqn, ctrl->hostnqn, NVMF_NQN_SIZE)) {
 650                                 pr_warn("hostnqn mismatch.\n");
 651                                 continue;
 652                         }
 653                         if (!kref_get_unless_zero(&ctrl->ref))
 654                                 continue;
 655
 656                         *ret = ctrl;
 657                         goto out;
 658                 }
 659         }
 660
 661         pr_warn("could not find controller %d for subsys %s / host %s\n",
 662                 cntlid, subsysnqn, hostnqn);
 663         req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid);
 664         status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
 665
 666 out:
 667         mutex_unlock(&subsys->lock);
 668         nvmet_subsys_put(subsys);
 669         return status;
 670 }
 671
 672 u16 nvmet_check_ctrl_status(struct nvmet_req *req, struct nvme_command *cmd)
 673 {
 674         if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) {
 675                 pr_err("got io cmd %d while CC.EN == 0 on qid = %d\n",
 676                        cmd->common.opcode, req->sq->qid);
 677                 return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
 678         }
 679
 680         if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) {
 681                 pr_err("got io cmd %d while CSTS.RDY == 0 on qid = %d\n",
 682                        cmd->common.opcode, req->sq->qid);
 683                 req->ns = NULL;
 684                 return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
 685         }
 686         return 0;
 687 }
 688
 689 static bool __nvmet_host_allowed(struct nvmet_subsys *subsys,
 690                 const char *hostnqn)
 691 {
 692         struct nvmet_host_link *p;
 693
 694         if (subsys->allow_any_host)
 695                 return true;
 696
 697         list_for_each_entry(p, &subsys->hosts, entry) {
 698                 if (!strcmp(nvmet_host_name(p->host), hostnqn))
 699                         return true;
 700         }
 701
 702         return false;
 703 }
 704
 705 static bool nvmet_host_discovery_allowed(struct nvmet_req *req,
 706                 const char *hostnqn)
 707 {
 708         struct nvmet_subsys_link *s;
 709
 710         list_for_each_entry(s, &req->port->subsystems, entry) {
 711                 if (__nvmet_host_allowed(s->subsys, hostnqn))
 712                         return true;
 713         }
 714
 715         return false;
 716 }
 717
 718 bool nvmet_host_allowed(struct nvmet_req *req, struct nvmet_subsys *subsys,
 719                 const char *hostnqn)
 720 {
 721         lockdep_assert_held(&nvmet_config_sem);
 722
 723         if (subsys->type == NVME_NQN_DISC)
 724                 return nvmet_host_discovery_allowed(req, hostnqn);
 725         else
 726                 return __nvmet_host_allowed(subsys, hostnqn);
 727 }
 728
 729 u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
 730                 struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp)
 731 {
 732         struct nvmet_subsys *subsys;
 733         struct nvmet_ctrl *ctrl;
 734         int ret;
 735         u16 status;
 736
 737         status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
 738         subsys = nvmet_find_get_subsys(req->port, subsysnqn);
 739         if (!subsys) {
 740                 pr_warn("connect request for invalid subsystem %s!\n",
 741                         subsysnqn);
 742                 req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn);
 743                 goto out;
 744         }
 745
 746         status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
 747         down_read(&nvmet_config_sem);
 748         if (!nvmet_host_allowed(req, subsys, hostnqn)) {
 749                 pr_info("connect by host %s for subsystem %s not allowed\n",
 750                         hostnqn, subsysnqn);
 751                 req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(hostnqn);
 752                 up_read(&nvmet_config_sem);
 753                 status = NVME_SC_CONNECT_INVALID_HOST | NVME_SC_DNR;
 754                 goto out_put_subsystem;
 755         }
 756         up_read(&nvmet_config_sem);
 757
 758         status = NVME_SC_INTERNAL;
 759         ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
 760         if (!ctrl)
 761                 goto out_put_subsystem;
 762         mutex_init(&ctrl->lock);
 763
 764         nvmet_init_cap(ctrl);
 765
 766         INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work);
 767         INIT_LIST_HEAD(&ctrl->async_events);
 768
 769         memcpy(ctrl->subsysnqn, subsysnqn, NVMF_NQN_SIZE);
 770         memcpy(ctrl->hostnqn, hostnqn, NVMF_NQN_SIZE);
 771
 772         kref_init(&ctrl->ref);
 773         ctrl->subsys = subsys;
 774
 775         ctrl->cqs = kcalloc(subsys->max_qid + 1,
 776                         sizeof(struct nvmet_cq *),
 777                         GFP_KERNEL);
 778         if (!ctrl->cqs)
 779                 goto out_free_ctrl;
 780
 781         ctrl->sqs = kcalloc(subsys->max_qid + 1,
 782                         sizeof(struct nvmet_sq *),
 783                         GFP_KERNEL);
 784         if (!ctrl->sqs)
 785                 goto out_free_cqs;
 786
 787         ret = ida_simple_get(&cntlid_ida,
 788                              NVME_CNTLID_MIN, NVME_CNTLID_MAX,
 789                              GFP_KERNEL);
 790         if (ret < 0) {
 791                 status = NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR;
 792                 goto out_free_sqs;
 793         }
 794         ctrl->cntlid = ret;
 795
 796         ctrl->ops = req->ops;
 797         if (ctrl->subsys->type == NVME_NQN_DISC) {
 798                 /* Don't accept keep-alive timeout for discovery controllers */
 799                 if (kato) {
 800                         status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
 801                         goto out_free_sqs;
 802                 }
 803
 804                 /*
 805                  * Discovery controllers use some arbitrary high value in order
 806                  * to cleanup stale discovery sessions
 807                  *
 808                  * From the latest base diff RC:
 809                  * "The Keep Alive command is not supported by
 810                  * Discovery controllers. A transport may specify a
 811                  * fixed Discovery controller activity timeout value
 812                  * (e.g., 2 minutes).  If no commands are received
 813                  * by a Discovery controller within that time
 814                  * period, the controller may perform the
 815                  * actions for Keep Alive Timer expiration".
 816                  */
 817                 ctrl->kato = NVMET_DISC_KATO;
 818         } else {
 819                 /* keep-alive timeout in seconds */
 820                 ctrl->kato = DIV_ROUND_UP(kato, 1000);
 821         }
 822         nvmet_start_keep_alive_timer(ctrl);
 823
 824         mutex_lock(&subsys->lock);
 825         list_add_tail(&ctrl->subsys_entry, &subsys->ctrls);
 826         mutex_unlock(&subsys->lock);
 827
 828         *ctrlp = ctrl;
 829         return 0;
 830
 831 out_free_sqs:
 832         kfree(ctrl->sqs);
 833 out_free_cqs:
 834         kfree(ctrl->cqs);
 835 out_free_ctrl:
 836         kfree(ctrl);
 837 out_put_subsystem:
 838         nvmet_subsys_put(subsys);
 839 out:
 840         return status;
 841 }
 842
 843 static void nvmet_ctrl_free(struct kref *ref)
 844 {
 845         struct nvmet_ctrl *ctrl = container_of(ref, struct nvmet_ctrl, ref);
 846         struct nvmet_subsys *subsys = ctrl->subsys;
 847
 848         nvmet_stop_keep_alive_timer(ctrl);
 849
 850         mutex_lock(&subsys->lock);
 851         list_del(&ctrl->subsys_entry);
 852         mutex_unlock(&subsys->lock);
 853
 854         flush_work(&ctrl->async_event_work);
 855         cancel_work_sync(&ctrl->fatal_err_work);
 856
 857         ida_simple_remove(&cntlid_ida, ctrl->cntlid);
 858         nvmet_subsys_put(subsys);
 859
 860         kfree(ctrl->sqs);
 861         kfree(ctrl->cqs);
 862         kfree(ctrl);
 863 }
 864
 865 void nvmet_ctrl_put(struct nvmet_ctrl *ctrl)
 866 {
 867         kref_put(&ctrl->ref, nvmet_ctrl_free);
 868 }
 869
 870 static void nvmet_fatal_error_handler(struct work_struct *work)
 871 {
 872         struct nvmet_ctrl *ctrl =
 873                         container_of(work, struct nvmet_ctrl, fatal_err_work);
 874
 875         pr_err("ctrl %d fatal error occurred!\n", ctrl->cntlid);
 876         ctrl->ops->delete_ctrl(ctrl);
 877 }
 878
 879 void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl)
 880 {
 881         mutex_lock(&ctrl->lock);
 882         if (!(ctrl->csts & NVME_CSTS_CFS)) {
 883                 ctrl->csts |= NVME_CSTS_CFS;
 884                 INIT_WORK(&ctrl->fatal_err_work, nvmet_fatal_error_handler);
 885                 schedule_work(&ctrl->fatal_err_work);
 886         }
 887         mutex_unlock(&ctrl->lock);
 888 }
 889 EXPORT_SYMBOL_GPL(nvmet_ctrl_fatal_error);
 890
 891 static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port,
 892                 const char *subsysnqn)
 893 {
 894         struct nvmet_subsys_link *p;
 895
 896         if (!port)
 897                 return NULL;
 898
 899         if (!strncmp(NVME_DISC_SUBSYS_NAME, subsysnqn,
 900                         NVMF_NQN_SIZE)) {
 901                 if (!kref_get_unless_zero(&nvmet_disc_subsys->ref))
 902                         return NULL;
 903                 return nvmet_disc_subsys;
 904         }
 905
 906         down_read(&nvmet_config_sem);
 907         list_for_each_entry(p, &port->subsystems, entry) {
 908                 if (!strncmp(p->subsys->subsysnqn, subsysnqn,
 909                                 NVMF_NQN_SIZE)) {
 910                         if (!kref_get_unless_zero(&p->subsys->ref))
 911                                 break;
 912                         up_read(&nvmet_config_sem);
 913                         return p->subsys;
 914                 }
 915         }
 916         up_read(&nvmet_config_sem);
 917         return NULL;
 918 }
 919
 920 struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
 921                 enum nvme_subsys_type type)
 922 {
 923         struct nvmet_subsys *subsys;
 924
 925         subsys = kzalloc(sizeof(*subsys), GFP_KERNEL);
 926         if (!subsys)
 927                 return NULL;
 928
 929         subsys->ver = NVME_VS(1, 3, 0); /* NVMe 1.3.0 */
 930         /* generate a random serial number as our controllers are ephemeral: */
 931         get_random_bytes(&subsys->serial, sizeof(subsys->serial));
 932
 933         switch (type) {
 934         case NVME_NQN_NVME:
 935                 subsys->max_qid = NVMET_NR_QUEUES;
 936                 break;
 937         case NVME_NQN_DISC:
 938                 subsys->max_qid = 0;
 939                 break;
 940         default:
 941                 pr_err("%s: Unknown Subsystem type - %d\n", __func__, type);
 942                 kfree(subsys);
 943                 return NULL;
 944         }
 945         subsys->type = type;
 946         subsys->subsysnqn = kstrndup(subsysnqn, NVMF_NQN_SIZE,
 947                         GFP_KERNEL);
 948         if (!subsys->subsysnqn) {
 949                 kfree(subsys);
 950                 return NULL;
 951         }
 952
 953         kref_init(&subsys->ref);
 954
 955         mutex_init(&subsys->lock);
 956         INIT_LIST_HEAD(&subsys->namespaces);
 957         INIT_LIST_HEAD(&subsys->ctrls);
 958         INIT_LIST_HEAD(&subsys->hosts);
 959
 960         return subsys;
 961 }
 962
 963 static void nvmet_subsys_free(struct kref *ref)
 964 {
 965         struct nvmet_subsys *subsys =
 966                 container_of(ref, struct nvmet_subsys, ref);
 967
 968         WARN_ON_ONCE(!list_empty(&subsys->namespaces));
 969
 970         kfree(subsys->subsysnqn);
 971         kfree(subsys);
 972 }
 973
 974 void nvmet_subsys_del_ctrls(struct nvmet_subsys *subsys)
 975 {
 976         struct nvmet_ctrl *ctrl;
 977
 978         mutex_lock(&subsys->lock);
 979         list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
 980                 ctrl->ops->delete_ctrl(ctrl);
 981         mutex_unlock(&subsys->lock);
 982 }
 983
 984 void nvmet_subsys_put(struct nvmet_subsys *subsys)
 985 {
 986         kref_put(&subsys->ref, nvmet_subsys_free);
 987 }
 988
 989 static int __init nvmet_init(void)
 990 {
 991         int error;
 992
 993         error = nvmet_init_discovery();
 994         if (error)
 995                 goto out;
 996
 997         error = nvmet_init_configfs();
 998         if (error)
 999                 goto out_exit_discovery;
1000         return 0;
1001
1002 out_exit_discovery:
1003         nvmet_exit_discovery();
1004 out:
1005         return error;
1006 }
1007
1008 static void __exit nvmet_exit(void)
1009 {
1010         nvmet_exit_configfs();
1011         nvmet_exit_discovery();
1012         ida_destroy(&cntlid_ida);
1013
1014         BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry) != 1024);
1015         BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr) != 1024);
1016 }
1017
1018 module_init(nvmet_init);
1019 module_exit(nvmet_exit);
1020
1021 MODULE_LICENSE("GPL v2");