drivers/nvme/target/core.c

   1 /*
   2  * Common code for the NVMe target.
   3  * Copyright (c) 2015-2016 HGST, a Western Digital Company.
   4  *
   5  * This program is free software; you can redistribute it and/or modify it
   6  * under the terms and conditions of the GNU General Public License,
   7  * version 2, as published by the Free Software Foundation.
   8  *
   9  * This program is distributed in the hope it will be useful, but WITHOUT
  10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
  12  * more details.
  13  */
  14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  15 #include <linux/module.h>
  16 #include <linux/random.h>
  17 #include <linux/rculist.h>
  18 #include <linux/pci-p2pdma.h>
  19
  20 #include "nvmet.h"
  21
  22 struct workqueue_struct *buffered_io_wq;
  23 static const struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX];
  24 static DEFINE_IDA(cntlid_ida);
  25
  26 /*
  27  * This read/write semaphore is used to synchronize access to configuration
  28  * information on a target system that will result in discovery log page
  29  * information change for at least one host.
  30  * The full list of resources to protected by this semaphore is:
  31  *
  32  *  - subsystems list
  33  *  - per-subsystem allowed hosts list
  34  *  - allow_any_host subsystem attribute
  35  *  - nvmet_genctr
  36  *  - the nvmet_transports array
  37  *
  38  * When updating any of those lists/structures write lock should be obtained,
  39  * while when reading (popolating discovery log page or checking host-subsystem
  40  * link) read lock is obtained to allow concurrent reads.
  41  */
  42 DECLARE_RWSEM(nvmet_config_sem);
  43
  44 u32 nvmet_ana_group_enabled[NVMET_MAX_ANAGRPS + 1];
  45 u64 nvmet_ana_chgcnt;
  46 DECLARE_RWSEM(nvmet_ana_sem);
  47
  48 static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port,
  49                 const char *subsysnqn);
  50
  51 u16 nvmet_copy_to_sgl(struct nvmet_req *req, off_t off, const void *buf,
  52                 size_t len)
  53 {
  54         if (sg_pcopy_from_buffer(req->sg, req->sg_cnt, buf, len, off) != len)
  55                 return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR;
  56         return 0;
  57 }
  58
  59 u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf, size_t len)
  60 {
  61         if (sg_pcopy_to_buffer(req->sg, req->sg_cnt, buf, len, off) != len)
  62                 return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR;
  63         return 0;
  64 }
  65
  66 u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len)
  67 {
  68         if (sg_zero_buffer(req->sg, req->sg_cnt, len, off) != len)
  69                 return NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR;
  70         return 0;
  71 }
  72
  73 static unsigned int nvmet_max_nsid(struct nvmet_subsys *subsys)
  74 {
  75         struct nvmet_ns *ns;
  76
  77         if (list_empty(&subsys->namespaces))
  78                 return 0;
  79
  80         ns = list_last_entry(&subsys->namespaces, struct nvmet_ns, dev_link);
  81         return ns->nsid;
  82 }
  83
  84 static u32 nvmet_async_event_result(struct nvmet_async_event *aen)
  85 {
  86         return aen->event_type | (aen->event_info << 8) | (aen->log_page << 16);
  87 }
  88
  89 static void nvmet_async_events_free(struct nvmet_ctrl *ctrl)
  90 {
  91         struct nvmet_req *req;
  92
  93         while (1) {
  94                 mutex_lock(&ctrl->lock);
  95                 if (!ctrl->nr_async_event_cmds) {
  96                         mutex_unlock(&ctrl->lock);
  97                         return;
  98                 }
  99
 100                 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
 101                 mutex_unlock(&ctrl->lock);
 102                 nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_SC_DNR);
 103         }
 104 }
 105
 106 static void nvmet_async_event_work(struct work_struct *work)
 107 {
 108         struct nvmet_ctrl *ctrl =
 109                 container_of(work, struct nvmet_ctrl, async_event_work);
 110         struct nvmet_async_event *aen;
 111         struct nvmet_req *req;
 112
 113         while (1) {
 114                 mutex_lock(&ctrl->lock);
 115                 aen = list_first_entry_or_null(&ctrl->async_events,
 116                                 struct nvmet_async_event, entry);
 117                 if (!aen || !ctrl->nr_async_event_cmds) {
 118                         mutex_unlock(&ctrl->lock);
 119                         return;
 120                 }
 121
 122                 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
 123                 nvmet_set_result(req, nvmet_async_event_result(aen));
 124
 125                 list_del(&aen->entry);
 126                 kfree(aen);
 127
 128                 mutex_unlock(&ctrl->lock);
 129                 nvmet_req_complete(req, 0);
 130         }
 131 }
 132
 133 static void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type,
 134                 u8 event_info, u8 log_page)
 135 {
 136         struct nvmet_async_event *aen;
 137
 138         aen = kmalloc(sizeof(*aen), GFP_KERNEL);
 139         if (!aen)
 140                 return;
 141
 142         aen->event_type = event_type;
 143         aen->event_info = event_info;
 144         aen->log_page = log_page;
 145
 146         mutex_lock(&ctrl->lock);
 147         list_add_tail(&aen->entry, &ctrl->async_events);
 148         mutex_unlock(&ctrl->lock);
 149
 150         schedule_work(&ctrl->async_event_work);
 151 }
 152
 153 static bool nvmet_aen_disabled(struct nvmet_ctrl *ctrl, u32 aen)
 154 {
 155         if (!(READ_ONCE(ctrl->aen_enabled) & aen))
 156                 return true;
 157         return test_and_set_bit(aen, &ctrl->aen_masked);
 158 }
 159
 160 static void nvmet_add_to_changed_ns_log(struct nvmet_ctrl *ctrl, __le32 nsid)
 161 {
 162         u32 i;
 163
 164         mutex_lock(&ctrl->lock);
 165         if (ctrl->nr_changed_ns > NVME_MAX_CHANGED_NAMESPACES)
 166                 goto out_unlock;
 167
 168         for (i = 0; i < ctrl->nr_changed_ns; i++) {
 169                 if (ctrl->changed_ns_list[i] == nsid)
 170                         goto out_unlock;
 171         }
 172
 173         if (ctrl->nr_changed_ns == NVME_MAX_CHANGED_NAMESPACES) {
 174                 ctrl->changed_ns_list[0] = cpu_to_le32(0xffffffff);
 175                 ctrl->nr_changed_ns = U32_MAX;
 176                 goto out_unlock;
 177         }
 178
 179         ctrl->changed_ns_list[ctrl->nr_changed_ns++] = nsid;
 180 out_unlock:
 181         mutex_unlock(&ctrl->lock);
 182 }
 183
 184 void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid)
 185 {
 186         struct nvmet_ctrl *ctrl;
 187
 188         list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
 189                 nvmet_add_to_changed_ns_log(ctrl, cpu_to_le32(nsid));
 190                 if (nvmet_aen_disabled(ctrl, NVME_AEN_CFG_NS_ATTR))
 191                         continue;
 192                 nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE,
 193                                 NVME_AER_NOTICE_NS_CHANGED,
 194                                 NVME_LOG_CHANGED_NS);
 195         }
 196 }
 197
 198 void nvmet_send_ana_event(struct nvmet_subsys *subsys,
 199                 struct nvmet_port *port)
 200 {
 201         struct nvmet_ctrl *ctrl;
 202
 203         mutex_lock(&subsys->lock);
 204         list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
 205                 if (port && ctrl->port != port)
 206                         continue;
 207                 if (nvmet_aen_disabled(ctrl, NVME_AEN_CFG_ANA_CHANGE))
 208                         continue;
 209                 nvmet_add_async_event(ctrl, NVME_AER_TYPE_NOTICE,
 210                                 NVME_AER_NOTICE_ANA, NVME_LOG_ANA);
 211         }
 212         mutex_unlock(&subsys->lock);
 213 }
 214
 215 void nvmet_port_send_ana_event(struct nvmet_port *port)
 216 {
 217         struct nvmet_subsys_link *p;
 218
 219         down_read(&nvmet_config_sem);
 220         list_for_each_entry(p, &port->subsystems, entry)
 221                 nvmet_send_ana_event(p->subsys, port);
 222         up_read(&nvmet_config_sem);
 223 }
 224
 225 int nvmet_register_transport(const struct nvmet_fabrics_ops *ops)
 226 {
 227         int ret = 0;
 228
 229         down_write(&nvmet_config_sem);
 230         if (nvmet_transports[ops->type])
 231                 ret = -EINVAL;
 232         else
 233                 nvmet_transports[ops->type] = ops;
 234         up_write(&nvmet_config_sem);
 235
 236         return ret;
 237 }
 238 EXPORT_SYMBOL_GPL(nvmet_register_transport);
 239
 240 void nvmet_unregister_transport(const struct nvmet_fabrics_ops *ops)
 241 {
 242         down_write(&nvmet_config_sem);
 243         nvmet_transports[ops->type] = NULL;
 244         up_write(&nvmet_config_sem);
 245 }
 246 EXPORT_SYMBOL_GPL(nvmet_unregister_transport);
 247
 248 int nvmet_enable_port(struct nvmet_port *port)
 249 {
 250         const struct nvmet_fabrics_ops *ops;
 251         int ret;
 252
 253         lockdep_assert_held(&nvmet_config_sem);
 254
 255         ops = nvmet_transports[port->disc_addr.trtype];
 256         if (!ops) {
 257                 up_write(&nvmet_config_sem);
 258                 request_module("nvmet-transport-%d", port->disc_addr.trtype);
 259                 down_write(&nvmet_config_sem);
 260                 ops = nvmet_transports[port->disc_addr.trtype];
 261                 if (!ops) {
 262                         pr_err("transport type %d not supported\n",
 263                                 port->disc_addr.trtype);
 264                         return -EINVAL;
 265                 }
 266         }
 267
 268         if (!try_module_get(ops->owner))
 269                 return -EINVAL;
 270
 271         ret = ops->add_port(port);
 272         if (ret) {
 273                 module_put(ops->owner);
 274                 return ret;
 275         }
 276
 277         /* If the transport didn't set inline_data_size, then disable it. */
 278         if (port->inline_data_size < 0)
 279                 port->inline_data_size = 0;
 280
 281         port->enabled = true;
 282         return 0;
 283 }
 284
 285 void nvmet_disable_port(struct nvmet_port *port)
 286 {
 287         const struct nvmet_fabrics_ops *ops;
 288
 289         lockdep_assert_held(&nvmet_config_sem);
 290
 291         port->enabled = false;
 292
 293         ops = nvmet_transports[port->disc_addr.trtype];
 294         ops->remove_port(port);
 295         module_put(ops->owner);
 296 }
 297
 298 static void nvmet_keep_alive_timer(struct work_struct *work)
 299 {
 300         struct nvmet_ctrl *ctrl = container_of(to_delayed_work(work),
 301                         struct nvmet_ctrl, ka_work);
 302
 303         pr_err("ctrl %d keep-alive timer (%d seconds) expired!\n",
 304                 ctrl->cntlid, ctrl->kato);
 305
 306         nvmet_ctrl_fatal_error(ctrl);
 307 }
 308
 309 static void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl)
 310 {
 311         pr_debug("ctrl %d start keep-alive timer for %d secs\n",
 312                 ctrl->cntlid, ctrl->kato);
 313
 314         INIT_DELAYED_WORK(&ctrl->ka_work, nvmet_keep_alive_timer);
 315         schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ);
 316 }
 317
 318 static void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl)
 319 {
 320         pr_debug("ctrl %d stop keep-alive\n", ctrl->cntlid);
 321
 322         cancel_delayed_work_sync(&ctrl->ka_work);
 323 }
 324
 325 static struct nvmet_ns *__nvmet_find_namespace(struct nvmet_ctrl *ctrl,
 326                 __le32 nsid)
 327 {
 328         struct nvmet_ns *ns;
 329
 330         list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link) {
 331                 if (ns->nsid == le32_to_cpu(nsid))
 332                         return ns;
 333         }
 334
 335         return NULL;
 336 }
 337
 338 struct nvmet_ns *nvmet_find_namespace(struct nvmet_ctrl *ctrl, __le32 nsid)
 339 {
 340         struct nvmet_ns *ns;
 341
 342         rcu_read_lock();
 343         ns = __nvmet_find_namespace(ctrl, nsid);
 344         if (ns)
 345                 percpu_ref_get(&ns->ref);
 346         rcu_read_unlock();
 347
 348         return ns;
 349 }
 350
 351 static void nvmet_destroy_namespace(struct percpu_ref *ref)
 352 {
 353         struct nvmet_ns *ns = container_of(ref, struct nvmet_ns, ref);
 354
 355         complete(&ns->disable_done);
 356 }
 357
 358 void nvmet_put_namespace(struct nvmet_ns *ns)
 359 {
 360         percpu_ref_put(&ns->ref);
 361 }
 362
 363 static void nvmet_ns_dev_disable(struct nvmet_ns *ns)
 364 {
 365         nvmet_bdev_ns_disable(ns);
 366         nvmet_file_ns_disable(ns);
 367 }
 368
 369 static int nvmet_p2pmem_ns_enable(struct nvmet_ns *ns)
 370 {
 371         int ret;
 372         struct pci_dev *p2p_dev;
 373
 374         if (!ns->use_p2pmem)
 375                 return 0;
 376
 377         if (!ns->bdev) {
 378                 pr_err("peer-to-peer DMA is not supported by non-block device namespaces\n");
 379                 return -EINVAL;
 380         }
 381
 382         if (!blk_queue_pci_p2pdma(ns->bdev->bd_queue)) {
 383                 pr_err("peer-to-peer DMA is not supported by the driver of %s\n",
 384                        ns->device_path);
 385                 return -EINVAL;
 386         }
 387
 388         if (ns->p2p_dev) {
 389                 ret = pci_p2pdma_distance(ns->p2p_dev, nvmet_ns_dev(ns), true);
 390                 if (ret < 0)
 391                         return -EINVAL;
 392         } else {
 393                 /*
 394                  * Right now we just check that there is p2pmem available so
 395                  * we can report an error to the user right away if there
 396                  * is not. We'll find the actual device to use once we
 397                  * setup the controller when the port's device is available.
 398                  */
 399
 400                 p2p_dev = pci_p2pmem_find(nvmet_ns_dev(ns));
 401                 if (!p2p_dev) {
 402                         pr_err("no peer-to-peer memory is available for %s\n",
 403                                ns->device_path);
 404                         return -EINVAL;
 405                 }
 406
 407                 pci_dev_put(p2p_dev);
 408         }
 409
 410         return 0;
 411 }
 412
 413 /*
 414  * Note: ctrl->subsys->lock should be held when calling this function
 415  */
 416 static void nvmet_p2pmem_ns_add_p2p(struct nvmet_ctrl *ctrl,
 417                                     struct nvmet_ns *ns)
 418 {
 419         struct device *clients[2];
 420         struct pci_dev *p2p_dev;
 421         int ret;
 422
 423         if (!ctrl->p2p_client)
 424                 return;
 425
 426         if (ns->p2p_dev) {
 427                 ret = pci_p2pdma_distance(ns->p2p_dev, ctrl->p2p_client, true);
 428                 if (ret < 0)
 429                         return;
 430
 431                 p2p_dev = pci_dev_get(ns->p2p_dev);
 432         } else {
 433                 clients[0] = ctrl->p2p_client;
 434                 clients[1] = nvmet_ns_dev(ns);
 435
 436                 p2p_dev = pci_p2pmem_find_many(clients, ARRAY_SIZE(clients));
 437                 if (!p2p_dev) {
 438                         pr_err("no peer-to-peer memory is available that's supported by %s and %s\n",
 439                                dev_name(ctrl->p2p_client), ns->device_path);
 440                         return;
 441                 }
 442         }
 443
 444         ret = radix_tree_insert(&ctrl->p2p_ns_map, ns->nsid, p2p_dev);
 445         if (ret < 0)
 446                 pci_dev_put(p2p_dev);
 447
 448         pr_info("using p2pmem on %s for nsid %d\n", pci_name(p2p_dev),
 449                 ns->nsid);
 450 }
 451
 452 int nvmet_ns_enable(struct nvmet_ns *ns)
 453 {
 454         struct nvmet_subsys *subsys = ns->subsys;
 455         struct nvmet_ctrl *ctrl;
 456         int ret;
 457
 458         mutex_lock(&subsys->lock);
 459         ret = -EMFILE;
 460         if (subsys->nr_namespaces == NVMET_MAX_NAMESPACES)
 461                 goto out_unlock;
 462         ret = 0;
 463         if (ns->enabled)
 464                 goto out_unlock;
 465
 466         ret = nvmet_bdev_ns_enable(ns);
 467         if (ret == -ENOTBLK)
 468                 ret = nvmet_file_ns_enable(ns);
 469         if (ret)
 470                 goto out_unlock;
 471
 472         ret = nvmet_p2pmem_ns_enable(ns);
 473         if (ret)
 474                 goto out_unlock;
 475
 476         list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
 477                 nvmet_p2pmem_ns_add_p2p(ctrl, ns);
 478
 479         ret = percpu_ref_init(&ns->ref, nvmet_destroy_namespace,
 480                                 0, GFP_KERNEL);
 481         if (ret)
 482                 goto out_dev_put;
 483
 484         if (ns->nsid > subsys->max_nsid)
 485                 subsys->max_nsid = ns->nsid;
 486
 487         /*
 488          * The namespaces list needs to be sorted to simplify the implementation
 489          * of the Identify Namepace List subcommand.
 490          */
 491         if (list_empty(&subsys->namespaces)) {
 492                 list_add_tail_rcu(&ns->dev_link, &subsys->namespaces);
 493         } else {
 494                 struct nvmet_ns *old;
 495
 496                 list_for_each_entry_rcu(old, &subsys->namespaces, dev_link) {
 497                         BUG_ON(ns->nsid == old->nsid);
 498                         if (ns->nsid < old->nsid)
 499                                 break;
 500                 }
 501
 502                 list_add_tail_rcu(&ns->dev_link, &old->dev_link);
 503         }
 504         subsys->nr_namespaces++;
 505
 506         nvmet_ns_changed(subsys, ns->nsid);
 507         ns->enabled = true;
 508         ret = 0;
 509 out_unlock:
 510         mutex_unlock(&subsys->lock);
 511         return ret;
 512 out_dev_put:
 513         list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
 514                 pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid));
 515
 516         nvmet_ns_dev_disable(ns);
 517         goto out_unlock;
 518 }
 519
 520 void nvmet_ns_disable(struct nvmet_ns *ns)
 521 {
 522         struct nvmet_subsys *subsys = ns->subsys;
 523         struct nvmet_ctrl *ctrl;
 524
 525         mutex_lock(&subsys->lock);
 526         if (!ns->enabled)
 527                 goto out_unlock;
 528
 529         ns->enabled = false;
 530         list_del_rcu(&ns->dev_link);
 531         if (ns->nsid == subsys->max_nsid)
 532                 subsys->max_nsid = nvmet_max_nsid(subsys);
 533
 534         list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
 535                 pci_dev_put(radix_tree_delete(&ctrl->p2p_ns_map, ns->nsid));
 536
 537         mutex_unlock(&subsys->lock);
 538
 539         /*
 540          * Now that we removed the namespaces from the lookup list, we
 541          * can kill the per_cpu ref and wait for any remaining references
 542          * to be dropped, as well as a RCU grace period for anyone only
 543          * using the namepace under rcu_read_lock().  Note that we can't
 544          * use call_rcu here as we need to ensure the namespaces have
 545          * been fully destroyed before unloading the module.
 546          */
 547         percpu_ref_kill(&ns->ref);
 548         synchronize_rcu();
 549         wait_for_completion(&ns->disable_done);
 550         percpu_ref_exit(&ns->ref);
 551
 552         mutex_lock(&subsys->lock);
 553
 554         subsys->nr_namespaces--;
 555         nvmet_ns_changed(subsys, ns->nsid);
 556         nvmet_ns_dev_disable(ns);
 557 out_unlock:
 558         mutex_unlock(&subsys->lock);
 559 }
 560
 561 void nvmet_ns_free(struct nvmet_ns *ns)
 562 {
 563         nvmet_ns_disable(ns);
 564
 565         down_write(&nvmet_ana_sem);
 566         nvmet_ana_group_enabled[ns->anagrpid]--;
 567         up_write(&nvmet_ana_sem);
 568
 569         kfree(ns->device_path);
 570         kfree(ns);
 571 }
 572
 573 struct nvmet_ns *nvmet_ns_alloc(struct nvmet_subsys *subsys, u32 nsid)
 574 {
 575         struct nvmet_ns *ns;
 576
 577         ns = kzalloc(sizeof(*ns), GFP_KERNEL);
 578         if (!ns)
 579                 return NULL;
 580
 581         INIT_LIST_HEAD(&ns->dev_link);
 582         init_completion(&ns->disable_done);
 583
 584         ns->nsid = nsid;
 585         ns->subsys = subsys;
 586
 587         down_write(&nvmet_ana_sem);
 588         ns->anagrpid = NVMET_DEFAULT_ANA_GRPID;
 589         nvmet_ana_group_enabled[ns->anagrpid]++;
 590         up_write(&nvmet_ana_sem);
 591
 592         uuid_gen(&ns->uuid);
 593         ns->buffered_io = false;
 594
 595         return ns;
 596 }
 597
 598 static void __nvmet_req_complete(struct nvmet_req *req, u16 status)
 599 {
 600         u32 old_sqhd, new_sqhd;
 601         u16 sqhd;
 602
 603         if (status)
 604                 nvmet_set_status(req, status);
 605
 606         if (req->sq->size) {
 607                 do {
 608                         old_sqhd = req->sq->sqhd;
 609                         new_sqhd = (old_sqhd + 1) % req->sq->size;
 610                 } while (cmpxchg(&req->sq->sqhd, old_sqhd, new_sqhd) !=
 611                                         old_sqhd);
 612         }
 613         sqhd = req->sq->sqhd & 0x0000FFFF;
 614         req->rsp->sq_head = cpu_to_le16(sqhd);
 615         req->rsp->sq_id = cpu_to_le16(req->sq->qid);
 616         req->rsp->command_id = req->cmd->common.command_id;
 617
 618         if (req->ns)
 619                 nvmet_put_namespace(req->ns);
 620         req->ops->queue_response(req);
 621 }
 622
 623 void nvmet_req_complete(struct nvmet_req *req, u16 status)
 624 {
 625         __nvmet_req_complete(req, status);
 626         percpu_ref_put(&req->sq->ref);
 627 }
 628 EXPORT_SYMBOL_GPL(nvmet_req_complete);
 629
 630 void nvmet_cq_setup(struct nvmet_ctrl *ctrl, struct nvmet_cq *cq,
 631                 u16 qid, u16 size)
 632 {
 633         cq->qid = qid;
 634         cq->size = size;
 635
 636         ctrl->cqs[qid] = cq;
 637 }
 638
 639 void nvmet_sq_setup(struct nvmet_ctrl *ctrl, struct nvmet_sq *sq,
 640                 u16 qid, u16 size)
 641 {
 642         sq->sqhd = 0;
 643         sq->qid = qid;
 644         sq->size = size;
 645
 646         ctrl->sqs[qid] = sq;
 647 }
 648
 649 static void nvmet_confirm_sq(struct percpu_ref *ref)
 650 {
 651         struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref);
 652
 653         complete(&sq->confirm_done);
 654 }
 655
 656 void nvmet_sq_destroy(struct nvmet_sq *sq)
 657 {
 658         /*
 659          * If this is the admin queue, complete all AERs so that our
 660          * queue doesn't have outstanding requests on it.
 661          */
 662         if (sq->ctrl && sq->ctrl->sqs && sq->ctrl->sqs[0] == sq)
 663                 nvmet_async_events_free(sq->ctrl);
 664         percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq);
 665         wait_for_completion(&sq->confirm_done);
 666         wait_for_completion(&sq->free_done);
 667         percpu_ref_exit(&sq->ref);
 668
 669         if (sq->ctrl) {
 670                 nvmet_ctrl_put(sq->ctrl);
 671                 sq->ctrl = NULL; /* allows reusing the queue later */
 672         }
 673 }
 674 EXPORT_SYMBOL_GPL(nvmet_sq_destroy);
 675
 676 static void nvmet_sq_free(struct percpu_ref *ref)
 677 {
 678         struct nvmet_sq *sq = container_of(ref, struct nvmet_sq, ref);
 679
 680         complete(&sq->free_done);
 681 }
 682
 683 int nvmet_sq_init(struct nvmet_sq *sq)
 684 {
 685         int ret;
 686
 687         ret = percpu_ref_init(&sq->ref, nvmet_sq_free, 0, GFP_KERNEL);
 688         if (ret) {
 689                 pr_err("percpu_ref init failed!\n");
 690                 return ret;
 691         }
 692         init_completion(&sq->free_done);
 693         init_completion(&sq->confirm_done);
 694
 695         return 0;
 696 }
 697 EXPORT_SYMBOL_GPL(nvmet_sq_init);
 698
 699 static inline u16 nvmet_check_ana_state(struct nvmet_port *port,
 700                 struct nvmet_ns *ns)
 701 {
 702         enum nvme_ana_state state = port->ana_state[ns->anagrpid];
 703
 704         if (unlikely(state == NVME_ANA_INACCESSIBLE))
 705                 return NVME_SC_ANA_INACCESSIBLE;
 706         if (unlikely(state == NVME_ANA_PERSISTENT_LOSS))
 707                 return NVME_SC_ANA_PERSISTENT_LOSS;
 708         if (unlikely(state == NVME_ANA_CHANGE))
 709                 return NVME_SC_ANA_TRANSITION;
 710         return 0;
 711 }
 712
 713 static inline u16 nvmet_io_cmd_check_access(struct nvmet_req *req)
 714 {
 715         if (unlikely(req->ns->readonly)) {
 716                 switch (req->cmd->common.opcode) {
 717                 case nvme_cmd_read:
 718                 case nvme_cmd_flush:
 719                         break;
 720                 default:
 721                         return NVME_SC_NS_WRITE_PROTECTED;
 722                 }
 723         }
 724
 725         return 0;
 726 }
 727
 728 static u16 nvmet_parse_io_cmd(struct nvmet_req *req)
 729 {
 730         struct nvme_command *cmd = req->cmd;
 731         u16 ret;
 732
 733         ret = nvmet_check_ctrl_status(req, cmd);
 734         if (unlikely(ret))
 735                 return ret;
 736
 737         req->ns = nvmet_find_namespace(req->sq->ctrl, cmd->rw.nsid);
 738         if (unlikely(!req->ns))
 739                 return NVME_SC_INVALID_NS | NVME_SC_DNR;
 740         ret = nvmet_check_ana_state(req->port, req->ns);
 741         if (unlikely(ret))
 742                 return ret;
 743         ret = nvmet_io_cmd_check_access(req);
 744         if (unlikely(ret))
 745                 return ret;
 746
 747         if (req->ns->file)
 748                 return nvmet_file_parse_io_cmd(req);
 749         else
 750                 return nvmet_bdev_parse_io_cmd(req);
 751 }
 752
 753 bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq,
 754                 struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops)
 755 {
 756         u8 flags = req->cmd->common.flags;
 757         u16 status;
 758
 759         req->cq = cq;
 760         req->sq = sq;
 761         req->ops = ops;
 762         req->sg = NULL;
 763         req->sg_cnt = 0;
 764         req->transfer_len = 0;
 765         req->rsp->status = 0;
 766         req->ns = NULL;
 767
 768         /* no support for fused commands yet */
 769         if (unlikely(flags & (NVME_CMD_FUSE_FIRST | NVME_CMD_FUSE_SECOND))) {
 770                 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
 771                 goto fail;
 772         }
 773
 774         /*
 775          * For fabrics, PSDT field shall describe metadata pointer (MPTR) that
 776          * contains an address of a single contiguous physical buffer that is
 777          * byte aligned.
 778          */
 779         if (unlikely((flags & NVME_CMD_SGL_ALL) != NVME_CMD_SGL_METABUF)) {
 780                 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
 781                 goto fail;
 782         }
 783
 784         if (unlikely(!req->sq->ctrl))
 785                 /* will return an error for any Non-connect command: */
 786                 status = nvmet_parse_connect_cmd(req);
 787         else if (likely(req->sq->qid != 0))
 788                 status = nvmet_parse_io_cmd(req);
 789         else if (req->cmd->common.opcode == nvme_fabrics_command)
 790                 status = nvmet_parse_fabrics_cmd(req);
 791         else if (req->sq->ctrl->subsys->type == NVME_NQN_DISC)
 792                 status = nvmet_parse_discovery_cmd(req);
 793         else
 794                 status = nvmet_parse_admin_cmd(req);
 795
 796         if (status)
 797                 goto fail;
 798
 799         if (unlikely(!percpu_ref_tryget_live(&sq->ref))) {
 800                 status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
 801                 goto fail;
 802         }
 803
 804         return true;
 805
 806 fail:
 807         __nvmet_req_complete(req, status);
 808         return false;
 809 }
 810 EXPORT_SYMBOL_GPL(nvmet_req_init);
 811
 812 void nvmet_req_uninit(struct nvmet_req *req)
 813 {
 814         percpu_ref_put(&req->sq->ref);
 815         if (req->ns)
 816                 nvmet_put_namespace(req->ns);
 817 }
 818 EXPORT_SYMBOL_GPL(nvmet_req_uninit);
 819
 820 void nvmet_req_execute(struct nvmet_req *req)
 821 {
 822         if (unlikely(req->data_len != req->transfer_len))
 823                 nvmet_req_complete(req, NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR);
 824         else
 825                 req->execute(req);
 826 }
 827 EXPORT_SYMBOL_GPL(nvmet_req_execute);
 828
 829 int nvmet_req_alloc_sgl(struct nvmet_req *req)
 830 {
 831         struct pci_dev *p2p_dev = NULL;
 832
 833         if (IS_ENABLED(CONFIG_PCI_P2PDMA)) {
 834                 if (req->sq->ctrl && req->ns)
 835                         p2p_dev = radix_tree_lookup(&req->sq->ctrl->p2p_ns_map,
 836                                                     req->ns->nsid);
 837
 838                 req->p2p_dev = NULL;
 839                 if (req->sq->qid && p2p_dev) {
 840                         req->sg = pci_p2pmem_alloc_sgl(p2p_dev, &req->sg_cnt,
 841                                                        req->transfer_len);
 842                         if (req->sg) {
 843                                 req->p2p_dev = p2p_dev;
 844                                 return 0;
 845                         }
 846                 }
 847
 848                 /*
 849                  * If no P2P memory was available we fallback to using
 850                  * regular memory
 851                  */
 852         }
 853
 854         req->sg = sgl_alloc(req->transfer_len, GFP_KERNEL, &req->sg_cnt);
 855         if (!req->sg)
 856                 return -ENOMEM;
 857
 858         return 0;
 859 }
 860 EXPORT_SYMBOL_GPL(nvmet_req_alloc_sgl);
 861
 862 void nvmet_req_free_sgl(struct nvmet_req *req)
 863 {
 864         if (req->p2p_dev)
 865                 pci_p2pmem_free_sgl(req->p2p_dev, req->sg);
 866         else
 867                 sgl_free(req->sg);
 868
 869         req->sg = NULL;
 870         req->sg_cnt = 0;
 871 }
 872 EXPORT_SYMBOL_GPL(nvmet_req_free_sgl);
 873
 874 static inline bool nvmet_cc_en(u32 cc)
 875 {
 876         return (cc >> NVME_CC_EN_SHIFT) & 0x1;
 877 }
 878
 879 static inline u8 nvmet_cc_css(u32 cc)
 880 {
 881         return (cc >> NVME_CC_CSS_SHIFT) & 0x7;
 882 }
 883
 884 static inline u8 nvmet_cc_mps(u32 cc)
 885 {
 886         return (cc >> NVME_CC_MPS_SHIFT) & 0xf;
 887 }
 888
 889 static inline u8 nvmet_cc_ams(u32 cc)
 890 {
 891         return (cc >> NVME_CC_AMS_SHIFT) & 0x7;
 892 }
 893
 894 static inline u8 nvmet_cc_shn(u32 cc)
 895 {
 896         return (cc >> NVME_CC_SHN_SHIFT) & 0x3;
 897 }
 898
 899 static inline u8 nvmet_cc_iosqes(u32 cc)
 900 {
 901         return (cc >> NVME_CC_IOSQES_SHIFT) & 0xf;
 902 }
 903
 904 static inline u8 nvmet_cc_iocqes(u32 cc)
 905 {
 906         return (cc >> NVME_CC_IOCQES_SHIFT) & 0xf;
 907 }
 908
 909 static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl)
 910 {
 911         lockdep_assert_held(&ctrl->lock);
 912
 913         if (nvmet_cc_iosqes(ctrl->cc) != NVME_NVM_IOSQES ||
 914             nvmet_cc_iocqes(ctrl->cc) != NVME_NVM_IOCQES ||
 915             nvmet_cc_mps(ctrl->cc) != 0 ||
 916             nvmet_cc_ams(ctrl->cc) != 0 ||
 917             nvmet_cc_css(ctrl->cc) != 0) {
 918                 ctrl->csts = NVME_CSTS_CFS;
 919                 return;
 920         }
 921
 922         ctrl->csts = NVME_CSTS_RDY;
 923
 924         /*
 925          * Controllers that are not yet enabled should not really enforce the
 926          * keep alive timeout, but we still want to track a timeout and cleanup
 927          * in case a host died before it enabled the controller.  Hence, simply
 928          * reset the keep alive timer when the controller is enabled.
 929          */
 930         mod_delayed_work(system_wq, &ctrl->ka_work, ctrl->kato * HZ);
 931 }
 932
 933 static void nvmet_clear_ctrl(struct nvmet_ctrl *ctrl)
 934 {
 935         lockdep_assert_held(&ctrl->lock);
 936
 937         /* XXX: tear down queues? */
 938         ctrl->csts &= ~NVME_CSTS_RDY;
 939         ctrl->cc = 0;
 940 }
 941
 942 void nvmet_update_cc(struct nvmet_ctrl *ctrl, u32 new)
 943 {
 944         u32 old;
 945
 946         mutex_lock(&ctrl->lock);
 947         old = ctrl->cc;
 948         ctrl->cc = new;
 949
 950         if (nvmet_cc_en(new) && !nvmet_cc_en(old))
 951                 nvmet_start_ctrl(ctrl);
 952         if (!nvmet_cc_en(new) && nvmet_cc_en(old))
 953                 nvmet_clear_ctrl(ctrl);
 954         if (nvmet_cc_shn(new) && !nvmet_cc_shn(old)) {
 955                 nvmet_clear_ctrl(ctrl);
 956                 ctrl->csts |= NVME_CSTS_SHST_CMPLT;
 957         }
 958         if (!nvmet_cc_shn(new) && nvmet_cc_shn(old))
 959                 ctrl->csts &= ~NVME_CSTS_SHST_CMPLT;
 960         mutex_unlock(&ctrl->lock);
 961 }
 962
 963 static void nvmet_init_cap(struct nvmet_ctrl *ctrl)
 964 {
 965         /* command sets supported: NVMe command set: */
 966         ctrl->cap = (1ULL << 37);
 967         /* CC.EN timeout in 500msec units: */
 968         ctrl->cap |= (15ULL << 24);
 969         /* maximum queue entries supported: */
 970         ctrl->cap |= NVMET_QUEUE_SIZE - 1;
 971 }
 972
 973 u16 nvmet_ctrl_find_get(const char *subsysnqn, const char *hostnqn, u16 cntlid,
 974                 struct nvmet_req *req, struct nvmet_ctrl **ret)
 975 {
 976         struct nvmet_subsys *subsys;
 977         struct nvmet_ctrl *ctrl;
 978         u16 status = 0;
 979
 980         subsys = nvmet_find_get_subsys(req->port, subsysnqn);
 981         if (!subsys) {
 982                 pr_warn("connect request for invalid subsystem %s!\n",
 983                         subsysnqn);
 984                 req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn);
 985                 return NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
 986         }
 987
 988         mutex_lock(&subsys->lock);
 989         list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
 990                 if (ctrl->cntlid == cntlid) {
 991                         if (strncmp(hostnqn, ctrl->hostnqn, NVMF_NQN_SIZE)) {
 992                                 pr_warn("hostnqn mismatch.\n");
 993                                 continue;
 994                         }
 995                         if (!kref_get_unless_zero(&ctrl->ref))
 996                                 continue;
 997
 998                         *ret = ctrl;
 999                         goto out;
1000                 }
1001         }
1002
1003         pr_warn("could not find controller %d for subsys %s / host %s\n",
1004                 cntlid, subsysnqn, hostnqn);
1005         req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(cntlid);
1006         status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
1007
1008 out:
1009         mutex_unlock(&subsys->lock);
1010         nvmet_subsys_put(subsys);
1011         return status;
1012 }
1013
1014 u16 nvmet_check_ctrl_status(struct nvmet_req *req, struct nvme_command *cmd)
1015 {
1016         if (unlikely(!(req->sq->ctrl->cc & NVME_CC_ENABLE))) {
1017                 pr_err("got cmd %d while CC.EN == 0 on qid = %d\n",
1018                        cmd->common.opcode, req->sq->qid);
1019                 return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
1020         }
1021
1022         if (unlikely(!(req->sq->ctrl->csts & NVME_CSTS_RDY))) {
1023                 pr_err("got cmd %d while CSTS.RDY == 0 on qid = %d\n",
1024                        cmd->common.opcode, req->sq->qid);
1025                 return NVME_SC_CMD_SEQ_ERROR | NVME_SC_DNR;
1026         }
1027         return 0;
1028 }
1029
1030 static bool __nvmet_host_allowed(struct nvmet_subsys *subsys,
1031                 const char *hostnqn)
1032 {
1033         struct nvmet_host_link *p;
1034
1035         if (subsys->allow_any_host)
1036                 return true;
1037
1038         list_for_each_entry(p, &subsys->hosts, entry) {
1039                 if (!strcmp(nvmet_host_name(p->host), hostnqn))
1040                         return true;
1041         }
1042
1043         return false;
1044 }
1045
1046 static bool nvmet_host_discovery_allowed(struct nvmet_req *req,
1047                 const char *hostnqn)
1048 {
1049         struct nvmet_subsys_link *s;
1050
1051         list_for_each_entry(s, &req->port->subsystems, entry) {
1052                 if (__nvmet_host_allowed(s->subsys, hostnqn))
1053                         return true;
1054         }
1055
1056         return false;
1057 }
1058
1059 bool nvmet_host_allowed(struct nvmet_req *req, struct nvmet_subsys *subsys,
1060                 const char *hostnqn)
1061 {
1062         lockdep_assert_held(&nvmet_config_sem);
1063
1064         if (subsys->type == NVME_NQN_DISC)
1065                 return nvmet_host_discovery_allowed(req, hostnqn);
1066         else
1067                 return __nvmet_host_allowed(subsys, hostnqn);
1068 }
1069
1070 /*
1071  * Note: ctrl->subsys->lock should be held when calling this function
1072  */
1073 static void nvmet_setup_p2p_ns_map(struct nvmet_ctrl *ctrl,
1074                 struct nvmet_req *req)
1075 {
1076         struct nvmet_ns *ns;
1077
1078         if (!req->p2p_client)
1079                 return;
1080
1081         ctrl->p2p_client = get_device(req->p2p_client);
1082
1083         list_for_each_entry_rcu(ns, &ctrl->subsys->namespaces, dev_link)
1084                 nvmet_p2pmem_ns_add_p2p(ctrl, ns);
1085 }
1086
1087 /*
1088  * Note: ctrl->subsys->lock should be held when calling this function
1089  */
1090 static void nvmet_release_p2p_ns_map(struct nvmet_ctrl *ctrl)
1091 {
1092         struct radix_tree_iter iter;
1093         void __rcu **slot;
1094
1095         radix_tree_for_each_slot(slot, &ctrl->p2p_ns_map, &iter, 0)
1096                 pci_dev_put(radix_tree_deref_slot(slot));
1097
1098         put_device(ctrl->p2p_client);
1099 }
1100
1101 u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn,
1102                 struct nvmet_req *req, u32 kato, struct nvmet_ctrl **ctrlp)
1103 {
1104         struct nvmet_subsys *subsys;
1105         struct nvmet_ctrl *ctrl;
1106         int ret;
1107         u16 status;
1108
1109         status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
1110         subsys = nvmet_find_get_subsys(req->port, subsysnqn);
1111         if (!subsys) {
1112                 pr_warn("connect request for invalid subsystem %s!\n",
1113                         subsysnqn);
1114                 req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(subsysnqn);
1115                 goto out;
1116         }
1117
1118         status = NVME_SC_CONNECT_INVALID_PARAM | NVME_SC_DNR;
1119         down_read(&nvmet_config_sem);
1120         if (!nvmet_host_allowed(req, subsys, hostnqn)) {
1121                 pr_info("connect by host %s for subsystem %s not allowed\n",
1122                         hostnqn, subsysnqn);
1123                 req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(hostnqn);
1124                 up_read(&nvmet_config_sem);
1125                 status = NVME_SC_CONNECT_INVALID_HOST | NVME_SC_DNR;
1126                 goto out_put_subsystem;
1127         }
1128         up_read(&nvmet_config_sem);
1129
1130         status = NVME_SC_INTERNAL;
1131         ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
1132         if (!ctrl)
1133                 goto out_put_subsystem;
1134         mutex_init(&ctrl->lock);
1135
1136         nvmet_init_cap(ctrl);
1137
1138         ctrl->port = req->port;
1139
1140         INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work);
1141         INIT_LIST_HEAD(&ctrl->async_events);
1142         INIT_RADIX_TREE(&ctrl->p2p_ns_map, GFP_KERNEL);
1143
1144         memcpy(ctrl->subsysnqn, subsysnqn, NVMF_NQN_SIZE);
1145         memcpy(ctrl->hostnqn, hostnqn, NVMF_NQN_SIZE);
1146
1147         kref_init(&ctrl->ref);
1148         ctrl->subsys = subsys;
1149         WRITE_ONCE(ctrl->aen_enabled, NVMET_AEN_CFG_OPTIONAL);
1150
1151         ctrl->changed_ns_list = kmalloc_array(NVME_MAX_CHANGED_NAMESPACES,
1152                         sizeof(__le32), GFP_KERNEL);
1153         if (!ctrl->changed_ns_list)
1154                 goto out_free_ctrl;
1155
1156         ctrl->cqs = kcalloc(subsys->max_qid + 1,
1157                         sizeof(struct nvmet_cq *),
1158                         GFP_KERNEL);
1159         if (!ctrl->cqs)
1160                 goto out_free_changed_ns_list;
1161
1162         ctrl->sqs = kcalloc(subsys->max_qid + 1,
1163                         sizeof(struct nvmet_sq *),
1164                         GFP_KERNEL);
1165         if (!ctrl->sqs)
1166                 goto out_free_cqs;
1167
1168         ret = ida_simple_get(&cntlid_ida,
1169                              NVME_CNTLID_MIN, NVME_CNTLID_MAX,
1170                              GFP_KERNEL);
1171         if (ret < 0) {
1172                 status = NVME_SC_CONNECT_CTRL_BUSY | NVME_SC_DNR;
1173                 goto out_free_sqs;
1174         }
1175         ctrl->cntlid = ret;
1176
1177         ctrl->ops = req->ops;
1178         if (ctrl->subsys->type == NVME_NQN_DISC) {
1179                 /* Don't accept keep-alive timeout for discovery controllers */
1180                 if (kato) {
1181                         status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
1182                         goto out_remove_ida;
1183                 }
1184
1185                 /*
1186                  * Discovery controllers use some arbitrary high value in order
1187                  * to cleanup stale discovery sessions
1188                  *
1189                  * From the latest base diff RC:
1190                  * "The Keep Alive command is not supported by
1191                  * Discovery controllers. A transport may specify a
1192                  * fixed Discovery controller activity timeout value
1193                  * (e.g., 2 minutes).  If no commands are received
1194                  * by a Discovery controller within that time
1195                  * period, the controller may perform the
1196                  * actions for Keep Alive Timer expiration".
1197                  */
1198                 ctrl->kato = NVMET_DISC_KATO;
1199         } else {
1200                 /* keep-alive timeout in seconds */
1201                 ctrl->kato = DIV_ROUND_UP(kato, 1000);
1202         }
1203         nvmet_start_keep_alive_timer(ctrl);
1204
1205         mutex_lock(&subsys->lock);
1206         list_add_tail(&ctrl->subsys_entry, &subsys->ctrls);
1207         nvmet_setup_p2p_ns_map(ctrl, req);
1208         mutex_unlock(&subsys->lock);
1209
1210         *ctrlp = ctrl;
1211         return 0;
1212
1213 out_remove_ida:
1214         ida_simple_remove(&cntlid_ida, ctrl->cntlid);
1215 out_free_sqs:
1216         kfree(ctrl->sqs);
1217 out_free_cqs:
1218         kfree(ctrl->cqs);
1219 out_free_changed_ns_list:
1220         kfree(ctrl->changed_ns_list);
1221 out_free_ctrl:
1222         kfree(ctrl);
1223 out_put_subsystem:
1224         nvmet_subsys_put(subsys);
1225 out:
1226         return status;
1227 }
1228
1229 static void nvmet_ctrl_free(struct kref *ref)
1230 {
1231         struct nvmet_ctrl *ctrl = container_of(ref, struct nvmet_ctrl, ref);
1232         struct nvmet_subsys *subsys = ctrl->subsys;
1233
1234         mutex_lock(&subsys->lock);
1235         nvmet_release_p2p_ns_map(ctrl);
1236         list_del(&ctrl->subsys_entry);
1237         mutex_unlock(&subsys->lock);
1238
1239         nvmet_stop_keep_alive_timer(ctrl);
1240
1241         flush_work(&ctrl->async_event_work);
1242         cancel_work_sync(&ctrl->fatal_err_work);
1243
1244         ida_simple_remove(&cntlid_ida, ctrl->cntlid);
1245
1246         kfree(ctrl->sqs);
1247         kfree(ctrl->cqs);
1248         kfree(ctrl->changed_ns_list);
1249         kfree(ctrl);
1250
1251         nvmet_subsys_put(subsys);
1252 }
1253
1254 void nvmet_ctrl_put(struct nvmet_ctrl *ctrl)
1255 {
1256         kref_put(&ctrl->ref, nvmet_ctrl_free);
1257 }
1258
1259 static void nvmet_fatal_error_handler(struct work_struct *work)
1260 {
1261         struct nvmet_ctrl *ctrl =
1262                         container_of(work, struct nvmet_ctrl, fatal_err_work);
1263
1264         pr_err("ctrl %d fatal error occurred!\n", ctrl->cntlid);
1265         ctrl->ops->delete_ctrl(ctrl);
1266 }
1267
1268 void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl)
1269 {
1270         mutex_lock(&ctrl->lock);
1271         if (!(ctrl->csts & NVME_CSTS_CFS)) {
1272                 ctrl->csts |= NVME_CSTS_CFS;
1273                 INIT_WORK(&ctrl->fatal_err_work, nvmet_fatal_error_handler);
1274                 schedule_work(&ctrl->fatal_err_work);
1275         }
1276         mutex_unlock(&ctrl->lock);
1277 }
1278 EXPORT_SYMBOL_GPL(nvmet_ctrl_fatal_error);
1279
1280 static struct nvmet_subsys *nvmet_find_get_subsys(struct nvmet_port *port,
1281                 const char *subsysnqn)
1282 {
1283         struct nvmet_subsys_link *p;
1284
1285         if (!port)
1286                 return NULL;
1287
1288         if (!strcmp(NVME_DISC_SUBSYS_NAME, subsysnqn)) {
1289                 if (!kref_get_unless_zero(&nvmet_disc_subsys->ref))
1290                         return NULL;
1291                 return nvmet_disc_subsys;
1292         }
1293
1294         down_read(&nvmet_config_sem);
1295         list_for_each_entry(p, &port->subsystems, entry) {
1296                 if (!strncmp(p->subsys->subsysnqn, subsysnqn,
1297                                 NVMF_NQN_SIZE)) {
1298                         if (!kref_get_unless_zero(&p->subsys->ref))
1299                                 break;
1300                         up_read(&nvmet_config_sem);
1301                         return p->subsys;
1302                 }
1303         }
1304         up_read(&nvmet_config_sem);
1305         return NULL;
1306 }
1307
1308 struct nvmet_subsys *nvmet_subsys_alloc(const char *subsysnqn,
1309                 enum nvme_subsys_type type)
1310 {
1311         struct nvmet_subsys *subsys;
1312
1313         subsys = kzalloc(sizeof(*subsys), GFP_KERNEL);
1314         if (!subsys)
1315                 return NULL;
1316
1317         subsys->ver = NVME_VS(1, 3, 0); /* NVMe 1.3.0 */
1318         /* generate a random serial number as our controllers are ephemeral: */
1319         get_random_bytes(&subsys->serial, sizeof(subsys->serial));
1320
1321         switch (type) {
1322         case NVME_NQN_NVME:
1323                 subsys->max_qid = NVMET_NR_QUEUES;
1324                 break;
1325         case NVME_NQN_DISC:
1326                 subsys->max_qid = 0;
1327                 break;
1328         default:
1329                 pr_err("%s: Unknown Subsystem type - %d\n", __func__, type);
1330                 kfree(subsys);
1331                 return NULL;
1332         }
1333         subsys->type = type;
1334         subsys->subsysnqn = kstrndup(subsysnqn, NVMF_NQN_SIZE,
1335                         GFP_KERNEL);
1336         if (!subsys->subsysnqn) {
1337                 kfree(subsys);
1338                 return NULL;
1339         }
1340
1341         kref_init(&subsys->ref);
1342
1343         mutex_init(&subsys->lock);
1344         INIT_LIST_HEAD(&subsys->namespaces);
1345         INIT_LIST_HEAD(&subsys->ctrls);
1346         INIT_LIST_HEAD(&subsys->hosts);
1347
1348         return subsys;
1349 }
1350
1351 static void nvmet_subsys_free(struct kref *ref)
1352 {
1353         struct nvmet_subsys *subsys =
1354                 container_of(ref, struct nvmet_subsys, ref);
1355
1356         WARN_ON_ONCE(!list_empty(&subsys->namespaces));
1357
1358         kfree(subsys->subsysnqn);
1359         kfree(subsys);
1360 }
1361
1362 void nvmet_subsys_del_ctrls(struct nvmet_subsys *subsys)
1363 {
1364         struct nvmet_ctrl *ctrl;
1365
1366         mutex_lock(&subsys->lock);
1367         list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry)
1368                 ctrl->ops->delete_ctrl(ctrl);
1369         mutex_unlock(&subsys->lock);
1370 }
1371
1372 void nvmet_subsys_put(struct nvmet_subsys *subsys)
1373 {
1374         kref_put(&subsys->ref, nvmet_subsys_free);
1375 }
1376
1377 static int __init nvmet_init(void)
1378 {
1379         int error;
1380
1381         nvmet_ana_group_enabled[NVMET_DEFAULT_ANA_GRPID] = 1;
1382
1383         buffered_io_wq = alloc_workqueue("nvmet-buffered-io-wq",
1384                         WQ_MEM_RECLAIM, 0);
1385         if (!buffered_io_wq) {
1386                 error = -ENOMEM;
1387                 goto out;
1388         }
1389
1390         error = nvmet_init_discovery();
1391         if (error)
1392                 goto out_free_work_queue;
1393
1394         error = nvmet_init_configfs();
1395         if (error)
1396                 goto out_exit_discovery;
1397         return 0;
1398
1399 out_exit_discovery:
1400         nvmet_exit_discovery();
1401 out_free_work_queue:
1402         destroy_workqueue(buffered_io_wq);
1403 out:
1404         return error;
1405 }
1406
1407 static void __exit nvmet_exit(void)
1408 {
1409         nvmet_exit_configfs();
1410         nvmet_exit_discovery();
1411         ida_destroy(&cntlid_ida);
1412         destroy_workqueue(buffered_io_wq);
1413
1414         BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_entry) != 1024);
1415         BUILD_BUG_ON(sizeof(struct nvmf_disc_rsp_page_hdr) != 1024);
1416 }
1417
1418 module_init(nvmet_init);
1419 module_exit(nvmet_exit);
1420
1421 MODULE_LICENSE("GPL v2");