Merge branch 'for-linus' of git://git.kernel.dk/linux-block
[sfrench/cifs-2.6.git] / drivers / nvme / host / pci.c
index b7a84c523475739ece377c884bb8da3c22b91e4d..8569ee7712693a880d8ece842f3b2ad9f1108fc9 100644 (file)
@@ -35,7 +35,6 @@
 
 #include "nvme.h"
 
-#define NVME_Q_DEPTH           1024
 #define SQ_SIZE(depth)         (depth * sizeof(struct nvme_command))
 #define CQ_SIZE(depth)         (depth * sizeof(struct nvme_completion))
 
@@ -57,6 +56,16 @@ module_param(max_host_mem_size_mb, uint, 0444);
 MODULE_PARM_DESC(max_host_mem_size_mb,
        "Maximum Host Memory Buffer (HMB) size per controller (in MiB)");
 
+static int io_queue_depth_set(const char *val, const struct kernel_param *kp);
+static const struct kernel_param_ops io_queue_depth_ops = {
+       .set = io_queue_depth_set,
+       .get = param_get_int,
+};
+
+static int io_queue_depth = 1024;
+module_param_cb(io_queue_depth, &io_queue_depth_ops, &io_queue_depth, 0644);
+MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2");
+
 struct nvme_dev;
 struct nvme_queue;
 
@@ -74,7 +83,6 @@ struct nvme_dev {
        struct device *dev;
        struct dma_pool *prp_page_pool;
        struct dma_pool *prp_small_pool;
-       unsigned queue_count;
        unsigned online_queues;
        unsigned max_qid;
        int q_depth;
@@ -105,6 +113,17 @@ struct nvme_dev {
        void **host_mem_desc_bufs;
 };
 
+static int io_queue_depth_set(const char *val, const struct kernel_param *kp)
+{
+       int n = 0, ret;
+
+       ret = kstrtoint(val, 10, &n);
+       if (ret != 0 || n < 2)
+               return -EINVAL;
+
+       return param_set_int(val, kp);
+}
+
 static inline unsigned int sq_idx(unsigned int qid, u32 stride)
 {
        return qid * 2 * stride;
@@ -520,7 +539,7 @@ static void nvme_dif_complete(u32 p, u32 v, struct t10_pi_tuple *pi)
 }
 #endif
 
-static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req)
+static blk_status_t nvme_setup_prps(struct nvme_dev *dev, struct request *req)
 {
        struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
        struct dma_pool *pool;
@@ -537,7 +556,7 @@ static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req)
 
        length -= (page_size - offset);
        if (length <= 0)
-               return true;
+               return BLK_STS_OK;
 
        dma_len -= (page_size - offset);
        if (dma_len) {
@@ -550,7 +569,7 @@ static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req)
 
        if (length <= page_size) {
                iod->first_dma = dma_addr;
-               return true;
+               return BLK_STS_OK;
        }
 
        nprps = DIV_ROUND_UP(length, page_size);
@@ -566,7 +585,7 @@ static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req)
        if (!prp_list) {
                iod->first_dma = dma_addr;
                iod->npages = -1;
-               return false;
+               return BLK_STS_RESOURCE;
        }
        list[0] = prp_list;
        iod->first_dma = prp_dma;
@@ -576,7 +595,7 @@ static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req)
                        __le64 *old_prp_list = prp_list;
                        prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma);
                        if (!prp_list)
-                               return false;
+                               return BLK_STS_RESOURCE;
                        list[iod->npages++] = prp_list;
                        prp_list[0] = old_prp_list[i - 1];
                        old_prp_list[i - 1] = cpu_to_le64(prp_dma);
@@ -590,13 +609,29 @@ static bool nvme_setup_prps(struct nvme_dev *dev, struct request *req)
                        break;
                if (dma_len > 0)
                        continue;
-               BUG_ON(dma_len < 0);
+               if (unlikely(dma_len < 0))
+                       goto bad_sgl;
                sg = sg_next(sg);
                dma_addr = sg_dma_address(sg);
                dma_len = sg_dma_len(sg);
        }
 
-       return true;
+       return BLK_STS_OK;
+
+ bad_sgl:
+       if (WARN_ONCE(1, "Invalid SGL for payload:%d nents:%d\n",
+                               blk_rq_payload_bytes(req), iod->nents)) {
+               for_each_sg(iod->sg, sg, iod->nents, i) {
+                       dma_addr_t phys = sg_phys(sg);
+                       pr_warn("sg[%d] phys_addr:%pad offset:%d length:%d "
+                              "dma_address:%pad dma_length:%d\n", i, &phys,
+                                       sg->offset, sg->length,
+                                       &sg_dma_address(sg),
+                                       sg_dma_len(sg));
+               }
+       }
+       return BLK_STS_IOERR;
+
 }
 
 static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
@@ -618,7 +653,8 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
                                DMA_ATTR_NO_WARN))
                goto out;
 
-       if (!nvme_setup_prps(dev, req))
+       ret = nvme_setup_prps(dev, req);
+       if (ret != BLK_STS_OK)
                goto out_unmap;
 
        ret = BLK_STS_IOERR;
@@ -1099,9 +1135,9 @@ static void nvme_free_queues(struct nvme_dev *dev, int lowest)
 {
        int i;
 
-       for (i = dev->queue_count - 1; i >= lowest; i--) {
+       for (i = dev->ctrl.queue_count - 1; i >= lowest; i--) {
                struct nvme_queue *nvmeq = dev->queues[i];
-               dev->queue_count--;
+               dev->ctrl.queue_count--;
                dev->queues[i] = NULL;
                nvme_free_queue(nvmeq);
        }
@@ -1126,7 +1162,7 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq)
        spin_unlock_irq(&nvmeq->q_lock);
 
        if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q)
-               blk_mq_stop_hw_queues(nvmeq->dev->ctrl.admin_q);
+               blk_mq_quiesce_queue(nvmeq->dev->ctrl.admin_q);
 
        pci_free_irq(to_pci_dev(nvmeq->dev->dev), vector, nvmeq);
 
@@ -1145,8 +1181,7 @@ static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown)
        if (shutdown)
                nvme_shutdown_ctrl(&dev->ctrl);
        else
-               nvme_disable_ctrl(&dev->ctrl, lo_hi_readq(
-                                               dev->bar + NVME_REG_CAP));
+               nvme_disable_ctrl(&dev->ctrl, dev->ctrl.cap);
 
        spin_lock_irq(&nvmeq->q_lock);
        nvme_process_cq(nvmeq);
@@ -1221,7 +1256,7 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
        nvmeq->qid = qid;
        nvmeq->cq_vector = -1;
        dev->queues[qid] = nvmeq;
-       dev->queue_count++;
+       dev->ctrl.queue_count++;
 
        return nvmeq;
 
@@ -1317,7 +1352,7 @@ static void nvme_dev_remove_admin(struct nvme_dev *dev)
                 * user requests may be waiting on a stopped queue. Start the
                 * queue to flush these to completion.
                 */
-               blk_mq_start_stopped_hw_queues(dev->ctrl.admin_q, true);
+               blk_mq_unquiesce_queue(dev->ctrl.admin_q);
                blk_cleanup_queue(dev->ctrl.admin_q);
                blk_mq_free_tag_set(&dev->admin_tagset);
        }
@@ -1354,7 +1389,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev)
                        return -ENODEV;
                }
        } else
-               blk_mq_start_stopped_hw_queues(dev->ctrl.admin_q, true);
+               blk_mq_unquiesce_queue(dev->ctrl.admin_q);
 
        return 0;
 }
@@ -1385,11 +1420,10 @@ static int nvme_remap_bar(struct nvme_dev *dev, unsigned long size)
        return 0;
 }
 
-static int nvme_configure_admin_queue(struct nvme_dev *dev)
+static int nvme_pci_configure_admin_queue(struct nvme_dev *dev)
 {
        int result;
        u32 aqa;
-       u64 cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
        struct nvme_queue *nvmeq;
 
        result = nvme_remap_bar(dev, db_bar_size(dev, 0));
@@ -1397,13 +1431,13 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
                return result;
 
        dev->subsystem = readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 1, 0) ?
-                                               NVME_CAP_NSSRC(cap) : 0;
+                               NVME_CAP_NSSRC(dev->ctrl.cap) : 0;
 
        if (dev->subsystem &&
            (readl(dev->bar + NVME_REG_CSTS) & NVME_CSTS_NSSRO))
                writel(NVME_CSTS_NSSRO, dev->bar + NVME_REG_CSTS);
 
-       result = nvme_disable_ctrl(&dev->ctrl, cap);
+       result = nvme_disable_ctrl(&dev->ctrl, dev->ctrl.cap);
        if (result < 0)
                return result;
 
@@ -1422,7 +1456,7 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
        lo_hi_writeq(nvmeq->sq_dma_addr, dev->bar + NVME_REG_ASQ);
        lo_hi_writeq(nvmeq->cq_dma_addr, dev->bar + NVME_REG_ACQ);
 
-       result = nvme_enable_ctrl(&dev->ctrl, cap);
+       result = nvme_enable_ctrl(&dev->ctrl, dev->ctrl.cap);
        if (result)
                return result;
 
@@ -1441,7 +1475,7 @@ static int nvme_create_io_queues(struct nvme_dev *dev)
        unsigned i, max;
        int ret = 0;
 
-       for (i = dev->queue_count; i <= dev->max_qid; i++) {
+       for (i = dev->ctrl.queue_count; i <= dev->max_qid; i++) {
                /* vector == qid - 1, match nvme_create_queue */
                if (!nvme_alloc_queue(dev, i, dev->q_depth,
                     pci_irq_get_node(to_pci_dev(dev->dev), i - 1))) {
@@ -1450,7 +1484,7 @@ static int nvme_create_io_queues(struct nvme_dev *dev)
                }
        }
 
-       max = min(dev->max_qid, dev->queue_count - 1);
+       max = min(dev->max_qid, dev->ctrl.queue_count - 1);
        for (i = dev->online_queues; i <= max; i++) {
                ret = nvme_create_queue(dev->queues[i], i);
                if (ret)
@@ -1585,9 +1619,10 @@ static void nvme_free_host_mem(struct nvme_dev *dev)
 static int nvme_alloc_host_mem(struct nvme_dev *dev, u64 min, u64 preferred)
 {
        struct nvme_host_mem_buf_desc *descs;
-       u32 chunk_size, max_entries, i = 0;
+       u32 chunk_size, max_entries;
+       int i = 0;
        void **bufs;
-       u64 size, tmp;
+       u64 size = 0, tmp;
 
        /* start big and work our way down */
        chunk_size = min(preferred, (u64)PAGE_SIZE << MAX_ORDER);
@@ -1866,7 +1901,6 @@ static int nvme_dev_add(struct nvme_dev *dev)
 
 static int nvme_pci_enable(struct nvme_dev *dev)
 {
-       u64 cap;
        int result = -ENOMEM;
        struct pci_dev *pdev = to_pci_dev(dev->dev);
 
@@ -1893,10 +1927,11 @@ static int nvme_pci_enable(struct nvme_dev *dev)
        if (result < 0)
                return result;
 
-       cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
+       dev->ctrl.cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
 
-       dev->q_depth = min_t(int, NVME_CAP_MQES(cap) + 1, NVME_Q_DEPTH);
-       dev->db_stride = 1 << NVME_CAP_STRIDE(cap);
+       dev->q_depth = min_t(int, NVME_CAP_MQES(dev->ctrl.cap) + 1,
+                               io_queue_depth);
+       dev->db_stride = 1 << NVME_CAP_STRIDE(dev->ctrl.cap);
        dev->dbs = dev->bar + 4096;
 
        /*
@@ -1908,6 +1943,12 @@ static int nvme_pci_enable(struct nvme_dev *dev)
                dev_warn(dev->ctrl.device, "detected Apple NVMe controller, "
                        "set queue depth=%u to work around controller resets\n",
                        dev->q_depth);
+       } else if (pdev->vendor == PCI_VENDOR_ID_SAMSUNG &&
+                  (pdev->device == 0xa821 || pdev->device == 0xa822) &&
+                  NVME_CAP_MQES(dev->ctrl.cap) == 0) {
+               dev->q_depth = 64;
+               dev_err(dev->ctrl.device, "detected PM1725 NVMe controller, "
+                        "set queue depth=%u\n", dev->q_depth);
        }
 
        /*
@@ -1996,7 +2037,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
        nvme_stop_queues(&dev->ctrl);
 
        queues = dev->online_queues - 1;
-       for (i = dev->queue_count - 1; i > 0; i--)
+       for (i = dev->ctrl.queue_count - 1; i > 0; i--)
                nvme_suspend_queue(dev->queues[i]);
 
        if (dead) {
@@ -2004,7 +2045,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
                 * probe, before the admin queue is configured. Thus,
                 * queue_count can be 0 here.
                 */
-               if (dev->queue_count)
+               if (dev->ctrl.queue_count)
                        nvme_suspend_queue(dev->queues[0]);
        } else {
                nvme_disable_io_queues(dev, queues);
@@ -2094,7 +2135,7 @@ static void nvme_reset_work(struct work_struct *work)
        if (result)
                goto out;
 
-       result = nvme_configure_admin_queue(dev);
+       result = nvme_pci_configure_admin_queue(dev);
        if (result)
                goto out;
 
@@ -2132,15 +2173,6 @@ static void nvme_reset_work(struct work_struct *work)
        if (result)
                goto out;
 
-       /*
-        * A controller that can not execute IO typically requires user
-        * intervention to correct. For such degraded controllers, the driver
-        * should not submit commands the user did not request, so skip
-        * registering for asynchronous event notification on this condition.
-        */
-       if (dev->online_queues > 1)
-               nvme_queue_async_events(&dev->ctrl);
-
        /*
         * Keep the controller around but remove all namespaces if we don't have
         * any working I/O queue.
@@ -2161,8 +2193,7 @@ static void nvme_reset_work(struct work_struct *work)
                goto out;
        }
 
-       if (dev->online_queues > 1)
-               nvme_queue_scan(&dev->ctrl);
+       nvme_start_ctrl(&dev->ctrl);
        return;
 
  out:
@@ -2268,7 +2299,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
        result = nvme_dev_map(dev);
        if (result)
-               goto free;
+               goto put_pci;
 
        INIT_WORK(&dev->ctrl.reset_work, nvme_reset_work);
        INIT_WORK(&dev->remove_work, nvme_remove_dead_ctrl_work);
@@ -2277,7 +2308,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
        result = nvme_setup_prp_pools(dev);
        if (result)
-               goto put_pci;
+               goto unmap;
 
        quirks |= check_dell_samsung_bug(pdev);
 
@@ -2294,9 +2325,10 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
  release_pools:
        nvme_release_prp_pools(dev);
+ unmap:
+       nvme_dev_unmap(dev);
  put_pci:
        put_device(dev->dev);
-       nvme_dev_unmap(dev);
  free:
        kfree(dev->queues);
        kfree(dev);
@@ -2341,11 +2373,13 @@ static void nvme_remove(struct pci_dev *pdev)
        }
 
        flush_work(&dev->ctrl.reset_work);
-       nvme_uninit_ctrl(&dev->ctrl);
+       nvme_stop_ctrl(&dev->ctrl);
+       nvme_remove_namespaces(&dev->ctrl);
        nvme_dev_disable(dev, true);
        nvme_free_host_mem(dev);
        nvme_dev_remove_admin(dev);
        nvme_free_queues(dev, 0);
+       nvme_uninit_ctrl(&dev->ctrl);
        nvme_release_prp_pools(dev);
        nvme_dev_unmap(dev);
        nvme_put_ctrl(&dev->ctrl);
@@ -2450,6 +2484,9 @@ static const struct pci_device_id nvme_id_table[] = {
        { PCI_VDEVICE(INTEL, 0x0a54),
                .driver_data = NVME_QUIRK_STRIPE_SIZE |
                                NVME_QUIRK_DEALLOCATE_ZEROES, },
+       { PCI_VDEVICE(INTEL, 0x0a55),
+               .driver_data = NVME_QUIRK_STRIPE_SIZE |
+                               NVME_QUIRK_DEALLOCATE_ZEROES, },
        { PCI_VDEVICE(INTEL, 0xf1a5),   /* Intel 600P/P3100 */
                .driver_data = NVME_QUIRK_NO_DEEPEST_PS },
        { PCI_VDEVICE(INTEL, 0x5845),   /* Qemu emulated controller */
@@ -2458,6 +2495,10 @@ static const struct pci_device_id nvme_id_table[] = {
                .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
        { PCI_DEVICE(0x1c5f, 0x0540),   /* Memblaze Pblaze4 adapter */
                .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
+       { PCI_DEVICE(0x144d, 0xa821),   /* Samsung PM1725 */
+               .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
+       { PCI_DEVICE(0x144d, 0xa822),   /* Samsung PM1725a */
+               .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
        { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
        { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) },
        { PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) },