Merge tag 'for-6.8/block-2024-01-18' of git://git.kernel.dk/linux
[sfrench/cifs-2.6.git] / drivers / nvme / host / core.c
index 50818dbcfa1ae11e5e0171b88cb7d46f17366436..85ab0fcf9e886451fb070b75dcd53be4a4f88f62 100644 (file)
@@ -132,7 +132,7 @@ void nvme_queue_scan(struct nvme_ctrl *ctrl)
        /*
         * Only new queue scan work when admin and IO queues are both alive
         */
-       if (ctrl->state == NVME_CTRL_LIVE && ctrl->tagset)
+       if (nvme_ctrl_state(ctrl) == NVME_CTRL_LIVE && ctrl->tagset)
                queue_work(nvme_wq, &ctrl->scan_work);
 }
 
@@ -144,7 +144,7 @@ void nvme_queue_scan(struct nvme_ctrl *ctrl)
  */
 int nvme_try_sched_reset(struct nvme_ctrl *ctrl)
 {
-       if (ctrl->state != NVME_CTRL_RESETTING)
+       if (nvme_ctrl_state(ctrl) != NVME_CTRL_RESETTING)
                return -EBUSY;
        if (!queue_work(nvme_reset_wq, &ctrl->reset_work))
                return -EBUSY;
@@ -157,7 +157,7 @@ static void nvme_failfast_work(struct work_struct *work)
        struct nvme_ctrl *ctrl = container_of(to_delayed_work(work),
                        struct nvme_ctrl, failfast_work);
 
-       if (ctrl->state != NVME_CTRL_CONNECTING)
+       if (nvme_ctrl_state(ctrl) != NVME_CTRL_CONNECTING)
                return;
 
        set_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
@@ -201,7 +201,7 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl)
        ret = nvme_reset_ctrl(ctrl);
        if (!ret) {
                flush_work(&ctrl->reset_work);
-               if (ctrl->state != NVME_CTRL_LIVE)
+               if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE)
                        ret = -ENETRESET;
        }
 
@@ -503,7 +503,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
 
        spin_lock_irqsave(&ctrl->lock, flags);
 
-       old_state = ctrl->state;
+       old_state = nvme_ctrl_state(ctrl);
        switch (new_state) {
        case NVME_CTRL_LIVE:
                switch (old_state) {
@@ -571,7 +571,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
        }
 
        if (changed) {
-               ctrl->state = new_state;
+               WRITE_ONCE(ctrl->state, new_state);
                wake_up_all(&ctrl->state_wq);
        }
 
@@ -579,11 +579,11 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
        if (!changed)
                return false;
 
-       if (ctrl->state == NVME_CTRL_LIVE) {
+       if (new_state == NVME_CTRL_LIVE) {
                if (old_state == NVME_CTRL_CONNECTING)
                        nvme_stop_failfast_work(ctrl);
                nvme_kick_requeue_lists(ctrl);
-       } else if (ctrl->state == NVME_CTRL_CONNECTING &&
+       } else if (new_state == NVME_CTRL_CONNECTING &&
                old_state == NVME_CTRL_RESETTING) {
                nvme_start_failfast_work(ctrl);
        }
@@ -596,7 +596,7 @@ EXPORT_SYMBOL_GPL(nvme_change_ctrl_state);
  */
 static bool nvme_state_terminal(struct nvme_ctrl *ctrl)
 {
-       switch (ctrl->state) {
+       switch (nvme_ctrl_state(ctrl)) {
        case NVME_CTRL_NEW:
        case NVME_CTRL_LIVE:
        case NVME_CTRL_RESETTING:
@@ -621,7 +621,7 @@ bool nvme_wait_reset(struct nvme_ctrl *ctrl)
        wait_event(ctrl->state_wq,
                   nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING) ||
                   nvme_state_terminal(ctrl));
-       return ctrl->state == NVME_CTRL_RESETTING;
+       return nvme_ctrl_state(ctrl) == NVME_CTRL_RESETTING;
 }
 EXPORT_SYMBOL_GPL(nvme_wait_reset);
 
@@ -708,9 +708,11 @@ EXPORT_SYMBOL_GPL(nvme_init_request);
 blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl,
                struct request *rq)
 {
-       if (ctrl->state != NVME_CTRL_DELETING_NOIO &&
-           ctrl->state != NVME_CTRL_DELETING &&
-           ctrl->state != NVME_CTRL_DEAD &&
+       enum nvme_ctrl_state state = nvme_ctrl_state(ctrl);
+
+       if (state != NVME_CTRL_DELETING_NOIO &&
+           state != NVME_CTRL_DELETING &&
+           state != NVME_CTRL_DEAD &&
            !test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags) &&
            !blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
                return BLK_STS_RESOURCE;
@@ -740,7 +742,7 @@ bool __nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
                 * command, which is require to set the queue live in the
                 * appropinquate states.
                 */
-               switch (ctrl->state) {
+               switch (nvme_ctrl_state(ctrl)) {
                case NVME_CTRL_CONNECTING:
                        if (blk_rq_is_passthrough(rq) && nvme_is_fabrics(req->cmd) &&
                            (req->cmd->fabrics.fctype == nvme_fabrics_type_connect ||
@@ -1200,8 +1202,16 @@ static unsigned long nvme_keep_alive_work_period(struct nvme_ctrl *ctrl)
 
 static void nvme_queue_keep_alive_work(struct nvme_ctrl *ctrl)
 {
-       queue_delayed_work(nvme_wq, &ctrl->ka_work,
-                          nvme_keep_alive_work_period(ctrl));
+       unsigned long now = jiffies;
+       unsigned long delay = nvme_keep_alive_work_period(ctrl);
+       unsigned long ka_next_check_tm = ctrl->ka_last_check_time + delay;
+
+       if (time_after(now, ka_next_check_tm))
+               delay = 0;
+       else
+               delay = ka_next_check_tm - now;
+
+       queue_delayed_work(nvme_wq, &ctrl->ka_work, delay);
 }
 
 static enum rq_end_io_ret nvme_keep_alive_end_io(struct request *rq,
@@ -1487,7 +1497,8 @@ static int nvme_ns_info_from_identify(struct nvme_ctrl *ctrl,
        if (id->ncap == 0) {
                /* namespace not allocated or attached */
                info->is_removed = true;
-               return -ENODEV;
+               ret = -ENODEV;
+               goto error;
        }
 
        info->anagrpid = id->anagrpid;
@@ -1505,8 +1516,10 @@ static int nvme_ns_info_from_identify(struct nvme_ctrl *ctrl,
                    !memchr_inv(ids->nguid, 0, sizeof(ids->nguid)))
                        memcpy(ids->nguid, id->nguid, sizeof(ids->nguid));
        }
+
+error:
        kfree(id);
-       return 0;
+       return ret;
 }
 
 static int nvme_ns_info_from_id_cs_indep(struct nvme_ctrl *ctrl,
@@ -1907,9 +1920,10 @@ static void nvme_update_disk_info(struct nvme_ctrl *ctrl, struct gendisk *disk,
 
        /*
         * The block layer can't support LBA sizes larger than the page size
-        * yet, so catch this early and don't allow block I/O.
+        * or smaller than a sector size yet, so catch this early and don't
+        * allow block I/O.
         */
-       if (head->lba_shift > PAGE_SHIFT) {
+       if (head->lba_shift > PAGE_SHIFT || head->lba_shift < SECTOR_SHIFT) {
                capacity = 0;
                bs = (1 << 9);
        }
@@ -2046,6 +2060,13 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
        if (ret)
                return ret;
 
+       if (id->ncap == 0) {
+               /* namespace not allocated or attached */
+               info->is_removed = true;
+               ret = -ENODEV;
+               goto error;
+       }
+
        blk_mq_freeze_queue(ns->disk->queue);
        lbaf = nvme_lbaf_index(id->flbas);
        ns->head->lba_shift = id->lbaf[lbaf].ds;
@@ -2108,6 +2129,8 @@ out:
                set_bit(NVME_NS_READY, &ns->flags);
                ret = 0;
        }
+
+error:
        kfree(id);
        return ret;
 }
@@ -2547,7 +2570,7 @@ static void nvme_set_latency_tolerance(struct device *dev, s32 val)
 
        if (ctrl->ps_max_latency_us != latency) {
                ctrl->ps_max_latency_us = latency;
-               if (ctrl->state == NVME_CTRL_LIVE)
+               if (nvme_ctrl_state(ctrl) == NVME_CTRL_LIVE)
                        nvme_configure_apst(ctrl);
        }
 }
@@ -3246,7 +3269,7 @@ static int nvme_dev_open(struct inode *inode, struct file *file)
        struct nvme_ctrl *ctrl =
                container_of(inode->i_cdev, struct nvme_ctrl, cdev);
 
-       switch (ctrl->state) {
+       switch (nvme_ctrl_state(ctrl)) {
        case NVME_CTRL_LIVE:
                break;
        default:
@@ -3670,6 +3693,14 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, struct nvme_ns_info *info)
                goto out_unlink_ns;
 
        down_write(&ctrl->namespaces_rwsem);
+       /*
+        * Ensure that no namespaces are added to the ctrl list after the queues
+        * are frozen, thereby avoiding a deadlock between scan and reset.
+        */
+       if (test_bit(NVME_CTRL_FROZEN, &ctrl->flags)) {
+               up_write(&ctrl->namespaces_rwsem);
+               goto out_unlink_ns;
+       }
        nvme_ns_add_to_ctrl_list(ns);
        up_write(&ctrl->namespaces_rwsem);
        nvme_get_ctrl(ctrl);
@@ -3934,7 +3965,7 @@ static void nvme_scan_work(struct work_struct *work)
        int ret;
 
        /* No tagset on a live ctrl means IO queues could not created */
-       if (ctrl->state != NVME_CTRL_LIVE || !ctrl->tagset)
+       if (nvme_ctrl_state(ctrl) != NVME_CTRL_LIVE || !ctrl->tagset)
                return;
 
        /*
@@ -4004,7 +4035,7 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
         * removing the namespaces' disks; fail all the queues now to avoid
         * potentially having to clean up the failed sync later.
         */
-       if (ctrl->state == NVME_CTRL_DEAD)
+       if (nvme_ctrl_state(ctrl) == NVME_CTRL_DEAD)
                nvme_mark_namespaces_dead(ctrl);
 
        /* this is a no-op when called from the controller reset handler */
@@ -4086,7 +4117,7 @@ static void nvme_async_event_work(struct work_struct *work)
         * flushing ctrl async_event_work after changing the controller state
         * from LIVE and before freeing the admin queue.
        */
-       if (ctrl->state == NVME_CTRL_LIVE)
+       if (nvme_ctrl_state(ctrl) == NVME_CTRL_LIVE)
                ctrl->ops->submit_async_event(ctrl);
 }
 
@@ -4137,6 +4168,8 @@ static void nvme_fw_act_work(struct work_struct *work)
                                struct nvme_ctrl, fw_act_work);
        unsigned long fw_act_timeout;
 
+       nvme_auth_stop(ctrl);
+
        if (ctrl->mtfa)
                fw_act_timeout = jiffies +
                                msecs_to_jiffies(ctrl->mtfa * 100);
@@ -4192,7 +4225,6 @@ static bool nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result)
                 * firmware activation.
                 */
                if (nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING)) {
-                       nvme_auth_stop(ctrl);
                        requeue = false;
                        queue_work(nvme_wq, &ctrl->fw_act_work);
                }
@@ -4481,7 +4513,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
 {
        int ret;
 
-       ctrl->state = NVME_CTRL_NEW;
+       WRITE_ONCE(ctrl->state, NVME_CTRL_NEW);
        clear_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags);
        spin_lock_init(&ctrl->lock);
        mutex_init(&ctrl->scan_lock);
@@ -4502,6 +4534,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev,
        INIT_DELAYED_WORK(&ctrl->failfast_work, nvme_failfast_work);
        memset(&ctrl->ka_cmd, 0, sizeof(ctrl->ka_cmd));
        ctrl->ka_cmd.common.opcode = nvme_admin_keep_alive;
+       ctrl->ka_last_check_time = jiffies;
 
        BUILD_BUG_ON(NVME_DSM_MAX_RANGES * sizeof(struct nvme_dsm_range) >
                        PAGE_SIZE);
@@ -4590,6 +4623,7 @@ void nvme_unfreeze(struct nvme_ctrl *ctrl)
        list_for_each_entry(ns, &ctrl->namespaces, list)
                blk_mq_unfreeze_queue(ns->queue);
        up_read(&ctrl->namespaces_rwsem);
+       clear_bit(NVME_CTRL_FROZEN, &ctrl->flags);
 }
 EXPORT_SYMBOL_GPL(nvme_unfreeze);
 
@@ -4623,6 +4657,7 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl)
 {
        struct nvme_ns *ns;
 
+       set_bit(NVME_CTRL_FROZEN, &ctrl->flags);
        down_read(&ctrl->namespaces_rwsem);
        list_for_each_entry(ns, &ctrl->namespaces, list)
                blk_freeze_queue_start(ns->queue);