From 629ab28bf2d1dbb58c63e8e5557235c2a6e33017 Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Wed, 29 Nov 2023 15:22:53 +0100 Subject: [PATCH] nvme-fc: fix races and scheduling while atomic bugs Signed-off-by: Maurizio Lombardi --- drivers/nvme/host/fc.c | 60 +++++++++++++++++++++++++++++++++--------- 1 file changed, 48 insertions(+), 12 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 9f9a3b35dc64..9cd22ab03e10 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -148,7 +148,8 @@ struct nvme_fc_rport { /* fc_ctrl flags values - specified as bit positions */ #define ASSOC_ACTIVE 0 #define ASSOC_FAILED 1 -#define FCCTRL_TERMIO 2 +#define ASSOC_CREATING 2 +#define FCCTRL_TERMIO 3 struct nvme_fc_ctrl { spinlock_t lock; @@ -2551,22 +2552,47 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) enum nvme_ctrl_state state; unsigned long flags; - spin_lock_irqsave(&ctrl->lock, flags); + spin_lock_irqsave(&ctrl->ctrl.lock, flags); state = ctrl->ctrl.state; if (state == NVME_CTRL_CONNECTING) { set_bit(ASSOC_FAILED, &ctrl->flags); - spin_unlock_irqrestore(&ctrl->lock, flags); + if (!test_bit(ASSOC_CREATING, &ctrl->flags)) { + /* create_association is completing but the + * controller has not been moved to LIVE state yet. + * The create_association error path will reset + * the controller. + */ + spin_unlock_irqrestore(&ctrl->ctrl.lock, flags); + return; + } + spin_unlock_irqrestore(&ctrl->ctrl.lock, flags); __nvme_fc_abort_outstanding_ios(ctrl, true); dev_warn(ctrl->ctrl.device, "NVME-FC{%d}: transport error during (re)connect\n", ctrl->cnum); return; } - spin_unlock_irqrestore(&ctrl->lock, flags); - /* Otherwise, only proceed if in LIVE state - e.g. on first error */ - if (state != NVME_CTRL_LIVE) + /* Otherwise, only proceed if in LIVE state - e.g. on first error + * or if the ASSOC_FAILED bit is not set, + */ + if (state != NVME_CTRL_LIVE || test_bit(ASSOC_FAILED, &ctrl->flags)) { + spin_unlock_irqrestore(&ctrl->ctrl.lock, flags); return; + } + + if (!test_bit(NVME_CTRL_STARTED_ONCE, &ctrl->flags)) { + /* ctrl is in LIVE state but not yet started. + * it means that create_association has not fully + * completed its job yet, set the ASSOC_FAILED bit + * and let create_association error path reset the controller. + */ + set_bit(ASSOC_FAILED, &ctrl->flags); + spin_unlock_irqrestore(&ctrl->ctrl.lock, flags); + return; + } + + spin_unlock_irqrestore(&ctrl->ctrl.lock, flags); dev_warn(ctrl->ctrl.device, "NVME-FC{%d}: transport association event: %s\n", @@ -3073,7 +3099,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; struct nvmefc_ls_rcv_op *disls = NULL; unsigned long flags; - int ret; + int ret, need_reset; bool changed; ++ctrl->ctrl.nr_reconnects; @@ -3090,6 +3116,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) ctrl->cnum, ctrl->lport->localport.port_name, ctrl->rport->remoteport.port_name, ctrl->ctrl.opts->subsysnqn); + set_bit(ASSOC_CREATING, &ctrl->flags); clear_bit(ASSOC_FAILED, &ctrl->flags); /* @@ -3181,20 +3208,28 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) ret = nvme_fc_recreate_io_queues(ctrl); } - spin_lock_irqsave(&ctrl->lock, flags); + spin_lock_irqsave(&ctrl->ctrl.lock, flags); + clear_bit(ASSOC_CREATING, &ctrl->flags); + if (!ret && test_bit(ASSOC_FAILED, &ctrl->flags)) ret = -EIO; if (ret) { - spin_unlock_irqrestore(&ctrl->lock, flags); + spin_unlock_irqrestore(&ctrl->ctrl.lock, flags); goto out_term_aen_ops; } - changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); - spin_unlock_irqrestore(&ctrl->lock, flags); + spin_unlock_irqrestore(&ctrl->ctrl.lock, flags); ctrl->ctrl.nr_reconnects = 0; - if (changed) + changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); + if (changed) { nvme_start_ctrl(&ctrl->ctrl); + spin_lock_irqsave(&ctrl->ctrl.lock, flags); + need_reset = test_bit(ASSOC_FAILED, &ctrl->flags); + spin_unlock_irqrestore(&ctrl->ctrl.lock, flags); + if (need_reset) + nvme_reset_ctrl(&ctrl->ctrl); + } return 0; /* Success */ @@ -3220,6 +3255,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) out_free_queue: nvme_fc_free_queue(&ctrl->queues[0]); clear_bit(ASSOC_ACTIVE, &ctrl->flags); + clear_bit(ASSOC_CREATING, &ctrl->flags); nvme_fc_ctlr_inactive_on_rport(ctrl); return ret; -- 2.39.3