From e8fd41bb3cf149fb6df4be714f94fc04871ccbce Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Thu, 2 May 2019 14:31:33 +0300 Subject: [PATCH 01/11] nvme-pci: init shadow doorbell after each reset The spec states: "The settings are not retained across a Controller Level Reset" Therefore the driver must enable the shadow doorbell, after each reset. This was caught while testing the nvme driver over upcoming nvme-mdev device. Signed-off-by: Maxim Levitsky Reviewed-by: Keith Busch Reviewed-by: Minwoo Im Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 3e4fb891a95a..8ef3c67e05d6 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -2280,8 +2280,6 @@ static int nvme_dev_add(struct nvme_dev *dev) return ret; } dev->ctrl.tagset = &dev->tagset; - - nvme_dbbuf_set(dev); } else { blk_mq_update_nr_hw_queues(&dev->tagset, dev->online_queues - 1); @@ -2289,6 +2287,7 @@ static int nvme_dev_add(struct nvme_dev *dev) nvme_free_queues(dev, dev->online_queues); } + nvme_dbbuf_set(dev); return 0; } From f4524cc45626e16264aabb930d0635eff19c7f73 Mon Sep 17 00:00:00 2001 From: Maxim Levitsky Date: Thu, 2 May 2019 14:31:34 +0300 Subject: [PATCH 02/11] nvme-pci: add known admin effects to augument admin effects log page Add known admin effects even if hardware has known admin effects page, since hardware can't be ever trusted to report sane values. (on my Intel DC P3700, it reports no side effects for namespace format) Signed-off-by: Maxim Levitsky Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index cd16d98d1f1a..eebaeadaa800 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1257,10 +1257,9 @@ static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, return 0; } + effects |= nvme_known_admin_effects(opcode); if (ctrl->effects) effects = le32_to_cpu(ctrl->effects->acs[opcode]); - else - effects = nvme_known_admin_effects(opcode); /* * For simplicity, IO to all namespaces is quiesced even if the command From 3b7dffb971dc2998004cec1efc883191c736d6b3 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Tue, 7 May 2019 09:23:00 -0500 Subject: [PATCH 03/11] nvme-pci: mark expected switch fall-through MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In preparation to enabling -Wimplicit-fallthrough, mark switch cases where we are expecting to fall through. This patch fixes the following warning: drivers/nvme/host/pci.c: In function ‘nvme_timeout’: drivers/nvme/host/pci.c:1298:12: warning: this statement may fall through [-Wimplicit-fallthrough=] shutdown = true; ~~~~~~~~~^~~~~~ drivers/nvme/host/pci.c:1299:2: note: here case NVME_CTRL_CONNECTING: ^~~~ Warning level 3 was used: -Wimplicit-fallthrough=3 This patch is part of the ongoing efforts to enable -Wimplicit-fallthrough. Signed-off-by: Gustavo A. R. Silva Reviewed-by: Keith Busch Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 8ef3c67e05d6..2a8708c9ac18 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1296,6 +1296,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) switch (dev->ctrl.state) { case NVME_CTRL_DELETING: shutdown = true; + /* fall through */ case NVME_CTRL_CONNECTING: case NVME_CTRL_RESETTING: dev_warn_ratelimited(dev->ctrl.device, From 87fd125344d68adf7699ec7396aa9f905ce79a80 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Mon, 6 May 2019 13:47:55 +0300 Subject: [PATCH 04/11] nvme-rdma: remove redundant reference between ib_device and tagset In the past, before adding f41725bb ("nvme-rdma: Use mr pool") commit, we needed a reference on the ib_device as long as the tagset was alive, as the MRs in the request structures needed a valid ib_device. Now, we allocate/deallocate MR pool per QP and consume on demand. Also remove nvme_rdma_free_tagset function and use blk_mq_free_tag_set instead, as it unneeded anymore. This commit also fixes a memory leakage and possible segmentation fault. When configuring the system with NIC teaming (aka bonding), we use 1 network interface to create an HA connection to the target side. In case one connection breaks down, nvme-rdma driver will get notification from rdma-cm layer that underlying address was change and will start error recovery process. During this process, we'll reconnect to the target via the second interface in the bond without destroying the tagset. This will cause a leakage of the initial rdma device (ndev) and miscount in the reference count of the new created rdma device (new ndev). In the final destruction (or in another error flow), we'll get a warning dump from the ib_dealloc_pd that we still have inflight MR's related to that pd. This happens becasue of the miscount of the reference tag of the rdma device and causing access violation to it's elements (some queues are not destroyed yet). Signed-off-by: Max Gurtovoy Signed-off-by: Israel Rukshin Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 34 +++++----------------------------- 1 file changed, 5 insertions(+), 29 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index e1824c2e0a1c..f383146e7d0f 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -697,15 +697,6 @@ out_free_queues: return ret; } -static void nvme_rdma_free_tagset(struct nvme_ctrl *nctrl, - struct blk_mq_tag_set *set) -{ - struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); - - blk_mq_free_tag_set(set); - nvme_rdma_dev_put(ctrl->device); -} - static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl, bool admin) { @@ -744,24 +735,9 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl, ret = blk_mq_alloc_tag_set(set); if (ret) - goto out; - - /* - * We need a reference on the device as long as the tag_set is alive, - * as the MRs in the request structures need a valid ib_device. - */ - ret = nvme_rdma_dev_get(ctrl->device); - if (!ret) { - ret = -EINVAL; - goto out_free_tagset; - } + return ERR_PTR(ret); return set; - -out_free_tagset: - blk_mq_free_tag_set(set); -out: - return ERR_PTR(ret); } static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl, @@ -769,7 +745,7 @@ static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl, { if (remove) { blk_cleanup_queue(ctrl->ctrl.admin_q); - nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset); + blk_mq_free_tag_set(ctrl->ctrl.admin_tagset); } if (ctrl->async_event_sqe.data) { nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe, @@ -847,7 +823,7 @@ out_cleanup_queue: blk_cleanup_queue(ctrl->ctrl.admin_q); out_free_tagset: if (new) - nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset); + blk_mq_free_tag_set(ctrl->ctrl.admin_tagset); out_free_async_qe: nvme_rdma_free_qe(ctrl->device->dev, &ctrl->async_event_sqe, sizeof(struct nvme_command), DMA_TO_DEVICE); @@ -862,7 +838,7 @@ static void nvme_rdma_destroy_io_queues(struct nvme_rdma_ctrl *ctrl, { if (remove) { blk_cleanup_queue(ctrl->ctrl.connect_q); - nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.tagset); + blk_mq_free_tag_set(ctrl->ctrl.tagset); } nvme_rdma_free_io_queues(ctrl); } @@ -903,7 +879,7 @@ out_cleanup_connect_q: blk_cleanup_queue(ctrl->ctrl.connect_q); out_free_tag_set: if (new) - nvme_rdma_free_tagset(&ctrl->ctrl, ctrl->ctrl.tagset); + blk_mq_free_tag_set(ctrl->ctrl.tagset); out_free_io_queues: nvme_rdma_free_io_queues(ctrl); return ret; From 8730c1ddb69bdeeb10c1f613a4e15e95862b1981 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 3 May 2019 11:43:52 +0200 Subject: [PATCH 05/11] nvme-fc: use separate work queue to avoid warning When tearing down a controller the following warning is issued: WARNING: CPU: 0 PID: 30681 at ../kernel/workqueue.c:2418 check_flush_dependency This happens as the err_work workqueue item is scheduled on the system workqueue (which has WQ_MEM_RECLAIM not set), but is flushed from a workqueue which has WQ_MEM_RECLAIM set. Fix this by providing an FC-NVMe specific workqueue. Fixes: 4cff280a5fcc ("nvme-fc: resolve io failures during connect") Signed-off-by: Hannes Reinecke Reviewed-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 6d8451356eac..c17c887f2148 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -202,7 +202,7 @@ static LIST_HEAD(nvme_fc_lport_list); static DEFINE_IDA(nvme_fc_local_port_cnt); static DEFINE_IDA(nvme_fc_ctrl_cnt); - +static struct workqueue_struct *nvme_fc_wq; /* * These items are short-term. They will eventually be moved into @@ -2054,7 +2054,7 @@ nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg) */ if (ctrl->ctrl.state == NVME_CTRL_CONNECTING) { active = atomic_xchg(&ctrl->err_work_active, 1); - if (!active && !schedule_work(&ctrl->err_work)) { + if (!active && !queue_work(nvme_fc_wq, &ctrl->err_work)) { atomic_set(&ctrl->err_work_active, 0); WARN_ON(1); } @@ -3399,6 +3399,10 @@ static int __init nvme_fc_init_module(void) { int ret; + nvme_fc_wq = alloc_workqueue("nvme_fc_wq", WQ_MEM_RECLAIM, 0); + if (!nvme_fc_wq) + return -ENOMEM; + /* * NOTE: * It is expected that in the future the kernel will combine @@ -3416,7 +3420,7 @@ static int __init nvme_fc_init_module(void) ret = class_register(&fc_class); if (ret) { pr_err("couldn't register class fc\n"); - return ret; + goto out_destroy_wq; } /* @@ -3440,6 +3444,9 @@ out_destroy_device: device_destroy(&fc_class, MKDEV(0, 0)); out_destroy_class: class_unregister(&fc_class); +out_destroy_wq: + destroy_workqueue(nvme_fc_wq); + return ret; } @@ -3456,6 +3463,7 @@ static void __exit nvme_fc_exit_module(void) device_destroy(&fc_class, MKDEV(0, 0)); class_unregister(&fc_class); + destroy_workqueue(nvme_fc_wq); } module_init(nvme_fc_init_module); From 8a03b27ea61c2ab9de16a8a195822ef05e799748 Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 3 May 2019 15:37:35 +0200 Subject: [PATCH 06/11] nvme-multipath: avoid crash on invalid subsystem cntlid enumeration A process holding an open reference to a removed disk prevents it from completing deletion, so its name continues to exist. A subsequent gendisk creation may have the same cntlid which risks collision when using that for the name. Use the unique ctrl->instance instead. Signed-off-by: Hannes Reinecke Reviewed-by: Keith Busch Signed-off-by: Christoph Hellwig --- drivers/nvme/host/multipath.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 5c9429d41120..499acf07d61a 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -31,7 +31,7 @@ void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns, sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->head->instance); } else if (ns->head->disk) { sprintf(disk_name, "nvme%dc%dn%d", ctrl->subsys->instance, - ctrl->cntlid, ns->head->instance); + ctrl->instance, ns->head->instance); *flags = GENHD_FL_HIDDEN; } else { sprintf(disk_name, "nvme%dn%d", ctrl->subsys->instance, From 94e970b6740b11209848b05364dadbc12f3937f5 Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Sat, 11 May 2019 22:42:55 +0900 Subject: [PATCH 07/11] nvme-fabrics: remove unused argument The variable 'count' is not currently used by nvmf_create_ctrl(), so remove it. Signed-off-by: Minwoo Im Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fabrics.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 592d1e61ef7e..5838f7cd53ac 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -978,7 +978,7 @@ EXPORT_SYMBOL_GPL(nvmf_free_options); NVMF_OPT_DISABLE_SQFLOW) static struct nvme_ctrl * -nvmf_create_ctrl(struct device *dev, const char *buf, size_t count) +nvmf_create_ctrl(struct device *dev, const char *buf) { struct nvmf_ctrl_options *opts; struct nvmf_transport_ops *ops; @@ -1073,7 +1073,7 @@ static ssize_t nvmf_dev_write(struct file *file, const char __user *ubuf, goto out_unlock; } - ctrl = nvmf_create_ctrl(nvmf_device, buf, count); + ctrl = nvmf_create_ctrl(nvmf_device, buf); if (IS_ERR(ctrl)) { ret = PTR_ERR(ctrl); goto out_unlock; From 9581ae4f0facf122a732841959812dea9ed2f422 Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Sat, 11 May 2019 22:42:54 +0900 Subject: [PATCH 08/11] nvme: fix typos in nvme status code values Fix typos in enumeration names for nvme status: s/ACIVATE/ACTIVATE/ s/INSUFFICENT/INSUFFICIENT/ Signed-off-by: Minwoo Im Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- include/linux/nvme.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/linux/nvme.h b/include/linux/nvme.h index c40720cb59ac..8028adacaff3 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -1246,9 +1246,9 @@ enum { NVME_SC_FW_NEEDS_SUBSYS_RESET = 0x110, NVME_SC_FW_NEEDS_RESET = 0x111, NVME_SC_FW_NEEDS_MAX_TIME = 0x112, - NVME_SC_FW_ACIVATE_PROHIBITED = 0x113, + NVME_SC_FW_ACTIVATE_PROHIBITED = 0x113, NVME_SC_OVERLAPPING_RANGE = 0x114, - NVME_SC_NS_INSUFFICENT_CAP = 0x115, + NVME_SC_NS_INSUFFICIENT_CAP = 0x115, NVME_SC_NS_ID_UNAVAILABLE = 0x116, NVME_SC_NS_ALREADY_ATTACHED = 0x118, NVME_SC_NS_IS_PRIVATE = 0x119, From 521cfb8e5a5dd6f2f528d675688f07b962fe4545 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Mon, 13 May 2019 10:46:05 -0700 Subject: [PATCH 09/11] nvme: trace all async notice events This patch removes the tracing of the NVMe Async events out of the switch so that it can trace all the events including the ones which are not handled in the nvme_handle_aen_notice(). The events which are not handled in the nvme_handle_aen_notice() such as NVME_AER_NOTICE_DISC_CHANGED corresponding event identifier needs to be added in the drivers/nvme/host/trace.h so that it can stringify the AER . Signed-off-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 5 ++--- drivers/nvme/host/trace.h | 1 + 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index eebaeadaa800..f22925b5eeca 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3604,19 +3604,18 @@ static void nvme_handle_aen_notice(struct nvme_ctrl *ctrl, u32 result) { u32 aer_notice_type = (result & 0xff00) >> 8; + trace_nvme_async_event(ctrl, aer_notice_type); + switch (aer_notice_type) { case NVME_AER_NOTICE_NS_CHANGED: - trace_nvme_async_event(ctrl, aer_notice_type); set_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events); nvme_queue_scan(ctrl); break; case NVME_AER_NOTICE_FW_ACT_STARTING: - trace_nvme_async_event(ctrl, aer_notice_type); queue_work(nvme_wq, &ctrl->fw_act_work); break; #ifdef CONFIG_NVME_MULTIPATH case NVME_AER_NOTICE_ANA: - trace_nvme_async_event(ctrl, aer_notice_type); if (!ctrl->ana_log_buf) break; queue_work(nvme_wq, &ctrl->ana_work); diff --git a/drivers/nvme/host/trace.h b/drivers/nvme/host/trace.h index 97d3c77365b8..e71502d141ed 100644 --- a/drivers/nvme/host/trace.h +++ b/drivers/nvme/host/trace.h @@ -167,6 +167,7 @@ TRACE_EVENT(nvme_async_event, aer_name(NVME_AER_NOTICE_NS_CHANGED), aer_name(NVME_AER_NOTICE_ANA), aer_name(NVME_AER_NOTICE_FW_ACT_STARTING), + aer_name(NVME_AER_NOTICE_DISC_CHANGED), aer_name(NVME_AER_ERROR), aer_name(NVME_AER_SMART), aer_name(NVME_AER_CSS), From 32fd90c407680935f84fb3ffc60fb3e020257d38 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 8 May 2019 09:48:27 +0200 Subject: [PATCH 10/11] nvme: change locking for the per-subsystem controller list Life becomes a lot simpler if we just use the global nvme_subsystems_lock to protect this list. Given that it is only accessed during controller probing and removal that isn't a scalability problem either. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Hannes Reinecke --- drivers/nvme/host/core.c | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index f22925b5eeca..d2d8e6088c46 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2346,13 +2346,13 @@ static int nvme_active_ctrls(struct nvme_subsystem *subsys) int count = 0; struct nvme_ctrl *ctrl; - mutex_lock(&subsys->lock); + lockdep_assert_held(&nvme_subsystems_lock); + list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { if (ctrl->state != NVME_CTRL_DELETING && ctrl->state != NVME_CTRL_DEAD) count++; } - mutex_unlock(&subsys->lock); return count; } @@ -2394,6 +2394,9 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) mutex_lock(&nvme_subsystems_lock); found = __nvme_find_get_subsystem(subsys->subnqn); if (found) { + __nvme_release_subsystem(subsys); + subsys = found; + /* * Verify that the subsystem actually supports multiple * controllers, else bail out. @@ -2402,14 +2405,10 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) nvme_active_ctrls(found) && !(id->cmic & (1 << 1))) { dev_err(ctrl->device, "ignoring ctrl due to duplicate subnqn (%s).\n", - found->subnqn); - nvme_put_subsystem(found); + subsys->subnqn); ret = -EINVAL; - goto out_unlock; + goto out_put_subsystem; } - - __nvme_release_subsystem(subsys); - subsys = found; } else { ret = device_add(&subsys->dev); if (ret) { @@ -2421,23 +2420,20 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) list_add_tail(&subsys->entry, &nvme_subsystems); } - ctrl->subsys = subsys; - mutex_unlock(&nvme_subsystems_lock); - if (sysfs_create_link(&subsys->dev.kobj, &ctrl->device->kobj, dev_name(ctrl->device))) { dev_err(ctrl->device, "failed to create sysfs link from subsystem.\n"); - /* the transport driver will eventually put the subsystem */ - return -EINVAL; + goto out_put_subsystem; } - mutex_lock(&subsys->lock); + ctrl->subsys = subsys; list_add_tail(&ctrl->subsys_entry, &subsys->ctrls); - mutex_unlock(&subsys->lock); - + mutex_unlock(&nvme_subsystems_lock); return 0; +out_put_subsystem: + nvme_put_subsystem(subsys); out_unlock: mutex_unlock(&nvme_subsystems_lock); put_device(&subsys->dev); @@ -3694,10 +3690,10 @@ static void nvme_free_ctrl(struct device *dev) __free_page(ctrl->discard_page); if (subsys) { - mutex_lock(&subsys->lock); + mutex_lock(&nvme_subsystems_lock); list_del(&ctrl->subsys_entry); - mutex_unlock(&subsys->lock); sysfs_remove_link(&subsys->dev.kobj, dev_name(ctrl->device)); + mutex_unlock(&nvme_subsystems_lock); } ctrl->ops->free_ctrl(ctrl); From 1b1031ca63b2ce1d3b664b35b77ec94e458693e9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 9 May 2019 09:01:26 +0200 Subject: [PATCH 11/11] nvme: validate cntlid during controller initialisation The CNTLID value is required to be unique, and we do rely on this for correct operation. So reject any controller for which a non-unique CNTLID has been detected. Based on a patch from Hannes Reinecke. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Hannes Reinecke --- drivers/nvme/host/core.c | 41 +++++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index d2d8e6088c46..d7de0642c832 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -2341,20 +2341,35 @@ static const struct attribute_group *nvme_subsys_attrs_groups[] = { NULL, }; -static int nvme_active_ctrls(struct nvme_subsystem *subsys) +static bool nvme_validate_cntlid(struct nvme_subsystem *subsys, + struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) { - int count = 0; - struct nvme_ctrl *ctrl; + struct nvme_ctrl *tmp; lockdep_assert_held(&nvme_subsystems_lock); - list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) { - if (ctrl->state != NVME_CTRL_DELETING && - ctrl->state != NVME_CTRL_DEAD) - count++; + list_for_each_entry(tmp, &subsys->ctrls, subsys_entry) { + if (ctrl->state == NVME_CTRL_DELETING || + ctrl->state == NVME_CTRL_DEAD) + continue; + + if (tmp->cntlid == ctrl->cntlid) { + dev_err(ctrl->device, + "Duplicate cntlid %u with %s, rejecting\n", + ctrl->cntlid, dev_name(tmp->device)); + return false; + } + + if ((id->cmic & (1 << 1)) || + (ctrl->opts && ctrl->opts->discovery_nqn)) + continue; + + dev_err(ctrl->device, + "Subsystem does not support multiple controllers\n"); + return false; } - return count; + return true; } static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) @@ -2397,15 +2412,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) __nvme_release_subsystem(subsys); subsys = found; - /* - * Verify that the subsystem actually supports multiple - * controllers, else bail out. - */ - if (!(ctrl->opts && ctrl->opts->discovery_nqn) && - nvme_active_ctrls(found) && !(id->cmic & (1 << 1))) { - dev_err(ctrl->device, - "ignoring ctrl due to duplicate subnqn (%s).\n", - subsys->subnqn); + if (!nvme_validate_cntlid(subsys, ctrl, id)) { ret = -EINVAL; goto out_put_subsystem; }