diff --git a/block/blk-mq.c b/block/blk-mq.c index ae8494d88897..c2697db59109 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2965,8 +2965,7 @@ static bool blk_mq_attempt_bio_merge(struct request_queue *q, static struct request *blk_mq_get_new_requests(struct request_queue *q, struct blk_plug *plug, - struct bio *bio, - unsigned int nsegs) + struct bio *bio) { struct blk_mq_alloc_data data = { .q = q, @@ -3125,7 +3124,7 @@ new_request: if (rq) { blk_mq_use_cached_rq(rq, plug, bio); } else { - rq = blk_mq_get_new_requests(q, plug, bio, nr_segs); + rq = blk_mq_get_new_requests(q, plug, bio); if (unlikely(!rq)) { if (bio->bi_opf & REQ_NOWAIT) bio_wouldblock_error(bio); @@ -4465,14 +4464,12 @@ static struct blk_mq_hw_ctx *blk_mq_alloc_and_init_hctx( return NULL; } -static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, - struct request_queue *q) +static void __blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, + struct request_queue *q) { struct blk_mq_hw_ctx *hctx; unsigned long i, j; - /* protect against switching io scheduler */ - mutex_lock(&q->elevator_lock); for (i = 0; i < set->nr_hw_queues; i++) { int old_node; int node = blk_mq_get_hctx_node(set, i); @@ -4505,7 +4502,19 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, xa_for_each_start(&q->hctx_table, j, hctx, j) blk_mq_exit_hctx(q, set, hctx, j); - mutex_unlock(&q->elevator_lock); +} + +static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set, + struct request_queue *q, bool lock) +{ + if (lock) { + /* protect against switching io scheduler */ + mutex_lock(&q->elevator_lock); + __blk_mq_realloc_hw_ctxs(set, q); + mutex_unlock(&q->elevator_lock); + } else { + __blk_mq_realloc_hw_ctxs(set, q); + } /* unregister cpuhp callbacks for exited hctxs */ blk_mq_remove_hw_queues_cpuhp(q); @@ -4537,7 +4546,7 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, xa_init(&q->hctx_table); - blk_mq_realloc_hw_ctxs(set, q); + blk_mq_realloc_hw_ctxs(set, q, false); if (!q->nr_hw_queues) goto err_hctxs; @@ -5033,7 +5042,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, fallback: blk_mq_update_queue_map(set); list_for_each_entry(q, &set->tag_list, tag_set_list) { - blk_mq_realloc_hw_ctxs(set, q); + blk_mq_realloc_hw_ctxs(set, q, true); if (q->nr_hw_queues != set->nr_hw_queues) { int i = prev_nr_hw_queues; diff --git a/drivers/nvme/host/Kconfig b/drivers/nvme/host/Kconfig index 10e453b2436e..d47dfa80fb95 100644 --- a/drivers/nvme/host/Kconfig +++ b/drivers/nvme/host/Kconfig @@ -18,10 +18,15 @@ config NVME_MULTIPATH bool "NVMe multipath support" depends on NVME_CORE help - This option enables support for multipath access to NVMe - subsystems. If this option is enabled only a single - /dev/nvmeXnY device will show up for each NVMe namespace, - even if it is accessible through multiple controllers. + This option controls support for multipath access to NVMe + subsystems. If this option is enabled support for NVMe multipath + access is included in the kernel. If this option is disabled support + for NVMe multipath access is excluded from the kernel. When this + option is disabled each controller/namespace receives its + own /dev/nvmeXnY device entry and NVMe multipath access is + not supported. + + If unsure, say Y. config NVME_VERBOSE_ERRORS bool "NVMe verbose error reporting" diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index c2d89fac86c5..cc23035148b4 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3822,7 +3822,7 @@ static int nvme_init_ns_head(struct nvme_ns *ns, struct nvme_ns_info *info) "Found shared namespace %d, but multipathing not supported.\n", info->nsid); dev_warn_once(ctrl->device, - "Support for shared namespaces without CONFIG_NVME_MULTIPATH is deprecated and will be removed in Linux 6.0.\n"); + "Shared namespace support requires core_nvme.multipath=Y.\n"); } } diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c index ecf136489044..ca86d3bf7ea4 100644 --- a/drivers/nvme/host/ioctl.c +++ b/drivers/nvme/host/ioctl.c @@ -114,8 +114,7 @@ static struct request *nvme_alloc_user_request(struct request_queue *q, static int nvme_map_user_request(struct request *req, u64 ubuffer, unsigned bufflen, void __user *meta_buffer, unsigned meta_len, - struct io_uring_cmd *ioucmd, unsigned int flags, - unsigned int iou_issue_flags) + struct iov_iter *iter, unsigned int flags) { struct request_queue *q = req->q; struct nvme_ns *ns = q->queuedata; @@ -129,37 +128,23 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer, if (!nvme_ctrl_sgl_supported(ctrl)) dev_warn_once(ctrl->device, "using unchecked data buffer\n"); if (has_metadata) { - if (!supports_metadata) { - ret = -EINVAL; - goto out; - } + if (!supports_metadata) + return -EINVAL; + if (!nvme_ctrl_meta_sgl_supported(ctrl)) dev_warn_once(ctrl->device, "using unchecked metadata buffer\n"); } - if (ioucmd && (ioucmd->flags & IORING_URING_CMD_FIXED)) { - struct iov_iter iter; - - /* fixedbufs is only for non-vectored io */ - if (WARN_ON_ONCE(flags & NVME_IOCTL_VEC)) { - ret = -EINVAL; - goto out; - } - ret = io_uring_cmd_import_fixed(ubuffer, bufflen, - rq_data_dir(req), &iter, ioucmd, - iou_issue_flags); - if (ret < 0) - goto out; - ret = blk_rq_map_user_iov(q, req, NULL, &iter, GFP_KERNEL); - } else { + if (iter) + ret = blk_rq_map_user_iov(q, req, NULL, iter, GFP_KERNEL); + else ret = blk_rq_map_user_io(req, NULL, nvme_to_user_ptr(ubuffer), bufflen, GFP_KERNEL, flags & NVME_IOCTL_VEC, 0, 0, rq_data_dir(req)); - } if (ret) - goto out; + return ret; bio = req->bio; if (bdev) @@ -176,8 +161,6 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer, out_unmap: if (bio) blk_rq_unmap_user(bio); -out: - blk_mq_free_request(req); return ret; } @@ -200,9 +183,9 @@ static int nvme_submit_user_cmd(struct request_queue *q, req->timeout = timeout; if (ubuffer && bufflen) { ret = nvme_map_user_request(req, ubuffer, bufflen, meta_buffer, - meta_len, NULL, flags, 0); + meta_len, NULL, flags); if (ret) - return ret; + goto out_free_req; } bio = req->bio; @@ -218,7 +201,10 @@ static int nvme_submit_user_cmd(struct request_queue *q, if (effects) nvme_passthru_end(ctrl, ns, effects, cmd, ret); + return ret; +out_free_req: + blk_mq_free_request(req); return ret; } @@ -469,6 +455,8 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, struct request_queue *q = ns ? ns->queue : ctrl->admin_q; struct nvme_uring_data d; struct nvme_command c; + struct iov_iter iter; + struct iov_iter *map_iter = NULL; struct request *req; blk_opf_t rq_flags = REQ_ALLOC_CACHE; blk_mq_req_flags_t blk_flags = 0; @@ -504,6 +492,20 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, d.metadata_len = READ_ONCE(cmd->metadata_len); d.timeout_ms = READ_ONCE(cmd->timeout_ms); + if (d.data_len && (ioucmd->flags & IORING_URING_CMD_FIXED)) { + /* fixedbufs is only for non-vectored io */ + if (vec) + return -EINVAL; + + ret = io_uring_cmd_import_fixed(d.addr, d.data_len, + nvme_is_write(&c) ? WRITE : READ, &iter, ioucmd, + issue_flags); + if (ret < 0) + return ret; + + map_iter = &iter; + } + if (issue_flags & IO_URING_F_NONBLOCK) { rq_flags |= REQ_NOWAIT; blk_flags = BLK_MQ_REQ_NOWAIT; @@ -517,11 +519,11 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, req->timeout = d.timeout_ms ? msecs_to_jiffies(d.timeout_ms) : 0; if (d.data_len) { - ret = nvme_map_user_request(req, d.addr, - d.data_len, nvme_to_user_ptr(d.metadata), - d.metadata_len, ioucmd, vec, issue_flags); + ret = nvme_map_user_request(req, d.addr, d.data_len, + nvme_to_user_ptr(d.metadata), d.metadata_len, + map_iter, vec); if (ret) - return ret; + goto out_free_req; } /* to free bio on completion, as req->bio will be null at that time */ @@ -531,6 +533,10 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns, req->end_io = nvme_uring_cmd_end_io; blk_execute_rq_nowait(req, false); return -EIOCBQUEUED; + +out_free_req: + blk_mq_free_request(req); + return ret; } static bool is_ctrl_ioctl(unsigned int cmd) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 2883d17ee1eb..b178d52eac1b 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -986,6 +986,9 @@ static void nvme_submit_cmds(struct nvme_queue *nvmeq, struct rq_list *rqlist) { struct request *req; + if (rq_list_empty(rqlist)) + return; + spin_lock(&nvmeq->sq_lock); while ((req = rq_list_pop(rqlist))) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); diff --git a/drivers/nvme/target/pci-epf.c b/drivers/nvme/target/pci-epf.c index b54b3fdbe389..51c27b32248d 100644 --- a/drivers/nvme/target/pci-epf.c +++ b/drivers/nvme/target/pci-epf.c @@ -1264,6 +1264,7 @@ static u16 nvmet_pci_epf_create_cq(struct nvmet_ctrl *tctrl, struct nvmet_pci_epf_ctrl *ctrl = tctrl->drvdata; struct nvmet_pci_epf_queue *cq = &ctrl->cq[cqid]; u16 status; + int ret; if (test_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags)) return NVME_SC_QID_INVALID | NVME_STATUS_DNR; @@ -1298,6 +1299,24 @@ static u16 nvmet_pci_epf_create_cq(struct nvmet_ctrl *tctrl, if (status != NVME_SC_SUCCESS) goto err; + /* + * Map the CQ PCI address space and since PCI endpoint controllers may + * return a partial mapping, check that the mapping is large enough. + */ + ret = nvmet_pci_epf_mem_map(ctrl->nvme_epf, cq->pci_addr, cq->pci_size, + &cq->pci_map); + if (ret) { + dev_err(ctrl->dev, "Failed to map CQ %u (err=%d)\n", + cq->qid, ret); + goto err_internal; + } + + if (cq->pci_map.pci_size < cq->pci_size) { + dev_err(ctrl->dev, "Invalid partial mapping of queue %u\n", + cq->qid); + goto err_unmap_queue; + } + set_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags); dev_dbg(ctrl->dev, "CQ[%u]: %u entries of %zu B, IRQ vector %u\n", @@ -1305,6 +1324,10 @@ static u16 nvmet_pci_epf_create_cq(struct nvmet_ctrl *tctrl, return NVME_SC_SUCCESS; +err_unmap_queue: + nvmet_pci_epf_mem_unmap(ctrl->nvme_epf, &cq->pci_map); +err_internal: + status = NVME_SC_INTERNAL | NVME_STATUS_DNR; err: if (test_and_clear_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags)) nvmet_pci_epf_remove_irq_vector(ctrl, cq->vector); @@ -1322,6 +1345,7 @@ static u16 nvmet_pci_epf_delete_cq(struct nvmet_ctrl *tctrl, u16 cqid) cancel_delayed_work_sync(&cq->work); nvmet_pci_epf_drain_queue(cq); nvmet_pci_epf_remove_irq_vector(ctrl, cq->vector); + nvmet_pci_epf_mem_unmap(ctrl->nvme_epf, &cq->pci_map); return NVME_SC_SUCCESS; } @@ -1553,36 +1577,6 @@ static void nvmet_pci_epf_free_queues(struct nvmet_pci_epf_ctrl *ctrl) ctrl->cq = NULL; } -static int nvmet_pci_epf_map_queue(struct nvmet_pci_epf_ctrl *ctrl, - struct nvmet_pci_epf_queue *queue) -{ - struct nvmet_pci_epf *nvme_epf = ctrl->nvme_epf; - int ret; - - ret = nvmet_pci_epf_mem_map(nvme_epf, queue->pci_addr, - queue->pci_size, &queue->pci_map); - if (ret) { - dev_err(ctrl->dev, "Failed to map queue %u (err=%d)\n", - queue->qid, ret); - return ret; - } - - if (queue->pci_map.pci_size < queue->pci_size) { - dev_err(ctrl->dev, "Invalid partial mapping of queue %u\n", - queue->qid); - nvmet_pci_epf_mem_unmap(nvme_epf, &queue->pci_map); - return -ENOMEM; - } - - return 0; -} - -static inline void nvmet_pci_epf_unmap_queue(struct nvmet_pci_epf_ctrl *ctrl, - struct nvmet_pci_epf_queue *queue) -{ - nvmet_pci_epf_mem_unmap(ctrl->nvme_epf, &queue->pci_map); -} - static void nvmet_pci_epf_exec_iod_work(struct work_struct *work) { struct nvmet_pci_epf_iod *iod = @@ -1746,11 +1740,7 @@ static void nvmet_pci_epf_cq_work(struct work_struct *work) struct nvme_completion *cqe; struct nvmet_pci_epf_iod *iod; unsigned long flags; - int ret, n = 0; - - ret = nvmet_pci_epf_map_queue(ctrl, cq); - if (ret) - goto again; + int ret = 0, n = 0; while (test_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags) && ctrl->link_up) { @@ -1797,8 +1787,6 @@ static void nvmet_pci_epf_cq_work(struct work_struct *work) n++; } - nvmet_pci_epf_unmap_queue(ctrl, cq); - /* * We do not support precise IRQ coalescing time (100ns units as per * NVMe specifications). So if we have posted completion entries without @@ -1807,7 +1795,6 @@ static void nvmet_pci_epf_cq_work(struct work_struct *work) if (n) nvmet_pci_epf_raise_irq(ctrl, cq, true); -again: if (ret < 0) queue_delayed_work(system_highpri_wq, &cq->work, NVMET_PCI_EPF_CQ_RETRY_INTERVAL);