block-6.15-20250403

-----BEGIN PGP SIGNATURE-----
 
 iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmfvB6MQHGF4Ym9lQGtl
 cm5lbC5kawAKCRD301j7KXHgpgzvEACNZtjY74icNtsW/tI52Ht84Y6ygSsf9qXb
 t0niYgVMp2KUlNeHVB7QYgXyXbGi0447GAza1oD1ucqzNNgDxkOBO7ESWdEVDi3k
 aajDy/pWmzwqoEIfXeyp86QQYuXwa1OE6NgfHYmdxAnxN0oBXQCD3Qy5nBXGLp+K
 GkQ2JVb1ftWsqfT7u7xuEY6eCtDTz9gN4JMen7FZhjPy2Pivs6ZqvH3NrQMFAGi2
 LmGdknkR7Xa0pys3oImQFATXcIM8qjpill3WMNZYlRiXaWFvWyvMvkU4Ec2I/qP8
 jZ/aiQ7yFKJWqHSUAGhKCWoEches2HA2naYUgc6BCDeaucOhEzp1hUiaIcJfNM+i
 JvWS8hbJj0N7gGGGdmydIAbyNQXvN5DM3CUBmhgYkRM/c1SW/S8QTz1GeE/b85/V
 KRGZNUCT31x1Zmk45CQxePqpRv5yqTcs8VGYAw8lPhsfWTd8tJnJO2FtwMbW55j3
 a/b8N5M8VWu9E7wWnUJGndh9RYVP8PumMZkR7dRD47JGoZKjC4t/nwNczoXE0Noy
 UehnNrtXvHQiuRbMAkhZF8GFjnFD7Q76eE33TQbye2cFx8Cyw7fyC6hx7EWOvVlF
 khZE48L435ICKRPwI4djnHWrf8i2KYC9Wa1184Pkir82X5gByb7vaPpSxuKa6zwe
 nq15w9+K2Q==
 =ZQTE
 -----END PGP SIGNATURE-----

Merge tag 'block-6.15-20250403' of git://git.kernel.dk/linux

Pull more block updates from Jens Axboe:

 - NVMe pull request via Keith:
      - PCI endpoint target cleanup (Damien)
      - Early import for uring_cmd fixed buffer (Caleb)
      - Multipath documentation and notification improvements (John)
      - Invalid pci sq doorbell write fix (Maurizio)

 - Queue init locking fix

 - Remove dead nsegs parameter from blk_mq_get_new_requests()

* tag 'block-6.15-20250403' of git://git.kernel.dk/linux:
  block: don't grab elevator lock during queue initialization
  nvme-pci: skip nvme_write_sq_db on empty rqlist
  nvme-multipath: change the NVME_MULTIPATH config option
  nvme: update the multipath warning in nvme_init_ns_head
  nvme/ioctl: move fixed buffer lookup to nvme_uring_cmd_io()
  nvme/ioctl: move blk_mq_free_request() out of nvme_map_user_request()
  nvme/ioctl: don't warn on vectorized uring_cmd with fixed buffer
  nvmet: pci-epf: Keep completion queues mapped
  block: remove unused nseg parameter
This commit is contained in:
Linus Torvalds 2025-04-03 16:04:38 -07:00
commit 949dd321de
6 changed files with 94 additions and 84 deletions

View File

@ -2965,8 +2965,7 @@ static bool blk_mq_attempt_bio_merge(struct request_queue *q,
static struct request *blk_mq_get_new_requests(struct request_queue *q,
struct blk_plug *plug,
struct bio *bio,
unsigned int nsegs)
struct bio *bio)
{
struct blk_mq_alloc_data data = {
.q = q,
@ -3125,7 +3124,7 @@ new_request:
if (rq) {
blk_mq_use_cached_rq(rq, plug, bio);
} else {
rq = blk_mq_get_new_requests(q, plug, bio, nr_segs);
rq = blk_mq_get_new_requests(q, plug, bio);
if (unlikely(!rq)) {
if (bio->bi_opf & REQ_NOWAIT)
bio_wouldblock_error(bio);
@ -4465,14 +4464,12 @@ static struct blk_mq_hw_ctx *blk_mq_alloc_and_init_hctx(
return NULL;
}
static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
struct request_queue *q)
static void __blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
struct request_queue *q)
{
struct blk_mq_hw_ctx *hctx;
unsigned long i, j;
/* protect against switching io scheduler */
mutex_lock(&q->elevator_lock);
for (i = 0; i < set->nr_hw_queues; i++) {
int old_node;
int node = blk_mq_get_hctx_node(set, i);
@ -4505,7 +4502,19 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
xa_for_each_start(&q->hctx_table, j, hctx, j)
blk_mq_exit_hctx(q, set, hctx, j);
mutex_unlock(&q->elevator_lock);
}
static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
struct request_queue *q, bool lock)
{
if (lock) {
/* protect against switching io scheduler */
mutex_lock(&q->elevator_lock);
__blk_mq_realloc_hw_ctxs(set, q);
mutex_unlock(&q->elevator_lock);
} else {
__blk_mq_realloc_hw_ctxs(set, q);
}
/* unregister cpuhp callbacks for exited hctxs */
blk_mq_remove_hw_queues_cpuhp(q);
@ -4537,7 +4546,7 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
xa_init(&q->hctx_table);
blk_mq_realloc_hw_ctxs(set, q);
blk_mq_realloc_hw_ctxs(set, q, false);
if (!q->nr_hw_queues)
goto err_hctxs;
@ -5033,7 +5042,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
fallback:
blk_mq_update_queue_map(set);
list_for_each_entry(q, &set->tag_list, tag_set_list) {
blk_mq_realloc_hw_ctxs(set, q);
blk_mq_realloc_hw_ctxs(set, q, true);
if (q->nr_hw_queues != set->nr_hw_queues) {
int i = prev_nr_hw_queues;

View File

@ -18,10 +18,15 @@ config NVME_MULTIPATH
bool "NVMe multipath support"
depends on NVME_CORE
help
This option enables support for multipath access to NVMe
subsystems. If this option is enabled only a single
/dev/nvmeXnY device will show up for each NVMe namespace,
even if it is accessible through multiple controllers.
This option controls support for multipath access to NVMe
subsystems. If this option is enabled support for NVMe multipath
access is included in the kernel. If this option is disabled support
for NVMe multipath access is excluded from the kernel. When this
option is disabled each controller/namespace receives its
own /dev/nvmeXnY device entry and NVMe multipath access is
not supported.
If unsure, say Y.
config NVME_VERBOSE_ERRORS
bool "NVMe verbose error reporting"

View File

@ -3822,7 +3822,7 @@ static int nvme_init_ns_head(struct nvme_ns *ns, struct nvme_ns_info *info)
"Found shared namespace %d, but multipathing not supported.\n",
info->nsid);
dev_warn_once(ctrl->device,
"Support for shared namespaces without CONFIG_NVME_MULTIPATH is deprecated and will be removed in Linux 6.0.\n");
"Shared namespace support requires core_nvme.multipath=Y.\n");
}
}

View File

@ -114,8 +114,7 @@ static struct request *nvme_alloc_user_request(struct request_queue *q,
static int nvme_map_user_request(struct request *req, u64 ubuffer,
unsigned bufflen, void __user *meta_buffer, unsigned meta_len,
struct io_uring_cmd *ioucmd, unsigned int flags,
unsigned int iou_issue_flags)
struct iov_iter *iter, unsigned int flags)
{
struct request_queue *q = req->q;
struct nvme_ns *ns = q->queuedata;
@ -129,37 +128,23 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer,
if (!nvme_ctrl_sgl_supported(ctrl))
dev_warn_once(ctrl->device, "using unchecked data buffer\n");
if (has_metadata) {
if (!supports_metadata) {
ret = -EINVAL;
goto out;
}
if (!supports_metadata)
return -EINVAL;
if (!nvme_ctrl_meta_sgl_supported(ctrl))
dev_warn_once(ctrl->device,
"using unchecked metadata buffer\n");
}
if (ioucmd && (ioucmd->flags & IORING_URING_CMD_FIXED)) {
struct iov_iter iter;
/* fixedbufs is only for non-vectored io */
if (WARN_ON_ONCE(flags & NVME_IOCTL_VEC)) {
ret = -EINVAL;
goto out;
}
ret = io_uring_cmd_import_fixed(ubuffer, bufflen,
rq_data_dir(req), &iter, ioucmd,
iou_issue_flags);
if (ret < 0)
goto out;
ret = blk_rq_map_user_iov(q, req, NULL, &iter, GFP_KERNEL);
} else {
if (iter)
ret = blk_rq_map_user_iov(q, req, NULL, iter, GFP_KERNEL);
else
ret = blk_rq_map_user_io(req, NULL, nvme_to_user_ptr(ubuffer),
bufflen, GFP_KERNEL, flags & NVME_IOCTL_VEC, 0,
0, rq_data_dir(req));
}
if (ret)
goto out;
return ret;
bio = req->bio;
if (bdev)
@ -176,8 +161,6 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer,
out_unmap:
if (bio)
blk_rq_unmap_user(bio);
out:
blk_mq_free_request(req);
return ret;
}
@ -200,9 +183,9 @@ static int nvme_submit_user_cmd(struct request_queue *q,
req->timeout = timeout;
if (ubuffer && bufflen) {
ret = nvme_map_user_request(req, ubuffer, bufflen, meta_buffer,
meta_len, NULL, flags, 0);
meta_len, NULL, flags);
if (ret)
return ret;
goto out_free_req;
}
bio = req->bio;
@ -218,7 +201,10 @@ static int nvme_submit_user_cmd(struct request_queue *q,
if (effects)
nvme_passthru_end(ctrl, ns, effects, cmd, ret);
return ret;
out_free_req:
blk_mq_free_request(req);
return ret;
}
@ -469,6 +455,8 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
struct request_queue *q = ns ? ns->queue : ctrl->admin_q;
struct nvme_uring_data d;
struct nvme_command c;
struct iov_iter iter;
struct iov_iter *map_iter = NULL;
struct request *req;
blk_opf_t rq_flags = REQ_ALLOC_CACHE;
blk_mq_req_flags_t blk_flags = 0;
@ -504,6 +492,20 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
d.metadata_len = READ_ONCE(cmd->metadata_len);
d.timeout_ms = READ_ONCE(cmd->timeout_ms);
if (d.data_len && (ioucmd->flags & IORING_URING_CMD_FIXED)) {
/* fixedbufs is only for non-vectored io */
if (vec)
return -EINVAL;
ret = io_uring_cmd_import_fixed(d.addr, d.data_len,
nvme_is_write(&c) ? WRITE : READ, &iter, ioucmd,
issue_flags);
if (ret < 0)
return ret;
map_iter = &iter;
}
if (issue_flags & IO_URING_F_NONBLOCK) {
rq_flags |= REQ_NOWAIT;
blk_flags = BLK_MQ_REQ_NOWAIT;
@ -517,11 +519,11 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
req->timeout = d.timeout_ms ? msecs_to_jiffies(d.timeout_ms) : 0;
if (d.data_len) {
ret = nvme_map_user_request(req, d.addr,
d.data_len, nvme_to_user_ptr(d.metadata),
d.metadata_len, ioucmd, vec, issue_flags);
ret = nvme_map_user_request(req, d.addr, d.data_len,
nvme_to_user_ptr(d.metadata), d.metadata_len,
map_iter, vec);
if (ret)
return ret;
goto out_free_req;
}
/* to free bio on completion, as req->bio will be null at that time */
@ -531,6 +533,10 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
req->end_io = nvme_uring_cmd_end_io;
blk_execute_rq_nowait(req, false);
return -EIOCBQUEUED;
out_free_req:
blk_mq_free_request(req);
return ret;
}
static bool is_ctrl_ioctl(unsigned int cmd)

View File

@ -986,6 +986,9 @@ static void nvme_submit_cmds(struct nvme_queue *nvmeq, struct rq_list *rqlist)
{
struct request *req;
if (rq_list_empty(rqlist))
return;
spin_lock(&nvmeq->sq_lock);
while ((req = rq_list_pop(rqlist))) {
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);

View File

@ -1264,6 +1264,7 @@ static u16 nvmet_pci_epf_create_cq(struct nvmet_ctrl *tctrl,
struct nvmet_pci_epf_ctrl *ctrl = tctrl->drvdata;
struct nvmet_pci_epf_queue *cq = &ctrl->cq[cqid];
u16 status;
int ret;
if (test_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags))
return NVME_SC_QID_INVALID | NVME_STATUS_DNR;
@ -1298,6 +1299,24 @@ static u16 nvmet_pci_epf_create_cq(struct nvmet_ctrl *tctrl,
if (status != NVME_SC_SUCCESS)
goto err;
/*
* Map the CQ PCI address space and since PCI endpoint controllers may
* return a partial mapping, check that the mapping is large enough.
*/
ret = nvmet_pci_epf_mem_map(ctrl->nvme_epf, cq->pci_addr, cq->pci_size,
&cq->pci_map);
if (ret) {
dev_err(ctrl->dev, "Failed to map CQ %u (err=%d)\n",
cq->qid, ret);
goto err_internal;
}
if (cq->pci_map.pci_size < cq->pci_size) {
dev_err(ctrl->dev, "Invalid partial mapping of queue %u\n",
cq->qid);
goto err_unmap_queue;
}
set_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags);
dev_dbg(ctrl->dev, "CQ[%u]: %u entries of %zu B, IRQ vector %u\n",
@ -1305,6 +1324,10 @@ static u16 nvmet_pci_epf_create_cq(struct nvmet_ctrl *tctrl,
return NVME_SC_SUCCESS;
err_unmap_queue:
nvmet_pci_epf_mem_unmap(ctrl->nvme_epf, &cq->pci_map);
err_internal:
status = NVME_SC_INTERNAL | NVME_STATUS_DNR;
err:
if (test_and_clear_bit(NVMET_PCI_EPF_Q_IRQ_ENABLED, &cq->flags))
nvmet_pci_epf_remove_irq_vector(ctrl, cq->vector);
@ -1322,6 +1345,7 @@ static u16 nvmet_pci_epf_delete_cq(struct nvmet_ctrl *tctrl, u16 cqid)
cancel_delayed_work_sync(&cq->work);
nvmet_pci_epf_drain_queue(cq);
nvmet_pci_epf_remove_irq_vector(ctrl, cq->vector);
nvmet_pci_epf_mem_unmap(ctrl->nvme_epf, &cq->pci_map);
return NVME_SC_SUCCESS;
}
@ -1553,36 +1577,6 @@ static void nvmet_pci_epf_free_queues(struct nvmet_pci_epf_ctrl *ctrl)
ctrl->cq = NULL;
}
static int nvmet_pci_epf_map_queue(struct nvmet_pci_epf_ctrl *ctrl,
struct nvmet_pci_epf_queue *queue)
{
struct nvmet_pci_epf *nvme_epf = ctrl->nvme_epf;
int ret;
ret = nvmet_pci_epf_mem_map(nvme_epf, queue->pci_addr,
queue->pci_size, &queue->pci_map);
if (ret) {
dev_err(ctrl->dev, "Failed to map queue %u (err=%d)\n",
queue->qid, ret);
return ret;
}
if (queue->pci_map.pci_size < queue->pci_size) {
dev_err(ctrl->dev, "Invalid partial mapping of queue %u\n",
queue->qid);
nvmet_pci_epf_mem_unmap(nvme_epf, &queue->pci_map);
return -ENOMEM;
}
return 0;
}
static inline void nvmet_pci_epf_unmap_queue(struct nvmet_pci_epf_ctrl *ctrl,
struct nvmet_pci_epf_queue *queue)
{
nvmet_pci_epf_mem_unmap(ctrl->nvme_epf, &queue->pci_map);
}
static void nvmet_pci_epf_exec_iod_work(struct work_struct *work)
{
struct nvmet_pci_epf_iod *iod =
@ -1746,11 +1740,7 @@ static void nvmet_pci_epf_cq_work(struct work_struct *work)
struct nvme_completion *cqe;
struct nvmet_pci_epf_iod *iod;
unsigned long flags;
int ret, n = 0;
ret = nvmet_pci_epf_map_queue(ctrl, cq);
if (ret)
goto again;
int ret = 0, n = 0;
while (test_bit(NVMET_PCI_EPF_Q_LIVE, &cq->flags) && ctrl->link_up) {
@ -1797,8 +1787,6 @@ static void nvmet_pci_epf_cq_work(struct work_struct *work)
n++;
}
nvmet_pci_epf_unmap_queue(ctrl, cq);
/*
* We do not support precise IRQ coalescing time (100ns units as per
* NVMe specifications). So if we have posted completion entries without
@ -1807,7 +1795,6 @@ static void nvmet_pci_epf_cq_work(struct work_struct *work)
if (n)
nvmet_pci_epf_raise_irq(ctrl, cq, true);
again:
if (ret < 0)
queue_delayed_work(system_highpri_wq, &cq->work,
NVMET_PCI_EPF_CQ_RETRY_INTERVAL);