mirror of
https://github.com/torvalds/linux.git
synced 2025-04-06 00:16:18 +00:00
for-6.15/io_uring-20250322
-----BEGIN PGP SIGNATURE----- iQJEBAABCAAuFiEEwPw5LcreJtl1+l5K99NY+ylx4KYFAmfe8DcQHGF4Ym9lQGtl cm5lbC5kawAKCRD301j7KXHgprzNEADPOOfi05F0f4KOMLYOiRlB+D9yGtnbcu8k foPATU++L5fFDUpjAGobi38AXqHtF/Bx0CjCkPCvy4GjygcszYDY81FQmRlSFdeH CnYV/bm5myyupc8xANcWGJYpxf/ZAw7ugOAgcR3Xxk/qXdD5hYJ1jG4477o8v4IJ jyzE4vQCnWBogIoqpWTQyY3FSP5fdjeQlYdFGIFCx68rFTO9zEpuHhFr2OMfJtSu y1dLkeqbu1BZ0fhgRog9k9ZWk0uRQJI7d7EUXf9iwXOfkUyzCaV+nHvNGRdhJ/XO zj/qePw4lFyCgc+kuIejjIaPf/g84paCzshz/PIxOZTsvo6X78zj72RPM0Makhga amgtEqegWHtKSya1zew57oDwqwaIe6b+56PtNSP7K8IrwcJLheszoPKJIwiiynIL ZqvWseqYQXs58S9qZzSZlyEpwLVFcvyNt7Z/q7b56vhvXHabxOccPkUHt+UscVoQ qpG0R9+zmIiDfzY6Vfu7JyH2Uspdq3WjGYiaoVUko1rJXY+HFPd3be9pIbckyaA8 ZQHIxB+h8IlxhpQ8HJKQpZhtYiluxEu6wDdvtpwC0KiGp5g4Q5eg9SkVyvETIEN8 yW2tHqMAJJtJjDAPwJOV+GIyWQMIexzLmMP7Fq8R5VIoAox+Xn3flgNNEnMZdl/r kTc/hRjHvQ== =VEQr -----END PGP SIGNATURE----- Merge tag 'for-6.15/io_uring-20250322' of git://git.kernel.dk/linux Pull io_uring updates from Jens Axboe: "This is the first of the io_uring pull requests for the 6.15 merge window, there will be others once the net tree has gone in. This contains: - Cleanup and unification of cancelation handling across various request types. - Improvement for bundles, supporting them both for incrementally consumed buffers, and for non-multishot requests. - Enable toggling of using iowait while waiting on io_uring events or not. Unfortunately this is still tied with CPU frequency boosting on short waits, as the scheduler side has not been very receptive to splitting the (useless) iowait stat from the cpufreq implied boost. - Add support for kbuf nodes, enabling zero-copy support for the ublk block driver. - Various cleanups for resource node handling. - Series greatly cleaning up the legacy provided (non-ring based) buffers. For years, we've been pushing the ring provided buffers as the way to go, and that is what people have been using. Reduce the complexity and code associated with legacy provided buffers. - Series cleaning up the compat handling. - Series improving and cleaning up the recvmsg/sendmsg iovec and msg handling. - Series of cleanups for io-wq. - Start adding a bunch of selftests. The liburing repository generally carries feature and regression tests for everything, but at least for ublk initially, we'll try and go the route of having it in selftests as well. We'll see how this goes, might decide to migrate more tests this way in the future. - Various little cleanups and fixes" * tag 'for-6.15/io_uring-20250322' of git://git.kernel.dk/linux: (108 commits) selftests: ublk: add stripe target selftests: ublk: simplify loop io completion selftests: ublk: enable zero copy for null target selftests: ublk: prepare for supporting stripe target selftests: ublk: move common code into common.c selftests: ublk: increase max buffer size to 1MB selftests: ublk: add single sqe allocator helper selftests: ublk: add generic_01 for verifying sequential IO order selftests: ublk: fix starting ublk device io_uring: enable toggle of iowait usage when waiting on CQEs selftests: ublk: fix write cache implementation selftests: ublk: add variable for user to not show test result selftests: ublk: don't show `modprobe` failure selftests: ublk: add one dependency header io_uring/kbuf: enable bundles for incrementally consumed buffers Revert "io_uring/rsrc: simplify the bvec iter count calculation" selftests: ublk: improve test usability selftests: ublk: add stress test for covering IO vs. killing ublk server selftests: ublk: add one stress test for covering IO vs. removing device selftests: ublk: load/unload ublk_drv when preparing & cleaning up tests ...
This commit is contained in:
commit
91928e0d3c
@ -24397,6 +24397,7 @@ S: Maintained
|
||||
F: Documentation/block/ublk.rst
|
||||
F: drivers/block/ublk_drv.c
|
||||
F: include/uapi/linux/ublk_cmd.h
|
||||
F: tools/testing/selftests/ublk/
|
||||
|
||||
UBSAN
|
||||
M: Kees Cook <kees@kernel.org>
|
||||
|
@ -51,6 +51,9 @@
|
||||
/* private ioctl command mirror */
|
||||
#define UBLK_CMD_DEL_DEV_ASYNC _IOC_NR(UBLK_U_CMD_DEL_DEV_ASYNC)
|
||||
|
||||
#define UBLK_IO_REGISTER_IO_BUF _IOC_NR(UBLK_U_IO_REGISTER_IO_BUF)
|
||||
#define UBLK_IO_UNREGISTER_IO_BUF _IOC_NR(UBLK_U_IO_UNREGISTER_IO_BUF)
|
||||
|
||||
/* All UBLK_F_* have to be included into UBLK_F_ALL */
|
||||
#define UBLK_F_ALL (UBLK_F_SUPPORT_ZERO_COPY \
|
||||
| UBLK_F_URING_CMD_COMP_IN_TASK \
|
||||
@ -196,12 +199,14 @@ struct ublk_params_header {
|
||||
|
||||
static bool ublk_abort_requests(struct ublk_device *ub, struct ublk_queue *ubq);
|
||||
|
||||
static inline struct request *__ublk_check_and_get_req(struct ublk_device *ub,
|
||||
struct ublk_queue *ubq, int tag, size_t offset);
|
||||
static inline unsigned int ublk_req_build_flags(struct request *req);
|
||||
static inline struct ublksrv_io_desc *ublk_get_iod(struct ublk_queue *ubq,
|
||||
int tag);
|
||||
static inline bool ublk_dev_is_user_copy(const struct ublk_device *ub)
|
||||
{
|
||||
return ub->dev_info.flags & UBLK_F_USER_COPY;
|
||||
return ub->dev_info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY);
|
||||
}
|
||||
|
||||
static inline bool ublk_dev_is_zoned(const struct ublk_device *ub)
|
||||
@ -581,7 +586,7 @@ static void ublk_apply_params(struct ublk_device *ub)
|
||||
|
||||
static inline bool ublk_support_user_copy(const struct ublk_queue *ubq)
|
||||
{
|
||||
return ubq->flags & UBLK_F_USER_COPY;
|
||||
return ubq->flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY);
|
||||
}
|
||||
|
||||
static inline bool ublk_need_req_ref(const struct ublk_queue *ubq)
|
||||
@ -1747,6 +1752,42 @@ static inline void ublk_prep_cancel(struct io_uring_cmd *cmd,
|
||||
io_uring_cmd_mark_cancelable(cmd, issue_flags);
|
||||
}
|
||||
|
||||
static void ublk_io_release(void *priv)
|
||||
{
|
||||
struct request *rq = priv;
|
||||
struct ublk_queue *ubq = rq->mq_hctx->driver_data;
|
||||
|
||||
ublk_put_req_ref(ubq, rq);
|
||||
}
|
||||
|
||||
static int ublk_register_io_buf(struct io_uring_cmd *cmd,
|
||||
struct ublk_queue *ubq, unsigned int tag,
|
||||
unsigned int index, unsigned int issue_flags)
|
||||
{
|
||||
struct ublk_device *ub = cmd->file->private_data;
|
||||
struct request *req;
|
||||
int ret;
|
||||
|
||||
req = __ublk_check_and_get_req(ub, ubq, tag, 0);
|
||||
if (!req)
|
||||
return -EINVAL;
|
||||
|
||||
ret = io_buffer_register_bvec(cmd, req, ublk_io_release, index,
|
||||
issue_flags);
|
||||
if (ret) {
|
||||
ublk_put_req_ref(ubq, req);
|
||||
return ret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int ublk_unregister_io_buf(struct io_uring_cmd *cmd,
|
||||
unsigned int index, unsigned int issue_flags)
|
||||
{
|
||||
return io_buffer_unregister_bvec(cmd, index, issue_flags);
|
||||
}
|
||||
|
||||
static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
|
||||
unsigned int issue_flags,
|
||||
const struct ublksrv_io_cmd *ub_cmd)
|
||||
@ -1798,6 +1839,10 @@ static int __ublk_ch_uring_cmd(struct io_uring_cmd *cmd,
|
||||
|
||||
ret = -EINVAL;
|
||||
switch (_IOC_NR(cmd_op)) {
|
||||
case UBLK_IO_REGISTER_IO_BUF:
|
||||
return ublk_register_io_buf(cmd, ubq, tag, ub_cmd->addr, issue_flags);
|
||||
case UBLK_IO_UNREGISTER_IO_BUF:
|
||||
return ublk_unregister_io_buf(cmd, ub_cmd->addr, issue_flags);
|
||||
case UBLK_IO_FETCH_REQ:
|
||||
/* UBLK_IO_FETCH_REQ is only allowed before queue is setup */
|
||||
if (ublk_queue_ready(ubq)) {
|
||||
@ -2459,7 +2504,7 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
|
||||
* buffer by pwrite() to ublk char device, which can't be
|
||||
* used for unprivileged device
|
||||
*/
|
||||
if (info.flags & UBLK_F_USER_COPY)
|
||||
if (info.flags & (UBLK_F_USER_COPY | UBLK_F_SUPPORT_ZERO_COPY))
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
@ -2527,9 +2572,6 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
|
||||
goto out_free_dev_number;
|
||||
}
|
||||
|
||||
/* We are not ready to support zero copy */
|
||||
ub->dev_info.flags &= ~UBLK_F_SUPPORT_ZERO_COPY;
|
||||
|
||||
ub->dev_info.nr_hw_queues = min_t(unsigned int,
|
||||
ub->dev_info.nr_hw_queues, nr_cpu_ids);
|
||||
ublk_align_max_io_size(ub);
|
||||
@ -2863,7 +2905,7 @@ static int ublk_ctrl_get_features(struct io_uring_cmd *cmd)
|
||||
{
|
||||
const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
|
||||
void __user *argp = (void __user *)(unsigned long)header->addr;
|
||||
u64 features = UBLK_F_ALL & ~UBLK_F_SUPPORT_ZERO_COPY;
|
||||
u64 features = UBLK_F_ALL;
|
||||
|
||||
if (header->len != UBLK_FEATURES_LEN || !header->addr)
|
||||
return -EINVAL;
|
||||
|
@ -114,7 +114,8 @@ static struct request *nvme_alloc_user_request(struct request_queue *q,
|
||||
|
||||
static int nvme_map_user_request(struct request *req, u64 ubuffer,
|
||||
unsigned bufflen, void __user *meta_buffer, unsigned meta_len,
|
||||
struct io_uring_cmd *ioucmd, unsigned int flags)
|
||||
struct io_uring_cmd *ioucmd, unsigned int flags,
|
||||
unsigned int iou_issue_flags)
|
||||
{
|
||||
struct request_queue *q = req->q;
|
||||
struct nvme_ns *ns = q->queuedata;
|
||||
@ -146,7 +147,8 @@ static int nvme_map_user_request(struct request *req, u64 ubuffer,
|
||||
goto out;
|
||||
}
|
||||
ret = io_uring_cmd_import_fixed(ubuffer, bufflen,
|
||||
rq_data_dir(req), &iter, ioucmd);
|
||||
rq_data_dir(req), &iter, ioucmd,
|
||||
iou_issue_flags);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
ret = blk_rq_map_user_iov(q, req, NULL, &iter, GFP_KERNEL);
|
||||
@ -198,7 +200,7 @@ static int nvme_submit_user_cmd(struct request_queue *q,
|
||||
req->timeout = timeout;
|
||||
if (ubuffer && bufflen) {
|
||||
ret = nvme_map_user_request(req, ubuffer, bufflen, meta_buffer,
|
||||
meta_len, NULL, flags);
|
||||
meta_len, NULL, flags, 0);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
@ -514,10 +516,10 @@ static int nvme_uring_cmd_io(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
|
||||
return PTR_ERR(req);
|
||||
req->timeout = d.timeout_ms ? msecs_to_jiffies(d.timeout_ms) : 0;
|
||||
|
||||
if (d.addr && d.data_len) {
|
||||
if (d.data_len) {
|
||||
ret = nvme_map_user_request(req, d.addr,
|
||||
d.data_len, nvme_to_user_ptr(d.metadata),
|
||||
d.metadata_len, ioucmd, vec);
|
||||
d.metadata_len, ioucmd, vec, issue_flags);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
@ -4,6 +4,7 @@
|
||||
|
||||
#include <uapi/linux/io_uring.h>
|
||||
#include <linux/io_uring_types.h>
|
||||
#include <linux/blk-mq.h>
|
||||
|
||||
/* only top 8 bits of sqe->uring_cmd_flags for kernel internal use */
|
||||
#define IORING_URING_CMD_CANCELABLE (1U << 30)
|
||||
@ -39,7 +40,9 @@ static inline void io_uring_cmd_private_sz_check(size_t cmd_sz)
|
||||
|
||||
#if defined(CONFIG_IO_URING)
|
||||
int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
|
||||
struct iov_iter *iter, void *ioucmd);
|
||||
struct iov_iter *iter,
|
||||
struct io_uring_cmd *ioucmd,
|
||||
unsigned int issue_flags);
|
||||
|
||||
/*
|
||||
* Completes the request, i.e. posts an io_uring CQE and deallocates @ioucmd
|
||||
@ -66,8 +69,10 @@ void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
|
||||
void io_uring_cmd_issue_blocking(struct io_uring_cmd *ioucmd);
|
||||
|
||||
#else
|
||||
static inline int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
|
||||
struct iov_iter *iter, void *ioucmd)
|
||||
static inline int
|
||||
io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
|
||||
struct iov_iter *iter, struct io_uring_cmd *ioucmd,
|
||||
unsigned int issue_flags)
|
||||
{
|
||||
return -EOPNOTSUPP;
|
||||
}
|
||||
@ -123,4 +128,10 @@ static inline struct io_uring_cmd_data *io_uring_cmd_get_async_data(struct io_ur
|
||||
return cmd_to_io_kiocb(cmd)->async_data;
|
||||
}
|
||||
|
||||
int io_buffer_register_bvec(struct io_uring_cmd *cmd, struct request *rq,
|
||||
void (*release)(void *), unsigned int index,
|
||||
unsigned int issue_flags);
|
||||
int io_buffer_unregister_bvec(struct io_uring_cmd *cmd, unsigned int index,
|
||||
unsigned int issue_flags);
|
||||
|
||||
#endif /* _LINUX_IO_URING_CMD_H */
|
||||
|
@ -292,6 +292,8 @@ struct io_ring_ctx {
|
||||
|
||||
struct io_file_table file_table;
|
||||
struct io_rsrc_data buf_table;
|
||||
struct io_alloc_cache node_cache;
|
||||
struct io_alloc_cache imu_cache;
|
||||
|
||||
struct io_submit_state submit_state;
|
||||
|
||||
@ -360,7 +362,6 @@ struct io_ring_ctx {
|
||||
|
||||
spinlock_t completion_lock;
|
||||
|
||||
struct list_head io_buffers_comp;
|
||||
struct list_head cq_overflow_list;
|
||||
|
||||
struct hlist_head waitid_list;
|
||||
@ -379,8 +380,6 @@ struct io_ring_ctx {
|
||||
unsigned int file_alloc_start;
|
||||
unsigned int file_alloc_end;
|
||||
|
||||
struct list_head io_buffers_cache;
|
||||
|
||||
/* Keep this last, we don't need it for the fast path */
|
||||
struct wait_queue_head poll_wq;
|
||||
struct io_restriction restrictions;
|
||||
@ -439,8 +438,15 @@ struct io_ring_ctx {
|
||||
struct io_mapped_region param_region;
|
||||
};
|
||||
|
||||
/*
|
||||
* Token indicating function is called in task work context:
|
||||
* ctx->uring_lock is held and any completions generated will be flushed.
|
||||
* ONLY core io_uring.c should instantiate this struct.
|
||||
*/
|
||||
struct io_tw_state {
|
||||
};
|
||||
/* Alias to use in code that doesn't instantiate struct io_tw_state */
|
||||
typedef struct io_tw_state io_tw_token_t;
|
||||
|
||||
enum {
|
||||
REQ_F_FIXED_FILE_BIT = IOSQE_FIXED_FILE_BIT,
|
||||
@ -566,7 +572,7 @@ enum {
|
||||
REQ_F_HAS_METADATA = IO_REQ_FLAG(REQ_F_HAS_METADATA_BIT),
|
||||
};
|
||||
|
||||
typedef void (*io_req_tw_func_t)(struct io_kiocb *req, struct io_tw_state *ts);
|
||||
typedef void (*io_req_tw_func_t)(struct io_kiocb *req, io_tw_token_t tw);
|
||||
|
||||
struct io_task_work {
|
||||
struct llist_node node;
|
||||
@ -601,7 +607,11 @@ static inline void io_kiocb_cmd_sz_check(size_t cmd_sz)
|
||||
io_kiocb_cmd_sz_check(sizeof(cmd_type)) , \
|
||||
((cmd_type *)&(req)->cmd) \
|
||||
)
|
||||
#define cmd_to_io_kiocb(ptr) ((struct io_kiocb *) ptr)
|
||||
|
||||
static inline struct io_kiocb *cmd_to_io_kiocb(void *ptr)
|
||||
{
|
||||
return ptr;
|
||||
}
|
||||
|
||||
struct io_kiocb {
|
||||
union {
|
||||
|
@ -541,6 +541,7 @@ struct io_cqring_offsets {
|
||||
#define IORING_ENTER_REGISTERED_RING (1U << 4)
|
||||
#define IORING_ENTER_ABS_TIMER (1U << 5)
|
||||
#define IORING_ENTER_EXT_ARG_REG (1U << 6)
|
||||
#define IORING_ENTER_NO_IOWAIT (1U << 7)
|
||||
|
||||
/*
|
||||
* Passed in for io_uring_setup(2). Copied back with updated info on success
|
||||
@ -578,6 +579,7 @@ struct io_uring_params {
|
||||
#define IORING_FEAT_RECVSEND_BUNDLE (1U << 14)
|
||||
#define IORING_FEAT_MIN_TIMEOUT (1U << 15)
|
||||
#define IORING_FEAT_RW_ATTR (1U << 16)
|
||||
#define IORING_FEAT_NO_IOWAIT (1U << 17)
|
||||
|
||||
/*
|
||||
* io_uring_register(2) opcodes and arguments
|
||||
|
@ -94,6 +94,10 @@
|
||||
_IOWR('u', UBLK_IO_COMMIT_AND_FETCH_REQ, struct ublksrv_io_cmd)
|
||||
#define UBLK_U_IO_NEED_GET_DATA \
|
||||
_IOWR('u', UBLK_IO_NEED_GET_DATA, struct ublksrv_io_cmd)
|
||||
#define UBLK_U_IO_REGISTER_IO_BUF \
|
||||
_IOWR('u', 0x23, struct ublksrv_io_cmd)
|
||||
#define UBLK_U_IO_UNREGISTER_IO_BUF \
|
||||
_IOWR('u', 0x24, struct ublksrv_io_cmd)
|
||||
|
||||
/* only ABORT means that no re-fetch */
|
||||
#define UBLK_IO_RES_OK 0
|
||||
|
@ -68,4 +68,10 @@ static inline void *io_cache_alloc(struct io_alloc_cache *cache, gfp_t gfp)
|
||||
return io_cache_alloc_new(cache, gfp);
|
||||
}
|
||||
|
||||
static inline void io_cache_free(struct io_alloc_cache *cache, void *obj)
|
||||
{
|
||||
if (!io_alloc_cache_put(cache, obj))
|
||||
kfree(obj);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -341,3 +341,45 @@ out:
|
||||
fput(file);
|
||||
return ret;
|
||||
}
|
||||
|
||||
bool io_cancel_remove_all(struct io_ring_ctx *ctx, struct io_uring_task *tctx,
|
||||
struct hlist_head *list, bool cancel_all,
|
||||
bool (*cancel)(struct io_kiocb *))
|
||||
{
|
||||
struct hlist_node *tmp;
|
||||
struct io_kiocb *req;
|
||||
bool found = false;
|
||||
|
||||
lockdep_assert_held(&ctx->uring_lock);
|
||||
|
||||
hlist_for_each_entry_safe(req, tmp, list, hash_node) {
|
||||
if (!io_match_task_safe(req, tctx, cancel_all))
|
||||
continue;
|
||||
hlist_del_init(&req->hash_node);
|
||||
if (cancel(req))
|
||||
found = true;
|
||||
}
|
||||
|
||||
return found;
|
||||
}
|
||||
|
||||
int io_cancel_remove(struct io_ring_ctx *ctx, struct io_cancel_data *cd,
|
||||
unsigned int issue_flags, struct hlist_head *list,
|
||||
bool (*cancel)(struct io_kiocb *))
|
||||
{
|
||||
struct hlist_node *tmp;
|
||||
struct io_kiocb *req;
|
||||
int nr = 0;
|
||||
|
||||
io_ring_submit_lock(ctx, issue_flags);
|
||||
hlist_for_each_entry_safe(req, tmp, list, hash_node) {
|
||||
if (!io_cancel_req_match(req, cd))
|
||||
continue;
|
||||
if (cancel(req))
|
||||
nr++;
|
||||
if (!(cd->flags & IORING_ASYNC_CANCEL_ALL))
|
||||
break;
|
||||
}
|
||||
io_ring_submit_unlock(ctx, issue_flags);
|
||||
return nr ?: -ENOENT;
|
||||
}
|
||||
|
@ -24,6 +24,14 @@ int io_try_cancel(struct io_uring_task *tctx, struct io_cancel_data *cd,
|
||||
int io_sync_cancel(struct io_ring_ctx *ctx, void __user *arg);
|
||||
bool io_cancel_req_match(struct io_kiocb *req, struct io_cancel_data *cd);
|
||||
|
||||
bool io_cancel_remove_all(struct io_ring_ctx *ctx, struct io_uring_task *tctx,
|
||||
struct hlist_head *list, bool cancel_all,
|
||||
bool (*cancel)(struct io_kiocb *));
|
||||
|
||||
int io_cancel_remove(struct io_ring_ctx *ctx, struct io_cancel_data *cd,
|
||||
unsigned int issue_flags, struct hlist_head *list,
|
||||
bool (*cancel)(struct io_kiocb *));
|
||||
|
||||
static inline bool io_cancel_match_sequence(struct io_kiocb *req, int sequence)
|
||||
{
|
||||
if (req->cancel_seq_set && sequence == req->work.cancel_seq)
|
||||
|
@ -68,7 +68,7 @@ static int io_install_fixed_file(struct io_ring_ctx *ctx, struct file *file,
|
||||
if (slot_index >= ctx->file_table.data.nr)
|
||||
return -EINVAL;
|
||||
|
||||
node = io_rsrc_node_alloc(IORING_RSRC_FILE);
|
||||
node = io_rsrc_node_alloc(ctx, IORING_RSRC_FILE);
|
||||
if (!node)
|
||||
return -ENOMEM;
|
||||
|
||||
|
@ -44,30 +44,28 @@ void io_futex_cache_free(struct io_ring_ctx *ctx)
|
||||
io_alloc_cache_free(&ctx->futex_cache, kfree);
|
||||
}
|
||||
|
||||
static void __io_futex_complete(struct io_kiocb *req, struct io_tw_state *ts)
|
||||
static void __io_futex_complete(struct io_kiocb *req, io_tw_token_t tw)
|
||||
{
|
||||
req->async_data = NULL;
|
||||
hlist_del_init(&req->hash_node);
|
||||
io_req_task_complete(req, ts);
|
||||
io_req_task_complete(req, tw);
|
||||
}
|
||||
|
||||
static void io_futex_complete(struct io_kiocb *req, struct io_tw_state *ts)
|
||||
static void io_futex_complete(struct io_kiocb *req, io_tw_token_t tw)
|
||||
{
|
||||
struct io_futex_data *ifd = req->async_data;
|
||||
struct io_ring_ctx *ctx = req->ctx;
|
||||
|
||||
io_tw_lock(ctx, ts);
|
||||
if (!io_alloc_cache_put(&ctx->futex_cache, ifd))
|
||||
kfree(ifd);
|
||||
__io_futex_complete(req, ts);
|
||||
io_tw_lock(ctx, tw);
|
||||
io_cache_free(&ctx->futex_cache, req->async_data);
|
||||
__io_futex_complete(req, tw);
|
||||
}
|
||||
|
||||
static void io_futexv_complete(struct io_kiocb *req, struct io_tw_state *ts)
|
||||
static void io_futexv_complete(struct io_kiocb *req, io_tw_token_t tw)
|
||||
{
|
||||
struct io_futex *iof = io_kiocb_to_cmd(req, struct io_futex);
|
||||
struct futex_vector *futexv = req->async_data;
|
||||
|
||||
io_tw_lock(req->ctx, ts);
|
||||
io_tw_lock(req->ctx, tw);
|
||||
|
||||
if (!iof->futexv_unqueued) {
|
||||
int res;
|
||||
@ -79,7 +77,7 @@ static void io_futexv_complete(struct io_kiocb *req, struct io_tw_state *ts)
|
||||
|
||||
kfree(req->async_data);
|
||||
req->flags &= ~REQ_F_ASYNC_DATA;
|
||||
__io_futex_complete(req, ts);
|
||||
__io_futex_complete(req, tw);
|
||||
}
|
||||
|
||||
static bool io_futexv_claim(struct io_futex *iof)
|
||||
@ -90,7 +88,7 @@ static bool io_futexv_claim(struct io_futex *iof)
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool __io_futex_cancel(struct io_ring_ctx *ctx, struct io_kiocb *req)
|
||||
static bool __io_futex_cancel(struct io_kiocb *req)
|
||||
{
|
||||
/* futex wake already done or in progress */
|
||||
if (req->opcode == IORING_OP_FUTEX_WAIT) {
|
||||
@ -116,49 +114,13 @@ static bool __io_futex_cancel(struct io_ring_ctx *ctx, struct io_kiocb *req)
|
||||
int io_futex_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd,
|
||||
unsigned int issue_flags)
|
||||
{
|
||||
struct hlist_node *tmp;
|
||||
struct io_kiocb *req;
|
||||
int nr = 0;
|
||||
|
||||
if (cd->flags & (IORING_ASYNC_CANCEL_FD|IORING_ASYNC_CANCEL_FD_FIXED))
|
||||
return -ENOENT;
|
||||
|
||||
io_ring_submit_lock(ctx, issue_flags);
|
||||
hlist_for_each_entry_safe(req, tmp, &ctx->futex_list, hash_node) {
|
||||
if (req->cqe.user_data != cd->data &&
|
||||
!(cd->flags & IORING_ASYNC_CANCEL_ANY))
|
||||
continue;
|
||||
if (__io_futex_cancel(ctx, req))
|
||||
nr++;
|
||||
if (!(cd->flags & IORING_ASYNC_CANCEL_ALL))
|
||||
break;
|
||||
}
|
||||
io_ring_submit_unlock(ctx, issue_flags);
|
||||
|
||||
if (nr)
|
||||
return nr;
|
||||
|
||||
return -ENOENT;
|
||||
return io_cancel_remove(ctx, cd, issue_flags, &ctx->futex_list, __io_futex_cancel);
|
||||
}
|
||||
|
||||
bool io_futex_remove_all(struct io_ring_ctx *ctx, struct io_uring_task *tctx,
|
||||
bool cancel_all)
|
||||
{
|
||||
struct hlist_node *tmp;
|
||||
struct io_kiocb *req;
|
||||
bool found = false;
|
||||
|
||||
lockdep_assert_held(&ctx->uring_lock);
|
||||
|
||||
hlist_for_each_entry_safe(req, tmp, &ctx->futex_list, hash_node) {
|
||||
if (!io_match_task_safe(req, tctx, cancel_all))
|
||||
continue;
|
||||
hlist_del_init(&req->hash_node);
|
||||
__io_futex_cancel(ctx, req);
|
||||
found = true;
|
||||
}
|
||||
|
||||
return found;
|
||||
return io_cancel_remove_all(ctx, tctx, &ctx->futex_list, cancel_all, __io_futex_cancel);
|
||||
}
|
||||
|
||||
int io_futex_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||
|
230
io_uring/io-wq.c
230
io_uring/io-wq.c
@ -30,7 +30,6 @@ enum {
|
||||
IO_WORKER_F_UP = 0, /* up and active */
|
||||
IO_WORKER_F_RUNNING = 1, /* account as running */
|
||||
IO_WORKER_F_FREE = 2, /* worker on free list */
|
||||
IO_WORKER_F_BOUND = 3, /* is doing bounded work */
|
||||
};
|
||||
|
||||
enum {
|
||||
@ -46,12 +45,12 @@ enum {
|
||||
*/
|
||||
struct io_worker {
|
||||
refcount_t ref;
|
||||
int create_index;
|
||||
unsigned long flags;
|
||||
struct hlist_nulls_node nulls_node;
|
||||
struct list_head all_list;
|
||||
struct task_struct *task;
|
||||
struct io_wq *wq;
|
||||
struct io_wq_acct *acct;
|
||||
|
||||
struct io_wq_work *cur_work;
|
||||
raw_spinlock_t lock;
|
||||
@ -77,10 +76,27 @@ struct io_worker {
|
||||
#define IO_WQ_NR_HASH_BUCKETS (1u << IO_WQ_HASH_ORDER)
|
||||
|
||||
struct io_wq_acct {
|
||||
/**
|
||||
* Protects access to the worker lists.
|
||||
*/
|
||||
raw_spinlock_t workers_lock;
|
||||
|
||||
unsigned nr_workers;
|
||||
unsigned max_workers;
|
||||
int index;
|
||||
atomic_t nr_running;
|
||||
|
||||
/**
|
||||
* The list of free workers. Protected by #workers_lock
|
||||
* (write) and RCU (read).
|
||||
*/
|
||||
struct hlist_nulls_head free_list;
|
||||
|
||||
/**
|
||||
* The list of all workers. Protected by #workers_lock
|
||||
* (write) and RCU (read).
|
||||
*/
|
||||
struct list_head all_list;
|
||||
|
||||
raw_spinlock_t lock;
|
||||
struct io_wq_work_list work_list;
|
||||
unsigned long flags;
|
||||
@ -112,12 +128,6 @@ struct io_wq {
|
||||
|
||||
struct io_wq_acct acct[IO_WQ_ACCT_NR];
|
||||
|
||||
/* lock protects access to elements below */
|
||||
raw_spinlock_t lock;
|
||||
|
||||
struct hlist_nulls_head free_list;
|
||||
struct list_head all_list;
|
||||
|
||||
struct wait_queue_entry wait;
|
||||
|
||||
struct io_wq_work *hash_tail[IO_WQ_NR_HASH_BUCKETS];
|
||||
@ -135,7 +145,7 @@ struct io_cb_cancel_data {
|
||||
bool cancel_all;
|
||||
};
|
||||
|
||||
static bool create_io_worker(struct io_wq *wq, int index);
|
||||
static bool create_io_worker(struct io_wq *wq, struct io_wq_acct *acct);
|
||||
static void io_wq_dec_running(struct io_worker *worker);
|
||||
static bool io_acct_cancel_pending_work(struct io_wq *wq,
|
||||
struct io_wq_acct *acct,
|
||||
@ -160,14 +170,14 @@ static inline struct io_wq_acct *io_get_acct(struct io_wq *wq, bool bound)
|
||||
}
|
||||
|
||||
static inline struct io_wq_acct *io_work_get_acct(struct io_wq *wq,
|
||||
struct io_wq_work *work)
|
||||
unsigned int work_flags)
|
||||
{
|
||||
return io_get_acct(wq, !(atomic_read(&work->flags) & IO_WQ_WORK_UNBOUND));
|
||||
return io_get_acct(wq, !(work_flags & IO_WQ_WORK_UNBOUND));
|
||||
}
|
||||
|
||||
static inline struct io_wq_acct *io_wq_get_acct(struct io_worker *worker)
|
||||
{
|
||||
return io_get_acct(worker->wq, test_bit(IO_WORKER_F_BOUND, &worker->flags));
|
||||
return worker->acct;
|
||||
}
|
||||
|
||||
static void io_worker_ref_put(struct io_wq *wq)
|
||||
@ -192,9 +202,9 @@ static void io_worker_cancel_cb(struct io_worker *worker)
|
||||
struct io_wq *wq = worker->wq;
|
||||
|
||||
atomic_dec(&acct->nr_running);
|
||||
raw_spin_lock(&wq->lock);
|
||||
raw_spin_lock(&acct->workers_lock);
|
||||
acct->nr_workers--;
|
||||
raw_spin_unlock(&wq->lock);
|
||||
raw_spin_unlock(&acct->workers_lock);
|
||||
io_worker_ref_put(wq);
|
||||
clear_bit_unlock(0, &worker->create_state);
|
||||
io_worker_release(worker);
|
||||
@ -213,6 +223,7 @@ static bool io_task_worker_match(struct callback_head *cb, void *data)
|
||||
static void io_worker_exit(struct io_worker *worker)
|
||||
{
|
||||
struct io_wq *wq = worker->wq;
|
||||
struct io_wq_acct *acct = io_wq_get_acct(worker);
|
||||
|
||||
while (1) {
|
||||
struct callback_head *cb = task_work_cancel_match(wq->task,
|
||||
@ -226,11 +237,11 @@ static void io_worker_exit(struct io_worker *worker)
|
||||
io_worker_release(worker);
|
||||
wait_for_completion(&worker->ref_done);
|
||||
|
||||
raw_spin_lock(&wq->lock);
|
||||
raw_spin_lock(&acct->workers_lock);
|
||||
if (test_bit(IO_WORKER_F_FREE, &worker->flags))
|
||||
hlist_nulls_del_rcu(&worker->nulls_node);
|
||||
list_del_rcu(&worker->all_list);
|
||||
raw_spin_unlock(&wq->lock);
|
||||
raw_spin_unlock(&acct->workers_lock);
|
||||
io_wq_dec_running(worker);
|
||||
/*
|
||||
* this worker is a goner, clear ->worker_private to avoid any
|
||||
@ -269,8 +280,7 @@ static inline bool io_acct_run_queue(struct io_wq_acct *acct)
|
||||
* Check head of free list for an available worker. If one isn't available,
|
||||
* caller must create one.
|
||||
*/
|
||||
static bool io_wq_activate_free_worker(struct io_wq *wq,
|
||||
struct io_wq_acct *acct)
|
||||
static bool io_acct_activate_free_worker(struct io_wq_acct *acct)
|
||||
__must_hold(RCU)
|
||||
{
|
||||
struct hlist_nulls_node *n;
|
||||
@ -281,13 +291,9 @@ static bool io_wq_activate_free_worker(struct io_wq *wq,
|
||||
* activate. If a given worker is on the free_list but in the process
|
||||
* of exiting, keep trying.
|
||||
*/
|
||||
hlist_nulls_for_each_entry_rcu(worker, n, &wq->free_list, nulls_node) {
|
||||
hlist_nulls_for_each_entry_rcu(worker, n, &acct->free_list, nulls_node) {
|
||||
if (!io_worker_get(worker))
|
||||
continue;
|
||||
if (io_wq_get_acct(worker) != acct) {
|
||||
io_worker_release(worker);
|
||||
continue;
|
||||
}
|
||||
/*
|
||||
* If the worker is already running, it's either already
|
||||
* starting work or finishing work. In either case, if it does
|
||||
@ -314,16 +320,16 @@ static bool io_wq_create_worker(struct io_wq *wq, struct io_wq_acct *acct)
|
||||
if (unlikely(!acct->max_workers))
|
||||
pr_warn_once("io-wq is not configured for unbound workers");
|
||||
|
||||
raw_spin_lock(&wq->lock);
|
||||
raw_spin_lock(&acct->workers_lock);
|
||||
if (acct->nr_workers >= acct->max_workers) {
|
||||
raw_spin_unlock(&wq->lock);
|
||||
raw_spin_unlock(&acct->workers_lock);
|
||||
return true;
|
||||
}
|
||||
acct->nr_workers++;
|
||||
raw_spin_unlock(&wq->lock);
|
||||
raw_spin_unlock(&acct->workers_lock);
|
||||
atomic_inc(&acct->nr_running);
|
||||
atomic_inc(&wq->worker_refs);
|
||||
return create_io_worker(wq, acct->index);
|
||||
return create_io_worker(wq, acct);
|
||||
}
|
||||
|
||||
static void io_wq_inc_running(struct io_worker *worker)
|
||||
@ -343,16 +349,16 @@ static void create_worker_cb(struct callback_head *cb)
|
||||
|
||||
worker = container_of(cb, struct io_worker, create_work);
|
||||
wq = worker->wq;
|
||||
acct = &wq->acct[worker->create_index];
|
||||
raw_spin_lock(&wq->lock);
|
||||
acct = worker->acct;
|
||||
raw_spin_lock(&acct->workers_lock);
|
||||
|
||||
if (acct->nr_workers < acct->max_workers) {
|
||||
acct->nr_workers++;
|
||||
do_create = true;
|
||||
}
|
||||
raw_spin_unlock(&wq->lock);
|
||||
raw_spin_unlock(&acct->workers_lock);
|
||||
if (do_create) {
|
||||
create_io_worker(wq, worker->create_index);
|
||||
create_io_worker(wq, acct);
|
||||
} else {
|
||||
atomic_dec(&acct->nr_running);
|
||||
io_worker_ref_put(wq);
|
||||
@ -384,7 +390,6 @@ static bool io_queue_worker_create(struct io_worker *worker,
|
||||
|
||||
atomic_inc(&wq->worker_refs);
|
||||
init_task_work(&worker->create_work, func);
|
||||
worker->create_index = acct->index;
|
||||
if (!task_work_add(wq->task, &worker->create_work, TWA_SIGNAL)) {
|
||||
/*
|
||||
* EXIT may have been set after checking it above, check after
|
||||
@ -430,31 +435,36 @@ static void io_wq_dec_running(struct io_worker *worker)
|
||||
* Worker will start processing some work. Move it to the busy list, if
|
||||
* it's currently on the freelist
|
||||
*/
|
||||
static void __io_worker_busy(struct io_wq *wq, struct io_worker *worker)
|
||||
static void __io_worker_busy(struct io_wq_acct *acct, struct io_worker *worker)
|
||||
{
|
||||
if (test_bit(IO_WORKER_F_FREE, &worker->flags)) {
|
||||
clear_bit(IO_WORKER_F_FREE, &worker->flags);
|
||||
raw_spin_lock(&wq->lock);
|
||||
raw_spin_lock(&acct->workers_lock);
|
||||
hlist_nulls_del_init_rcu(&worker->nulls_node);
|
||||
raw_spin_unlock(&wq->lock);
|
||||
raw_spin_unlock(&acct->workers_lock);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* No work, worker going to sleep. Move to freelist.
|
||||
*/
|
||||
static void __io_worker_idle(struct io_wq *wq, struct io_worker *worker)
|
||||
__must_hold(wq->lock)
|
||||
static void __io_worker_idle(struct io_wq_acct *acct, struct io_worker *worker)
|
||||
__must_hold(acct->workers_lock)
|
||||
{
|
||||
if (!test_bit(IO_WORKER_F_FREE, &worker->flags)) {
|
||||
set_bit(IO_WORKER_F_FREE, &worker->flags);
|
||||
hlist_nulls_add_head_rcu(&worker->nulls_node, &wq->free_list);
|
||||
hlist_nulls_add_head_rcu(&worker->nulls_node, &acct->free_list);
|
||||
}
|
||||
}
|
||||
|
||||
static inline unsigned int __io_get_work_hash(unsigned int work_flags)
|
||||
{
|
||||
return work_flags >> IO_WQ_HASH_SHIFT;
|
||||
}
|
||||
|
||||
static inline unsigned int io_get_work_hash(struct io_wq_work *work)
|
||||
{
|
||||
return atomic_read(&work->flags) >> IO_WQ_HASH_SHIFT;
|
||||
return __io_get_work_hash(atomic_read(&work->flags));
|
||||
}
|
||||
|
||||
static bool io_wait_on_hash(struct io_wq *wq, unsigned int hash)
|
||||
@ -475,26 +485,27 @@ static bool io_wait_on_hash(struct io_wq *wq, unsigned int hash)
|
||||
}
|
||||
|
||||
static struct io_wq_work *io_get_next_work(struct io_wq_acct *acct,
|
||||
struct io_worker *worker)
|
||||
struct io_wq *wq)
|
||||
__must_hold(acct->lock)
|
||||
{
|
||||
struct io_wq_work_node *node, *prev;
|
||||
struct io_wq_work *work, *tail;
|
||||
unsigned int stall_hash = -1U;
|
||||
struct io_wq *wq = worker->wq;
|
||||
|
||||
wq_list_for_each(node, prev, &acct->work_list) {
|
||||
unsigned int work_flags;
|
||||
unsigned int hash;
|
||||
|
||||
work = container_of(node, struct io_wq_work, list);
|
||||
|
||||
/* not hashed, can run anytime */
|
||||
if (!io_wq_is_hashed(work)) {
|
||||
work_flags = atomic_read(&work->flags);
|
||||
if (!__io_wq_is_hashed(work_flags)) {
|
||||
wq_list_del(&acct->work_list, node, prev);
|
||||
return work;
|
||||
}
|
||||
|
||||
hash = io_get_work_hash(work);
|
||||
hash = __io_get_work_hash(work_flags);
|
||||
/* all items with this hash lie in [work, tail] */
|
||||
tail = wq->hash_tail[hash];
|
||||
|
||||
@ -564,7 +575,7 @@ static void io_worker_handle_work(struct io_wq_acct *acct,
|
||||
* can't make progress, any work completion or insertion will
|
||||
* clear the stalled flag.
|
||||
*/
|
||||
work = io_get_next_work(acct, worker);
|
||||
work = io_get_next_work(acct, wq);
|
||||
if (work) {
|
||||
/*
|
||||
* Make sure cancelation can find this, even before
|
||||
@ -583,7 +594,7 @@ static void io_worker_handle_work(struct io_wq_acct *acct,
|
||||
if (!work)
|
||||
break;
|
||||
|
||||
__io_worker_busy(wq, worker);
|
||||
__io_worker_busy(acct, worker);
|
||||
|
||||
io_assign_current_work(worker, work);
|
||||
__set_current_state(TASK_RUNNING);
|
||||
@ -591,12 +602,15 @@ static void io_worker_handle_work(struct io_wq_acct *acct,
|
||||
/* handle a whole dependent link */
|
||||
do {
|
||||
struct io_wq_work *next_hashed, *linked;
|
||||
unsigned int hash = io_get_work_hash(work);
|
||||
unsigned int work_flags = atomic_read(&work->flags);
|
||||
unsigned int hash = __io_wq_is_hashed(work_flags)
|
||||
? __io_get_work_hash(work_flags)
|
||||
: -1U;
|
||||
|
||||
next_hashed = wq_next_work(work);
|
||||
|
||||
if (do_kill &&
|
||||
(atomic_read(&work->flags) & IO_WQ_WORK_UNBOUND))
|
||||
(work_flags & IO_WQ_WORK_UNBOUND))
|
||||
atomic_or(IO_WQ_WORK_CANCEL, &work->flags);
|
||||
wq->do_work(work);
|
||||
io_assign_current_work(worker, NULL);
|
||||
@ -654,20 +668,20 @@ static int io_wq_worker(void *data)
|
||||
while (io_acct_run_queue(acct))
|
||||
io_worker_handle_work(acct, worker);
|
||||
|
||||
raw_spin_lock(&wq->lock);
|
||||
raw_spin_lock(&acct->workers_lock);
|
||||
/*
|
||||
* Last sleep timed out. Exit if we're not the last worker,
|
||||
* or if someone modified our affinity.
|
||||
*/
|
||||
if (last_timeout && (exit_mask || acct->nr_workers > 1)) {
|
||||
acct->nr_workers--;
|
||||
raw_spin_unlock(&wq->lock);
|
||||
raw_spin_unlock(&acct->workers_lock);
|
||||
__set_current_state(TASK_RUNNING);
|
||||
break;
|
||||
}
|
||||
last_timeout = false;
|
||||
__io_worker_idle(wq, worker);
|
||||
raw_spin_unlock(&wq->lock);
|
||||
__io_worker_idle(acct, worker);
|
||||
raw_spin_unlock(&acct->workers_lock);
|
||||
if (io_run_task_work())
|
||||
continue;
|
||||
ret = schedule_timeout(WORKER_IDLE_TIMEOUT);
|
||||
@ -728,18 +742,18 @@ void io_wq_worker_sleeping(struct task_struct *tsk)
|
||||
io_wq_dec_running(worker);
|
||||
}
|
||||
|
||||
static void io_init_new_worker(struct io_wq *wq, struct io_worker *worker,
|
||||
static void io_init_new_worker(struct io_wq *wq, struct io_wq_acct *acct, struct io_worker *worker,
|
||||
struct task_struct *tsk)
|
||||
{
|
||||
tsk->worker_private = worker;
|
||||
worker->task = tsk;
|
||||
set_cpus_allowed_ptr(tsk, wq->cpu_mask);
|
||||
|
||||
raw_spin_lock(&wq->lock);
|
||||
hlist_nulls_add_head_rcu(&worker->nulls_node, &wq->free_list);
|
||||
list_add_tail_rcu(&worker->all_list, &wq->all_list);
|
||||
raw_spin_lock(&acct->workers_lock);
|
||||
hlist_nulls_add_head_rcu(&worker->nulls_node, &acct->free_list);
|
||||
list_add_tail_rcu(&worker->all_list, &acct->all_list);
|
||||
set_bit(IO_WORKER_F_FREE, &worker->flags);
|
||||
raw_spin_unlock(&wq->lock);
|
||||
raw_spin_unlock(&acct->workers_lock);
|
||||
wake_up_new_task(tsk);
|
||||
}
|
||||
|
||||
@ -787,20 +801,20 @@ static void create_worker_cont(struct callback_head *cb)
|
||||
struct io_worker *worker;
|
||||
struct task_struct *tsk;
|
||||
struct io_wq *wq;
|
||||
struct io_wq_acct *acct;
|
||||
|
||||
worker = container_of(cb, struct io_worker, create_work);
|
||||
clear_bit_unlock(0, &worker->create_state);
|
||||
wq = worker->wq;
|
||||
acct = io_wq_get_acct(worker);
|
||||
tsk = create_io_thread(io_wq_worker, worker, NUMA_NO_NODE);
|
||||
if (!IS_ERR(tsk)) {
|
||||
io_init_new_worker(wq, worker, tsk);
|
||||
io_init_new_worker(wq, acct, worker, tsk);
|
||||
io_worker_release(worker);
|
||||
return;
|
||||
} else if (!io_should_retry_thread(worker, PTR_ERR(tsk))) {
|
||||
struct io_wq_acct *acct = io_wq_get_acct(worker);
|
||||
|
||||
atomic_dec(&acct->nr_running);
|
||||
raw_spin_lock(&wq->lock);
|
||||
raw_spin_lock(&acct->workers_lock);
|
||||
acct->nr_workers--;
|
||||
if (!acct->nr_workers) {
|
||||
struct io_cb_cancel_data match = {
|
||||
@ -808,11 +822,11 @@ static void create_worker_cont(struct callback_head *cb)
|
||||
.cancel_all = true,
|
||||
};
|
||||
|
||||
raw_spin_unlock(&wq->lock);
|
||||
raw_spin_unlock(&acct->workers_lock);
|
||||
while (io_acct_cancel_pending_work(wq, acct, &match))
|
||||
;
|
||||
} else {
|
||||
raw_spin_unlock(&wq->lock);
|
||||
raw_spin_unlock(&acct->workers_lock);
|
||||
}
|
||||
io_worker_ref_put(wq);
|
||||
kfree(worker);
|
||||
@ -834,9 +848,8 @@ static void io_workqueue_create(struct work_struct *work)
|
||||
kfree(worker);
|
||||
}
|
||||
|
||||
static bool create_io_worker(struct io_wq *wq, int index)
|
||||
static bool create_io_worker(struct io_wq *wq, struct io_wq_acct *acct)
|
||||
{
|
||||
struct io_wq_acct *acct = &wq->acct[index];
|
||||
struct io_worker *worker;
|
||||
struct task_struct *tsk;
|
||||
|
||||
@ -846,24 +859,22 @@ static bool create_io_worker(struct io_wq *wq, int index)
|
||||
if (!worker) {
|
||||
fail:
|
||||
atomic_dec(&acct->nr_running);
|
||||
raw_spin_lock(&wq->lock);
|
||||
raw_spin_lock(&acct->workers_lock);
|
||||
acct->nr_workers--;
|
||||
raw_spin_unlock(&wq->lock);
|
||||
raw_spin_unlock(&acct->workers_lock);
|
||||
io_worker_ref_put(wq);
|
||||
return false;
|
||||
}
|
||||
|
||||
refcount_set(&worker->ref, 1);
|
||||
worker->wq = wq;
|
||||
worker->acct = acct;
|
||||
raw_spin_lock_init(&worker->lock);
|
||||
init_completion(&worker->ref_done);
|
||||
|
||||
if (index == IO_WQ_ACCT_BOUND)
|
||||
set_bit(IO_WORKER_F_BOUND, &worker->flags);
|
||||
|
||||
tsk = create_io_thread(io_wq_worker, worker, NUMA_NO_NODE);
|
||||
if (!IS_ERR(tsk)) {
|
||||
io_init_new_worker(wq, worker, tsk);
|
||||
io_init_new_worker(wq, acct, worker, tsk);
|
||||
} else if (!io_should_retry_thread(worker, PTR_ERR(tsk))) {
|
||||
kfree(worker);
|
||||
goto fail;
|
||||
@ -879,14 +890,14 @@ fail:
|
||||
* Iterate the passed in list and call the specific function for each
|
||||
* worker that isn't exiting
|
||||
*/
|
||||
static bool io_wq_for_each_worker(struct io_wq *wq,
|
||||
bool (*func)(struct io_worker *, void *),
|
||||
void *data)
|
||||
static bool io_acct_for_each_worker(struct io_wq_acct *acct,
|
||||
bool (*func)(struct io_worker *, void *),
|
||||
void *data)
|
||||
{
|
||||
struct io_worker *worker;
|
||||
bool ret = false;
|
||||
|
||||
list_for_each_entry_rcu(worker, &wq->all_list, all_list) {
|
||||
list_for_each_entry_rcu(worker, &acct->all_list, all_list) {
|
||||
if (io_worker_get(worker)) {
|
||||
/* no task if node is/was offline */
|
||||
if (worker->task)
|
||||
@ -900,6 +911,18 @@ static bool io_wq_for_each_worker(struct io_wq *wq,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool io_wq_for_each_worker(struct io_wq *wq,
|
||||
bool (*func)(struct io_worker *, void *),
|
||||
void *data)
|
||||
{
|
||||
for (int i = 0; i < IO_WQ_ACCT_NR; i++) {
|
||||
if (!io_acct_for_each_worker(&wq->acct[i], func, data))
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool io_wq_worker_wake(struct io_worker *worker, void *data)
|
||||
{
|
||||
__set_notify_signal(worker->task);
|
||||
@ -916,19 +939,19 @@ static void io_run_cancel(struct io_wq_work *work, struct io_wq *wq)
|
||||
} while (work);
|
||||
}
|
||||
|
||||
static void io_wq_insert_work(struct io_wq *wq, struct io_wq_work *work)
|
||||
static void io_wq_insert_work(struct io_wq *wq, struct io_wq_acct *acct,
|
||||
struct io_wq_work *work, unsigned int work_flags)
|
||||
{
|
||||
struct io_wq_acct *acct = io_work_get_acct(wq, work);
|
||||
unsigned int hash;
|
||||
struct io_wq_work *tail;
|
||||
|
||||
if (!io_wq_is_hashed(work)) {
|
||||
if (!__io_wq_is_hashed(work_flags)) {
|
||||
append:
|
||||
wq_list_add_tail(&work->list, &acct->work_list);
|
||||
return;
|
||||
}
|
||||
|
||||
hash = io_get_work_hash(work);
|
||||
hash = __io_get_work_hash(work_flags);
|
||||
tail = wq->hash_tail[hash];
|
||||
wq->hash_tail[hash] = work;
|
||||
if (!tail)
|
||||
@ -944,8 +967,8 @@ static bool io_wq_work_match_item(struct io_wq_work *work, void *data)
|
||||
|
||||
void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work)
|
||||
{
|
||||
struct io_wq_acct *acct = io_work_get_acct(wq, work);
|
||||
unsigned int work_flags = atomic_read(&work->flags);
|
||||
struct io_wq_acct *acct = io_work_get_acct(wq, work_flags);
|
||||
struct io_cb_cancel_data match = {
|
||||
.fn = io_wq_work_match_item,
|
||||
.data = work,
|
||||
@ -964,12 +987,12 @@ void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work)
|
||||
}
|
||||
|
||||
raw_spin_lock(&acct->lock);
|
||||
io_wq_insert_work(wq, work);
|
||||
io_wq_insert_work(wq, acct, work, work_flags);
|
||||
clear_bit(IO_ACCT_STALLED_BIT, &acct->flags);
|
||||
raw_spin_unlock(&acct->lock);
|
||||
|
||||
rcu_read_lock();
|
||||
do_create = !io_wq_activate_free_worker(wq, acct);
|
||||
do_create = !io_acct_activate_free_worker(acct);
|
||||
rcu_read_unlock();
|
||||
|
||||
if (do_create && ((work_flags & IO_WQ_WORK_CONCURRENT) ||
|
||||
@ -980,12 +1003,12 @@ void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work)
|
||||
if (likely(did_create))
|
||||
return;
|
||||
|
||||
raw_spin_lock(&wq->lock);
|
||||
raw_spin_lock(&acct->workers_lock);
|
||||
if (acct->nr_workers) {
|
||||
raw_spin_unlock(&wq->lock);
|
||||
raw_spin_unlock(&acct->workers_lock);
|
||||
return;
|
||||
}
|
||||
raw_spin_unlock(&wq->lock);
|
||||
raw_spin_unlock(&acct->workers_lock);
|
||||
|
||||
/* fatal condition, failed to create the first worker */
|
||||
io_acct_cancel_pending_work(wq, acct, &match);
|
||||
@ -1034,10 +1057,10 @@ static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
|
||||
}
|
||||
|
||||
static inline void io_wq_remove_pending(struct io_wq *wq,
|
||||
struct io_wq_acct *acct,
|
||||
struct io_wq_work *work,
|
||||
struct io_wq_work_node *prev)
|
||||
{
|
||||
struct io_wq_acct *acct = io_work_get_acct(wq, work);
|
||||
unsigned int hash = io_get_work_hash(work);
|
||||
struct io_wq_work *prev_work = NULL;
|
||||
|
||||
@ -1064,7 +1087,7 @@ static bool io_acct_cancel_pending_work(struct io_wq *wq,
|
||||
work = container_of(node, struct io_wq_work, list);
|
||||
if (!match->fn(work, match->data))
|
||||
continue;
|
||||
io_wq_remove_pending(wq, work, prev);
|
||||
io_wq_remove_pending(wq, acct, work, prev);
|
||||
raw_spin_unlock(&acct->lock);
|
||||
io_run_cancel(work, wq);
|
||||
match->nr_pending++;
|
||||
@ -1092,11 +1115,22 @@ retry:
|
||||
}
|
||||
}
|
||||
|
||||
static void io_acct_cancel_running_work(struct io_wq_acct *acct,
|
||||
struct io_cb_cancel_data *match)
|
||||
{
|
||||
raw_spin_lock(&acct->workers_lock);
|
||||
io_acct_for_each_worker(acct, io_wq_worker_cancel, match);
|
||||
raw_spin_unlock(&acct->workers_lock);
|
||||
}
|
||||
|
||||
static void io_wq_cancel_running_work(struct io_wq *wq,
|
||||
struct io_cb_cancel_data *match)
|
||||
{
|
||||
rcu_read_lock();
|
||||
io_wq_for_each_worker(wq, io_wq_worker_cancel, match);
|
||||
|
||||
for (int i = 0; i < IO_WQ_ACCT_NR; i++)
|
||||
io_acct_cancel_running_work(&wq->acct[i], match);
|
||||
|
||||
rcu_read_unlock();
|
||||
}
|
||||
|
||||
@ -1119,16 +1153,14 @@ enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel,
|
||||
* as an indication that we attempt to signal cancellation. The
|
||||
* completion will run normally in this case.
|
||||
*
|
||||
* Do both of these while holding the wq->lock, to ensure that
|
||||
* Do both of these while holding the acct->workers_lock, to ensure that
|
||||
* we'll find a work item regardless of state.
|
||||
*/
|
||||
io_wq_cancel_pending_work(wq, &match);
|
||||
if (match.nr_pending && !match.cancel_all)
|
||||
return IO_WQ_CANCEL_OK;
|
||||
|
||||
raw_spin_lock(&wq->lock);
|
||||
io_wq_cancel_running_work(wq, &match);
|
||||
raw_spin_unlock(&wq->lock);
|
||||
if (match.nr_running && !match.cancel_all)
|
||||
return IO_WQ_CANCEL_RUNNING;
|
||||
|
||||
@ -1152,7 +1184,7 @@ static int io_wq_hash_wake(struct wait_queue_entry *wait, unsigned mode,
|
||||
struct io_wq_acct *acct = &wq->acct[i];
|
||||
|
||||
if (test_and_clear_bit(IO_ACCT_STALLED_BIT, &acct->flags))
|
||||
io_wq_activate_free_worker(wq, acct);
|
||||
io_acct_activate_free_worker(acct);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
return 1;
|
||||
@ -1190,16 +1222,16 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
|
||||
for (i = 0; i < IO_WQ_ACCT_NR; i++) {
|
||||
struct io_wq_acct *acct = &wq->acct[i];
|
||||
|
||||
acct->index = i;
|
||||
atomic_set(&acct->nr_running, 0);
|
||||
|
||||
raw_spin_lock_init(&acct->workers_lock);
|
||||
INIT_HLIST_NULLS_HEAD(&acct->free_list, 0);
|
||||
INIT_LIST_HEAD(&acct->all_list);
|
||||
|
||||
INIT_WQ_LIST(&acct->work_list);
|
||||
raw_spin_lock_init(&acct->lock);
|
||||
}
|
||||
|
||||
raw_spin_lock_init(&wq->lock);
|
||||
INIT_HLIST_NULLS_HEAD(&wq->free_list, 0);
|
||||
INIT_LIST_HEAD(&wq->all_list);
|
||||
|
||||
wq->task = get_task_struct(data->task);
|
||||
atomic_set(&wq->worker_refs, 1);
|
||||
init_completion(&wq->worker_done);
|
||||
@ -1385,14 +1417,14 @@ int io_wq_max_workers(struct io_wq *wq, int *new_count)
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
raw_spin_lock(&wq->lock);
|
||||
for (i = 0; i < IO_WQ_ACCT_NR; i++) {
|
||||
acct = &wq->acct[i];
|
||||
raw_spin_lock(&acct->workers_lock);
|
||||
prev[i] = max_t(int, acct->max_workers, prev[i]);
|
||||
if (new_count[i])
|
||||
acct->max_workers = new_count[i];
|
||||
raw_spin_unlock(&acct->workers_lock);
|
||||
}
|
||||
raw_spin_unlock(&wq->lock);
|
||||
rcu_read_unlock();
|
||||
|
||||
for (i = 0; i < IO_WQ_ACCT_NR; i++)
|
||||
|
@ -54,9 +54,14 @@ int io_wq_cpu_affinity(struct io_uring_task *tctx, cpumask_var_t mask);
|
||||
int io_wq_max_workers(struct io_wq *wq, int *new_count);
|
||||
bool io_wq_worker_stopped(void);
|
||||
|
||||
static inline bool __io_wq_is_hashed(unsigned int work_flags)
|
||||
{
|
||||
return work_flags & IO_WQ_WORK_HASHED;
|
||||
}
|
||||
|
||||
static inline bool io_wq_is_hashed(struct io_wq_work *work)
|
||||
{
|
||||
return atomic_read(&work->flags) & IO_WQ_WORK_HASHED;
|
||||
return __io_wq_is_hashed(atomic_read(&work->flags));
|
||||
}
|
||||
|
||||
typedef bool (work_cancel_fn)(struct io_wq_work *, void *);
|
||||
|
@ -110,11 +110,13 @@
|
||||
#define SQE_VALID_FLAGS (SQE_COMMON_FLAGS | IOSQE_BUFFER_SELECT | \
|
||||
IOSQE_IO_DRAIN | IOSQE_CQE_SKIP_SUCCESS)
|
||||
|
||||
#define IO_REQ_LINK_FLAGS (REQ_F_LINK | REQ_F_HARDLINK)
|
||||
|
||||
#define IO_REQ_CLEAN_FLAGS (REQ_F_BUFFER_SELECTED | REQ_F_NEED_CLEANUP | \
|
||||
REQ_F_POLLED | REQ_F_INFLIGHT | REQ_F_CREDS | \
|
||||
REQ_F_ASYNC_DATA)
|
||||
|
||||
#define IO_REQ_CLEAN_SLOW_FLAGS (REQ_F_REFCOUNT | REQ_F_LINK | REQ_F_HARDLINK |\
|
||||
#define IO_REQ_CLEAN_SLOW_FLAGS (REQ_F_REFCOUNT | IO_REQ_LINK_FLAGS | \
|
||||
REQ_F_REISSUE | IO_REQ_CLEAN_FLAGS)
|
||||
|
||||
#define IO_TCTX_REFS_CACHE_NR (1U << 10)
|
||||
@ -131,7 +133,6 @@ struct io_defer_entry {
|
||||
|
||||
/* requests with any of those set should undergo io_disarm_next() */
|
||||
#define IO_DISARM_MASK (REQ_F_ARM_LTIMEOUT | REQ_F_LINK_TIMEOUT | REQ_F_FAIL)
|
||||
#define IO_REQ_LINK_FLAGS (REQ_F_LINK | REQ_F_HARDLINK)
|
||||
|
||||
/*
|
||||
* No waiters. It's larger than any valid value of the tw counter
|
||||
@ -254,7 +255,7 @@ static __cold void io_fallback_req_func(struct work_struct *work)
|
||||
percpu_ref_get(&ctx->refs);
|
||||
mutex_lock(&ctx->uring_lock);
|
||||
llist_for_each_entry_safe(req, tmp, node, io_task_work.node)
|
||||
req->io_task_work.func(req, &ts);
|
||||
req->io_task_work.func(req, ts);
|
||||
io_submit_flush_completions(ctx);
|
||||
mutex_unlock(&ctx->uring_lock);
|
||||
percpu_ref_put(&ctx->refs);
|
||||
@ -282,6 +283,17 @@ static int io_alloc_hash_table(struct io_hash_table *table, unsigned bits)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void io_free_alloc_caches(struct io_ring_ctx *ctx)
|
||||
{
|
||||
io_alloc_cache_free(&ctx->apoll_cache, kfree);
|
||||
io_alloc_cache_free(&ctx->netmsg_cache, io_netmsg_cache_free);
|
||||
io_alloc_cache_free(&ctx->rw_cache, io_rw_cache_free);
|
||||
io_alloc_cache_free(&ctx->uring_cache, kfree);
|
||||
io_alloc_cache_free(&ctx->msg_cache, kfree);
|
||||
io_futex_cache_free(ctx);
|
||||
io_rsrc_cache_free(ctx);
|
||||
}
|
||||
|
||||
static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
|
||||
{
|
||||
struct io_ring_ctx *ctx;
|
||||
@ -313,7 +325,6 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
|
||||
init_waitqueue_head(&ctx->sqo_sq_wait);
|
||||
INIT_LIST_HEAD(&ctx->sqd_list);
|
||||
INIT_LIST_HEAD(&ctx->cq_overflow_list);
|
||||
INIT_LIST_HEAD(&ctx->io_buffers_cache);
|
||||
ret = io_alloc_cache_init(&ctx->apoll_cache, IO_POLL_ALLOC_CACHE_MAX,
|
||||
sizeof(struct async_poll), 0);
|
||||
ret |= io_alloc_cache_init(&ctx->netmsg_cache, IO_ALLOC_CACHE_MAX,
|
||||
@ -328,6 +339,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
|
||||
ret |= io_alloc_cache_init(&ctx->msg_cache, IO_ALLOC_CACHE_MAX,
|
||||
sizeof(struct io_kiocb), 0);
|
||||
ret |= io_futex_cache_init(ctx);
|
||||
ret |= io_rsrc_cache_init(ctx);
|
||||
if (ret)
|
||||
goto free_ref;
|
||||
init_completion(&ctx->ref_comp);
|
||||
@ -338,7 +350,6 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
|
||||
spin_lock_init(&ctx->completion_lock);
|
||||
raw_spin_lock_init(&ctx->timeout_lock);
|
||||
INIT_WQ_LIST(&ctx->iopoll_list);
|
||||
INIT_LIST_HEAD(&ctx->io_buffers_comp);
|
||||
INIT_LIST_HEAD(&ctx->defer_list);
|
||||
INIT_LIST_HEAD(&ctx->timeout_list);
|
||||
INIT_LIST_HEAD(&ctx->ltimeout_list);
|
||||
@ -360,12 +371,7 @@ static __cold struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
|
||||
free_ref:
|
||||
percpu_ref_exit(&ctx->refs);
|
||||
err:
|
||||
io_alloc_cache_free(&ctx->apoll_cache, kfree);
|
||||
io_alloc_cache_free(&ctx->netmsg_cache, io_netmsg_cache_free);
|
||||
io_alloc_cache_free(&ctx->rw_cache, io_rw_cache_free);
|
||||
io_alloc_cache_free(&ctx->uring_cache, kfree);
|
||||
io_alloc_cache_free(&ctx->msg_cache, kfree);
|
||||
io_futex_cache_free(ctx);
|
||||
io_free_alloc_caches(ctx);
|
||||
kvfree(ctx->cancel_table.hbs);
|
||||
xa_destroy(&ctx->io_bl_xa);
|
||||
kfree(ctx);
|
||||
@ -393,11 +399,8 @@ static bool req_need_defer(struct io_kiocb *req, u32 seq)
|
||||
|
||||
static void io_clean_op(struct io_kiocb *req)
|
||||
{
|
||||
if (req->flags & REQ_F_BUFFER_SELECTED) {
|
||||
spin_lock(&req->ctx->completion_lock);
|
||||
io_kbuf_drop(req);
|
||||
spin_unlock(&req->ctx->completion_lock);
|
||||
}
|
||||
if (unlikely(req->flags & REQ_F_BUFFER_SELECTED))
|
||||
io_kbuf_drop_legacy(req);
|
||||
|
||||
if (req->flags & REQ_F_NEED_CLEANUP) {
|
||||
const struct io_cold_def *def = &io_cold_defs[req->opcode];
|
||||
@ -542,7 +545,7 @@ static void io_queue_iowq(struct io_kiocb *req)
|
||||
io_queue_linked_timeout(link);
|
||||
}
|
||||
|
||||
static void io_req_queue_iowq_tw(struct io_kiocb *req, struct io_tw_state *ts)
|
||||
static void io_req_queue_iowq_tw(struct io_kiocb *req, io_tw_token_t tw)
|
||||
{
|
||||
io_queue_iowq(req);
|
||||
}
|
||||
@ -899,7 +902,7 @@ static void io_req_complete_post(struct io_kiocb *req, unsigned issue_flags)
|
||||
* Handle special CQ sync cases via task_work. DEFER_TASKRUN requires
|
||||
* the submitter task context, IOPOLL protects with uring_lock.
|
||||
*/
|
||||
if (ctx->task_complete || (ctx->flags & IORING_SETUP_IOPOLL)) {
|
||||
if (ctx->lockless_cq) {
|
||||
req->io_task_work.func = io_req_task_complete;
|
||||
io_req_task_work_add(req);
|
||||
return;
|
||||
@ -1021,7 +1024,7 @@ static inline struct io_kiocb *io_req_find_next(struct io_kiocb *req)
|
||||
return nxt;
|
||||
}
|
||||
|
||||
static void ctx_flush_and_put(struct io_ring_ctx *ctx, struct io_tw_state *ts)
|
||||
static void ctx_flush_and_put(struct io_ring_ctx *ctx, io_tw_token_t tw)
|
||||
{
|
||||
if (!ctx)
|
||||
return;
|
||||
@ -1051,24 +1054,24 @@ struct llist_node *io_handle_tw_list(struct llist_node *node,
|
||||
io_task_work.node);
|
||||
|
||||
if (req->ctx != ctx) {
|
||||
ctx_flush_and_put(ctx, &ts);
|
||||
ctx_flush_and_put(ctx, ts);
|
||||
ctx = req->ctx;
|
||||
mutex_lock(&ctx->uring_lock);
|
||||
percpu_ref_get(&ctx->refs);
|
||||
}
|
||||
INDIRECT_CALL_2(req->io_task_work.func,
|
||||
io_poll_task_func, io_req_rw_complete,
|
||||
req, &ts);
|
||||
req, ts);
|
||||
node = next;
|
||||
(*count)++;
|
||||
if (unlikely(need_resched())) {
|
||||
ctx_flush_and_put(ctx, &ts);
|
||||
ctx_flush_and_put(ctx, ts);
|
||||
ctx = NULL;
|
||||
cond_resched();
|
||||
}
|
||||
} while (node && *count < max_entries);
|
||||
|
||||
ctx_flush_and_put(ctx, &ts);
|
||||
ctx_flush_and_put(ctx, ts);
|
||||
return node;
|
||||
}
|
||||
|
||||
@ -1157,7 +1160,7 @@ static inline void io_req_local_work_add(struct io_kiocb *req,
|
||||
* We don't know how many reuqests is there in the link and whether
|
||||
* they can even be queued lazily, fall back to non-lazy.
|
||||
*/
|
||||
if (req->flags & (REQ_F_LINK | REQ_F_HARDLINK))
|
||||
if (req->flags & IO_REQ_LINK_FLAGS)
|
||||
flags &= ~IOU_F_TWQ_LAZY_WAKE;
|
||||
|
||||
guard(rcu)();
|
||||
@ -1276,7 +1279,7 @@ static bool io_run_local_work_continue(struct io_ring_ctx *ctx, int events,
|
||||
}
|
||||
|
||||
static int __io_run_local_work_loop(struct llist_node **node,
|
||||
struct io_tw_state *ts,
|
||||
io_tw_token_t tw,
|
||||
int events)
|
||||
{
|
||||
int ret = 0;
|
||||
@ -1287,7 +1290,7 @@ static int __io_run_local_work_loop(struct llist_node **node,
|
||||
io_task_work.node);
|
||||
INDIRECT_CALL_2(req->io_task_work.func,
|
||||
io_poll_task_func, io_req_rw_complete,
|
||||
req, ts);
|
||||
req, tw);
|
||||
*node = next;
|
||||
if (++ret >= events)
|
||||
break;
|
||||
@ -1296,7 +1299,7 @@ static int __io_run_local_work_loop(struct llist_node **node,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __io_run_local_work(struct io_ring_ctx *ctx, struct io_tw_state *ts,
|
||||
static int __io_run_local_work(struct io_ring_ctx *ctx, io_tw_token_t tw,
|
||||
int min_events, int max_events)
|
||||
{
|
||||
struct llist_node *node;
|
||||
@ -1309,7 +1312,7 @@ static int __io_run_local_work(struct io_ring_ctx *ctx, struct io_tw_state *ts,
|
||||
atomic_andnot(IORING_SQ_TASKRUN, &ctx->rings->sq_flags);
|
||||
again:
|
||||
min_events -= ret;
|
||||
ret = __io_run_local_work_loop(&ctx->retry_llist.first, ts, max_events);
|
||||
ret = __io_run_local_work_loop(&ctx->retry_llist.first, tw, max_events);
|
||||
if (ctx->retry_llist.first)
|
||||
goto retry_done;
|
||||
|
||||
@ -1318,7 +1321,7 @@ again:
|
||||
* running the pending items.
|
||||
*/
|
||||
node = llist_reverse_order(llist_del_all(&ctx->work_llist));
|
||||
ret += __io_run_local_work_loop(&node, ts, max_events - ret);
|
||||
ret += __io_run_local_work_loop(&node, tw, max_events - ret);
|
||||
ctx->retry_llist.first = node;
|
||||
loops++;
|
||||
|
||||
@ -1340,7 +1343,7 @@ static inline int io_run_local_work_locked(struct io_ring_ctx *ctx,
|
||||
|
||||
if (!io_local_work_pending(ctx))
|
||||
return 0;
|
||||
return __io_run_local_work(ctx, &ts, min_events,
|
||||
return __io_run_local_work(ctx, ts, min_events,
|
||||
max(IO_LOCAL_TW_DEFAULT_MAX, min_events));
|
||||
}
|
||||
|
||||
@ -1351,20 +1354,20 @@ static int io_run_local_work(struct io_ring_ctx *ctx, int min_events,
|
||||
int ret;
|
||||
|
||||
mutex_lock(&ctx->uring_lock);
|
||||
ret = __io_run_local_work(ctx, &ts, min_events, max_events);
|
||||
ret = __io_run_local_work(ctx, ts, min_events, max_events);
|
||||
mutex_unlock(&ctx->uring_lock);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void io_req_task_cancel(struct io_kiocb *req, struct io_tw_state *ts)
|
||||
static void io_req_task_cancel(struct io_kiocb *req, io_tw_token_t tw)
|
||||
{
|
||||
io_tw_lock(req->ctx, ts);
|
||||
io_tw_lock(req->ctx, tw);
|
||||
io_req_defer_failed(req, req->cqe.res);
|
||||
}
|
||||
|
||||
void io_req_task_submit(struct io_kiocb *req, struct io_tw_state *ts)
|
||||
void io_req_task_submit(struct io_kiocb *req, io_tw_token_t tw)
|
||||
{
|
||||
io_tw_lock(req->ctx, ts);
|
||||
io_tw_lock(req->ctx, tw);
|
||||
if (unlikely(io_should_terminate_tw()))
|
||||
io_req_defer_failed(req, -EFAULT);
|
||||
else if (req->flags & REQ_F_FORCE_ASYNC)
|
||||
@ -1419,8 +1422,7 @@ static void io_free_batch_list(struct io_ring_ctx *ctx,
|
||||
|
||||
if (apoll->double_poll)
|
||||
kfree(apoll->double_poll);
|
||||
if (!io_alloc_cache_put(&ctx->apoll_cache, apoll))
|
||||
kfree(apoll);
|
||||
io_cache_free(&ctx->apoll_cache, apoll);
|
||||
req->flags &= ~REQ_F_POLLED;
|
||||
}
|
||||
if (req->flags & IO_REQ_LINK_FLAGS)
|
||||
@ -1582,7 +1584,7 @@ static int io_iopoll_check(struct io_ring_ctx *ctx, long min)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void io_req_task_complete(struct io_kiocb *req, struct io_tw_state *ts)
|
||||
void io_req_task_complete(struct io_kiocb *req, io_tw_token_t tw)
|
||||
{
|
||||
io_req_complete_defer(req);
|
||||
}
|
||||
@ -1719,15 +1721,13 @@ static bool io_assign_file(struct io_kiocb *req, const struct io_issue_def *def,
|
||||
return !!req->file;
|
||||
}
|
||||
|
||||
static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
|
||||
static inline int __io_issue_sqe(struct io_kiocb *req,
|
||||
unsigned int issue_flags,
|
||||
const struct io_issue_def *def)
|
||||
{
|
||||
const struct io_issue_def *def = &io_issue_defs[req->opcode];
|
||||
const struct cred *creds = NULL;
|
||||
int ret;
|
||||
|
||||
if (unlikely(!io_assign_file(req, def, issue_flags)))
|
||||
return -EBADF;
|
||||
|
||||
if (unlikely((req->flags & REQ_F_CREDS) && req->creds != current_cred()))
|
||||
creds = override_creds(req->creds);
|
||||
|
||||
@ -1742,6 +1742,19 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
|
||||
if (creds)
|
||||
revert_creds(creds);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
|
||||
{
|
||||
const struct io_issue_def *def = &io_issue_defs[req->opcode];
|
||||
int ret;
|
||||
|
||||
if (unlikely(!io_assign_file(req, def, issue_flags)))
|
||||
return -EBADF;
|
||||
|
||||
ret = __io_issue_sqe(req, issue_flags, def);
|
||||
|
||||
if (ret == IOU_OK) {
|
||||
if (issue_flags & IO_URING_F_COMPLETE_DEFER)
|
||||
io_req_complete_defer(req);
|
||||
@ -1762,11 +1775,26 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
|
||||
return ret;
|
||||
}
|
||||
|
||||
int io_poll_issue(struct io_kiocb *req, struct io_tw_state *ts)
|
||||
int io_poll_issue(struct io_kiocb *req, io_tw_token_t tw)
|
||||
{
|
||||
io_tw_lock(req->ctx, ts);
|
||||
return io_issue_sqe(req, IO_URING_F_NONBLOCK|IO_URING_F_MULTISHOT|
|
||||
IO_URING_F_COMPLETE_DEFER);
|
||||
const unsigned int issue_flags = IO_URING_F_NONBLOCK |
|
||||
IO_URING_F_MULTISHOT |
|
||||
IO_URING_F_COMPLETE_DEFER;
|
||||
int ret;
|
||||
|
||||
io_tw_lock(req->ctx, tw);
|
||||
|
||||
WARN_ON_ONCE(!req->file);
|
||||
if (WARN_ON_ONCE(req->ctx->flags & IORING_SETUP_IOPOLL))
|
||||
return -EFAULT;
|
||||
|
||||
ret = __io_issue_sqe(req, issue_flags, &io_issue_defs[req->opcode]);
|
||||
|
||||
WARN_ON_ONCE(ret == IOU_OK);
|
||||
|
||||
if (ret == IOU_ISSUE_SKIP_COMPLETE)
|
||||
ret = 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct io_wq_work *io_wq_free_work(struct io_wq_work *work)
|
||||
@ -1996,9 +2024,8 @@ static inline bool io_check_restriction(struct io_ring_ctx *ctx,
|
||||
return true;
|
||||
}
|
||||
|
||||
static void io_init_req_drain(struct io_kiocb *req)
|
||||
static void io_init_drain(struct io_ring_ctx *ctx)
|
||||
{
|
||||
struct io_ring_ctx *ctx = req->ctx;
|
||||
struct io_kiocb *head = ctx->submit_state.link.head;
|
||||
|
||||
ctx->drain_active = true;
|
||||
@ -2062,7 +2089,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
|
||||
if (sqe_flags & IOSQE_IO_DRAIN) {
|
||||
if (ctx->drain_disabled)
|
||||
return io_init_fail_req(req, -EOPNOTSUPP);
|
||||
io_init_req_drain(req);
|
||||
io_init_drain(ctx);
|
||||
}
|
||||
}
|
||||
if (unlikely(ctx->restricted || ctx->drain_active || ctx->drain_next)) {
|
||||
@ -2458,8 +2485,18 @@ static int io_cqring_schedule_timeout(struct io_wait_queue *iowq,
|
||||
return READ_ONCE(iowq->hit_timeout) ? -ETIME : 0;
|
||||
}
|
||||
|
||||
struct ext_arg {
|
||||
size_t argsz;
|
||||
struct timespec64 ts;
|
||||
const sigset_t __user *sig;
|
||||
ktime_t min_time;
|
||||
bool ts_set;
|
||||
bool iowait;
|
||||
};
|
||||
|
||||
static int __io_cqring_wait_schedule(struct io_ring_ctx *ctx,
|
||||
struct io_wait_queue *iowq,
|
||||
struct ext_arg *ext_arg,
|
||||
ktime_t start_time)
|
||||
{
|
||||
int ret = 0;
|
||||
@ -2469,7 +2506,7 @@ static int __io_cqring_wait_schedule(struct io_ring_ctx *ctx,
|
||||
* can take into account that the task is waiting for IO - turns out
|
||||
* to be important for low QD IO.
|
||||
*/
|
||||
if (current_pending_io())
|
||||
if (ext_arg->iowait && current_pending_io())
|
||||
current->in_iowait = 1;
|
||||
if (iowq->timeout != KTIME_MAX || iowq->min_timeout)
|
||||
ret = io_cqring_schedule_timeout(iowq, ctx->clockid, start_time);
|
||||
@ -2482,6 +2519,7 @@ static int __io_cqring_wait_schedule(struct io_ring_ctx *ctx,
|
||||
/* If this returns > 0, the caller should retry */
|
||||
static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx,
|
||||
struct io_wait_queue *iowq,
|
||||
struct ext_arg *ext_arg,
|
||||
ktime_t start_time)
|
||||
{
|
||||
if (unlikely(READ_ONCE(ctx->check_cq)))
|
||||
@ -2495,17 +2533,9 @@ static inline int io_cqring_wait_schedule(struct io_ring_ctx *ctx,
|
||||
if (unlikely(io_should_wake(iowq)))
|
||||
return 0;
|
||||
|
||||
return __io_cqring_wait_schedule(ctx, iowq, start_time);
|
||||
return __io_cqring_wait_schedule(ctx, iowq, ext_arg, start_time);
|
||||
}
|
||||
|
||||
struct ext_arg {
|
||||
size_t argsz;
|
||||
struct timespec64 ts;
|
||||
const sigset_t __user *sig;
|
||||
ktime_t min_time;
|
||||
bool ts_set;
|
||||
};
|
||||
|
||||
/*
|
||||
* Wait until events become available, if we don't already have some. The
|
||||
* application must reap them itself, as they reside on the shared cq ring.
|
||||
@ -2583,7 +2613,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events, u32 flags,
|
||||
TASK_INTERRUPTIBLE);
|
||||
}
|
||||
|
||||
ret = io_cqring_wait_schedule(ctx, &iowq, start_time);
|
||||
ret = io_cqring_wait_schedule(ctx, &iowq, ext_arg, start_time);
|
||||
__set_current_state(TASK_RUNNING);
|
||||
atomic_set(&ctx->cq_wait_nr, IO_CQ_WAKE_INIT);
|
||||
|
||||
@ -2704,12 +2734,7 @@ static __cold void io_ring_ctx_free(struct io_ring_ctx *ctx)
|
||||
io_sqe_files_unregister(ctx);
|
||||
io_cqring_overflow_kill(ctx);
|
||||
io_eventfd_unregister(ctx);
|
||||
io_alloc_cache_free(&ctx->apoll_cache, kfree);
|
||||
io_alloc_cache_free(&ctx->netmsg_cache, io_netmsg_cache_free);
|
||||
io_alloc_cache_free(&ctx->rw_cache, io_rw_cache_free);
|
||||
io_alloc_cache_free(&ctx->uring_cache, kfree);
|
||||
io_alloc_cache_free(&ctx->msg_cache, kfree);
|
||||
io_futex_cache_free(ctx);
|
||||
io_free_alloc_caches(ctx);
|
||||
io_destroy_buffers(ctx);
|
||||
io_free_region(ctx, &ctx->param_region);
|
||||
mutex_unlock(&ctx->uring_lock);
|
||||
@ -3239,6 +3264,8 @@ static int io_get_ext_arg(struct io_ring_ctx *ctx, unsigned flags,
|
||||
const struct io_uring_getevents_arg __user *uarg = argp;
|
||||
struct io_uring_getevents_arg arg;
|
||||
|
||||
ext_arg->iowait = !(flags & IORING_ENTER_NO_IOWAIT);
|
||||
|
||||
/*
|
||||
* If EXT_ARG isn't set, then we have no timespec and the argp pointer
|
||||
* is just a pointer to the sigset_t.
|
||||
@ -3316,7 +3343,8 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
|
||||
IORING_ENTER_SQ_WAIT | IORING_ENTER_EXT_ARG |
|
||||
IORING_ENTER_REGISTERED_RING |
|
||||
IORING_ENTER_ABS_TIMER |
|
||||
IORING_ENTER_EXT_ARG_REG)))
|
||||
IORING_ENTER_EXT_ARG_REG |
|
||||
IORING_ENTER_NO_IOWAIT)))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
@ -3537,6 +3565,44 @@ static struct file *io_uring_get_file(struct io_ring_ctx *ctx)
|
||||
O_RDWR | O_CLOEXEC, NULL);
|
||||
}
|
||||
|
||||
static int io_uring_sanitise_params(struct io_uring_params *p)
|
||||
{
|
||||
unsigned flags = p->flags;
|
||||
|
||||
/* There is no way to mmap rings without a real fd */
|
||||
if ((flags & IORING_SETUP_REGISTERED_FD_ONLY) &&
|
||||
!(flags & IORING_SETUP_NO_MMAP))
|
||||
return -EINVAL;
|
||||
|
||||
if (flags & IORING_SETUP_SQPOLL) {
|
||||
/* IPI related flags don't make sense with SQPOLL */
|
||||
if (flags & (IORING_SETUP_COOP_TASKRUN |
|
||||
IORING_SETUP_TASKRUN_FLAG |
|
||||
IORING_SETUP_DEFER_TASKRUN))
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (flags & IORING_SETUP_TASKRUN_FLAG) {
|
||||
if (!(flags & (IORING_SETUP_COOP_TASKRUN |
|
||||
IORING_SETUP_DEFER_TASKRUN)))
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
/* HYBRID_IOPOLL only valid with IOPOLL */
|
||||
if ((flags & IORING_SETUP_HYBRID_IOPOLL) && !(flags & IORING_SETUP_IOPOLL))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* For DEFER_TASKRUN we require the completion task to be the same as
|
||||
* the submission task. This implies that there is only one submitter.
|
||||
*/
|
||||
if ((flags & IORING_SETUP_DEFER_TASKRUN) &&
|
||||
!(flags & IORING_SETUP_SINGLE_ISSUER))
|
||||
return -EINVAL;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int io_uring_fill_params(unsigned entries, struct io_uring_params *p)
|
||||
{
|
||||
if (!entries)
|
||||
@ -3547,10 +3613,6 @@ int io_uring_fill_params(unsigned entries, struct io_uring_params *p)
|
||||
entries = IORING_MAX_ENTRIES;
|
||||
}
|
||||
|
||||
if ((p->flags & IORING_SETUP_REGISTERED_FD_ONLY)
|
||||
&& !(p->flags & IORING_SETUP_NO_MMAP))
|
||||
return -EINVAL;
|
||||
|
||||
/*
|
||||
* Use twice as many entries for the CQ ring. It's possible for the
|
||||
* application to drive a higher depth than the size of the SQ ring,
|
||||
@ -3612,6 +3674,10 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
|
||||
struct file *file;
|
||||
int ret;
|
||||
|
||||
ret = io_uring_sanitise_params(p);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = io_uring_fill_params(entries, p);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
@ -3659,37 +3725,10 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
|
||||
* For SQPOLL, we just need a wakeup, always. For !SQPOLL, if
|
||||
* COOP_TASKRUN is set, then IPIs are never needed by the app.
|
||||
*/
|
||||
ret = -EINVAL;
|
||||
if (ctx->flags & IORING_SETUP_SQPOLL) {
|
||||
/* IPI related flags don't make sense with SQPOLL */
|
||||
if (ctx->flags & (IORING_SETUP_COOP_TASKRUN |
|
||||
IORING_SETUP_TASKRUN_FLAG |
|
||||
IORING_SETUP_DEFER_TASKRUN))
|
||||
goto err;
|
||||
if (ctx->flags & (IORING_SETUP_SQPOLL|IORING_SETUP_COOP_TASKRUN))
|
||||
ctx->notify_method = TWA_SIGNAL_NO_IPI;
|
||||
} else if (ctx->flags & IORING_SETUP_COOP_TASKRUN) {
|
||||
ctx->notify_method = TWA_SIGNAL_NO_IPI;
|
||||
} else {
|
||||
if (ctx->flags & IORING_SETUP_TASKRUN_FLAG &&
|
||||
!(ctx->flags & IORING_SETUP_DEFER_TASKRUN))
|
||||
goto err;
|
||||
else
|
||||
ctx->notify_method = TWA_SIGNAL;
|
||||
}
|
||||
|
||||
/* HYBRID_IOPOLL only valid with IOPOLL */
|
||||
if ((ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_HYBRID_IOPOLL)) ==
|
||||
IORING_SETUP_HYBRID_IOPOLL)
|
||||
goto err;
|
||||
|
||||
/*
|
||||
* For DEFER_TASKRUN we require the completion task to be the same as the
|
||||
* submission task. This implies that there is only one submitter, so enforce
|
||||
* that.
|
||||
*/
|
||||
if (ctx->flags & IORING_SETUP_DEFER_TASKRUN &&
|
||||
!(ctx->flags & IORING_SETUP_SINGLE_ISSUER)) {
|
||||
goto err;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is just grabbed for accounting purposes. When a process exits,
|
||||
@ -3719,7 +3758,7 @@ static __cold int io_uring_create(unsigned entries, struct io_uring_params *p,
|
||||
IORING_FEAT_RSRC_TAGS | IORING_FEAT_CQE_SKIP |
|
||||
IORING_FEAT_LINKED_FILE | IORING_FEAT_REG_REG_RING |
|
||||
IORING_FEAT_RECVSEND_BUNDLE | IORING_FEAT_MIN_TIMEOUT |
|
||||
IORING_FEAT_RW_ATTR;
|
||||
IORING_FEAT_RW_ATTR | IORING_FEAT_NO_IOWAIT;
|
||||
|
||||
if (copy_to_user(params, p, sizeof(*p))) {
|
||||
ret = -EFAULT;
|
||||
@ -3915,6 +3954,9 @@ static int __init io_uring_init(void)
|
||||
|
||||
io_uring_optable_init();
|
||||
|
||||
/* imu->dir is u8 */
|
||||
BUILD_BUG_ON((IO_IMU_DEST | IO_IMU_SOURCE) > U8_MAX);
|
||||
|
||||
/*
|
||||
* Allow user copy in the per-command field, which starts after the
|
||||
* file in io_kiocb and until the opcode field. The openat2 handling
|
||||
@ -3925,10 +3967,9 @@ static int __init io_uring_init(void)
|
||||
req_cachep = kmem_cache_create("io_kiocb", sizeof(struct io_kiocb), &kmem_args,
|
||||
SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT |
|
||||
SLAB_TYPESAFE_BY_RCU);
|
||||
io_buf_cachep = KMEM_CACHE(io_buffer,
|
||||
SLAB_HWCACHE_ALIGN | SLAB_PANIC | SLAB_ACCOUNT);
|
||||
|
||||
iou_wq = alloc_workqueue("iou_exit", WQ_UNBOUND, 64);
|
||||
BUG_ON(!iou_wq);
|
||||
|
||||
#ifdef CONFIG_SYSCTL
|
||||
register_sysctl_init("kernel", kernel_io_uring_disabled_table);
|
||||
|
@ -88,11 +88,10 @@ struct file *io_file_get_fixed(struct io_kiocb *req, int fd,
|
||||
void __io_req_task_work_add(struct io_kiocb *req, unsigned flags);
|
||||
void io_req_task_work_add_remote(struct io_kiocb *req, struct io_ring_ctx *ctx,
|
||||
unsigned flags);
|
||||
bool io_alloc_async_data(struct io_kiocb *req);
|
||||
void io_req_task_queue(struct io_kiocb *req);
|
||||
void io_req_task_complete(struct io_kiocb *req, struct io_tw_state *ts);
|
||||
void io_req_task_complete(struct io_kiocb *req, io_tw_token_t tw);
|
||||
void io_req_task_queue_fail(struct io_kiocb *req, int ret);
|
||||
void io_req_task_submit(struct io_kiocb *req, struct io_tw_state *ts);
|
||||
void io_req_task_submit(struct io_kiocb *req, io_tw_token_t tw);
|
||||
struct llist_node *io_handle_tw_list(struct llist_node *node, unsigned int *count, unsigned int max_entries);
|
||||
struct llist_node *tctx_task_work_run(struct io_uring_task *tctx, unsigned int max_entries, unsigned int *count);
|
||||
void tctx_task_work(struct callback_head *cb);
|
||||
@ -104,7 +103,7 @@ int io_ring_add_registered_file(struct io_uring_task *tctx, struct file *file,
|
||||
int start, int end);
|
||||
void io_req_queue_iowq(struct io_kiocb *req);
|
||||
|
||||
int io_poll_issue(struct io_kiocb *req, struct io_tw_state *ts);
|
||||
int io_poll_issue(struct io_kiocb *req, io_tw_token_t tw);
|
||||
int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr);
|
||||
int io_do_iopoll(struct io_ring_ctx *ctx, bool force_nonspin);
|
||||
void __io_submit_flush_completions(struct io_ring_ctx *ctx);
|
||||
@ -147,6 +146,11 @@ static inline void io_lockdep_assert_cq_locked(struct io_ring_ctx *ctx)
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline bool io_is_compat(struct io_ring_ctx *ctx)
|
||||
{
|
||||
return IS_ENABLED(CONFIG_COMPAT) && unlikely(ctx->compat);
|
||||
}
|
||||
|
||||
static inline void io_req_task_work_add(struct io_kiocb *req)
|
||||
{
|
||||
__io_req_task_work_add(req, 0);
|
||||
@ -376,7 +380,7 @@ static inline bool io_task_work_pending(struct io_ring_ctx *ctx)
|
||||
return task_work_pending(current) || io_local_work_pending(ctx);
|
||||
}
|
||||
|
||||
static inline void io_tw_lock(struct io_ring_ctx *ctx, struct io_tw_state *ts)
|
||||
static inline void io_tw_lock(struct io_ring_ctx *ctx, io_tw_token_t tw)
|
||||
{
|
||||
lockdep_assert_held(&ctx->uring_lock);
|
||||
}
|
||||
@ -418,7 +422,6 @@ static inline bool io_req_cache_empty(struct io_ring_ctx *ctx)
|
||||
}
|
||||
|
||||
extern struct kmem_cache *req_cachep;
|
||||
extern struct kmem_cache *io_buf_cachep;
|
||||
|
||||
static inline struct io_kiocb *io_extract_req(struct io_ring_ctx *ctx)
|
||||
{
|
||||
|
200
io_uring/kbuf.c
200
io_uring/kbuf.c
@ -20,7 +20,8 @@
|
||||
/* BIDs are addressed by a 16-bit field in a CQE */
|
||||
#define MAX_BIDS_PER_BGID (1 << 16)
|
||||
|
||||
struct kmem_cache *io_buf_cachep;
|
||||
/* Mapped buffer ring, return io_uring_buf from head */
|
||||
#define io_ring_head_to_buf(br, head, mask) &(br)->bufs[(head) & (mask)]
|
||||
|
||||
struct io_provide_buf {
|
||||
struct file *file;
|
||||
@ -31,6 +32,41 @@ struct io_provide_buf {
|
||||
__u16 bid;
|
||||
};
|
||||
|
||||
static bool io_kbuf_inc_commit(struct io_buffer_list *bl, int len)
|
||||
{
|
||||
while (len) {
|
||||
struct io_uring_buf *buf;
|
||||
u32 this_len;
|
||||
|
||||
buf = io_ring_head_to_buf(bl->buf_ring, bl->head, bl->mask);
|
||||
this_len = min_t(int, len, buf->len);
|
||||
buf->len -= this_len;
|
||||
if (buf->len) {
|
||||
buf->addr += this_len;
|
||||
return false;
|
||||
}
|
||||
bl->head++;
|
||||
len -= this_len;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool io_kbuf_commit(struct io_kiocb *req,
|
||||
struct io_buffer_list *bl, int len, int nr)
|
||||
{
|
||||
if (unlikely(!(req->flags & REQ_F_BUFFERS_COMMIT)))
|
||||
return true;
|
||||
|
||||
req->flags &= ~REQ_F_BUFFERS_COMMIT;
|
||||
|
||||
if (unlikely(len < 0))
|
||||
return true;
|
||||
if (bl->flags & IOBL_INC)
|
||||
return io_kbuf_inc_commit(bl, len);
|
||||
bl->head += nr;
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline struct io_buffer_list *io_buffer_get_list(struct io_ring_ctx *ctx,
|
||||
unsigned int bgid)
|
||||
{
|
||||
@ -52,6 +88,16 @@ static int io_buffer_add_list(struct io_ring_ctx *ctx,
|
||||
return xa_err(xa_store(&ctx->io_bl_xa, bgid, bl, GFP_KERNEL));
|
||||
}
|
||||
|
||||
void io_kbuf_drop_legacy(struct io_kiocb *req)
|
||||
{
|
||||
if (WARN_ON_ONCE(!(req->flags & REQ_F_BUFFER_SELECTED)))
|
||||
return;
|
||||
req->buf_index = req->kbuf->bgid;
|
||||
req->flags &= ~REQ_F_BUFFER_SELECTED;
|
||||
kfree(req->kbuf);
|
||||
req->kbuf = NULL;
|
||||
}
|
||||
|
||||
bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags)
|
||||
{
|
||||
struct io_ring_ctx *ctx = req->ctx;
|
||||
@ -70,33 +116,6 @@ bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags)
|
||||
return true;
|
||||
}
|
||||
|
||||
void __io_put_kbuf(struct io_kiocb *req, int len, unsigned issue_flags)
|
||||
{
|
||||
/*
|
||||
* We can add this buffer back to two lists:
|
||||
*
|
||||
* 1) The io_buffers_cache list. This one is protected by the
|
||||
* ctx->uring_lock. If we already hold this lock, add back to this
|
||||
* list as we can grab it from issue as well.
|
||||
* 2) The io_buffers_comp list. This one is protected by the
|
||||
* ctx->completion_lock.
|
||||
*
|
||||
* We migrate buffers from the comp_list to the issue cache list
|
||||
* when we need one.
|
||||
*/
|
||||
if (issue_flags & IO_URING_F_UNLOCKED) {
|
||||
struct io_ring_ctx *ctx = req->ctx;
|
||||
|
||||
spin_lock(&ctx->completion_lock);
|
||||
__io_put_kbuf_list(req, len, &ctx->io_buffers_comp);
|
||||
spin_unlock(&ctx->completion_lock);
|
||||
} else {
|
||||
lockdep_assert_held(&req->ctx->uring_lock);
|
||||
|
||||
__io_put_kbuf_list(req, len, &req->ctx->io_buffers_cache);
|
||||
}
|
||||
}
|
||||
|
||||
static void __user *io_provided_buffer_select(struct io_kiocb *req, size_t *len,
|
||||
struct io_buffer_list *bl)
|
||||
{
|
||||
@ -214,25 +233,14 @@ static int io_ring_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg,
|
||||
buf = io_ring_head_to_buf(br, head, bl->mask);
|
||||
if (arg->max_len) {
|
||||
u32 len = READ_ONCE(buf->len);
|
||||
size_t needed;
|
||||
|
||||
if (unlikely(!len))
|
||||
return -ENOBUFS;
|
||||
/*
|
||||
* Limit incremental buffers to 1 segment. No point trying
|
||||
* to peek ahead and map more than we need, when the buffers
|
||||
* themselves should be large when setup with
|
||||
* IOU_PBUF_RING_INC.
|
||||
*/
|
||||
if (bl->flags & IOBL_INC) {
|
||||
nr_avail = 1;
|
||||
} else {
|
||||
size_t needed;
|
||||
|
||||
needed = (arg->max_len + len - 1) / len;
|
||||
needed = min_not_zero(needed, (size_t) PEEK_MAX_IMPORT);
|
||||
if (nr_avail > needed)
|
||||
nr_avail = needed;
|
||||
}
|
||||
needed = (arg->max_len + len - 1) / len;
|
||||
needed = min_not_zero(needed, (size_t) PEEK_MAX_IMPORT);
|
||||
if (nr_avail > needed)
|
||||
nr_avail = needed;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -342,6 +350,35 @@ int io_buffers_peek(struct io_kiocb *req, struct buf_sel_arg *arg)
|
||||
return io_provided_buffers_select(req, &arg->max_len, bl, arg->iovs);
|
||||
}
|
||||
|
||||
static inline bool __io_put_kbuf_ring(struct io_kiocb *req, int len, int nr)
|
||||
{
|
||||
struct io_buffer_list *bl = req->buf_list;
|
||||
bool ret = true;
|
||||
|
||||
if (bl) {
|
||||
ret = io_kbuf_commit(req, bl, len, nr);
|
||||
req->buf_index = bl->bgid;
|
||||
}
|
||||
req->flags &= ~REQ_F_BUFFER_RING;
|
||||
return ret;
|
||||
}
|
||||
|
||||
unsigned int __io_put_kbufs(struct io_kiocb *req, int len, int nbufs)
|
||||
{
|
||||
unsigned int ret;
|
||||
|
||||
ret = IORING_CQE_F_BUFFER | (req->buf_index << IORING_CQE_BUFFER_SHIFT);
|
||||
|
||||
if (unlikely(!(req->flags & REQ_F_BUFFER_RING))) {
|
||||
io_kbuf_drop_legacy(req);
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (!__io_put_kbuf_ring(req, len, nbufs))
|
||||
ret |= IORING_CQE_F_BUF_MORE;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int __io_remove_buffers(struct io_ring_ctx *ctx,
|
||||
struct io_buffer_list *bl, unsigned nbufs)
|
||||
{
|
||||
@ -367,7 +404,9 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx,
|
||||
struct io_buffer *nxt;
|
||||
|
||||
nxt = list_first_entry(&bl->buf_list, struct io_buffer, list);
|
||||
list_move(&nxt->list, &ctx->io_buffers_cache);
|
||||
list_del(&nxt->list);
|
||||
kfree(nxt);
|
||||
|
||||
if (++i == nbufs)
|
||||
return i;
|
||||
cond_resched();
|
||||
@ -385,8 +424,6 @@ static void io_put_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl)
|
||||
void io_destroy_buffers(struct io_ring_ctx *ctx)
|
||||
{
|
||||
struct io_buffer_list *bl;
|
||||
struct list_head *item, *tmp;
|
||||
struct io_buffer *buf;
|
||||
|
||||
while (1) {
|
||||
unsigned long index = 0;
|
||||
@ -400,19 +437,6 @@ void io_destroy_buffers(struct io_ring_ctx *ctx)
|
||||
break;
|
||||
io_put_bl(ctx, bl);
|
||||
}
|
||||
|
||||
/*
|
||||
* Move deferred locked entries to cache before pruning
|
||||
*/
|
||||
spin_lock(&ctx->completion_lock);
|
||||
if (!list_empty(&ctx->io_buffers_comp))
|
||||
list_splice_init(&ctx->io_buffers_comp, &ctx->io_buffers_cache);
|
||||
spin_unlock(&ctx->completion_lock);
|
||||
|
||||
list_for_each_safe(item, tmp, &ctx->io_buffers_cache) {
|
||||
buf = list_entry(item, struct io_buffer, list);
|
||||
kmem_cache_free(io_buf_cachep, buf);
|
||||
}
|
||||
}
|
||||
|
||||
static void io_destroy_bl(struct io_ring_ctx *ctx, struct io_buffer_list *bl)
|
||||
@ -501,53 +525,6 @@ int io_provide_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define IO_BUFFER_ALLOC_BATCH 64
|
||||
|
||||
static int io_refill_buffer_cache(struct io_ring_ctx *ctx)
|
||||
{
|
||||
struct io_buffer *bufs[IO_BUFFER_ALLOC_BATCH];
|
||||
int allocated;
|
||||
|
||||
/*
|
||||
* Completions that don't happen inline (eg not under uring_lock) will
|
||||
* add to ->io_buffers_comp. If we don't have any free buffers, check
|
||||
* the completion list and splice those entries first.
|
||||
*/
|
||||
if (!list_empty_careful(&ctx->io_buffers_comp)) {
|
||||
spin_lock(&ctx->completion_lock);
|
||||
if (!list_empty(&ctx->io_buffers_comp)) {
|
||||
list_splice_init(&ctx->io_buffers_comp,
|
||||
&ctx->io_buffers_cache);
|
||||
spin_unlock(&ctx->completion_lock);
|
||||
return 0;
|
||||
}
|
||||
spin_unlock(&ctx->completion_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* No free buffers and no completion entries either. Allocate a new
|
||||
* batch of buffer entries and add those to our freelist.
|
||||
*/
|
||||
|
||||
allocated = kmem_cache_alloc_bulk(io_buf_cachep, GFP_KERNEL_ACCOUNT,
|
||||
ARRAY_SIZE(bufs), (void **) bufs);
|
||||
if (unlikely(!allocated)) {
|
||||
/*
|
||||
* Bulk alloc is all-or-nothing. If we fail to get a batch,
|
||||
* retry single alloc to be on the safe side.
|
||||
*/
|
||||
bufs[0] = kmem_cache_alloc(io_buf_cachep, GFP_KERNEL);
|
||||
if (!bufs[0])
|
||||
return -ENOMEM;
|
||||
allocated = 1;
|
||||
}
|
||||
|
||||
while (allocated)
|
||||
list_add_tail(&bufs[--allocated]->list, &ctx->io_buffers_cache);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int io_add_buffers(struct io_ring_ctx *ctx, struct io_provide_buf *pbuf,
|
||||
struct io_buffer_list *bl)
|
||||
{
|
||||
@ -556,12 +533,11 @@ static int io_add_buffers(struct io_ring_ctx *ctx, struct io_provide_buf *pbuf,
|
||||
int i, bid = pbuf->bid;
|
||||
|
||||
for (i = 0; i < pbuf->nbufs; i++) {
|
||||
if (list_empty(&ctx->io_buffers_cache) &&
|
||||
io_refill_buffer_cache(ctx))
|
||||
buf = kmalloc(sizeof(*buf), GFP_KERNEL_ACCOUNT);
|
||||
if (!buf)
|
||||
break;
|
||||
buf = list_first_entry(&ctx->io_buffers_cache, struct io_buffer,
|
||||
list);
|
||||
list_move_tail(&buf->list, &bl->buf_list);
|
||||
|
||||
list_add_tail(&buf->list, &bl->buf_list);
|
||||
buf->addr = addr;
|
||||
buf->len = min_t(__u32, pbuf->len, MAX_RW_COUNT);
|
||||
buf->bid = bid;
|
||||
|
100
io_uring/kbuf.h
100
io_uring/kbuf.h
@ -74,9 +74,12 @@ int io_register_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg);
|
||||
int io_unregister_pbuf_ring(struct io_ring_ctx *ctx, void __user *arg);
|
||||
int io_register_pbuf_status(struct io_ring_ctx *ctx, void __user *arg);
|
||||
|
||||
void __io_put_kbuf(struct io_kiocb *req, int len, unsigned issue_flags);
|
||||
|
||||
bool io_kbuf_recycle_legacy(struct io_kiocb *req, unsigned issue_flags);
|
||||
void io_kbuf_drop_legacy(struct io_kiocb *req);
|
||||
|
||||
unsigned int __io_put_kbufs(struct io_kiocb *req, int len, int nbufs);
|
||||
bool io_kbuf_commit(struct io_kiocb *req,
|
||||
struct io_buffer_list *bl, int len, int nr);
|
||||
|
||||
struct io_mapped_region *io_pbuf_get_region(struct io_ring_ctx *ctx,
|
||||
unsigned int bgid);
|
||||
@ -116,100 +119,19 @@ static inline bool io_kbuf_recycle(struct io_kiocb *req, unsigned issue_flags)
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Mapped buffer ring, return io_uring_buf from head */
|
||||
#define io_ring_head_to_buf(br, head, mask) &(br)->bufs[(head) & (mask)]
|
||||
|
||||
static inline bool io_kbuf_commit(struct io_kiocb *req,
|
||||
struct io_buffer_list *bl, int len, int nr)
|
||||
{
|
||||
if (unlikely(!(req->flags & REQ_F_BUFFERS_COMMIT)))
|
||||
return true;
|
||||
|
||||
req->flags &= ~REQ_F_BUFFERS_COMMIT;
|
||||
|
||||
if (unlikely(len < 0))
|
||||
return true;
|
||||
|
||||
if (bl->flags & IOBL_INC) {
|
||||
struct io_uring_buf *buf;
|
||||
|
||||
buf = io_ring_head_to_buf(bl->buf_ring, bl->head, bl->mask);
|
||||
if (WARN_ON_ONCE(len > buf->len))
|
||||
len = buf->len;
|
||||
buf->len -= len;
|
||||
if (buf->len) {
|
||||
buf->addr += len;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bl->head += nr;
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool __io_put_kbuf_ring(struct io_kiocb *req, int len, int nr)
|
||||
{
|
||||
struct io_buffer_list *bl = req->buf_list;
|
||||
bool ret = true;
|
||||
|
||||
if (bl) {
|
||||
ret = io_kbuf_commit(req, bl, len, nr);
|
||||
req->buf_index = bl->bgid;
|
||||
}
|
||||
req->flags &= ~REQ_F_BUFFER_RING;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline void __io_put_kbuf_list(struct io_kiocb *req, int len,
|
||||
struct list_head *list)
|
||||
{
|
||||
if (req->flags & REQ_F_BUFFER_RING) {
|
||||
__io_put_kbuf_ring(req, len, 1);
|
||||
} else {
|
||||
req->buf_index = req->kbuf->bgid;
|
||||
list_add(&req->kbuf->list, list);
|
||||
req->flags &= ~REQ_F_BUFFER_SELECTED;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void io_kbuf_drop(struct io_kiocb *req)
|
||||
{
|
||||
lockdep_assert_held(&req->ctx->completion_lock);
|
||||
|
||||
if (!(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)))
|
||||
return;
|
||||
|
||||
/* len == 0 is fine here, non-ring will always drop all of it */
|
||||
__io_put_kbuf_list(req, 0, &req->ctx->io_buffers_comp);
|
||||
}
|
||||
|
||||
static inline unsigned int __io_put_kbufs(struct io_kiocb *req, int len,
|
||||
int nbufs, unsigned issue_flags)
|
||||
{
|
||||
unsigned int ret;
|
||||
|
||||
if (!(req->flags & (REQ_F_BUFFER_RING | REQ_F_BUFFER_SELECTED)))
|
||||
return 0;
|
||||
|
||||
ret = IORING_CQE_F_BUFFER | (req->buf_index << IORING_CQE_BUFFER_SHIFT);
|
||||
if (req->flags & REQ_F_BUFFER_RING) {
|
||||
if (!__io_put_kbuf_ring(req, len, nbufs))
|
||||
ret |= IORING_CQE_F_BUF_MORE;
|
||||
} else {
|
||||
__io_put_kbuf(req, len, issue_flags);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline unsigned int io_put_kbuf(struct io_kiocb *req, int len,
|
||||
unsigned issue_flags)
|
||||
{
|
||||
return __io_put_kbufs(req, len, 1, issue_flags);
|
||||
if (!(req->flags & (REQ_F_BUFFER_RING | REQ_F_BUFFER_SELECTED)))
|
||||
return 0;
|
||||
return __io_put_kbufs(req, len, 1);
|
||||
}
|
||||
|
||||
static inline unsigned int io_put_kbufs(struct io_kiocb *req, int len,
|
||||
int nbufs, unsigned issue_flags)
|
||||
{
|
||||
return __io_put_kbufs(req, len, nbufs, issue_flags);
|
||||
if (!(req->flags & (REQ_F_BUFFER_RING | REQ_F_BUFFER_SELECTED)))
|
||||
return 0;
|
||||
return __io_put_kbufs(req, len, nbufs);
|
||||
}
|
||||
#endif
|
||||
|
@ -71,7 +71,7 @@ static inline bool io_msg_need_remote(struct io_ring_ctx *target_ctx)
|
||||
return target_ctx->task_complete;
|
||||
}
|
||||
|
||||
static void io_msg_tw_complete(struct io_kiocb *req, struct io_tw_state *ts)
|
||||
static void io_msg_tw_complete(struct io_kiocb *req, io_tw_token_t tw)
|
||||
{
|
||||
struct io_ring_ctx *ctx = req->ctx;
|
||||
|
||||
|
257
io_uring/net.c
257
io_uring/net.c
@ -75,7 +75,7 @@ struct io_sr_msg {
|
||||
u16 flags;
|
||||
/* initialised and used only by !msg send variants */
|
||||
u16 buf_group;
|
||||
u16 buf_index;
|
||||
bool retry;
|
||||
void __user *msg_control;
|
||||
/* used only for send zerocopy */
|
||||
struct io_kiocb *notif;
|
||||
@ -187,17 +187,15 @@ static inline void io_mshot_prep_retry(struct io_kiocb *req,
|
||||
|
||||
req->flags &= ~REQ_F_BL_EMPTY;
|
||||
sr->done_io = 0;
|
||||
sr->retry = false;
|
||||
sr->len = 0; /* get from the provided buffer */
|
||||
req->buf_index = sr->buf_group;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
static int io_compat_msg_copy_hdr(struct io_kiocb *req,
|
||||
struct io_async_msghdr *iomsg,
|
||||
struct compat_msghdr *msg, int ddir)
|
||||
static int io_net_import_vec(struct io_kiocb *req, struct io_async_msghdr *iomsg,
|
||||
const struct iovec __user *uiov, unsigned uvec_seg,
|
||||
int ddir)
|
||||
{
|
||||
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
|
||||
struct compat_iovec __user *uiov;
|
||||
struct iovec *iov;
|
||||
int ret, nr_segs;
|
||||
|
||||
@ -205,103 +203,108 @@ static int io_compat_msg_copy_hdr(struct io_kiocb *req,
|
||||
nr_segs = iomsg->free_iov_nr;
|
||||
iov = iomsg->free_iov;
|
||||
} else {
|
||||
iov = &iomsg->fast_iov;
|
||||
nr_segs = 1;
|
||||
iov = &iomsg->fast_iov;
|
||||
}
|
||||
|
||||
ret = __import_iovec(ddir, uiov, uvec_seg, nr_segs, &iov,
|
||||
&iomsg->msg.msg_iter, io_is_compat(req->ctx));
|
||||
if (unlikely(ret < 0))
|
||||
return ret;
|
||||
io_net_vec_assign(req, iomsg, iov);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int io_compat_msg_copy_hdr(struct io_kiocb *req,
|
||||
struct io_async_msghdr *iomsg,
|
||||
struct compat_msghdr *msg, int ddir,
|
||||
struct sockaddr __user **save_addr)
|
||||
{
|
||||
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
|
||||
struct compat_iovec __user *uiov;
|
||||
int ret;
|
||||
|
||||
if (copy_from_user(msg, sr->umsg_compat, sizeof(*msg)))
|
||||
return -EFAULT;
|
||||
|
||||
ret = __get_compat_msghdr(&iomsg->msg, msg, save_addr);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
uiov = compat_ptr(msg->msg_iov);
|
||||
if (req->flags & REQ_F_BUFFER_SELECT) {
|
||||
compat_ssize_t clen;
|
||||
|
||||
if (msg->msg_iovlen == 0) {
|
||||
sr->len = iov->iov_len = 0;
|
||||
iov->iov_base = NULL;
|
||||
sr->len = 0;
|
||||
} else if (msg->msg_iovlen > 1) {
|
||||
return -EINVAL;
|
||||
} else {
|
||||
if (!access_ok(uiov, sizeof(*uiov)))
|
||||
struct compat_iovec tmp_iov;
|
||||
|
||||
if (copy_from_user(&tmp_iov, uiov, sizeof(tmp_iov)))
|
||||
return -EFAULT;
|
||||
if (__get_user(clen, &uiov->iov_len))
|
||||
return -EFAULT;
|
||||
if (clen < 0)
|
||||
return -EINVAL;
|
||||
sr->len = clen;
|
||||
sr->len = tmp_iov.iov_len;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
ret = __import_iovec(ddir, (struct iovec __user *)uiov, msg->msg_iovlen,
|
||||
nr_segs, &iov, &iomsg->msg.msg_iter, true);
|
||||
if (unlikely(ret < 0))
|
||||
return ret;
|
||||
|
||||
io_net_vec_assign(req, iomsg, iov);
|
||||
return 0;
|
||||
return io_net_import_vec(req, iomsg, (struct iovec __user *)uiov,
|
||||
msg->msg_iovlen, ddir);
|
||||
}
|
||||
#endif
|
||||
|
||||
static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg,
|
||||
struct user_msghdr *msg, int ddir)
|
||||
static int io_copy_msghdr_from_user(struct user_msghdr *msg,
|
||||
struct user_msghdr __user *umsg)
|
||||
{
|
||||
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
|
||||
struct user_msghdr __user *umsg = sr->umsg;
|
||||
struct iovec *iov;
|
||||
int ret, nr_segs;
|
||||
|
||||
if (iomsg->free_iov) {
|
||||
nr_segs = iomsg->free_iov_nr;
|
||||
iov = iomsg->free_iov;
|
||||
} else {
|
||||
iov = &iomsg->fast_iov;
|
||||
nr_segs = 1;
|
||||
}
|
||||
|
||||
if (!user_access_begin(umsg, sizeof(*umsg)))
|
||||
return -EFAULT;
|
||||
|
||||
ret = -EFAULT;
|
||||
unsafe_get_user(msg->msg_name, &umsg->msg_name, ua_end);
|
||||
unsafe_get_user(msg->msg_namelen, &umsg->msg_namelen, ua_end);
|
||||
unsafe_get_user(msg->msg_iov, &umsg->msg_iov, ua_end);
|
||||
unsafe_get_user(msg->msg_iovlen, &umsg->msg_iovlen, ua_end);
|
||||
unsafe_get_user(msg->msg_control, &umsg->msg_control, ua_end);
|
||||
unsafe_get_user(msg->msg_controllen, &umsg->msg_controllen, ua_end);
|
||||
user_access_end();
|
||||
return 0;
|
||||
ua_end:
|
||||
user_access_end();
|
||||
return -EFAULT;
|
||||
}
|
||||
|
||||
static int io_msg_copy_hdr(struct io_kiocb *req, struct io_async_msghdr *iomsg,
|
||||
struct user_msghdr *msg, int ddir,
|
||||
struct sockaddr __user **save_addr)
|
||||
{
|
||||
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
|
||||
struct user_msghdr __user *umsg = sr->umsg;
|
||||
int ret;
|
||||
|
||||
ret = io_copy_msghdr_from_user(msg, umsg);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
|
||||
msg->msg_flags = 0;
|
||||
|
||||
ret = __copy_msghdr(&iomsg->msg, msg, save_addr);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (req->flags & REQ_F_BUFFER_SELECT) {
|
||||
if (msg->msg_iovlen == 0) {
|
||||
sr->len = iov->iov_len = 0;
|
||||
iov->iov_base = NULL;
|
||||
sr->len = 0;
|
||||
} else if (msg->msg_iovlen > 1) {
|
||||
ret = -EINVAL;
|
||||
goto ua_end;
|
||||
return -EINVAL;
|
||||
} else {
|
||||
struct iovec __user *uiov = msg->msg_iov;
|
||||
struct iovec tmp_iov;
|
||||
|
||||
/* we only need the length for provided buffers */
|
||||
if (!access_ok(&uiov->iov_len, sizeof(uiov->iov_len)))
|
||||
goto ua_end;
|
||||
unsafe_get_user(iov->iov_len, &uiov->iov_len, ua_end);
|
||||
sr->len = iov->iov_len;
|
||||
if (copy_from_user(&tmp_iov, uiov, sizeof(tmp_iov)))
|
||||
return -EFAULT;
|
||||
sr->len = tmp_iov.iov_len;
|
||||
}
|
||||
ret = 0;
|
||||
ua_end:
|
||||
user_access_end();
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
user_access_end();
|
||||
ret = __import_iovec(ddir, msg->msg_iov, msg->msg_iovlen, nr_segs,
|
||||
&iov, &iomsg->msg.msg_iter, false);
|
||||
if (unlikely(ret < 0))
|
||||
return ret;
|
||||
|
||||
io_net_vec_assign(req, iomsg, iov);
|
||||
return 0;
|
||||
return io_net_import_vec(req, iomsg, msg->msg_iov, msg->msg_iovlen, ddir);
|
||||
}
|
||||
|
||||
static int io_sendmsg_copy_hdr(struct io_kiocb *req,
|
||||
@ -314,26 +317,16 @@ static int io_sendmsg_copy_hdr(struct io_kiocb *req,
|
||||
iomsg->msg.msg_name = &iomsg->addr;
|
||||
iomsg->msg.msg_iter.nr_segs = 0;
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
if (unlikely(req->ctx->compat)) {
|
||||
if (io_is_compat(req->ctx)) {
|
||||
struct compat_msghdr cmsg;
|
||||
|
||||
ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ITER_SOURCE);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
|
||||
ret = __get_compat_msghdr(&iomsg->msg, &cmsg, NULL);
|
||||
ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ITER_SOURCE,
|
||||
NULL);
|
||||
sr->msg_control = iomsg->msg.msg_control_user;
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_SOURCE);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
|
||||
ret = __copy_msghdr(&iomsg->msg, &msg, NULL);
|
||||
|
||||
ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_SOURCE, NULL);
|
||||
/* save msg_control as sys_sendmsg() overwrites it */
|
||||
sr->msg_control = iomsg->msg.msg_control_user;
|
||||
return ret;
|
||||
@ -387,14 +380,10 @@ static int io_sendmsg_setup(struct io_kiocb *req, const struct io_uring_sqe *sqe
|
||||
{
|
||||
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
|
||||
struct io_async_msghdr *kmsg = req->async_data;
|
||||
int ret;
|
||||
|
||||
sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
|
||||
|
||||
ret = io_sendmsg_copy_hdr(req, kmsg);
|
||||
if (!ret)
|
||||
req->flags |= REQ_F_NEED_CLEANUP;
|
||||
return ret;
|
||||
return io_sendmsg_copy_hdr(req, kmsg);
|
||||
}
|
||||
|
||||
#define SENDMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECVSEND_BUNDLE)
|
||||
@ -404,6 +393,7 @@ int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
|
||||
|
||||
sr->done_io = 0;
|
||||
sr->retry = false;
|
||||
|
||||
if (req->opcode != IORING_OP_SEND) {
|
||||
if (sqe->addr2 || sqe->file_index)
|
||||
@ -427,10 +417,9 @@ int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||
req->buf_list = NULL;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
if (req->ctx->compat)
|
||||
if (io_is_compat(req->ctx))
|
||||
sr->msg_flags |= MSG_CMSG_COMPAT;
|
||||
#endif
|
||||
|
||||
if (unlikely(!io_msg_alloc_async(req)))
|
||||
return -ENOMEM;
|
||||
if (req->opcode != IORING_OP_SENDMSG)
|
||||
@ -714,31 +703,20 @@ static int io_recvmsg_copy_hdr(struct io_kiocb *req,
|
||||
iomsg->msg.msg_name = &iomsg->addr;
|
||||
iomsg->msg.msg_iter.nr_segs = 0;
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
if (unlikely(req->ctx->compat)) {
|
||||
if (io_is_compat(req->ctx)) {
|
||||
struct compat_msghdr cmsg;
|
||||
|
||||
ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ITER_DEST);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
|
||||
ret = __get_compat_msghdr(&iomsg->msg, &cmsg, &iomsg->uaddr);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
|
||||
return io_recvmsg_mshot_prep(req, iomsg, cmsg.msg_namelen,
|
||||
cmsg.msg_controllen);
|
||||
ret = io_compat_msg_copy_hdr(req, iomsg, &cmsg, ITER_DEST,
|
||||
&iomsg->uaddr);
|
||||
memset(&msg, 0, sizeof(msg));
|
||||
msg.msg_namelen = cmsg.msg_namelen;
|
||||
msg.msg_controllen = cmsg.msg_controllen;
|
||||
} else {
|
||||
ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_DEST, &iomsg->uaddr);
|
||||
}
|
||||
#endif
|
||||
|
||||
ret = io_msg_copy_hdr(req, iomsg, &msg, ITER_DEST);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
|
||||
ret = __copy_msghdr(&iomsg->msg, &msg, &iomsg->uaddr);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
|
||||
return io_recvmsg_mshot_prep(req, iomsg, msg.msg_namelen,
|
||||
msg.msg_controllen);
|
||||
}
|
||||
@ -772,10 +750,7 @@ static int io_recvmsg_prep_setup(struct io_kiocb *req)
|
||||
return 0;
|
||||
}
|
||||
|
||||
ret = io_recvmsg_copy_hdr(req, kmsg);
|
||||
if (!ret)
|
||||
req->flags |= REQ_F_NEED_CLEANUP;
|
||||
return ret;
|
||||
return io_recvmsg_copy_hdr(req, kmsg);
|
||||
}
|
||||
|
||||
#define RECVMSG_FLAGS (IORING_RECVSEND_POLL_FIRST | IORING_RECV_MULTISHOT | \
|
||||
@ -786,6 +761,7 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||
struct io_sr_msg *sr = io_kiocb_to_cmd(req, struct io_sr_msg);
|
||||
|
||||
sr->done_io = 0;
|
||||
sr->retry = false;
|
||||
|
||||
if (unlikely(sqe->file_index || sqe->addr2))
|
||||
return -EINVAL;
|
||||
@ -826,14 +802,16 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
if (req->ctx->compat)
|
||||
if (io_is_compat(req->ctx))
|
||||
sr->msg_flags |= MSG_CMSG_COMPAT;
|
||||
#endif
|
||||
|
||||
sr->nr_multishot_loops = 0;
|
||||
return io_recvmsg_prep_setup(req);
|
||||
}
|
||||
|
||||
/* bits to clear in old and inherit in new cflags on bundle retry */
|
||||
#define CQE_F_MASK (IORING_CQE_F_SOCK_NONEMPTY|IORING_CQE_F_MORE)
|
||||
|
||||
/*
|
||||
* Finishes io_recv and io_recvmsg.
|
||||
*
|
||||
@ -853,9 +831,19 @@ static inline bool io_recv_finish(struct io_kiocb *req, int *ret,
|
||||
if (sr->flags & IORING_RECVSEND_BUNDLE) {
|
||||
cflags |= io_put_kbufs(req, *ret, io_bundle_nbufs(kmsg, *ret),
|
||||
issue_flags);
|
||||
if (sr->retry)
|
||||
cflags = req->cqe.flags | (cflags & CQE_F_MASK);
|
||||
/* bundle with no more immediate buffers, we're done */
|
||||
if (req->flags & REQ_F_BL_EMPTY)
|
||||
goto finish;
|
||||
/* if more is available, retry and append to this one */
|
||||
if (!sr->retry && kmsg->msg.msg_inq > 0 && *ret > 0) {
|
||||
req->cqe.flags = cflags & ~CQE_F_MASK;
|
||||
sr->len = kmsg->msg.msg_inq;
|
||||
sr->done_io += *ret;
|
||||
sr->retry = true;
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
cflags |= io_put_kbuf(req, *ret, issue_flags);
|
||||
}
|
||||
@ -1234,6 +1222,7 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||
struct io_kiocb *notif;
|
||||
|
||||
zc->done_io = 0;
|
||||
zc->retry = false;
|
||||
req->flags |= REQ_F_POLL_NO_LAZY;
|
||||
|
||||
if (unlikely(READ_ONCE(sqe->__pad2[0]) || READ_ONCE(sqe->addr3)))
|
||||
@ -1272,14 +1261,13 @@ int io_send_zc_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||
|
||||
zc->len = READ_ONCE(sqe->len);
|
||||
zc->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL | MSG_ZEROCOPY;
|
||||
zc->buf_index = READ_ONCE(sqe->buf_index);
|
||||
req->buf_index = READ_ONCE(sqe->buf_index);
|
||||
if (zc->msg_flags & MSG_DONTWAIT)
|
||||
req->flags |= REQ_F_NOWAIT;
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
if (req->ctx->compat)
|
||||
if (io_is_compat(req->ctx))
|
||||
zc->msg_flags |= MSG_CMSG_COMPAT;
|
||||
#endif
|
||||
|
||||
if (unlikely(!io_msg_alloc_async(req)))
|
||||
return -ENOMEM;
|
||||
if (req->opcode != IORING_OP_SENDMSG_ZC)
|
||||
@ -1344,24 +1332,10 @@ static int io_send_zc_import(struct io_kiocb *req, unsigned int issue_flags)
|
||||
int ret;
|
||||
|
||||
if (sr->flags & IORING_RECVSEND_FIXED_BUF) {
|
||||
struct io_ring_ctx *ctx = req->ctx;
|
||||
struct io_rsrc_node *node;
|
||||
|
||||
ret = -EFAULT;
|
||||
io_ring_submit_lock(ctx, issue_flags);
|
||||
node = io_rsrc_node_lookup(&ctx->buf_table, sr->buf_index);
|
||||
if (node) {
|
||||
io_req_assign_buf_node(sr->notif, node);
|
||||
ret = 0;
|
||||
}
|
||||
io_ring_submit_unlock(ctx, issue_flags);
|
||||
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
|
||||
ret = io_import_fixed(ITER_SOURCE, &kmsg->msg.msg_iter,
|
||||
node->buf, (u64)(uintptr_t)sr->buf,
|
||||
sr->len);
|
||||
sr->notif->buf_index = req->buf_index;
|
||||
ret = io_import_reg_buf(sr->notif, &kmsg->msg.msg_iter,
|
||||
(u64)(uintptr_t)sr->buf, sr->len,
|
||||
ITER_SOURCE, issue_flags);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
kmsg->msg.sg_from_iter = io_sg_from_iter;
|
||||
@ -1600,7 +1574,6 @@ retry:
|
||||
}
|
||||
if (ret == -ERESTARTSYS)
|
||||
ret = -EINTR;
|
||||
req_set_fail(req);
|
||||
} else if (!fixed) {
|
||||
fd_install(fd, file);
|
||||
ret = fd;
|
||||
@ -1613,14 +1586,8 @@ retry:
|
||||
if (!arg.is_empty)
|
||||
cflags |= IORING_CQE_F_SOCK_NONEMPTY;
|
||||
|
||||
if (!(req->flags & REQ_F_APOLL_MULTISHOT)) {
|
||||
io_req_set_res(req, ret, cflags);
|
||||
return IOU_OK;
|
||||
}
|
||||
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
if (io_req_post_cqe(req, ret, cflags | IORING_CQE_F_MORE)) {
|
||||
if (ret >= 0 && (req->flags & REQ_F_APOLL_MULTISHOT) &&
|
||||
io_req_post_cqe(req, ret, cflags | IORING_CQE_F_MORE)) {
|
||||
if (cflags & IORING_CQE_F_SOCK_NONEMPTY || arg.is_empty == -1)
|
||||
goto retry;
|
||||
if (issue_flags & IO_URING_F_MULTISHOT)
|
||||
@ -1629,6 +1596,10 @@ retry:
|
||||
}
|
||||
|
||||
io_req_set_res(req, ret, cflags);
|
||||
if (ret < 0)
|
||||
req_set_fail(req);
|
||||
if (!(issue_flags & IO_URING_F_MULTISHOT))
|
||||
return IOU_OK;
|
||||
return IOU_STOP_MULTISHOT;
|
||||
}
|
||||
|
||||
|
@ -16,7 +16,6 @@ struct io_nop {
|
||||
struct file *file;
|
||||
int result;
|
||||
int fd;
|
||||
int buffer;
|
||||
unsigned int flags;
|
||||
};
|
||||
|
||||
@ -40,9 +39,7 @@ int io_nop_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||
else
|
||||
nop->fd = -1;
|
||||
if (nop->flags & IORING_NOP_FIXED_BUFFER)
|
||||
nop->buffer = READ_ONCE(sqe->buf_index);
|
||||
else
|
||||
nop->buffer = -1;
|
||||
req->buf_index = READ_ONCE(sqe->buf_index);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -64,17 +61,8 @@ int io_nop(struct io_kiocb *req, unsigned int issue_flags)
|
||||
}
|
||||
}
|
||||
if (nop->flags & IORING_NOP_FIXED_BUFFER) {
|
||||
struct io_ring_ctx *ctx = req->ctx;
|
||||
struct io_rsrc_node *node;
|
||||
|
||||
ret = -EFAULT;
|
||||
io_ring_submit_lock(ctx, issue_flags);
|
||||
node = io_rsrc_node_lookup(&ctx->buf_table, nop->buffer);
|
||||
if (node) {
|
||||
io_req_assign_buf_node(req, node);
|
||||
ret = 0;
|
||||
}
|
||||
io_ring_submit_unlock(ctx, issue_flags);
|
||||
if (!io_find_buf_node(req, issue_flags))
|
||||
ret = -EFAULT;
|
||||
}
|
||||
done:
|
||||
if (ret < 0)
|
||||
|
@ -11,7 +11,7 @@
|
||||
|
||||
static const struct ubuf_info_ops io_ubuf_ops;
|
||||
|
||||
static void io_notif_tw_complete(struct io_kiocb *notif, struct io_tw_state *ts)
|
||||
static void io_notif_tw_complete(struct io_kiocb *notif, io_tw_token_t tw)
|
||||
{
|
||||
struct io_notif_data *nd = io_notif_to_data(notif);
|
||||
|
||||
@ -29,7 +29,7 @@ static void io_notif_tw_complete(struct io_kiocb *notif, struct io_tw_state *ts)
|
||||
}
|
||||
|
||||
nd = nd->next;
|
||||
io_req_task_complete(notif, ts);
|
||||
io_req_task_complete(notif, tw);
|
||||
} while (nd);
|
||||
}
|
||||
|
||||
|
@ -104,7 +104,7 @@ const struct io_issue_def io_issue_defs[] = {
|
||||
.iopoll_queue = 1,
|
||||
.async_size = sizeof(struct io_async_rw),
|
||||
.prep = io_prep_read_fixed,
|
||||
.issue = io_read,
|
||||
.issue = io_read_fixed,
|
||||
},
|
||||
[IORING_OP_WRITE_FIXED] = {
|
||||
.needs_file = 1,
|
||||
@ -118,7 +118,7 @@ const struct io_issue_def io_issue_defs[] = {
|
||||
.iopoll_queue = 1,
|
||||
.async_size = sizeof(struct io_async_rw),
|
||||
.prep = io_prep_write_fixed,
|
||||
.issue = io_write,
|
||||
.issue = io_write_fixed,
|
||||
},
|
||||
[IORING_OP_POLL_ADD] = {
|
||||
.needs_file = 1,
|
||||
|
@ -7,6 +7,12 @@ struct io_issue_def {
|
||||
unsigned needs_file : 1;
|
||||
/* should block plug */
|
||||
unsigned plug : 1;
|
||||
/* supports ioprio */
|
||||
unsigned ioprio : 1;
|
||||
/* supports iopoll */
|
||||
unsigned iopoll : 1;
|
||||
/* op supports buffer selection */
|
||||
unsigned buffer_select : 1;
|
||||
/* hash wq insertion if file is a regular file */
|
||||
unsigned hash_reg_file : 1;
|
||||
/* unbound wq insertion if file is a non-regular file */
|
||||
@ -15,14 +21,8 @@ struct io_issue_def {
|
||||
unsigned pollin : 1;
|
||||
unsigned pollout : 1;
|
||||
unsigned poll_exclusive : 1;
|
||||
/* op supports buffer selection */
|
||||
unsigned buffer_select : 1;
|
||||
/* skip auditing */
|
||||
unsigned audit_skip : 1;
|
||||
/* supports ioprio */
|
||||
unsigned ioprio : 1;
|
||||
/* supports iopoll */
|
||||
unsigned iopoll : 1;
|
||||
/* have to be put into the iopoll list */
|
||||
unsigned iopoll_queue : 1;
|
||||
/* vectored opcode, set if 1) vectored, and 2) handler needs to know */
|
||||
|
@ -220,7 +220,7 @@ static inline void io_poll_execute(struct io_kiocb *req, int res)
|
||||
* req->cqe.res. IOU_POLL_REMOVE_POLL_USE_RES indicates to remove multishot
|
||||
* poll and that the result is stored in req->cqe.
|
||||
*/
|
||||
static int io_poll_check_events(struct io_kiocb *req, struct io_tw_state *ts)
|
||||
static int io_poll_check_events(struct io_kiocb *req, io_tw_token_t tw)
|
||||
{
|
||||
int v;
|
||||
|
||||
@ -288,7 +288,7 @@ static int io_poll_check_events(struct io_kiocb *req, struct io_tw_state *ts)
|
||||
return IOU_POLL_REMOVE_POLL_USE_RES;
|
||||
}
|
||||
} else {
|
||||
int ret = io_poll_issue(req, ts);
|
||||
int ret = io_poll_issue(req, tw);
|
||||
if (ret == IOU_STOP_MULTISHOT)
|
||||
return IOU_POLL_REMOVE_POLL_USE_RES;
|
||||
else if (ret == IOU_REQUEUE)
|
||||
@ -311,11 +311,11 @@ static int io_poll_check_events(struct io_kiocb *req, struct io_tw_state *ts)
|
||||
return IOU_POLL_NO_ACTION;
|
||||
}
|
||||
|
||||
void io_poll_task_func(struct io_kiocb *req, struct io_tw_state *ts)
|
||||
void io_poll_task_func(struct io_kiocb *req, io_tw_token_t tw)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = io_poll_check_events(req, ts);
|
||||
ret = io_poll_check_events(req, tw);
|
||||
if (ret == IOU_POLL_NO_ACTION) {
|
||||
io_kbuf_recycle(req, 0);
|
||||
return;
|
||||
@ -335,7 +335,7 @@ void io_poll_task_func(struct io_kiocb *req, struct io_tw_state *ts)
|
||||
poll = io_kiocb_to_cmd(req, struct io_poll);
|
||||
req->cqe.res = mangle_poll(req->cqe.res & poll->events);
|
||||
} else if (ret == IOU_POLL_REISSUE) {
|
||||
io_req_task_submit(req, ts);
|
||||
io_req_task_submit(req, tw);
|
||||
return;
|
||||
} else if (ret != IOU_POLL_REMOVE_POLL_USE_RES) {
|
||||
req->cqe.res = ret;
|
||||
@ -343,14 +343,14 @@ void io_poll_task_func(struct io_kiocb *req, struct io_tw_state *ts)
|
||||
}
|
||||
|
||||
io_req_set_res(req, req->cqe.res, 0);
|
||||
io_req_task_complete(req, ts);
|
||||
io_req_task_complete(req, tw);
|
||||
} else {
|
||||
io_tw_lock(req->ctx, ts);
|
||||
io_tw_lock(req->ctx, tw);
|
||||
|
||||
if (ret == IOU_POLL_REMOVE_POLL_USE_RES)
|
||||
io_req_task_complete(req, ts);
|
||||
io_req_task_complete(req, tw);
|
||||
else if (ret == IOU_POLL_DONE || ret == IOU_POLL_REISSUE)
|
||||
io_req_task_submit(req, ts);
|
||||
io_req_task_submit(req, tw);
|
||||
else
|
||||
io_req_defer_failed(req, ret);
|
||||
}
|
||||
|
@ -1,5 +1,7 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include <linux/io_uring_types.h>
|
||||
|
||||
#define IO_POLL_ALLOC_CACHE_MAX 32
|
||||
|
||||
enum {
|
||||
@ -43,4 +45,4 @@ int io_arm_poll_handler(struct io_kiocb *req, unsigned issue_flags);
|
||||
bool io_poll_remove_all(struct io_ring_ctx *ctx, struct io_uring_task *tctx,
|
||||
bool cancel_all);
|
||||
|
||||
void io_poll_task_func(struct io_kiocb *req, struct io_tw_state *ts);
|
||||
void io_poll_task_func(struct io_kiocb *req, io_tw_token_t tw);
|
||||
|
250
io_uring/rsrc.c
250
io_uring/rsrc.c
@ -9,6 +9,7 @@
|
||||
#include <linux/hugetlb.h>
|
||||
#include <linux/compat.h>
|
||||
#include <linux/io_uring.h>
|
||||
#include <linux/io_uring/cmd.h>
|
||||
|
||||
#include <uapi/linux/io_uring.h>
|
||||
|
||||
@ -32,6 +33,8 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx,
|
||||
#define IORING_MAX_FIXED_FILES (1U << 20)
|
||||
#define IORING_MAX_REG_BUFFERS (1U << 14)
|
||||
|
||||
#define IO_CACHED_BVECS_SEGS 32
|
||||
|
||||
int __io_account_mem(struct user_struct *user, unsigned long nr_pages)
|
||||
{
|
||||
unsigned long page_limit, cur_pages, new_pages;
|
||||
@ -101,36 +104,79 @@ static int io_buffer_validate(struct iovec *iov)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_rsrc_node *node)
|
||||
static void io_release_ubuf(void *priv)
|
||||
{
|
||||
struct io_mapped_ubuf *imu = priv;
|
||||
unsigned int i;
|
||||
|
||||
if (node->buf) {
|
||||
struct io_mapped_ubuf *imu = node->buf;
|
||||
|
||||
if (!refcount_dec_and_test(&imu->refs))
|
||||
return;
|
||||
for (i = 0; i < imu->nr_bvecs; i++)
|
||||
unpin_user_page(imu->bvec[i].bv_page);
|
||||
if (imu->acct_pages)
|
||||
io_unaccount_mem(ctx, imu->acct_pages);
|
||||
kvfree(imu);
|
||||
}
|
||||
for (i = 0; i < imu->nr_bvecs; i++)
|
||||
unpin_user_page(imu->bvec[i].bv_page);
|
||||
}
|
||||
|
||||
struct io_rsrc_node *io_rsrc_node_alloc(int type)
|
||||
static struct io_mapped_ubuf *io_alloc_imu(struct io_ring_ctx *ctx,
|
||||
int nr_bvecs)
|
||||
{
|
||||
if (nr_bvecs <= IO_CACHED_BVECS_SEGS)
|
||||
return io_cache_alloc(&ctx->imu_cache, GFP_KERNEL);
|
||||
return kvmalloc(struct_size_t(struct io_mapped_ubuf, bvec, nr_bvecs),
|
||||
GFP_KERNEL);
|
||||
}
|
||||
|
||||
static void io_free_imu(struct io_ring_ctx *ctx, struct io_mapped_ubuf *imu)
|
||||
{
|
||||
if (imu->nr_bvecs <= IO_CACHED_BVECS_SEGS)
|
||||
io_cache_free(&ctx->imu_cache, imu);
|
||||
else
|
||||
kvfree(imu);
|
||||
}
|
||||
|
||||
static void io_buffer_unmap(struct io_ring_ctx *ctx, struct io_mapped_ubuf *imu)
|
||||
{
|
||||
if (!refcount_dec_and_test(&imu->refs))
|
||||
return;
|
||||
|
||||
if (imu->acct_pages)
|
||||
io_unaccount_mem(ctx, imu->acct_pages);
|
||||
imu->release(imu->priv);
|
||||
io_free_imu(ctx, imu);
|
||||
}
|
||||
|
||||
struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx, int type)
|
||||
{
|
||||
struct io_rsrc_node *node;
|
||||
|
||||
node = kzalloc(sizeof(*node), GFP_KERNEL);
|
||||
node = io_cache_alloc(&ctx->node_cache, GFP_KERNEL);
|
||||
if (node) {
|
||||
node->type = type;
|
||||
node->refs = 1;
|
||||
node->tag = 0;
|
||||
node->file_ptr = 0;
|
||||
}
|
||||
return node;
|
||||
}
|
||||
|
||||
__cold void io_rsrc_data_free(struct io_ring_ctx *ctx, struct io_rsrc_data *data)
|
||||
bool io_rsrc_cache_init(struct io_ring_ctx *ctx)
|
||||
{
|
||||
const int imu_cache_size = struct_size_t(struct io_mapped_ubuf, bvec,
|
||||
IO_CACHED_BVECS_SEGS);
|
||||
const int node_size = sizeof(struct io_rsrc_node);
|
||||
bool ret;
|
||||
|
||||
ret = io_alloc_cache_init(&ctx->node_cache, IO_ALLOC_CACHE_MAX,
|
||||
node_size, 0);
|
||||
ret |= io_alloc_cache_init(&ctx->imu_cache, IO_ALLOC_CACHE_MAX,
|
||||
imu_cache_size, 0);
|
||||
return ret;
|
||||
}
|
||||
|
||||
void io_rsrc_cache_free(struct io_ring_ctx *ctx)
|
||||
{
|
||||
io_alloc_cache_free(&ctx->node_cache, kfree);
|
||||
io_alloc_cache_free(&ctx->imu_cache, kfree);
|
||||
}
|
||||
|
||||
__cold void io_rsrc_data_free(struct io_ring_ctx *ctx,
|
||||
struct io_rsrc_data *data)
|
||||
{
|
||||
if (!data->nr)
|
||||
return;
|
||||
@ -203,7 +249,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
|
||||
err = -EBADF;
|
||||
break;
|
||||
}
|
||||
node = io_rsrc_node_alloc(IORING_RSRC_FILE);
|
||||
node = io_rsrc_node_alloc(ctx, IORING_RSRC_FILE);
|
||||
if (!node) {
|
||||
err = -ENOMEM;
|
||||
fput(file);
|
||||
@ -449,19 +495,17 @@ void io_free_rsrc_node(struct io_ring_ctx *ctx, struct io_rsrc_node *node)
|
||||
|
||||
switch (node->type) {
|
||||
case IORING_RSRC_FILE:
|
||||
if (io_slot_file(node))
|
||||
fput(io_slot_file(node));
|
||||
fput(io_slot_file(node));
|
||||
break;
|
||||
case IORING_RSRC_BUFFER:
|
||||
if (node->buf)
|
||||
io_buffer_unmap(ctx, node);
|
||||
io_buffer_unmap(ctx, node->buf);
|
||||
break;
|
||||
default:
|
||||
WARN_ON_ONCE(1);
|
||||
break;
|
||||
}
|
||||
|
||||
kfree(node);
|
||||
io_cache_free(&ctx->node_cache, node);
|
||||
}
|
||||
|
||||
int io_sqe_files_unregister(struct io_ring_ctx *ctx)
|
||||
@ -523,7 +567,7 @@ int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
|
||||
goto fail;
|
||||
}
|
||||
ret = -ENOMEM;
|
||||
node = io_rsrc_node_alloc(IORING_RSRC_FILE);
|
||||
node = io_rsrc_node_alloc(ctx, IORING_RSRC_FILE);
|
||||
if (!node) {
|
||||
fput(file);
|
||||
goto fail;
|
||||
@ -728,10 +772,9 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx,
|
||||
if (!iov->iov_base)
|
||||
return NULL;
|
||||
|
||||
node = io_rsrc_node_alloc(IORING_RSRC_BUFFER);
|
||||
node = io_rsrc_node_alloc(ctx, IORING_RSRC_BUFFER);
|
||||
if (!node)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
node->buf = NULL;
|
||||
|
||||
ret = -ENOMEM;
|
||||
pages = io_pin_pages((unsigned long) iov->iov_base, iov->iov_len,
|
||||
@ -748,10 +791,11 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx,
|
||||
coalesced = io_coalesce_buffer(&pages, &nr_pages, &data);
|
||||
}
|
||||
|
||||
imu = kvmalloc(struct_size(imu, bvec, nr_pages), GFP_KERNEL);
|
||||
imu = io_alloc_imu(ctx, nr_pages);
|
||||
if (!imu)
|
||||
goto done;
|
||||
|
||||
imu->nr_bvecs = nr_pages;
|
||||
ret = io_buffer_account_pin(ctx, pages, nr_pages, imu, last_hpage);
|
||||
if (ret) {
|
||||
unpin_user_pages(pages, nr_pages);
|
||||
@ -762,8 +806,11 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx,
|
||||
/* store original address for later verification */
|
||||
imu->ubuf = (unsigned long) iov->iov_base;
|
||||
imu->len = iov->iov_len;
|
||||
imu->nr_bvecs = nr_pages;
|
||||
imu->folio_shift = PAGE_SHIFT;
|
||||
imu->release = io_release_ubuf;
|
||||
imu->priv = imu;
|
||||
imu->is_kbuf = false;
|
||||
imu->dir = IO_IMU_DEST | IO_IMU_SOURCE;
|
||||
if (coalesced)
|
||||
imu->folio_shift = data.folio_shift;
|
||||
refcount_set(&imu->refs, 1);
|
||||
@ -781,9 +828,9 @@ static struct io_rsrc_node *io_sqe_buffer_register(struct io_ring_ctx *ctx,
|
||||
}
|
||||
done:
|
||||
if (ret) {
|
||||
kvfree(imu);
|
||||
if (node)
|
||||
io_put_rsrc_node(ctx, node);
|
||||
if (imu)
|
||||
io_free_imu(ctx, imu);
|
||||
io_cache_free(&ctx->node_cache, node);
|
||||
node = ERR_PTR(ret);
|
||||
}
|
||||
kvfree(pages);
|
||||
@ -860,7 +907,102 @@ int io_sqe_buffers_register(struct io_ring_ctx *ctx, void __user *arg,
|
||||
return ret;
|
||||
}
|
||||
|
||||
int io_import_fixed(int ddir, struct iov_iter *iter,
|
||||
int io_buffer_register_bvec(struct io_uring_cmd *cmd, struct request *rq,
|
||||
void (*release)(void *), unsigned int index,
|
||||
unsigned int issue_flags)
|
||||
{
|
||||
struct io_ring_ctx *ctx = cmd_to_io_kiocb(cmd)->ctx;
|
||||
struct io_rsrc_data *data = &ctx->buf_table;
|
||||
struct req_iterator rq_iter;
|
||||
struct io_mapped_ubuf *imu;
|
||||
struct io_rsrc_node *node;
|
||||
struct bio_vec bv, *bvec;
|
||||
u16 nr_bvecs;
|
||||
int ret = 0;
|
||||
|
||||
io_ring_submit_lock(ctx, issue_flags);
|
||||
if (index >= data->nr) {
|
||||
ret = -EINVAL;
|
||||
goto unlock;
|
||||
}
|
||||
index = array_index_nospec(index, data->nr);
|
||||
|
||||
if (data->nodes[index]) {
|
||||
ret = -EBUSY;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
node = io_rsrc_node_alloc(ctx, IORING_RSRC_BUFFER);
|
||||
if (!node) {
|
||||
ret = -ENOMEM;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
nr_bvecs = blk_rq_nr_phys_segments(rq);
|
||||
imu = io_alloc_imu(ctx, nr_bvecs);
|
||||
if (!imu) {
|
||||
kfree(node);
|
||||
ret = -ENOMEM;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
imu->ubuf = 0;
|
||||
imu->len = blk_rq_bytes(rq);
|
||||
imu->acct_pages = 0;
|
||||
imu->folio_shift = PAGE_SHIFT;
|
||||
imu->nr_bvecs = nr_bvecs;
|
||||
refcount_set(&imu->refs, 1);
|
||||
imu->release = release;
|
||||
imu->priv = rq;
|
||||
imu->is_kbuf = true;
|
||||
imu->dir = 1 << rq_data_dir(rq);
|
||||
|
||||
bvec = imu->bvec;
|
||||
rq_for_each_bvec(bv, rq, rq_iter)
|
||||
*bvec++ = bv;
|
||||
|
||||
node->buf = imu;
|
||||
data->nodes[index] = node;
|
||||
unlock:
|
||||
io_ring_submit_unlock(ctx, issue_flags);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(io_buffer_register_bvec);
|
||||
|
||||
int io_buffer_unregister_bvec(struct io_uring_cmd *cmd, unsigned int index,
|
||||
unsigned int issue_flags)
|
||||
{
|
||||
struct io_ring_ctx *ctx = cmd_to_io_kiocb(cmd)->ctx;
|
||||
struct io_rsrc_data *data = &ctx->buf_table;
|
||||
struct io_rsrc_node *node;
|
||||
int ret = 0;
|
||||
|
||||
io_ring_submit_lock(ctx, issue_flags);
|
||||
if (index >= data->nr) {
|
||||
ret = -EINVAL;
|
||||
goto unlock;
|
||||
}
|
||||
index = array_index_nospec(index, data->nr);
|
||||
|
||||
node = data->nodes[index];
|
||||
if (!node) {
|
||||
ret = -EINVAL;
|
||||
goto unlock;
|
||||
}
|
||||
if (!node->buf->is_kbuf) {
|
||||
ret = -EBUSY;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
io_put_rsrc_node(ctx, node);
|
||||
data->nodes[index] = NULL;
|
||||
unlock:
|
||||
io_ring_submit_unlock(ctx, issue_flags);
|
||||
return ret;
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(io_buffer_unregister_bvec);
|
||||
|
||||
static int io_import_fixed(int ddir, struct iov_iter *iter,
|
||||
struct io_mapped_ubuf *imu,
|
||||
u64 buf_addr, size_t len)
|
||||
{
|
||||
@ -874,20 +1016,22 @@ int io_import_fixed(int ddir, struct iov_iter *iter,
|
||||
/* not inside the mapped region */
|
||||
if (unlikely(buf_addr < imu->ubuf || buf_end > (imu->ubuf + imu->len)))
|
||||
return -EFAULT;
|
||||
if (!(imu->dir & (1 << ddir)))
|
||||
return -EFAULT;
|
||||
|
||||
/*
|
||||
* Might not be a start of buffer, set size appropriately
|
||||
* and advance us to the beginning.
|
||||
*/
|
||||
offset = buf_addr - imu->ubuf;
|
||||
iov_iter_bvec(iter, ddir, imu->bvec, imu->nr_bvecs, len);
|
||||
iov_iter_bvec(iter, ddir, imu->bvec, imu->nr_bvecs, offset + len);
|
||||
|
||||
if (offset) {
|
||||
/*
|
||||
* Don't use iov_iter_advance() here, as it's really slow for
|
||||
* using the latter parts of a big fixed buffer - it iterates
|
||||
* over each segment manually. We can cheat a bit here, because
|
||||
* we know that:
|
||||
* over each segment manually. We can cheat a bit here for user
|
||||
* registered nodes, because we know that:
|
||||
*
|
||||
* 1) it's a BVEC iter, we set it up
|
||||
* 2) all bvecs are the same in size, except potentially the
|
||||
@ -901,8 +1045,16 @@ int io_import_fixed(int ddir, struct iov_iter *iter,
|
||||
*/
|
||||
const struct bio_vec *bvec = imu->bvec;
|
||||
|
||||
/*
|
||||
* Kernel buffer bvecs, on the other hand, don't necessarily
|
||||
* have the size property of user registered ones, so we have
|
||||
* to use the slow iter advance.
|
||||
*/
|
||||
if (offset < bvec->bv_len) {
|
||||
iter->count -= offset;
|
||||
iter->iov_offset = offset;
|
||||
} else if (imu->is_kbuf) {
|
||||
iov_iter_advance(iter, offset);
|
||||
} else {
|
||||
unsigned long seg_skip;
|
||||
|
||||
@ -912,6 +1064,7 @@ int io_import_fixed(int ddir, struct iov_iter *iter,
|
||||
|
||||
iter->bvec += seg_skip;
|
||||
iter->nr_segs -= seg_skip;
|
||||
iter->count -= bvec->bv_len + offset;
|
||||
iter->iov_offset = offset & ((1UL << imu->folio_shift) - 1);
|
||||
}
|
||||
}
|
||||
@ -919,6 +1072,35 @@ int io_import_fixed(int ddir, struct iov_iter *iter,
|
||||
return 0;
|
||||
}
|
||||
|
||||
inline struct io_rsrc_node *io_find_buf_node(struct io_kiocb *req,
|
||||
unsigned issue_flags)
|
||||
{
|
||||
struct io_ring_ctx *ctx = req->ctx;
|
||||
struct io_rsrc_node *node;
|
||||
|
||||
if (req->flags & REQ_F_BUF_NODE)
|
||||
return req->buf_node;
|
||||
|
||||
io_ring_submit_lock(ctx, issue_flags);
|
||||
node = io_rsrc_node_lookup(&ctx->buf_table, req->buf_index);
|
||||
if (node)
|
||||
io_req_assign_buf_node(req, node);
|
||||
io_ring_submit_unlock(ctx, issue_flags);
|
||||
return node;
|
||||
}
|
||||
|
||||
int io_import_reg_buf(struct io_kiocb *req, struct iov_iter *iter,
|
||||
u64 buf_addr, size_t len, int ddir,
|
||||
unsigned issue_flags)
|
||||
{
|
||||
struct io_rsrc_node *node;
|
||||
|
||||
node = io_find_buf_node(req, issue_flags);
|
||||
if (!node)
|
||||
return -EFAULT;
|
||||
return io_import_fixed(ddir, iter, node->buf, buf_addr, len);
|
||||
}
|
||||
|
||||
/* Lock two rings at once. The rings must be different! */
|
||||
static void lock_two_rings(struct io_ring_ctx *ctx1, struct io_ring_ctx *ctx2)
|
||||
{
|
||||
@ -1002,7 +1184,7 @@ static int io_clone_buffers(struct io_ring_ctx *ctx, struct io_ring_ctx *src_ctx
|
||||
if (!src_node) {
|
||||
dst_node = NULL;
|
||||
} else {
|
||||
dst_node = io_rsrc_node_alloc(IORING_RSRC_BUFFER);
|
||||
dst_node = io_rsrc_node_alloc(ctx, IORING_RSRC_BUFFER);
|
||||
if (!dst_node) {
|
||||
ret = -ENOMEM;
|
||||
goto out_free;
|
||||
|
@ -2,6 +2,7 @@
|
||||
#ifndef IOU_RSRC_H
|
||||
#define IOU_RSRC_H
|
||||
|
||||
#include <linux/io_uring_types.h>
|
||||
#include <linux/lockdep.h>
|
||||
|
||||
enum {
|
||||
@ -20,6 +21,11 @@ struct io_rsrc_node {
|
||||
};
|
||||
};
|
||||
|
||||
enum {
|
||||
IO_IMU_DEST = 1 << ITER_DEST,
|
||||
IO_IMU_SOURCE = 1 << ITER_SOURCE,
|
||||
};
|
||||
|
||||
struct io_mapped_ubuf {
|
||||
u64 ubuf;
|
||||
unsigned int len;
|
||||
@ -27,6 +33,10 @@ struct io_mapped_ubuf {
|
||||
unsigned int folio_shift;
|
||||
refcount_t refs;
|
||||
unsigned long acct_pages;
|
||||
void (*release)(void *);
|
||||
void *priv;
|
||||
bool is_kbuf;
|
||||
u8 dir;
|
||||
struct bio_vec bvec[] __counted_by(nr_bvecs);
|
||||
};
|
||||
|
||||
@ -39,14 +49,18 @@ struct io_imu_folio_data {
|
||||
unsigned int nr_folios;
|
||||
};
|
||||
|
||||
struct io_rsrc_node *io_rsrc_node_alloc(int type);
|
||||
bool io_rsrc_cache_init(struct io_ring_ctx *ctx);
|
||||
void io_rsrc_cache_free(struct io_ring_ctx *ctx);
|
||||
struct io_rsrc_node *io_rsrc_node_alloc(struct io_ring_ctx *ctx, int type);
|
||||
void io_free_rsrc_node(struct io_ring_ctx *ctx, struct io_rsrc_node *node);
|
||||
void io_rsrc_data_free(struct io_ring_ctx *ctx, struct io_rsrc_data *data);
|
||||
int io_rsrc_data_alloc(struct io_rsrc_data *data, unsigned nr);
|
||||
|
||||
int io_import_fixed(int ddir, struct iov_iter *iter,
|
||||
struct io_mapped_ubuf *imu,
|
||||
u64 buf_addr, size_t len);
|
||||
struct io_rsrc_node *io_find_buf_node(struct io_kiocb *req,
|
||||
unsigned issue_flags);
|
||||
int io_import_reg_buf(struct io_kiocb *req, struct iov_iter *iter,
|
||||
u64 buf_addr, size_t len, int ddir,
|
||||
unsigned issue_flags);
|
||||
|
||||
int io_register_clone_buffers(struct io_ring_ctx *ctx, void __user *arg);
|
||||
int io_sqe_buffers_unregister(struct io_ring_ctx *ctx);
|
||||
@ -77,7 +91,7 @@ static inline struct io_rsrc_node *io_rsrc_node_lookup(struct io_rsrc_data *data
|
||||
static inline void io_put_rsrc_node(struct io_ring_ctx *ctx, struct io_rsrc_node *node)
|
||||
{
|
||||
lockdep_assert_held(&ctx->uring_lock);
|
||||
if (node && !--node->refs)
|
||||
if (!--node->refs)
|
||||
io_free_rsrc_node(ctx, node);
|
||||
}
|
||||
|
||||
|
200
io_uring/rw.c
200
io_uring/rw.c
@ -49,24 +49,16 @@ static bool io_file_supports_nowait(struct io_kiocb *req, __poll_t mask)
|
||||
return false;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
static int io_iov_compat_buffer_select_prep(struct io_rw *rw)
|
||||
{
|
||||
struct compat_iovec __user *uiov;
|
||||
compat_ssize_t clen;
|
||||
struct compat_iovec __user *uiov = u64_to_user_ptr(rw->addr);
|
||||
struct compat_iovec iov;
|
||||
|
||||
uiov = u64_to_user_ptr(rw->addr);
|
||||
if (!access_ok(uiov, sizeof(*uiov)))
|
||||
if (copy_from_user(&iov, uiov, sizeof(iov)))
|
||||
return -EFAULT;
|
||||
if (__get_user(clen, &uiov->iov_len))
|
||||
return -EFAULT;
|
||||
if (clen < 0)
|
||||
return -EINVAL;
|
||||
|
||||
rw->len = clen;
|
||||
rw->len = iov.iov_len;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int io_iov_buffer_select_prep(struct io_kiocb *req)
|
||||
{
|
||||
@ -77,10 +69,8 @@ static int io_iov_buffer_select_prep(struct io_kiocb *req)
|
||||
if (rw->len != 1)
|
||||
return -EINVAL;
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
if (req->ctx->compat)
|
||||
if (io_is_compat(req->ctx))
|
||||
return io_iov_compat_buffer_select_prep(rw);
|
||||
#endif
|
||||
|
||||
uiov = u64_to_user_ptr(rw->addr);
|
||||
if (copy_from_user(&iov, uiov, sizeof(*uiov)))
|
||||
@ -89,41 +79,24 @@ static int io_iov_buffer_select_prep(struct io_kiocb *req)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int __io_import_iovec(int ddir, struct io_kiocb *req,
|
||||
struct io_async_rw *io,
|
||||
unsigned int issue_flags)
|
||||
static int io_import_vec(int ddir, struct io_kiocb *req,
|
||||
struct io_async_rw *io,
|
||||
const struct iovec __user *uvec,
|
||||
size_t uvec_segs)
|
||||
{
|
||||
const struct io_issue_def *def = &io_issue_defs[req->opcode];
|
||||
struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
|
||||
int ret, nr_segs;
|
||||
struct iovec *iov;
|
||||
void __user *buf;
|
||||
int nr_segs, ret;
|
||||
size_t sqe_len;
|
||||
|
||||
buf = u64_to_user_ptr(rw->addr);
|
||||
sqe_len = rw->len;
|
||||
|
||||
if (!def->vectored || req->flags & REQ_F_BUFFER_SELECT) {
|
||||
if (io_do_buffer_select(req)) {
|
||||
buf = io_buffer_select(req, &sqe_len, issue_flags);
|
||||
if (!buf)
|
||||
return -ENOBUFS;
|
||||
rw->addr = (unsigned long) buf;
|
||||
rw->len = sqe_len;
|
||||
}
|
||||
|
||||
return import_ubuf(ddir, buf, sqe_len, &io->iter);
|
||||
}
|
||||
|
||||
if (io->free_iovec) {
|
||||
nr_segs = io->free_iov_nr;
|
||||
iov = io->free_iovec;
|
||||
} else {
|
||||
iov = &io->fast_iov;
|
||||
nr_segs = 1;
|
||||
iov = &io->fast_iov;
|
||||
}
|
||||
ret = __import_iovec(ddir, buf, sqe_len, nr_segs, &iov, &io->iter,
|
||||
req->ctx->compat);
|
||||
|
||||
ret = __import_iovec(ddir, uvec, uvec_segs, nr_segs, &iov, &io->iter,
|
||||
io_is_compat(req->ctx));
|
||||
if (unlikely(ret < 0))
|
||||
return ret;
|
||||
if (iov) {
|
||||
@ -135,13 +108,35 @@ static int __io_import_iovec(int ddir, struct io_kiocb *req,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int io_import_iovec(int rw, struct io_kiocb *req,
|
||||
struct io_async_rw *io,
|
||||
unsigned int issue_flags)
|
||||
static int __io_import_rw_buffer(int ddir, struct io_kiocb *req,
|
||||
struct io_async_rw *io,
|
||||
unsigned int issue_flags)
|
||||
{
|
||||
const struct io_issue_def *def = &io_issue_defs[req->opcode];
|
||||
struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
|
||||
void __user *buf = u64_to_user_ptr(rw->addr);
|
||||
size_t sqe_len = rw->len;
|
||||
|
||||
if (def->vectored && !(req->flags & REQ_F_BUFFER_SELECT))
|
||||
return io_import_vec(ddir, req, io, buf, sqe_len);
|
||||
|
||||
if (io_do_buffer_select(req)) {
|
||||
buf = io_buffer_select(req, &sqe_len, issue_flags);
|
||||
if (!buf)
|
||||
return -ENOBUFS;
|
||||
rw->addr = (unsigned long) buf;
|
||||
rw->len = sqe_len;
|
||||
}
|
||||
return import_ubuf(ddir, buf, sqe_len, &io->iter);
|
||||
}
|
||||
|
||||
static inline int io_import_rw_buffer(int rw, struct io_kiocb *req,
|
||||
struct io_async_rw *io,
|
||||
unsigned int issue_flags)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = __io_import_iovec(rw, req, io, issue_flags);
|
||||
ret = __io_import_rw_buffer(rw, req, io, issue_flags);
|
||||
if (unlikely(ret < 0))
|
||||
return ret;
|
||||
|
||||
@ -212,20 +207,6 @@ static int io_rw_alloc_async(struct io_kiocb *req)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int io_prep_rw_setup(struct io_kiocb *req, int ddir, bool do_import)
|
||||
{
|
||||
struct io_async_rw *rw;
|
||||
|
||||
if (io_rw_alloc_async(req))
|
||||
return -ENOMEM;
|
||||
|
||||
if (!do_import || io_do_buffer_select(req))
|
||||
return 0;
|
||||
|
||||
rw = req->async_data;
|
||||
return io_import_iovec(ddir, req, rw, 0);
|
||||
}
|
||||
|
||||
static inline void io_meta_save_state(struct io_async_rw *io)
|
||||
{
|
||||
io->meta_state.seed = io->meta.seed;
|
||||
@ -267,14 +248,17 @@ static int io_prep_rw_pi(struct io_kiocb *req, struct io_rw *rw, int ddir,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
|
||||
int ddir, bool do_import)
|
||||
static int __io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
|
||||
int ddir)
|
||||
{
|
||||
struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
|
||||
unsigned ioprio;
|
||||
u64 attr_type_mask;
|
||||
int ret;
|
||||
|
||||
if (io_rw_alloc_async(req))
|
||||
return -ENOMEM;
|
||||
|
||||
rw->kiocb.ki_pos = READ_ONCE(sqe->off);
|
||||
/* used for fixed read/write too - just read unconditionally */
|
||||
req->buf_index = READ_ONCE(sqe->buf_index);
|
||||
@ -300,10 +284,6 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
|
||||
rw->addr = READ_ONCE(sqe->addr);
|
||||
rw->len = READ_ONCE(sqe->len);
|
||||
rw->flags = READ_ONCE(sqe->rw_flags);
|
||||
ret = io_prep_rw_setup(req, ddir, do_import);
|
||||
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
|
||||
attr_type_mask = READ_ONCE(sqe->attr_type_mask);
|
||||
if (attr_type_mask) {
|
||||
@ -314,31 +294,50 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
|
||||
return -EINVAL;
|
||||
|
||||
attr_ptr = READ_ONCE(sqe->attr_ptr);
|
||||
ret = io_prep_rw_pi(req, rw, ddir, attr_ptr, attr_type_mask);
|
||||
return io_prep_rw_pi(req, rw, ddir, attr_ptr, attr_type_mask);
|
||||
}
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int io_rw_do_import(struct io_kiocb *req, int ddir)
|
||||
{
|
||||
if (io_do_buffer_select(req))
|
||||
return 0;
|
||||
|
||||
return io_import_rw_buffer(ddir, req, req->async_data, 0);
|
||||
}
|
||||
|
||||
static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
|
||||
int ddir)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = __io_prep_rw(req, sqe, ddir);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
|
||||
return io_rw_do_import(req, ddir);
|
||||
}
|
||||
|
||||
int io_prep_read(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||
{
|
||||
return io_prep_rw(req, sqe, ITER_DEST, true);
|
||||
return io_prep_rw(req, sqe, ITER_DEST);
|
||||
}
|
||||
|
||||
int io_prep_write(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||
{
|
||||
return io_prep_rw(req, sqe, ITER_SOURCE, true);
|
||||
return io_prep_rw(req, sqe, ITER_SOURCE);
|
||||
}
|
||||
|
||||
static int io_prep_rwv(struct io_kiocb *req, const struct io_uring_sqe *sqe,
|
||||
int ddir)
|
||||
{
|
||||
const bool do_import = !(req->flags & REQ_F_BUFFER_SELECT);
|
||||
int ret;
|
||||
|
||||
ret = io_prep_rw(req, sqe, ddir, do_import);
|
||||
ret = io_prep_rw(req, sqe, ddir);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
if (do_import)
|
||||
if (!(req->flags & REQ_F_BUFFER_SELECT))
|
||||
return 0;
|
||||
|
||||
/*
|
||||
@ -358,38 +357,30 @@ int io_prep_writev(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||
return io_prep_rwv(req, sqe, ITER_SOURCE);
|
||||
}
|
||||
|
||||
static int io_prep_rw_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe,
|
||||
static int io_init_rw_fixed(struct io_kiocb *req, unsigned int issue_flags,
|
||||
int ddir)
|
||||
{
|
||||
struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
|
||||
struct io_ring_ctx *ctx = req->ctx;
|
||||
struct io_rsrc_node *node;
|
||||
struct io_async_rw *io;
|
||||
struct io_async_rw *io = req->async_data;
|
||||
int ret;
|
||||
|
||||
ret = io_prep_rw(req, sqe, ddir, false);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
if (io->bytes_done)
|
||||
return 0;
|
||||
|
||||
node = io_rsrc_node_lookup(&ctx->buf_table, req->buf_index);
|
||||
if (!node)
|
||||
return -EFAULT;
|
||||
io_req_assign_buf_node(req, node);
|
||||
|
||||
io = req->async_data;
|
||||
ret = io_import_fixed(ddir, &io->iter, node->buf, rw->addr, rw->len);
|
||||
ret = io_import_reg_buf(req, &io->iter, rw->addr, rw->len, ddir,
|
||||
issue_flags);
|
||||
iov_iter_save_state(&io->iter, &io->iter_state);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int io_prep_read_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||
{
|
||||
return io_prep_rw_fixed(req, sqe, ITER_DEST);
|
||||
return __io_prep_rw(req, sqe, ITER_DEST);
|
||||
}
|
||||
|
||||
int io_prep_write_fixed(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||
{
|
||||
return io_prep_rw_fixed(req, sqe, ITER_SOURCE);
|
||||
return __io_prep_rw(req, sqe, ITER_SOURCE);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -405,7 +396,7 @@ int io_read_mshot_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||
if (!(req->flags & REQ_F_BUFFER_SELECT))
|
||||
return -EINVAL;
|
||||
|
||||
ret = io_prep_rw(req, sqe, ITER_DEST, false);
|
||||
ret = __io_prep_rw(req, sqe, ITER_DEST);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
|
||||
@ -519,7 +510,7 @@ static inline int io_fixup_rw_res(struct io_kiocb *req, long res)
|
||||
return res;
|
||||
}
|
||||
|
||||
void io_req_rw_complete(struct io_kiocb *req, struct io_tw_state *ts)
|
||||
void io_req_rw_complete(struct io_kiocb *req, io_tw_token_t tw)
|
||||
{
|
||||
struct io_rw *rw = io_kiocb_to_cmd(req, struct io_rw);
|
||||
struct kiocb *kiocb = &rw->kiocb;
|
||||
@ -536,7 +527,7 @@ void io_req_rw_complete(struct io_kiocb *req, struct io_tw_state *ts)
|
||||
req->cqe.flags |= io_put_kbuf(req, req->cqe.res, 0);
|
||||
|
||||
io_req_rw_cleanup(req, 0);
|
||||
io_req_task_complete(req, ts);
|
||||
io_req_task_complete(req, tw);
|
||||
}
|
||||
|
||||
static void io_complete_rw(struct kiocb *kiocb, long res)
|
||||
@ -637,6 +628,7 @@ static inline loff_t *io_kiocb_ppos(struct kiocb *kiocb)
|
||||
*/
|
||||
static ssize_t loop_rw_iter(int ddir, struct io_rw *rw, struct iov_iter *iter)
|
||||
{
|
||||
struct io_kiocb *req = cmd_to_io_kiocb(rw);
|
||||
struct kiocb *kiocb = &rw->kiocb;
|
||||
struct file *file = kiocb->ki_filp;
|
||||
ssize_t ret = 0;
|
||||
@ -652,6 +644,8 @@ static ssize_t loop_rw_iter(int ddir, struct io_rw *rw, struct iov_iter *iter)
|
||||
if ((kiocb->ki_flags & IOCB_NOWAIT) &&
|
||||
!(kiocb->ki_filp->f_flags & O_NONBLOCK))
|
||||
return -EAGAIN;
|
||||
if ((req->flags & REQ_F_BUF_NODE) && req->buf_node->buf->is_kbuf)
|
||||
return -EFAULT;
|
||||
|
||||
ppos = io_kiocb_ppos(kiocb);
|
||||
|
||||
@ -863,7 +857,7 @@ static int __io_read(struct io_kiocb *req, unsigned int issue_flags)
|
||||
loff_t *ppos;
|
||||
|
||||
if (io_do_buffer_select(req)) {
|
||||
ret = io_import_iovec(ITER_DEST, req, io, issue_flags);
|
||||
ret = io_import_rw_buffer(ITER_DEST, req, io, issue_flags);
|
||||
if (unlikely(ret < 0))
|
||||
return ret;
|
||||
}
|
||||
@ -1154,6 +1148,28 @@ ret_eagain:
|
||||
}
|
||||
}
|
||||
|
||||
int io_read_fixed(struct io_kiocb *req, unsigned int issue_flags)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = io_init_rw_fixed(req, issue_flags, ITER_DEST);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
|
||||
return io_read(req, issue_flags);
|
||||
}
|
||||
|
||||
int io_write_fixed(struct io_kiocb *req, unsigned int issue_flags)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = io_init_rw_fixed(req, issue_flags, ITER_SOURCE);
|
||||
if (unlikely(ret))
|
||||
return ret;
|
||||
|
||||
return io_write(req, issue_flags);
|
||||
}
|
||||
|
||||
void io_rw_fail(struct io_kiocb *req)
|
||||
{
|
||||
int res;
|
||||
|
@ -1,5 +1,6 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include <linux/io_uring_types.h>
|
||||
#include <linux/pagemap.h>
|
||||
|
||||
struct io_meta_state {
|
||||
@ -37,9 +38,11 @@ int io_prep_read(struct io_kiocb *req, const struct io_uring_sqe *sqe);
|
||||
int io_prep_write(struct io_kiocb *req, const struct io_uring_sqe *sqe);
|
||||
int io_read(struct io_kiocb *req, unsigned int issue_flags);
|
||||
int io_write(struct io_kiocb *req, unsigned int issue_flags);
|
||||
int io_read_fixed(struct io_kiocb *req, unsigned int issue_flags);
|
||||
int io_write_fixed(struct io_kiocb *req, unsigned int issue_flags);
|
||||
void io_readv_writev_cleanup(struct io_kiocb *req);
|
||||
void io_rw_fail(struct io_kiocb *req);
|
||||
void io_req_rw_complete(struct io_kiocb *req, struct io_tw_state *ts);
|
||||
void io_req_rw_complete(struct io_kiocb *req, io_tw_token_t tw);
|
||||
int io_read_mshot_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe);
|
||||
int io_read_mshot(struct io_kiocb *req, unsigned int issue_flags);
|
||||
void io_rw_cache_free(const void *entry);
|
||||
|
@ -51,7 +51,8 @@ void io_splice_cleanup(struct io_kiocb *req)
|
||||
{
|
||||
struct io_splice *sp = io_kiocb_to_cmd(req, struct io_splice);
|
||||
|
||||
io_put_rsrc_node(req->ctx, sp->rsrc_node);
|
||||
if (sp->rsrc_node)
|
||||
io_put_rsrc_node(req->ctx, sp->rsrc_node);
|
||||
}
|
||||
|
||||
static struct file *io_splice_get_file(struct io_kiocb *req,
|
||||
|
@ -65,7 +65,7 @@ static inline bool io_timeout_finish(struct io_timeout *timeout,
|
||||
|
||||
static enum hrtimer_restart io_timeout_fn(struct hrtimer *timer);
|
||||
|
||||
static void io_timeout_complete(struct io_kiocb *req, struct io_tw_state *ts)
|
||||
static void io_timeout_complete(struct io_kiocb *req, io_tw_token_t tw)
|
||||
{
|
||||
struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
|
||||
struct io_timeout_data *data = req->async_data;
|
||||
@ -82,7 +82,7 @@ static void io_timeout_complete(struct io_kiocb *req, struct io_tw_state *ts)
|
||||
}
|
||||
}
|
||||
|
||||
io_req_task_complete(req, ts);
|
||||
io_req_task_complete(req, tw);
|
||||
}
|
||||
|
||||
static __cold bool io_flush_killed_timeouts(struct list_head *list, int err)
|
||||
@ -154,9 +154,9 @@ __cold void io_flush_timeouts(struct io_ring_ctx *ctx)
|
||||
io_flush_killed_timeouts(&list, 0);
|
||||
}
|
||||
|
||||
static void io_req_tw_fail_links(struct io_kiocb *link, struct io_tw_state *ts)
|
||||
static void io_req_tw_fail_links(struct io_kiocb *link, io_tw_token_t tw)
|
||||
{
|
||||
io_tw_lock(link->ctx, ts);
|
||||
io_tw_lock(link->ctx, tw);
|
||||
while (link) {
|
||||
struct io_kiocb *nxt = link->link;
|
||||
long res = -ECANCELED;
|
||||
@ -165,7 +165,7 @@ static void io_req_tw_fail_links(struct io_kiocb *link, struct io_tw_state *ts)
|
||||
res = link->cqe.res;
|
||||
link->link = NULL;
|
||||
io_req_set_res(link, res, 0);
|
||||
io_req_task_complete(link, ts);
|
||||
io_req_task_complete(link, tw);
|
||||
link = nxt;
|
||||
}
|
||||
}
|
||||
@ -312,7 +312,7 @@ int io_timeout_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void io_req_task_link_timeout(struct io_kiocb *req, struct io_tw_state *ts)
|
||||
static void io_req_task_link_timeout(struct io_kiocb *req, io_tw_token_t tw)
|
||||
{
|
||||
struct io_timeout *timeout = io_kiocb_to_cmd(req, struct io_timeout);
|
||||
struct io_kiocb *prev = timeout->prev;
|
||||
@ -330,11 +330,11 @@ static void io_req_task_link_timeout(struct io_kiocb *req, struct io_tw_state *t
|
||||
ret = -ECANCELED;
|
||||
}
|
||||
io_req_set_res(req, ret ?: -ETIME, 0);
|
||||
io_req_task_complete(req, ts);
|
||||
io_req_task_complete(req, tw);
|
||||
io_put_req(prev);
|
||||
} else {
|
||||
io_req_set_res(req, -ETIME, 0);
|
||||
io_req_task_complete(req, ts);
|
||||
io_req_task_complete(req, tw);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -102,7 +102,7 @@ void io_uring_cmd_mark_cancelable(struct io_uring_cmd *cmd,
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(io_uring_cmd_mark_cancelable);
|
||||
|
||||
static void io_uring_cmd_work(struct io_kiocb *req, struct io_tw_state *ts)
|
||||
static void io_uring_cmd_work(struct io_kiocb *req, io_tw_token_t tw)
|
||||
{
|
||||
struct io_uring_cmd *ioucmd = io_kiocb_to_cmd(req, struct io_uring_cmd);
|
||||
unsigned int flags = IO_URING_F_COMPLETE_DEFER;
|
||||
@ -199,21 +199,9 @@ int io_uring_cmd_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
|
||||
if (ioucmd->flags & ~IORING_URING_CMD_MASK)
|
||||
return -EINVAL;
|
||||
|
||||
if (ioucmd->flags & IORING_URING_CMD_FIXED) {
|
||||
struct io_ring_ctx *ctx = req->ctx;
|
||||
struct io_rsrc_node *node;
|
||||
u16 index = READ_ONCE(sqe->buf_index);
|
||||
if (ioucmd->flags & IORING_URING_CMD_FIXED)
|
||||
req->buf_index = READ_ONCE(sqe->buf_index);
|
||||
|
||||
node = io_rsrc_node_lookup(&ctx->buf_table, index);
|
||||
if (unlikely(!node))
|
||||
return -EFAULT;
|
||||
/*
|
||||
* Pi node upfront, prior to io_uring_cmd_import_fixed()
|
||||
* being called. This prevents destruction of the mapped buffer
|
||||
* we'll need at actual import time.
|
||||
*/
|
||||
io_req_assign_buf_node(req, node);
|
||||
}
|
||||
ioucmd->cmd_op = READ_ONCE(sqe->cmd_op);
|
||||
|
||||
return io_uring_cmd_prep_setup(req, sqe);
|
||||
@ -237,7 +225,7 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
|
||||
issue_flags |= IO_URING_F_SQE128;
|
||||
if (ctx->flags & IORING_SETUP_CQE32)
|
||||
issue_flags |= IO_URING_F_CQE32;
|
||||
if (ctx->compat)
|
||||
if (io_is_compat(ctx))
|
||||
issue_flags |= IO_URING_F_COMPAT;
|
||||
if (ctx->flags & IORING_SETUP_IOPOLL) {
|
||||
if (!file->f_op->uring_cmd_iopoll)
|
||||
@ -257,16 +245,13 @@ int io_uring_cmd(struct io_kiocb *req, unsigned int issue_flags)
|
||||
}
|
||||
|
||||
int io_uring_cmd_import_fixed(u64 ubuf, unsigned long len, int rw,
|
||||
struct iov_iter *iter, void *ioucmd)
|
||||
struct iov_iter *iter,
|
||||
struct io_uring_cmd *ioucmd,
|
||||
unsigned int issue_flags)
|
||||
{
|
||||
struct io_kiocb *req = cmd_to_io_kiocb(ioucmd);
|
||||
struct io_rsrc_node *node = req->buf_node;
|
||||
|
||||
/* Must have had rsrc_node assigned at prep time */
|
||||
if (node)
|
||||
return io_import_fixed(rw, iter, node->buf, ubuf, len);
|
||||
|
||||
return -EFAULT;
|
||||
return io_import_reg_buf(req, iter, ubuf, len, rw, issue_flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(io_uring_cmd_import_fixed);
|
||||
|
||||
|
@ -16,7 +16,7 @@
|
||||
#include "waitid.h"
|
||||
#include "../kernel/exit.h"
|
||||
|
||||
static void io_waitid_cb(struct io_kiocb *req, struct io_tw_state *ts);
|
||||
static void io_waitid_cb(struct io_kiocb *req, io_tw_token_t tw);
|
||||
|
||||
#define IO_WAITID_CANCEL_FLAG BIT(31)
|
||||
#define IO_WAITID_REF_MASK GENMASK(30, 0)
|
||||
@ -42,7 +42,6 @@ static void io_waitid_free(struct io_kiocb *req)
|
||||
req->flags &= ~REQ_F_ASYNC_DATA;
|
||||
}
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
static bool io_waitid_compat_copy_si(struct io_waitid *iw, int signo)
|
||||
{
|
||||
struct compat_siginfo __user *infop;
|
||||
@ -67,7 +66,6 @@ Efault:
|
||||
ret = false;
|
||||
goto done;
|
||||
}
|
||||
#endif
|
||||
|
||||
static bool io_waitid_copy_si(struct io_kiocb *req, int signo)
|
||||
{
|
||||
@ -77,10 +75,8 @@ static bool io_waitid_copy_si(struct io_kiocb *req, int signo)
|
||||
if (!iw->infop)
|
||||
return true;
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
if (req->ctx->compat)
|
||||
if (io_is_compat(req->ctx))
|
||||
return io_waitid_compat_copy_si(iw, signo);
|
||||
#endif
|
||||
|
||||
if (!user_write_access_begin(iw->infop, sizeof(*iw->infop)))
|
||||
return false;
|
||||
@ -132,7 +128,7 @@ static void io_waitid_complete(struct io_kiocb *req, int ret)
|
||||
io_req_set_res(req, ret, 0);
|
||||
}
|
||||
|
||||
static bool __io_waitid_cancel(struct io_ring_ctx *ctx, struct io_kiocb *req)
|
||||
static bool __io_waitid_cancel(struct io_kiocb *req)
|
||||
{
|
||||
struct io_waitid *iw = io_kiocb_to_cmd(req, struct io_waitid);
|
||||
struct io_waitid_async *iwa = req->async_data;
|
||||
@ -158,49 +154,13 @@ static bool __io_waitid_cancel(struct io_ring_ctx *ctx, struct io_kiocb *req)
|
||||
int io_waitid_cancel(struct io_ring_ctx *ctx, struct io_cancel_data *cd,
|
||||
unsigned int issue_flags)
|
||||
{
|
||||
struct hlist_node *tmp;
|
||||
struct io_kiocb *req;
|
||||
int nr = 0;
|
||||
|
||||
if (cd->flags & (IORING_ASYNC_CANCEL_FD|IORING_ASYNC_CANCEL_FD_FIXED))
|
||||
return -ENOENT;
|
||||
|
||||
io_ring_submit_lock(ctx, issue_flags);
|
||||
hlist_for_each_entry_safe(req, tmp, &ctx->waitid_list, hash_node) {
|
||||
if (req->cqe.user_data != cd->data &&
|
||||
!(cd->flags & IORING_ASYNC_CANCEL_ANY))
|
||||
continue;
|
||||
if (__io_waitid_cancel(ctx, req))
|
||||
nr++;
|
||||
if (!(cd->flags & IORING_ASYNC_CANCEL_ALL))
|
||||
break;
|
||||
}
|
||||
io_ring_submit_unlock(ctx, issue_flags);
|
||||
|
||||
if (nr)
|
||||
return nr;
|
||||
|
||||
return -ENOENT;
|
||||
return io_cancel_remove(ctx, cd, issue_flags, &ctx->waitid_list, __io_waitid_cancel);
|
||||
}
|
||||
|
||||
bool io_waitid_remove_all(struct io_ring_ctx *ctx, struct io_uring_task *tctx,
|
||||
bool cancel_all)
|
||||
{
|
||||
struct hlist_node *tmp;
|
||||
struct io_kiocb *req;
|
||||
bool found = false;
|
||||
|
||||
lockdep_assert_held(&ctx->uring_lock);
|
||||
|
||||
hlist_for_each_entry_safe(req, tmp, &ctx->waitid_list, hash_node) {
|
||||
if (!io_match_task_safe(req, tctx, cancel_all))
|
||||
continue;
|
||||
hlist_del_init(&req->hash_node);
|
||||
__io_waitid_cancel(ctx, req);
|
||||
found = true;
|
||||
}
|
||||
|
||||
return found;
|
||||
return io_cancel_remove_all(ctx, tctx, &ctx->waitid_list, cancel_all, __io_waitid_cancel);
|
||||
}
|
||||
|
||||
static inline bool io_waitid_drop_issue_ref(struct io_kiocb *req)
|
||||
@ -221,13 +181,13 @@ static inline bool io_waitid_drop_issue_ref(struct io_kiocb *req)
|
||||
return true;
|
||||
}
|
||||
|
||||
static void io_waitid_cb(struct io_kiocb *req, struct io_tw_state *ts)
|
||||
static void io_waitid_cb(struct io_kiocb *req, io_tw_token_t tw)
|
||||
{
|
||||
struct io_waitid_async *iwa = req->async_data;
|
||||
struct io_ring_ctx *ctx = req->ctx;
|
||||
int ret;
|
||||
|
||||
io_tw_lock(ctx, ts);
|
||||
io_tw_lock(ctx, tw);
|
||||
|
||||
ret = __do_wait(&iwa->wo);
|
||||
|
||||
@ -257,7 +217,7 @@ static void io_waitid_cb(struct io_kiocb *req, struct io_tw_state *ts)
|
||||
}
|
||||
|
||||
io_waitid_complete(req, ret);
|
||||
io_req_task_complete(req, ts);
|
||||
io_req_task_complete(req, tw);
|
||||
}
|
||||
|
||||
static int io_waitid_wait(struct wait_queue_entry *wait, unsigned mode,
|
||||
|
@ -114,6 +114,7 @@ endif
|
||||
TARGETS += tmpfs
|
||||
TARGETS += tpm2
|
||||
TARGETS += tty
|
||||
TARGETS += ublk
|
||||
TARGETS += uevent
|
||||
TARGETS += user_events
|
||||
TARGETS += vDSO
|
||||
|
3
tools/testing/selftests/ublk/.gitignore
vendored
Normal file
3
tools/testing/selftests/ublk/.gitignore
vendored
Normal file
@ -0,0 +1,3 @@
|
||||
kublk
|
||||
/tools
|
||||
*-verify.state
|
27
tools/testing/selftests/ublk/Makefile
Normal file
27
tools/testing/selftests/ublk/Makefile
Normal file
@ -0,0 +1,27 @@
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
CFLAGS += -O3 -Wl,-no-as-needed -Wall -I $(top_srcdir)
|
||||
LDLIBS += -lpthread -lm -luring
|
||||
|
||||
TEST_PROGS := test_generic_01.sh
|
||||
|
||||
TEST_PROGS += test_null_01.sh
|
||||
TEST_PROGS += test_null_02.sh
|
||||
TEST_PROGS += test_loop_01.sh
|
||||
TEST_PROGS += test_loop_02.sh
|
||||
TEST_PROGS += test_loop_03.sh
|
||||
TEST_PROGS += test_loop_04.sh
|
||||
TEST_PROGS += test_stripe_01.sh
|
||||
TEST_PROGS += test_stripe_02.sh
|
||||
|
||||
TEST_PROGS += test_stress_01.sh
|
||||
TEST_PROGS += test_stress_02.sh
|
||||
|
||||
TEST_GEN_PROGS_EXTENDED = kublk
|
||||
|
||||
include ../lib.mk
|
||||
|
||||
$(TEST_GEN_PROGS_EXTENDED): kublk.c null.c file_backed.c common.c stripe.c
|
||||
|
||||
check:
|
||||
shellcheck -x -f gcc *.sh
|
55
tools/testing/selftests/ublk/common.c
Normal file
55
tools/testing/selftests/ublk/common.c
Normal file
@ -0,0 +1,55 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include "kublk.h"
|
||||
|
||||
void backing_file_tgt_deinit(struct ublk_dev *dev)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 1; i < dev->nr_fds; i++) {
|
||||
fsync(dev->fds[i]);
|
||||
close(dev->fds[i]);
|
||||
}
|
||||
}
|
||||
|
||||
int backing_file_tgt_init(struct ublk_dev *dev)
|
||||
{
|
||||
int fd, i;
|
||||
|
||||
assert(dev->nr_fds == 1);
|
||||
|
||||
for (i = 0; i < dev->tgt.nr_backing_files; i++) {
|
||||
char *file = dev->tgt.backing_file[i];
|
||||
unsigned long bytes;
|
||||
struct stat st;
|
||||
|
||||
ublk_dbg(UBLK_DBG_DEV, "%s: file %d: %s\n", __func__, i, file);
|
||||
|
||||
fd = open(file, O_RDWR | O_DIRECT);
|
||||
if (fd < 0) {
|
||||
ublk_err("%s: backing file %s can't be opened: %s\n",
|
||||
__func__, file, strerror(errno));
|
||||
return -EBADF;
|
||||
}
|
||||
|
||||
if (fstat(fd, &st) < 0) {
|
||||
close(fd);
|
||||
return -EBADF;
|
||||
}
|
||||
|
||||
if (S_ISREG(st.st_mode))
|
||||
bytes = st.st_size;
|
||||
else if (S_ISBLK(st.st_mode)) {
|
||||
if (ioctl(fd, BLKGETSIZE64, &bytes) != 0)
|
||||
return -1;
|
||||
} else {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
dev->tgt.backing_file_size[i] = bytes;
|
||||
dev->fds[dev->nr_fds] = fd;
|
||||
dev->nr_fds += 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
1
tools/testing/selftests/ublk/config
Normal file
1
tools/testing/selftests/ublk/config
Normal file
@ -0,0 +1 @@
|
||||
CONFIG_BLK_DEV_UBLK=m
|
169
tools/testing/selftests/ublk/file_backed.c
Normal file
169
tools/testing/selftests/ublk/file_backed.c
Normal file
@ -0,0 +1,169 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include "kublk.h"
|
||||
|
||||
static enum io_uring_op ublk_to_uring_op(const struct ublksrv_io_desc *iod, int zc)
|
||||
{
|
||||
unsigned ublk_op = ublksrv_get_op(iod);
|
||||
|
||||
if (ublk_op == UBLK_IO_OP_READ)
|
||||
return zc ? IORING_OP_READ_FIXED : IORING_OP_READ;
|
||||
else if (ublk_op == UBLK_IO_OP_WRITE)
|
||||
return zc ? IORING_OP_WRITE_FIXED : IORING_OP_WRITE;
|
||||
assert(0);
|
||||
}
|
||||
|
||||
static int loop_queue_flush_io(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag)
|
||||
{
|
||||
unsigned ublk_op = ublksrv_get_op(iod);
|
||||
struct io_uring_sqe *sqe[1];
|
||||
|
||||
ublk_queue_alloc_sqes(q, sqe, 1);
|
||||
io_uring_prep_fsync(sqe[0], 1 /*fds[1]*/, IORING_FSYNC_DATASYNC);
|
||||
io_uring_sqe_set_flags(sqe[0], IOSQE_FIXED_FILE);
|
||||
/* bit63 marks us as tgt io */
|
||||
sqe[0]->user_data = build_user_data(tag, ublk_op, 0, 1);
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int loop_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag)
|
||||
{
|
||||
unsigned ublk_op = ublksrv_get_op(iod);
|
||||
int zc = ublk_queue_use_zc(q);
|
||||
enum io_uring_op op = ublk_to_uring_op(iod, zc);
|
||||
struct io_uring_sqe *sqe[3];
|
||||
|
||||
if (!zc) {
|
||||
ublk_queue_alloc_sqes(q, sqe, 1);
|
||||
if (!sqe[0])
|
||||
return -ENOMEM;
|
||||
|
||||
io_uring_prep_rw(op, sqe[0], 1 /*fds[1]*/,
|
||||
(void *)iod->addr,
|
||||
iod->nr_sectors << 9,
|
||||
iod->start_sector << 9);
|
||||
io_uring_sqe_set_flags(sqe[0], IOSQE_FIXED_FILE);
|
||||
/* bit63 marks us as tgt io */
|
||||
sqe[0]->user_data = build_user_data(tag, ublk_op, 0, 1);
|
||||
return 1;
|
||||
}
|
||||
|
||||
ublk_queue_alloc_sqes(q, sqe, 3);
|
||||
|
||||
io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, tag);
|
||||
sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK;
|
||||
sqe[0]->user_data = build_user_data(tag,
|
||||
ublk_cmd_op_nr(sqe[0]->cmd_op), 0, 1);
|
||||
|
||||
io_uring_prep_rw(op, sqe[1], 1 /*fds[1]*/, 0,
|
||||
iod->nr_sectors << 9,
|
||||
iod->start_sector << 9);
|
||||
sqe[1]->buf_index = tag;
|
||||
sqe[1]->flags |= IOSQE_FIXED_FILE | IOSQE_IO_HARDLINK;
|
||||
sqe[1]->user_data = build_user_data(tag, ublk_op, 0, 1);
|
||||
|
||||
io_uring_prep_buf_unregister(sqe[2], 0, tag, q->q_id, tag);
|
||||
sqe[2]->user_data = build_user_data(tag, ublk_cmd_op_nr(sqe[2]->cmd_op), 0, 1);
|
||||
|
||||
return 2;
|
||||
}
|
||||
|
||||
static int loop_queue_tgt_io(struct ublk_queue *q, int tag)
|
||||
{
|
||||
const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
|
||||
unsigned ublk_op = ublksrv_get_op(iod);
|
||||
int ret;
|
||||
|
||||
switch (ublk_op) {
|
||||
case UBLK_IO_OP_FLUSH:
|
||||
ret = loop_queue_flush_io(q, iod, tag);
|
||||
break;
|
||||
case UBLK_IO_OP_WRITE_ZEROES:
|
||||
case UBLK_IO_OP_DISCARD:
|
||||
ret = -ENOTSUP;
|
||||
break;
|
||||
case UBLK_IO_OP_READ:
|
||||
case UBLK_IO_OP_WRITE:
|
||||
ret = loop_queue_tgt_rw_io(q, iod, tag);
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
ublk_dbg(UBLK_DBG_IO, "%s: tag %d ublk io %x %llx %u\n", __func__, tag,
|
||||
iod->op_flags, iod->start_sector, iod->nr_sectors << 9);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int ublk_loop_queue_io(struct ublk_queue *q, int tag)
|
||||
{
|
||||
int queued = loop_queue_tgt_io(q, tag);
|
||||
|
||||
ublk_queued_tgt_io(q, tag, queued);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void ublk_loop_io_done(struct ublk_queue *q, int tag,
|
||||
const struct io_uring_cqe *cqe)
|
||||
{
|
||||
unsigned op = user_data_to_op(cqe->user_data);
|
||||
struct ublk_io *io = ublk_get_io(q, tag);
|
||||
|
||||
if (cqe->res < 0 || op != ublk_cmd_op_nr(UBLK_U_IO_UNREGISTER_IO_BUF)) {
|
||||
if (!io->result)
|
||||
io->result = cqe->res;
|
||||
if (cqe->res < 0)
|
||||
ublk_err("%s: io failed op %x user_data %lx\n",
|
||||
__func__, op, cqe->user_data);
|
||||
}
|
||||
|
||||
/* buffer register op is IOSQE_CQE_SKIP_SUCCESS */
|
||||
if (op == ublk_cmd_op_nr(UBLK_U_IO_REGISTER_IO_BUF))
|
||||
io->tgt_ios += 1;
|
||||
|
||||
if (ublk_completed_tgt_io(q, tag))
|
||||
ublk_complete_io(q, tag, io->result);
|
||||
}
|
||||
|
||||
static int ublk_loop_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev)
|
||||
{
|
||||
unsigned long long bytes;
|
||||
int ret;
|
||||
struct ublk_params p = {
|
||||
.types = UBLK_PARAM_TYPE_BASIC | UBLK_PARAM_TYPE_DMA_ALIGN,
|
||||
.basic = {
|
||||
.attrs = UBLK_ATTR_VOLATILE_CACHE,
|
||||
.logical_bs_shift = 9,
|
||||
.physical_bs_shift = 12,
|
||||
.io_opt_shift = 12,
|
||||
.io_min_shift = 9,
|
||||
.max_sectors = dev->dev_info.max_io_buf_bytes >> 9,
|
||||
},
|
||||
.dma = {
|
||||
.alignment = 511,
|
||||
},
|
||||
};
|
||||
|
||||
ret = backing_file_tgt_init(dev);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (dev->tgt.nr_backing_files != 1)
|
||||
return -EINVAL;
|
||||
|
||||
bytes = dev->tgt.backing_file_size[0];
|
||||
dev->tgt.dev_size = bytes;
|
||||
p.basic.dev_sectors = bytes >> 9;
|
||||
dev->tgt.params = p;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
const struct ublk_tgt_ops loop_tgt_ops = {
|
||||
.name = "loop",
|
||||
.init_tgt = ublk_loop_tgt_init,
|
||||
.deinit_tgt = backing_file_tgt_deinit,
|
||||
.queue_io = ublk_loop_queue_io,
|
||||
.tgt_io_done = ublk_loop_io_done,
|
||||
};
|
1138
tools/testing/selftests/ublk/kublk.c
Normal file
1138
tools/testing/selftests/ublk/kublk.c
Normal file
File diff suppressed because it is too large
Load Diff
370
tools/testing/selftests/ublk/kublk.h
Normal file
370
tools/testing/selftests/ublk/kublk.h
Normal file
@ -0,0 +1,370 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
#ifndef KUBLK_INTERNAL_H
|
||||
#define KUBLK_INTERNAL_H
|
||||
|
||||
#include <unistd.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <stdarg.h>
|
||||
#include <string.h>
|
||||
#include <pthread.h>
|
||||
#include <getopt.h>
|
||||
#include <limits.h>
|
||||
#include <poll.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <sys/inotify.h>
|
||||
#include <sys/wait.h>
|
||||
#include <sys/eventfd.h>
|
||||
#include <sys/uio.h>
|
||||
#include <liburing.h>
|
||||
#include <linux/ublk_cmd.h>
|
||||
#include "ublk_dep.h"
|
||||
|
||||
#define __maybe_unused __attribute__((unused))
|
||||
#define MAX_BACK_FILES 4
|
||||
#ifndef min
|
||||
#define min(a, b) ((a) < (b) ? (a) : (b))
|
||||
#endif
|
||||
|
||||
/****************** part 1: libublk ********************/
|
||||
|
||||
#define CTRL_DEV "/dev/ublk-control"
|
||||
#define UBLKC_DEV "/dev/ublkc"
|
||||
#define UBLKB_DEV "/dev/ublkb"
|
||||
#define UBLK_CTRL_RING_DEPTH 32
|
||||
#define ERROR_EVTFD_DEVID -2
|
||||
|
||||
/* queue idle timeout */
|
||||
#define UBLKSRV_IO_IDLE_SECS 20
|
||||
|
||||
#define UBLK_IO_MAX_BYTES (1 << 20)
|
||||
#define UBLK_MAX_QUEUES 4
|
||||
#define UBLK_QUEUE_DEPTH 128
|
||||
|
||||
#define UBLK_DBG_DEV (1U << 0)
|
||||
#define UBLK_DBG_QUEUE (1U << 1)
|
||||
#define UBLK_DBG_IO_CMD (1U << 2)
|
||||
#define UBLK_DBG_IO (1U << 3)
|
||||
#define UBLK_DBG_CTRL_CMD (1U << 4)
|
||||
#define UBLK_LOG (1U << 5)
|
||||
|
||||
struct ublk_dev;
|
||||
struct ublk_queue;
|
||||
|
||||
struct dev_ctx {
|
||||
char tgt_type[16];
|
||||
unsigned long flags;
|
||||
unsigned nr_hw_queues;
|
||||
unsigned queue_depth;
|
||||
int dev_id;
|
||||
int nr_files;
|
||||
char *files[MAX_BACK_FILES];
|
||||
unsigned int logging:1;
|
||||
unsigned int all:1;
|
||||
unsigned int fg:1;
|
||||
|
||||
/* stripe */
|
||||
unsigned int chunk_size;
|
||||
|
||||
int _evtfd;
|
||||
};
|
||||
|
||||
struct ublk_ctrl_cmd_data {
|
||||
__u32 cmd_op;
|
||||
#define CTRL_CMD_HAS_DATA 1
|
||||
#define CTRL_CMD_HAS_BUF 2
|
||||
__u32 flags;
|
||||
|
||||
__u64 data[2];
|
||||
__u64 addr;
|
||||
__u32 len;
|
||||
};
|
||||
|
||||
struct ublk_io {
|
||||
char *buf_addr;
|
||||
|
||||
#define UBLKSRV_NEED_FETCH_RQ (1UL << 0)
|
||||
#define UBLKSRV_NEED_COMMIT_RQ_COMP (1UL << 1)
|
||||
#define UBLKSRV_IO_FREE (1UL << 2)
|
||||
unsigned short flags;
|
||||
unsigned short refs; /* used by target code only */
|
||||
|
||||
int result;
|
||||
|
||||
unsigned short tgt_ios;
|
||||
void *private_data;
|
||||
};
|
||||
|
||||
struct ublk_tgt_ops {
|
||||
const char *name;
|
||||
int (*init_tgt)(const struct dev_ctx *ctx, struct ublk_dev *);
|
||||
void (*deinit_tgt)(struct ublk_dev *);
|
||||
|
||||
int (*queue_io)(struct ublk_queue *, int tag);
|
||||
void (*tgt_io_done)(struct ublk_queue *,
|
||||
int tag, const struct io_uring_cqe *);
|
||||
};
|
||||
|
||||
struct ublk_tgt {
|
||||
unsigned long dev_size;
|
||||
unsigned int sq_depth;
|
||||
unsigned int cq_depth;
|
||||
const struct ublk_tgt_ops *ops;
|
||||
struct ublk_params params;
|
||||
|
||||
int nr_backing_files;
|
||||
unsigned long backing_file_size[MAX_BACK_FILES];
|
||||
char backing_file[MAX_BACK_FILES][PATH_MAX];
|
||||
};
|
||||
|
||||
struct ublk_queue {
|
||||
int q_id;
|
||||
int q_depth;
|
||||
unsigned int cmd_inflight;
|
||||
unsigned int io_inflight;
|
||||
struct ublk_dev *dev;
|
||||
const struct ublk_tgt_ops *tgt_ops;
|
||||
char *io_cmd_buf;
|
||||
struct io_uring ring;
|
||||
struct ublk_io ios[UBLK_QUEUE_DEPTH];
|
||||
#define UBLKSRV_QUEUE_STOPPING (1U << 0)
|
||||
#define UBLKSRV_QUEUE_IDLE (1U << 1)
|
||||
#define UBLKSRV_NO_BUF (1U << 2)
|
||||
#define UBLKSRV_ZC (1U << 3)
|
||||
unsigned state;
|
||||
pid_t tid;
|
||||
pthread_t thread;
|
||||
};
|
||||
|
||||
struct ublk_dev {
|
||||
struct ublk_tgt tgt;
|
||||
struct ublksrv_ctrl_dev_info dev_info;
|
||||
struct ublk_queue q[UBLK_MAX_QUEUES];
|
||||
|
||||
int fds[MAX_BACK_FILES + 1]; /* fds[0] points to /dev/ublkcN */
|
||||
int nr_fds;
|
||||
int ctrl_fd;
|
||||
struct io_uring ring;
|
||||
|
||||
void *private_data;
|
||||
};
|
||||
|
||||
#ifndef offsetof
|
||||
#define offsetof(TYPE, MEMBER) ((size_t)&((TYPE *)0)->MEMBER)
|
||||
#endif
|
||||
|
||||
#ifndef container_of
|
||||
#define container_of(ptr, type, member) ({ \
|
||||
unsigned long __mptr = (unsigned long)(ptr); \
|
||||
((type *)(__mptr - offsetof(type, member))); })
|
||||
#endif
|
||||
|
||||
#define round_up(val, rnd) \
|
||||
(((val) + ((rnd) - 1)) & ~((rnd) - 1))
|
||||
|
||||
|
||||
extern unsigned int ublk_dbg_mask;
|
||||
extern int ublk_queue_io_cmd(struct ublk_queue *q, struct ublk_io *io, unsigned tag);
|
||||
|
||||
static inline int is_target_io(__u64 user_data)
|
||||
{
|
||||
return (user_data & (1ULL << 63)) != 0;
|
||||
}
|
||||
|
||||
static inline __u64 build_user_data(unsigned tag, unsigned op,
|
||||
unsigned tgt_data, unsigned is_target_io)
|
||||
{
|
||||
assert(!(tag >> 16) && !(op >> 8) && !(tgt_data >> 16));
|
||||
|
||||
return tag | (op << 16) | (tgt_data << 24) | (__u64)is_target_io << 63;
|
||||
}
|
||||
|
||||
static inline unsigned int user_data_to_tag(__u64 user_data)
|
||||
{
|
||||
return user_data & 0xffff;
|
||||
}
|
||||
|
||||
static inline unsigned int user_data_to_op(__u64 user_data)
|
||||
{
|
||||
return (user_data >> 16) & 0xff;
|
||||
}
|
||||
|
||||
static inline unsigned int user_data_to_tgt_data(__u64 user_data)
|
||||
{
|
||||
return (user_data >> 24) & 0xffff;
|
||||
}
|
||||
|
||||
static inline unsigned short ublk_cmd_op_nr(unsigned int op)
|
||||
{
|
||||
return _IOC_NR(op);
|
||||
}
|
||||
|
||||
static inline void ublk_err(const char *fmt, ...)
|
||||
{
|
||||
va_list ap;
|
||||
|
||||
va_start(ap, fmt);
|
||||
vfprintf(stderr, fmt, ap);
|
||||
}
|
||||
|
||||
static inline void ublk_log(const char *fmt, ...)
|
||||
{
|
||||
if (ublk_dbg_mask & UBLK_LOG) {
|
||||
va_list ap;
|
||||
|
||||
va_start(ap, fmt);
|
||||
vfprintf(stdout, fmt, ap);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void ublk_dbg(int level, const char *fmt, ...)
|
||||
{
|
||||
if (level & ublk_dbg_mask) {
|
||||
va_list ap;
|
||||
|
||||
va_start(ap, fmt);
|
||||
vfprintf(stdout, fmt, ap);
|
||||
}
|
||||
}
|
||||
|
||||
static inline int ublk_queue_alloc_sqes(struct ublk_queue *q,
|
||||
struct io_uring_sqe *sqes[], int nr_sqes)
|
||||
{
|
||||
unsigned left = io_uring_sq_space_left(&q->ring);
|
||||
int i;
|
||||
|
||||
if (left < nr_sqes)
|
||||
io_uring_submit(&q->ring);
|
||||
|
||||
for (i = 0; i < nr_sqes; i++) {
|
||||
sqes[i] = io_uring_get_sqe(&q->ring);
|
||||
if (!sqes[i])
|
||||
return i;
|
||||
}
|
||||
|
||||
return nr_sqes;
|
||||
}
|
||||
|
||||
static inline void io_uring_prep_buf_register(struct io_uring_sqe *sqe,
|
||||
int dev_fd, int tag, int q_id, __u64 index)
|
||||
{
|
||||
struct ublksrv_io_cmd *cmd = (struct ublksrv_io_cmd *)sqe->cmd;
|
||||
|
||||
io_uring_prep_read(sqe, dev_fd, 0, 0, 0);
|
||||
sqe->opcode = IORING_OP_URING_CMD;
|
||||
sqe->flags |= IOSQE_FIXED_FILE;
|
||||
sqe->cmd_op = UBLK_U_IO_REGISTER_IO_BUF;
|
||||
|
||||
cmd->tag = tag;
|
||||
cmd->addr = index;
|
||||
cmd->q_id = q_id;
|
||||
}
|
||||
|
||||
static inline void io_uring_prep_buf_unregister(struct io_uring_sqe *sqe,
|
||||
int dev_fd, int tag, int q_id, __u64 index)
|
||||
{
|
||||
struct ublksrv_io_cmd *cmd = (struct ublksrv_io_cmd *)sqe->cmd;
|
||||
|
||||
io_uring_prep_read(sqe, dev_fd, 0, 0, 0);
|
||||
sqe->opcode = IORING_OP_URING_CMD;
|
||||
sqe->flags |= IOSQE_FIXED_FILE;
|
||||
sqe->cmd_op = UBLK_U_IO_UNREGISTER_IO_BUF;
|
||||
|
||||
cmd->tag = tag;
|
||||
cmd->addr = index;
|
||||
cmd->q_id = q_id;
|
||||
}
|
||||
|
||||
static inline void *ublk_get_sqe_cmd(const struct io_uring_sqe *sqe)
|
||||
{
|
||||
return (void *)&sqe->cmd;
|
||||
}
|
||||
|
||||
static inline void ublk_set_io_res(struct ublk_queue *q, int tag, int res)
|
||||
{
|
||||
q->ios[tag].result = res;
|
||||
}
|
||||
|
||||
static inline int ublk_get_io_res(const struct ublk_queue *q, unsigned tag)
|
||||
{
|
||||
return q->ios[tag].result;
|
||||
}
|
||||
|
||||
static inline void ublk_mark_io_done(struct ublk_io *io, int res)
|
||||
{
|
||||
io->flags |= (UBLKSRV_NEED_COMMIT_RQ_COMP | UBLKSRV_IO_FREE);
|
||||
io->result = res;
|
||||
}
|
||||
|
||||
static inline const struct ublksrv_io_desc *ublk_get_iod(const struct ublk_queue *q, int tag)
|
||||
{
|
||||
return (struct ublksrv_io_desc *)&(q->io_cmd_buf[tag * sizeof(struct ublksrv_io_desc)]);
|
||||
}
|
||||
|
||||
static inline void ublk_set_sqe_cmd_op(struct io_uring_sqe *sqe, __u32 cmd_op)
|
||||
{
|
||||
__u32 *addr = (__u32 *)&sqe->off;
|
||||
|
||||
addr[0] = cmd_op;
|
||||
addr[1] = 0;
|
||||
}
|
||||
|
||||
static inline struct ublk_io *ublk_get_io(struct ublk_queue *q, unsigned tag)
|
||||
{
|
||||
return &q->ios[tag];
|
||||
}
|
||||
|
||||
static inline int ublk_complete_io(struct ublk_queue *q, unsigned tag, int res)
|
||||
{
|
||||
struct ublk_io *io = &q->ios[tag];
|
||||
|
||||
ublk_mark_io_done(io, res);
|
||||
|
||||
return ublk_queue_io_cmd(q, io, tag);
|
||||
}
|
||||
|
||||
static inline void ublk_queued_tgt_io(struct ublk_queue *q, unsigned tag, int queued)
|
||||
{
|
||||
if (queued < 0)
|
||||
ublk_complete_io(q, tag, queued);
|
||||
else {
|
||||
struct ublk_io *io = ublk_get_io(q, tag);
|
||||
|
||||
q->io_inflight += queued;
|
||||
io->tgt_ios = queued;
|
||||
io->result = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static inline int ublk_completed_tgt_io(struct ublk_queue *q, unsigned tag)
|
||||
{
|
||||
struct ublk_io *io = ublk_get_io(q, tag);
|
||||
|
||||
q->io_inflight--;
|
||||
|
||||
return --io->tgt_ios == 0;
|
||||
}
|
||||
|
||||
static inline int ublk_queue_use_zc(const struct ublk_queue *q)
|
||||
{
|
||||
return q->state & UBLKSRV_ZC;
|
||||
}
|
||||
|
||||
extern const struct ublk_tgt_ops null_tgt_ops;
|
||||
extern const struct ublk_tgt_ops loop_tgt_ops;
|
||||
extern const struct ublk_tgt_ops stripe_tgt_ops;
|
||||
|
||||
void backing_file_tgt_deinit(struct ublk_dev *dev);
|
||||
int backing_file_tgt_init(struct ublk_dev *dev);
|
||||
|
||||
static inline unsigned int ilog2(unsigned int x)
|
||||
{
|
||||
if (x == 0)
|
||||
return 0;
|
||||
return (sizeof(x) * 8 - 1) - __builtin_clz(x);
|
||||
}
|
||||
#endif
|
106
tools/testing/selftests/ublk/null.c
Normal file
106
tools/testing/selftests/ublk/null.c
Normal file
@ -0,0 +1,106 @@
|
||||
/* SPDX-License-Identifier: GPL-2.0 */
|
||||
|
||||
#include "kublk.h"
|
||||
|
||||
#ifndef IORING_NOP_INJECT_RESULT
|
||||
#define IORING_NOP_INJECT_RESULT (1U << 0)
|
||||
#endif
|
||||
|
||||
#ifndef IORING_NOP_FIXED_BUFFER
|
||||
#define IORING_NOP_FIXED_BUFFER (1U << 3)
|
||||
#endif
|
||||
|
||||
static int ublk_null_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev)
|
||||
{
|
||||
const struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
|
||||
unsigned long dev_size = 250UL << 30;
|
||||
|
||||
dev->tgt.dev_size = dev_size;
|
||||
dev->tgt.params = (struct ublk_params) {
|
||||
.types = UBLK_PARAM_TYPE_BASIC,
|
||||
.basic = {
|
||||
.logical_bs_shift = 9,
|
||||
.physical_bs_shift = 12,
|
||||
.io_opt_shift = 12,
|
||||
.io_min_shift = 9,
|
||||
.max_sectors = info->max_io_buf_bytes >> 9,
|
||||
.dev_sectors = dev_size >> 9,
|
||||
},
|
||||
};
|
||||
|
||||
if (info->flags & UBLK_F_SUPPORT_ZERO_COPY)
|
||||
dev->tgt.sq_depth = dev->tgt.cq_depth = 2 * info->queue_depth;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int null_queue_zc_io(struct ublk_queue *q, int tag)
|
||||
{
|
||||
const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
|
||||
unsigned ublk_op = ublksrv_get_op(iod);
|
||||
struct io_uring_sqe *sqe[3];
|
||||
|
||||
ublk_queue_alloc_sqes(q, sqe, 3);
|
||||
|
||||
io_uring_prep_buf_register(sqe[0], 0, tag, q->q_id, tag);
|
||||
sqe[0]->user_data = build_user_data(tag,
|
||||
ublk_cmd_op_nr(sqe[0]->cmd_op), 0, 1);
|
||||
sqe[0]->flags |= IOSQE_CQE_SKIP_SUCCESS | IOSQE_IO_HARDLINK;
|
||||
|
||||
io_uring_prep_nop(sqe[1]);
|
||||
sqe[1]->buf_index = tag;
|
||||
sqe[1]->flags |= IOSQE_FIXED_FILE | IOSQE_IO_HARDLINK;
|
||||
sqe[1]->rw_flags = IORING_NOP_FIXED_BUFFER | IORING_NOP_INJECT_RESULT;
|
||||
sqe[1]->len = iod->nr_sectors << 9; /* injected result */
|
||||
sqe[1]->user_data = build_user_data(tag, ublk_op, 0, 1);
|
||||
|
||||
io_uring_prep_buf_unregister(sqe[2], 0, tag, q->q_id, tag);
|
||||
sqe[2]->user_data = build_user_data(tag, ublk_cmd_op_nr(sqe[2]->cmd_op), 0, 1);
|
||||
|
||||
// buf register is marked as IOSQE_CQE_SKIP_SUCCESS
|
||||
return 2;
|
||||
}
|
||||
|
||||
static void ublk_null_io_done(struct ublk_queue *q, int tag,
|
||||
const struct io_uring_cqe *cqe)
|
||||
{
|
||||
unsigned op = user_data_to_op(cqe->user_data);
|
||||
struct ublk_io *io = ublk_get_io(q, tag);
|
||||
|
||||
if (cqe->res < 0 || op != ublk_cmd_op_nr(UBLK_U_IO_UNREGISTER_IO_BUF)) {
|
||||
if (!io->result)
|
||||
io->result = cqe->res;
|
||||
if (cqe->res < 0)
|
||||
ublk_err("%s: io failed op %x user_data %lx\n",
|
||||
__func__, op, cqe->user_data);
|
||||
}
|
||||
|
||||
/* buffer register op is IOSQE_CQE_SKIP_SUCCESS */
|
||||
if (op == ublk_cmd_op_nr(UBLK_U_IO_REGISTER_IO_BUF))
|
||||
io->tgt_ios += 1;
|
||||
|
||||
if (ublk_completed_tgt_io(q, tag))
|
||||
ublk_complete_io(q, tag, io->result);
|
||||
}
|
||||
|
||||
static int ublk_null_queue_io(struct ublk_queue *q, int tag)
|
||||
{
|
||||
const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
|
||||
int zc = ublk_queue_use_zc(q);
|
||||
int queued;
|
||||
|
||||
if (!zc) {
|
||||
ublk_complete_io(q, tag, iod->nr_sectors << 9);
|
||||
return 0;
|
||||
}
|
||||
|
||||
queued = null_queue_zc_io(q, tag);
|
||||
ublk_queued_tgt_io(q, tag, queued);
|
||||
return 0;
|
||||
}
|
||||
|
||||
const struct ublk_tgt_ops null_tgt_ops = {
|
||||
.name = "null",
|
||||
.init_tgt = ublk_null_tgt_init,
|
||||
.queue_io = ublk_null_queue_io,
|
||||
.tgt_io_done = ublk_null_io_done,
|
||||
};
|
318
tools/testing/selftests/ublk/stripe.c
Normal file
318
tools/testing/selftests/ublk/stripe.c
Normal file
@ -0,0 +1,318 @@
|
||||
// SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
#include "kublk.h"
|
||||
|
||||
#define NR_STRIPE MAX_BACK_FILES
|
||||
|
||||
struct stripe_conf {
|
||||
unsigned nr_files;
|
||||
unsigned shift;
|
||||
};
|
||||
|
||||
struct stripe {
|
||||
loff_t start;
|
||||
unsigned nr_sects;
|
||||
int seq;
|
||||
|
||||
struct iovec *vec;
|
||||
unsigned nr_vec;
|
||||
unsigned cap;
|
||||
};
|
||||
|
||||
struct stripe_array {
|
||||
struct stripe s[NR_STRIPE];
|
||||
unsigned nr;
|
||||
struct iovec _vec[];
|
||||
};
|
||||
|
||||
static inline const struct stripe_conf *get_chunk_shift(const struct ublk_queue *q)
|
||||
{
|
||||
return (struct stripe_conf *)q->dev->private_data;
|
||||
}
|
||||
|
||||
static inline unsigned calculate_nr_vec(const struct stripe_conf *conf,
|
||||
const struct ublksrv_io_desc *iod)
|
||||
{
|
||||
const unsigned shift = conf->shift - 9;
|
||||
const unsigned unit_sects = conf->nr_files << shift;
|
||||
loff_t start = iod->start_sector;
|
||||
loff_t end = start + iod->nr_sectors;
|
||||
|
||||
return (end / unit_sects) - (start / unit_sects) + 1;
|
||||
}
|
||||
|
||||
static struct stripe_array *alloc_stripe_array(const struct stripe_conf *conf,
|
||||
const struct ublksrv_io_desc *iod)
|
||||
{
|
||||
unsigned nr_vecs = calculate_nr_vec(conf, iod);
|
||||
unsigned total = nr_vecs * conf->nr_files;
|
||||
struct stripe_array *s;
|
||||
int i;
|
||||
|
||||
s = malloc(sizeof(*s) + total * sizeof(struct iovec));
|
||||
|
||||
s->nr = 0;
|
||||
for (i = 0; i < conf->nr_files; i++) {
|
||||
struct stripe *t = &s->s[i];
|
||||
|
||||
t->nr_vec = 0;
|
||||
t->vec = &s->_vec[i * nr_vecs];
|
||||
t->nr_sects = 0;
|
||||
t->cap = nr_vecs;
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
static void free_stripe_array(struct stripe_array *s)
|
||||
{
|
||||
free(s);
|
||||
}
|
||||
|
||||
static void calculate_stripe_array(const struct stripe_conf *conf,
|
||||
const struct ublksrv_io_desc *iod, struct stripe_array *s)
|
||||
{
|
||||
const unsigned shift = conf->shift - 9;
|
||||
const unsigned chunk_sects = 1 << shift;
|
||||
const unsigned unit_sects = conf->nr_files << shift;
|
||||
off64_t start = iod->start_sector;
|
||||
off64_t end = start + iod->nr_sectors;
|
||||
unsigned long done = 0;
|
||||
unsigned idx = 0;
|
||||
|
||||
while (start < end) {
|
||||
unsigned nr_sects = chunk_sects - (start & (chunk_sects - 1));
|
||||
loff_t unit_off = (start / unit_sects) * unit_sects;
|
||||
unsigned seq = (start - unit_off) >> shift;
|
||||
struct stripe *this = &s->s[idx];
|
||||
loff_t stripe_off = (unit_off / conf->nr_files) +
|
||||
(start & (chunk_sects - 1));
|
||||
|
||||
if (nr_sects > end - start)
|
||||
nr_sects = end - start;
|
||||
if (this->nr_sects == 0) {
|
||||
this->nr_sects = nr_sects;
|
||||
this->start = stripe_off;
|
||||
this->seq = seq;
|
||||
s->nr += 1;
|
||||
} else {
|
||||
assert(seq == this->seq);
|
||||
assert(this->start + this->nr_sects == stripe_off);
|
||||
this->nr_sects += nr_sects;
|
||||
}
|
||||
|
||||
assert(this->nr_vec < this->cap);
|
||||
this->vec[this->nr_vec].iov_base = (void *)(iod->addr + done);
|
||||
this->vec[this->nr_vec++].iov_len = nr_sects << 9;
|
||||
|
||||
start += nr_sects;
|
||||
done += nr_sects << 9;
|
||||
idx = (idx + 1) % conf->nr_files;
|
||||
}
|
||||
}
|
||||
|
||||
static inline enum io_uring_op stripe_to_uring_op(const struct ublksrv_io_desc *iod)
|
||||
{
|
||||
unsigned ublk_op = ublksrv_get_op(iod);
|
||||
|
||||
if (ublk_op == UBLK_IO_OP_READ)
|
||||
return IORING_OP_READV;
|
||||
else if (ublk_op == UBLK_IO_OP_WRITE)
|
||||
return IORING_OP_WRITEV;
|
||||
assert(0);
|
||||
}
|
||||
|
||||
static int stripe_queue_tgt_rw_io(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag)
|
||||
{
|
||||
const struct stripe_conf *conf = get_chunk_shift(q);
|
||||
enum io_uring_op op = stripe_to_uring_op(iod);
|
||||
struct io_uring_sqe *sqe[NR_STRIPE];
|
||||
struct stripe_array *s = alloc_stripe_array(conf, iod);
|
||||
struct ublk_io *io = ublk_get_io(q, tag);
|
||||
int i;
|
||||
|
||||
io->private_data = s;
|
||||
calculate_stripe_array(conf, iod, s);
|
||||
|
||||
ublk_queue_alloc_sqes(q, sqe, s->nr);
|
||||
for (i = 0; i < s->nr; i++) {
|
||||
struct stripe *t = &s->s[i];
|
||||
|
||||
io_uring_prep_rw(op, sqe[i],
|
||||
t->seq + 1,
|
||||
(void *)t->vec,
|
||||
t->nr_vec,
|
||||
t->start << 9);
|
||||
io_uring_sqe_set_flags(sqe[i], IOSQE_FIXED_FILE);
|
||||
/* bit63 marks us as tgt io */
|
||||
sqe[i]->user_data = build_user_data(tag, ublksrv_get_op(iod), i, 1);
|
||||
}
|
||||
return s->nr;
|
||||
}
|
||||
|
||||
static int handle_flush(struct ublk_queue *q, const struct ublksrv_io_desc *iod, int tag)
|
||||
{
|
||||
const struct stripe_conf *conf = get_chunk_shift(q);
|
||||
struct io_uring_sqe *sqe[NR_STRIPE];
|
||||
int i;
|
||||
|
||||
ublk_queue_alloc_sqes(q, sqe, conf->nr_files);
|
||||
for (i = 0; i < conf->nr_files; i++) {
|
||||
io_uring_prep_fsync(sqe[i], i + 1, IORING_FSYNC_DATASYNC);
|
||||
io_uring_sqe_set_flags(sqe[i], IOSQE_FIXED_FILE);
|
||||
sqe[i]->user_data = build_user_data(tag, UBLK_IO_OP_FLUSH, 0, 1);
|
||||
}
|
||||
return conf->nr_files;
|
||||
}
|
||||
|
||||
static int stripe_queue_tgt_io(struct ublk_queue *q, int tag)
|
||||
{
|
||||
const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
|
||||
unsigned ublk_op = ublksrv_get_op(iod);
|
||||
int ret = 0;
|
||||
|
||||
switch (ublk_op) {
|
||||
case UBLK_IO_OP_FLUSH:
|
||||
ret = handle_flush(q, iod, tag);
|
||||
break;
|
||||
case UBLK_IO_OP_WRITE_ZEROES:
|
||||
case UBLK_IO_OP_DISCARD:
|
||||
ret = -ENOTSUP;
|
||||
break;
|
||||
case UBLK_IO_OP_READ:
|
||||
case UBLK_IO_OP_WRITE:
|
||||
ret = stripe_queue_tgt_rw_io(q, iod, tag);
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
ublk_dbg(UBLK_DBG_IO, "%s: tag %d ublk io %x %llx %u ret %d\n", __func__, tag,
|
||||
iod->op_flags, iod->start_sector, iod->nr_sectors << 9, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int ublk_stripe_queue_io(struct ublk_queue *q, int tag)
|
||||
{
|
||||
int queued = stripe_queue_tgt_io(q, tag);
|
||||
|
||||
ublk_queued_tgt_io(q, tag, queued);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void ublk_stripe_io_done(struct ublk_queue *q, int tag,
|
||||
const struct io_uring_cqe *cqe)
|
||||
{
|
||||
const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
|
||||
unsigned op = user_data_to_op(cqe->user_data);
|
||||
struct ublk_io *io = ublk_get_io(q, tag);
|
||||
int res = cqe->res;
|
||||
|
||||
if (res < 0) {
|
||||
if (!io->result)
|
||||
io->result = res;
|
||||
ublk_err("%s: io failure %d tag %u\n", __func__, res, tag);
|
||||
}
|
||||
|
||||
/* fail short READ/WRITE simply */
|
||||
if (op == UBLK_IO_OP_READ || op == UBLK_IO_OP_WRITE) {
|
||||
unsigned seq = user_data_to_tgt_data(cqe->user_data);
|
||||
struct stripe_array *s = io->private_data;
|
||||
|
||||
if (res < s->s[seq].vec->iov_len)
|
||||
io->result = -EIO;
|
||||
}
|
||||
|
||||
if (ublk_completed_tgt_io(q, tag)) {
|
||||
int res = io->result;
|
||||
|
||||
if (!res)
|
||||
res = iod->nr_sectors << 9;
|
||||
|
||||
ublk_complete_io(q, tag, res);
|
||||
|
||||
free_stripe_array(io->private_data);
|
||||
io->private_data = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static int ublk_stripe_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev)
|
||||
{
|
||||
struct ublk_params p = {
|
||||
.types = UBLK_PARAM_TYPE_BASIC,
|
||||
.basic = {
|
||||
.attrs = UBLK_ATTR_VOLATILE_CACHE,
|
||||
.logical_bs_shift = 9,
|
||||
.physical_bs_shift = 12,
|
||||
.io_opt_shift = 12,
|
||||
.io_min_shift = 9,
|
||||
.max_sectors = dev->dev_info.max_io_buf_bytes >> 9,
|
||||
},
|
||||
};
|
||||
unsigned chunk_size = ctx->chunk_size;
|
||||
struct stripe_conf *conf;
|
||||
unsigned chunk_shift;
|
||||
loff_t bytes = 0;
|
||||
int ret, i;
|
||||
|
||||
if ((chunk_size & (chunk_size - 1)) || !chunk_size) {
|
||||
ublk_err("invalid chunk size %u\n", chunk_size);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (chunk_size < 4096 || chunk_size > 512 * 1024) {
|
||||
ublk_err("invalid chunk size %u\n", chunk_size);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
chunk_shift = ilog2(chunk_size);
|
||||
|
||||
ret = backing_file_tgt_init(dev);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (!dev->tgt.nr_backing_files || dev->tgt.nr_backing_files > NR_STRIPE)
|
||||
return -EINVAL;
|
||||
|
||||
assert(dev->nr_fds == dev->tgt.nr_backing_files + 1);
|
||||
|
||||
for (i = 0; i < dev->tgt.nr_backing_files; i++)
|
||||
dev->tgt.backing_file_size[i] &= ~((1 << chunk_shift) - 1);
|
||||
|
||||
for (i = 0; i < dev->tgt.nr_backing_files; i++) {
|
||||
unsigned long size = dev->tgt.backing_file_size[i];
|
||||
|
||||
if (size != dev->tgt.backing_file_size[0])
|
||||
return -EINVAL;
|
||||
bytes += size;
|
||||
}
|
||||
|
||||
conf = malloc(sizeof(*conf));
|
||||
conf->shift = chunk_shift;
|
||||
conf->nr_files = dev->tgt.nr_backing_files;
|
||||
|
||||
dev->private_data = conf;
|
||||
dev->tgt.dev_size = bytes;
|
||||
p.basic.dev_sectors = bytes >> 9;
|
||||
dev->tgt.params = p;
|
||||
dev->tgt.sq_depth = dev->dev_info.queue_depth * conf->nr_files;
|
||||
dev->tgt.cq_depth = dev->dev_info.queue_depth * conf->nr_files;
|
||||
|
||||
printf("%s: shift %u files %u\n", __func__, conf->shift, conf->nr_files);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void ublk_stripe_tgt_deinit(struct ublk_dev *dev)
|
||||
{
|
||||
free(dev->private_data);
|
||||
backing_file_tgt_deinit(dev);
|
||||
}
|
||||
|
||||
const struct ublk_tgt_ops stripe_tgt_ops = {
|
||||
.name = "stripe",
|
||||
.init_tgt = ublk_stripe_tgt_init,
|
||||
.deinit_tgt = ublk_stripe_tgt_deinit,
|
||||
.queue_io = ublk_stripe_queue_io,
|
||||
.tgt_io_done = ublk_stripe_io_done,
|
||||
};
|
246
tools/testing/selftests/ublk/test_common.sh
Executable file
246
tools/testing/selftests/ublk/test_common.sh
Executable file
@ -0,0 +1,246 @@
|
||||
#!/bin/bash
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
UBLK_SKIP_CODE=4
|
||||
|
||||
_have_program() {
|
||||
if command -v "$1" >/dev/null 2>&1; then
|
||||
return 0
|
||||
fi
|
||||
return 1
|
||||
}
|
||||
|
||||
_get_disk_dev_t() {
|
||||
local dev_id=$1
|
||||
local dev
|
||||
local major
|
||||
local minor
|
||||
|
||||
dev=/dev/ublkb"${dev_id}"
|
||||
major=$(stat -c '%Hr' "$dev")
|
||||
minor=$(stat -c '%Lr' "$dev")
|
||||
|
||||
echo $(( (major & 0xfff) << 20 | (minor & 0xfffff) ))
|
||||
}
|
||||
|
||||
_create_backfile() {
|
||||
local my_size=$1
|
||||
local my_file
|
||||
|
||||
my_file=$(mktemp ublk_file_"${my_size}"_XXXXX)
|
||||
truncate -s "${my_size}" "${my_file}"
|
||||
echo "$my_file"
|
||||
}
|
||||
|
||||
_remove_backfile() {
|
||||
local file=$1
|
||||
|
||||
[ -f "$file" ] && rm -f "$file"
|
||||
}
|
||||
|
||||
_create_tmp_dir() {
|
||||
local my_file;
|
||||
|
||||
my_file=$(mktemp -d ublk_dir_XXXXX)
|
||||
echo "$my_file"
|
||||
}
|
||||
|
||||
_remove_tmp_dir() {
|
||||
local dir=$1
|
||||
|
||||
[ -d "$dir" ] && rmdir "$dir"
|
||||
}
|
||||
|
||||
_mkfs_mount_test()
|
||||
{
|
||||
local dev=$1
|
||||
local err_code=0
|
||||
local mnt_dir;
|
||||
|
||||
mnt_dir=$(_create_tmp_dir)
|
||||
mkfs.ext4 -F "$dev" > /dev/null 2>&1
|
||||
err_code=$?
|
||||
if [ $err_code -ne 0 ]; then
|
||||
return $err_code
|
||||
fi
|
||||
|
||||
mount -t ext4 "$dev" "$mnt_dir" > /dev/null 2>&1
|
||||
umount "$dev"
|
||||
err_code=$?
|
||||
_remove_tmp_dir "$mnt_dir"
|
||||
if [ $err_code -ne 0 ]; then
|
||||
return $err_code
|
||||
fi
|
||||
}
|
||||
|
||||
_check_root() {
|
||||
local ksft_skip=4
|
||||
|
||||
if [ $UID != 0 ]; then
|
||||
echo please run this as root >&2
|
||||
exit $ksft_skip
|
||||
fi
|
||||
}
|
||||
|
||||
_remove_ublk_devices() {
|
||||
${UBLK_PROG} del -a
|
||||
modprobe -r ublk_drv > /dev/null 2>&1
|
||||
}
|
||||
|
||||
_get_ublk_dev_state() {
|
||||
${UBLK_PROG} list -n "$1" | grep "state" | awk '{print $11}'
|
||||
}
|
||||
|
||||
_get_ublk_daemon_pid() {
|
||||
${UBLK_PROG} list -n "$1" | grep "pid" | awk '{print $7}'
|
||||
}
|
||||
|
||||
_prep_test() {
|
||||
_check_root
|
||||
local type=$1
|
||||
shift 1
|
||||
modprobe ublk_drv > /dev/null 2>&1
|
||||
[ "$UBLK_TEST_QUIET" -eq 0 ] && echo "ublk $type: $*"
|
||||
}
|
||||
|
||||
_remove_test_files()
|
||||
{
|
||||
local files=$*
|
||||
|
||||
for file in ${files}; do
|
||||
[ -f "${file}" ] && rm -f "${file}"
|
||||
done
|
||||
}
|
||||
|
||||
_show_result()
|
||||
{
|
||||
if [ "$UBLK_TEST_SHOW_RESULT" -ne 0 ]; then
|
||||
if [ "$2" -eq 0 ]; then
|
||||
echo "$1 : [PASS]"
|
||||
elif [ "$2" -eq 4 ]; then
|
||||
echo "$1 : [SKIP]"
|
||||
else
|
||||
echo "$1 : [FAIL]"
|
||||
fi
|
||||
fi
|
||||
[ "$2" -ne 0 ] && exit "$2"
|
||||
return 0
|
||||
}
|
||||
|
||||
# don't call from sub-shell, otherwise can't exit
|
||||
_check_add_dev()
|
||||
{
|
||||
local tid=$1
|
||||
local code=$2
|
||||
shift 2
|
||||
if [ "${code}" -ne 0 ]; then
|
||||
_remove_test_files "$@"
|
||||
_show_result "${tid}" "${code}"
|
||||
fi
|
||||
}
|
||||
|
||||
_cleanup_test() {
|
||||
"${UBLK_PROG}" del -a
|
||||
rm -f "$UBLK_TMP"
|
||||
}
|
||||
|
||||
_have_feature()
|
||||
{
|
||||
if $UBLK_PROG "features" | grep "$1" > /dev/null 2>&1; then
|
||||
return 0
|
||||
fi
|
||||
return 1
|
||||
}
|
||||
|
||||
_add_ublk_dev() {
|
||||
local kublk_temp;
|
||||
local dev_id;
|
||||
|
||||
if [ ! -c /dev/ublk-control ]; then
|
||||
return ${UBLK_SKIP_CODE}
|
||||
fi
|
||||
if echo "$@" | grep -q "\-z"; then
|
||||
if ! _have_feature "ZERO_COPY"; then
|
||||
return ${UBLK_SKIP_CODE}
|
||||
fi
|
||||
fi
|
||||
|
||||
kublk_temp=$(mktemp /tmp/kublk-XXXXXX)
|
||||
if ! "${UBLK_PROG}" add "$@" > "${kublk_temp}" 2>&1; then
|
||||
echo "fail to add ublk dev $*"
|
||||
rm -f "${kublk_temp}"
|
||||
return 255
|
||||
fi
|
||||
|
||||
dev_id=$(grep "dev id" "${kublk_temp}" | awk -F '[ :]' '{print $3}')
|
||||
udevadm settle
|
||||
rm -f "${kublk_temp}"
|
||||
echo "${dev_id}"
|
||||
}
|
||||
|
||||
# kill the ublk daemon and return ublk device state
|
||||
__ublk_kill_daemon()
|
||||
{
|
||||
local dev_id=$1
|
||||
local exp_state=$2
|
||||
local daemon_pid
|
||||
local state
|
||||
|
||||
daemon_pid=$(_get_ublk_daemon_pid "${dev_id}")
|
||||
state=$(_get_ublk_dev_state "${dev_id}")
|
||||
|
||||
for ((j=0;j<50;j++)); do
|
||||
[ "$state" == "$exp_state" ] && break
|
||||
kill -9 "$daemon_pid" > /dev/null 2>&1
|
||||
sleep 1
|
||||
state=$(_get_ublk_dev_state "${dev_id}")
|
||||
done
|
||||
echo "$state"
|
||||
}
|
||||
|
||||
__remove_ublk_dev_return() {
|
||||
local dev_id=$1
|
||||
|
||||
${UBLK_PROG} del -n "${dev_id}"
|
||||
local res=$?
|
||||
udevadm settle
|
||||
return ${res}
|
||||
}
|
||||
|
||||
__run_io_and_remove()
|
||||
{
|
||||
local dev_id=$1
|
||||
local size=$2
|
||||
local kill_server=$3
|
||||
|
||||
fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio \
|
||||
--rw=readwrite --iodepth=64 --size="${size}" --numjobs=4 \
|
||||
--runtime=20 --time_based > /dev/null 2>&1 &
|
||||
sleep 2
|
||||
if [ "${kill_server}" = "yes" ]; then
|
||||
local state
|
||||
state=$(__ublk_kill_daemon "${dev_id}" "DEAD")
|
||||
if [ "$state" != "DEAD" ]; then
|
||||
echo "device isn't dead($state) after killing daemon"
|
||||
return 255
|
||||
fi
|
||||
fi
|
||||
if ! __remove_ublk_dev_return "${dev_id}"; then
|
||||
echo "delete dev ${dev_id} failed"
|
||||
return 255
|
||||
fi
|
||||
wait
|
||||
}
|
||||
|
||||
_ublk_test_top_dir()
|
||||
{
|
||||
cd "$(dirname "$0")" && pwd
|
||||
}
|
||||
|
||||
UBLK_TMP=$(mktemp ublk_test_XXXXX)
|
||||
UBLK_PROG=$(_ublk_test_top_dir)/kublk
|
||||
UBLK_TEST_QUIET=1
|
||||
UBLK_TEST_SHOW_RESULT=1
|
||||
export UBLK_PROG
|
||||
export UBLK_TEST_QUIET
|
||||
export UBLK_TEST_SHOW_RESULT
|
44
tools/testing/selftests/ublk/test_generic_01.sh
Executable file
44
tools/testing/selftests/ublk/test_generic_01.sh
Executable file
@ -0,0 +1,44 @@
|
||||
#!/bin/bash
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
|
||||
|
||||
TID="generic_01"
|
||||
ERR_CODE=0
|
||||
|
||||
if ! _have_program bpftrace; then
|
||||
exit "$UBLK_SKIP_CODE"
|
||||
fi
|
||||
|
||||
_prep_test "null" "sequential io order"
|
||||
|
||||
dev_id=$(_add_ublk_dev -t null)
|
||||
_check_add_dev $TID $?
|
||||
|
||||
dev_t=$(_get_disk_dev_t "$dev_id")
|
||||
bpftrace trace/seq_io.bt "$dev_t" "W" 1 > "$UBLK_TMP" 2>&1 &
|
||||
btrace_pid=$!
|
||||
sleep 2
|
||||
|
||||
if ! kill -0 "$btrace_pid" > /dev/null 2>&1; then
|
||||
_cleanup_test "null"
|
||||
exit "$UBLK_SKIP_CODE"
|
||||
fi
|
||||
|
||||
# run fio over this ublk disk
|
||||
fio --name=write_seq \
|
||||
--filename=/dev/ublkb"${dev_id}" \
|
||||
--ioengine=libaio --iodepth=16 \
|
||||
--rw=write \
|
||||
--size=512M \
|
||||
--direct=1 \
|
||||
--bs=4k > /dev/null 2>&1
|
||||
ERR_CODE=$?
|
||||
kill "$btrace_pid"
|
||||
wait
|
||||
if grep -q "io_out_of_order" "$UBLK_TMP"; then
|
||||
cat "$UBLK_TMP"
|
||||
ERR_CODE=255
|
||||
fi
|
||||
_cleanup_test "null"
|
||||
_show_result $TID $ERR_CODE
|
32
tools/testing/selftests/ublk/test_loop_01.sh
Executable file
32
tools/testing/selftests/ublk/test_loop_01.sh
Executable file
@ -0,0 +1,32 @@
|
||||
#!/bin/bash
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
|
||||
|
||||
TID="loop_01"
|
||||
ERR_CODE=0
|
||||
|
||||
_prep_test "loop" "write and verify test"
|
||||
|
||||
backfile_0=$(_create_backfile 256M)
|
||||
|
||||
dev_id=$(_add_ublk_dev -t loop "$backfile_0")
|
||||
_check_add_dev $TID $? "${backfile_0}"
|
||||
|
||||
# run fio over the ublk disk
|
||||
fio --name=write_and_verify \
|
||||
--filename=/dev/ublkb"${dev_id}" \
|
||||
--ioengine=libaio --iodepth=16 \
|
||||
--rw=write \
|
||||
--size=256M \
|
||||
--direct=1 \
|
||||
--verify=crc32c \
|
||||
--do_verify=1 \
|
||||
--bs=4k > /dev/null 2>&1
|
||||
ERR_CODE=$?
|
||||
|
||||
_cleanup_test "loop"
|
||||
|
||||
_remove_backfile "$backfile_0"
|
||||
|
||||
_show_result $TID $ERR_CODE
|
22
tools/testing/selftests/ublk/test_loop_02.sh
Executable file
22
tools/testing/selftests/ublk/test_loop_02.sh
Executable file
@ -0,0 +1,22 @@
|
||||
#!/bin/bash
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
|
||||
|
||||
TID="loop_02"
|
||||
ERR_CODE=0
|
||||
|
||||
_prep_test "loop" "mkfs & mount & umount"
|
||||
|
||||
backfile_0=$(_create_backfile 256M)
|
||||
dev_id=$(_add_ublk_dev -t loop "$backfile_0")
|
||||
_check_add_dev $TID $? "$backfile_0"
|
||||
|
||||
_mkfs_mount_test /dev/ublkb"${dev_id}"
|
||||
ERR_CODE=$?
|
||||
|
||||
_cleanup_test "loop"
|
||||
|
||||
_remove_backfile "$backfile_0"
|
||||
|
||||
_show_result $TID $ERR_CODE
|
31
tools/testing/selftests/ublk/test_loop_03.sh
Executable file
31
tools/testing/selftests/ublk/test_loop_03.sh
Executable file
@ -0,0 +1,31 @@
|
||||
#!/bin/bash
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
|
||||
|
||||
TID="loop_03"
|
||||
ERR_CODE=0
|
||||
|
||||
_prep_test "loop" "write and verify over zero copy"
|
||||
|
||||
backfile_0=$(_create_backfile 256M)
|
||||
dev_id=$(_add_ublk_dev -t loop -z "$backfile_0")
|
||||
_check_add_dev $TID $? "$backfile_0"
|
||||
|
||||
# run fio over the ublk disk
|
||||
fio --name=write_and_verify \
|
||||
--filename=/dev/ublkb"${dev_id}" \
|
||||
--ioengine=libaio --iodepth=64 \
|
||||
--rw=write \
|
||||
--size=256M \
|
||||
--direct=1 \
|
||||
--verify=crc32c \
|
||||
--do_verify=1 \
|
||||
--bs=4k > /dev/null 2>&1
|
||||
ERR_CODE=$?
|
||||
|
||||
_cleanup_test "loop"
|
||||
|
||||
_remove_backfile "$backfile_0"
|
||||
|
||||
_show_result $TID $ERR_CODE
|
22
tools/testing/selftests/ublk/test_loop_04.sh
Executable file
22
tools/testing/selftests/ublk/test_loop_04.sh
Executable file
@ -0,0 +1,22 @@
|
||||
#!/bin/bash
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
|
||||
|
||||
TID="loop_04"
|
||||
ERR_CODE=0
|
||||
|
||||
_prep_test "loop" "mkfs & mount & umount with zero copy"
|
||||
|
||||
backfile_0=$(_create_backfile 256M)
|
||||
dev_id=$(_add_ublk_dev -t loop -z "$backfile_0")
|
||||
_check_add_dev $TID $? "$backfile_0"
|
||||
|
||||
_mkfs_mount_test /dev/ublkb"${dev_id}"
|
||||
ERR_CODE=$?
|
||||
|
||||
_cleanup_test "loop"
|
||||
|
||||
_remove_backfile "$backfile_0"
|
||||
|
||||
_show_result $TID $ERR_CODE
|
20
tools/testing/selftests/ublk/test_null_01.sh
Executable file
20
tools/testing/selftests/ublk/test_null_01.sh
Executable file
@ -0,0 +1,20 @@
|
||||
#!/bin/bash
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
|
||||
|
||||
TID="null_01"
|
||||
ERR_CODE=0
|
||||
|
||||
_prep_test "null" "basic IO test"
|
||||
|
||||
dev_id=$(_add_ublk_dev -t null)
|
||||
_check_add_dev $TID $?
|
||||
|
||||
# run fio over the two disks
|
||||
fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio --rw=readwrite --iodepth=32 --size=256M > /dev/null 2>&1
|
||||
ERR_CODE=$?
|
||||
|
||||
_cleanup_test "null"
|
||||
|
||||
_show_result $TID $ERR_CODE
|
20
tools/testing/selftests/ublk/test_null_02.sh
Executable file
20
tools/testing/selftests/ublk/test_null_02.sh
Executable file
@ -0,0 +1,20 @@
|
||||
#!/bin/bash
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
|
||||
|
||||
TID="null_02"
|
||||
ERR_CODE=0
|
||||
|
||||
_prep_test "null" "basic IO test with zero copy"
|
||||
|
||||
dev_id=$(_add_ublk_dev -t null -z)
|
||||
_check_add_dev $TID $?
|
||||
|
||||
# run fio over the two disks
|
||||
fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio --rw=readwrite --iodepth=32 --size=256M > /dev/null 2>&1
|
||||
ERR_CODE=$?
|
||||
|
||||
_cleanup_test "null"
|
||||
|
||||
_show_result $TID $ERR_CODE
|
47
tools/testing/selftests/ublk/test_stress_01.sh
Executable file
47
tools/testing/selftests/ublk/test_stress_01.sh
Executable file
@ -0,0 +1,47 @@
|
||||
#!/bin/bash
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
|
||||
TID="stress_01"
|
||||
ERR_CODE=0
|
||||
DEV_ID=-1
|
||||
|
||||
ublk_io_and_remove()
|
||||
{
|
||||
local size=$1
|
||||
shift 1
|
||||
local backfile=""
|
||||
if echo "$@" | grep -q "loop"; then
|
||||
backfile=${*: -1}
|
||||
fi
|
||||
DEV_ID=$(_add_ublk_dev "$@")
|
||||
_check_add_dev $TID $? "${backfile}"
|
||||
|
||||
[ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs. remove device(ublk add $*)"
|
||||
if ! __run_io_and_remove "${DEV_ID}" "${size}" "no"; then
|
||||
echo "/dev/ublkc${DEV_ID} isn't removed"
|
||||
_remove_backfile "${backfile}"
|
||||
exit 255
|
||||
fi
|
||||
}
|
||||
|
||||
_prep_test "stress" "run IO and remove device"
|
||||
|
||||
ublk_io_and_remove 8G -t null
|
||||
ERR_CODE=$?
|
||||
if [ ${ERR_CODE} -ne 0 ]; then
|
||||
_show_result $TID $ERR_CODE
|
||||
fi
|
||||
|
||||
BACK_FILE=$(_create_backfile 256M)
|
||||
ublk_io_and_remove 256M -t loop "${BACK_FILE}"
|
||||
ERR_CODE=$?
|
||||
if [ ${ERR_CODE} -ne 0 ]; then
|
||||
_show_result $TID $ERR_CODE
|
||||
fi
|
||||
|
||||
ublk_io_and_remove 256M -t loop -z "${BACK_FILE}"
|
||||
ERR_CODE=$?
|
||||
_cleanup_test "stress"
|
||||
_remove_backfile "${BACK_FILE}"
|
||||
_show_result $TID $ERR_CODE
|
47
tools/testing/selftests/ublk/test_stress_02.sh
Executable file
47
tools/testing/selftests/ublk/test_stress_02.sh
Executable file
@ -0,0 +1,47 @@
|
||||
#!/bin/bash
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
|
||||
TID="stress_02"
|
||||
ERR_CODE=0
|
||||
DEV_ID=-1
|
||||
|
||||
ublk_io_and_kill_daemon()
|
||||
{
|
||||
local size=$1
|
||||
shift 1
|
||||
local backfile=""
|
||||
if echo "$@" | grep -q "loop"; then
|
||||
backfile=${*: -1}
|
||||
fi
|
||||
DEV_ID=$(_add_ublk_dev "$@")
|
||||
_check_add_dev $TID $? "${backfile}"
|
||||
|
||||
[ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs kill ublk server(ublk add $*)"
|
||||
if ! __run_io_and_remove "${DEV_ID}" "${size}" "yes"; then
|
||||
echo "/dev/ublkc${DEV_ID} isn't removed res ${res}"
|
||||
_remove_backfile "${backfile}"
|
||||
exit 255
|
||||
fi
|
||||
}
|
||||
|
||||
_prep_test "stress" "run IO and kill ublk server"
|
||||
|
||||
ublk_io_and_kill_daemon 8G -t null
|
||||
ERR_CODE=$?
|
||||
if [ ${ERR_CODE} -ne 0 ]; then
|
||||
_show_result $TID $ERR_CODE
|
||||
fi
|
||||
|
||||
BACK_FILE=$(_create_backfile 256M)
|
||||
ublk_io_and_kill_daemon 256M -t loop "${BACK_FILE}"
|
||||
ERR_CODE=$?
|
||||
if [ ${ERR_CODE} -ne 0 ]; then
|
||||
_show_result $TID $ERR_CODE
|
||||
fi
|
||||
|
||||
ublk_io_and_kill_daemon 256M -t loop -z "${BACK_FILE}"
|
||||
ERR_CODE=$?
|
||||
_cleanup_test "stress"
|
||||
_remove_backfile "${BACK_FILE}"
|
||||
_show_result $TID $ERR_CODE
|
34
tools/testing/selftests/ublk/test_stripe_01.sh
Executable file
34
tools/testing/selftests/ublk/test_stripe_01.sh
Executable file
@ -0,0 +1,34 @@
|
||||
#!/bin/bash
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
|
||||
|
||||
TID="stripe_01"
|
||||
ERR_CODE=0
|
||||
|
||||
_prep_test "stripe" "write and verify test"
|
||||
|
||||
backfile_0=$(_create_backfile 256M)
|
||||
backfile_1=$(_create_backfile 256M)
|
||||
|
||||
dev_id=$(_add_ublk_dev -t stripe "$backfile_0" "$backfile_1")
|
||||
_check_add_dev $TID $? "${backfile_0}"
|
||||
|
||||
# run fio over the ublk disk
|
||||
fio --name=write_and_verify \
|
||||
--filename=/dev/ublkb"${dev_id}" \
|
||||
--ioengine=libaio --iodepth=32 \
|
||||
--rw=write \
|
||||
--size=512M \
|
||||
--direct=1 \
|
||||
--verify=crc32c \
|
||||
--do_verify=1 \
|
||||
--bs=4k > /dev/null 2>&1
|
||||
ERR_CODE=$?
|
||||
|
||||
_cleanup_test "stripe"
|
||||
|
||||
_remove_backfile "$backfile_0"
|
||||
_remove_backfile "$backfile_1"
|
||||
|
||||
_show_result $TID $ERR_CODE
|
24
tools/testing/selftests/ublk/test_stripe_02.sh
Executable file
24
tools/testing/selftests/ublk/test_stripe_02.sh
Executable file
@ -0,0 +1,24 @@
|
||||
#!/bin/bash
|
||||
# SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
|
||||
|
||||
TID="stripe_02"
|
||||
ERR_CODE=0
|
||||
|
||||
_prep_test "stripe" "mkfs & mount & umount"
|
||||
|
||||
backfile_0=$(_create_backfile 256M)
|
||||
backfile_1=$(_create_backfile 256M)
|
||||
dev_id=$(_add_ublk_dev -t stripe "$backfile_0" "$backfile_1")
|
||||
_check_add_dev $TID $? "$backfile_0" "$backfile_1"
|
||||
|
||||
_mkfs_mount_test /dev/ublkb"${dev_id}"
|
||||
ERR_CODE=$?
|
||||
|
||||
_cleanup_test "stripe"
|
||||
|
||||
_remove_backfile "$backfile_0"
|
||||
_remove_backfile "$backfile_1"
|
||||
|
||||
_show_result $TID $ERR_CODE
|
25
tools/testing/selftests/ublk/trace/seq_io.bt
Normal file
25
tools/testing/selftests/ublk/trace/seq_io.bt
Normal file
@ -0,0 +1,25 @@
|
||||
/*
|
||||
$1: dev_t
|
||||
$2: RWBS
|
||||
$3: strlen($2)
|
||||
*/
|
||||
BEGIN {
|
||||
@last_rw[$1, str($2)] = 0;
|
||||
}
|
||||
tracepoint:block:block_rq_complete
|
||||
{
|
||||
$dev = $1;
|
||||
if ((int64)args.dev == $1 && !strncmp(args.rwbs, str($2), $3)) {
|
||||
$last = @last_rw[$dev, str($2)];
|
||||
if ((uint64)args.sector != $last) {
|
||||
printf("io_out_of_order: exp %llu actual %llu\n",
|
||||
args.sector, $last);
|
||||
}
|
||||
@last_rw[$dev, str($2)] = (args.sector + args.nr_sector);
|
||||
}
|
||||
@ios = count();
|
||||
}
|
||||
|
||||
END {
|
||||
clear(@last_rw);
|
||||
}
|
18
tools/testing/selftests/ublk/ublk_dep.h
Normal file
18
tools/testing/selftests/ublk/ublk_dep.h
Normal file
@ -0,0 +1,18 @@
|
||||
#ifndef UBLK_DEP_H
|
||||
#define UBLK_DEP_H
|
||||
|
||||
#ifndef UBLK_U_IO_REGISTER_IO_BUF
|
||||
#define UBLK_U_IO_REGISTER_IO_BUF \
|
||||
_IOWR('u', 0x23, struct ublksrv_io_cmd)
|
||||
#define UBLK_U_IO_UNREGISTER_IO_BUF \
|
||||
_IOWR('u', 0x24, struct ublksrv_io_cmd)
|
||||
#endif
|
||||
|
||||
#ifndef UBLK_F_USER_RECOVERY_FAIL_IO
|
||||
#define UBLK_F_USER_RECOVERY_FAIL_IO (1ULL << 9)
|
||||
#endif
|
||||
|
||||
#ifndef UBLK_F_ZONED
|
||||
#define UBLK_F_ZONED (1ULL << 8)
|
||||
#endif
|
||||
#endif
|
Loading…
x
Reference in New Issue
Block a user