mirror of
https://github.com/torvalds/linux.git
synced 2025-04-09 14:45:27 +00:00

This eliminates several redundant atomic reads and therefore reduces the duration the surrounding spinlocks are held. In several io_uring benchmarks, this reduced the CPU time spent in queued_spin_lock_slowpath() considerably: io_uring benchmark with a flood of `IORING_OP_NOP` and `IOSQE_ASYNC`: 38.86% -1.49% [kernel.kallsyms] [k] queued_spin_lock_slowpath 6.75% +0.36% [kernel.kallsyms] [k] io_worker_handle_work 2.60% +0.19% [kernel.kallsyms] [k] io_nop 3.92% +0.18% [kernel.kallsyms] [k] io_req_task_complete 6.34% -0.18% [kernel.kallsyms] [k] io_wq_submit_work HTTP server, static file: 42.79% -2.77% [kernel.kallsyms] [k] queued_spin_lock_slowpath 2.08% +0.23% [kernel.kallsyms] [k] io_wq_submit_work 1.19% +0.20% [kernel.kallsyms] [k] amd_iommu_iotlb_sync_map 1.46% +0.15% [kernel.kallsyms] [k] ep_poll_callback 1.80% +0.15% [kernel.kallsyms] [k] io_worker_handle_work HTTP server, PHP: 35.03% -1.80% [kernel.kallsyms] [k] queued_spin_lock_slowpath 0.84% +0.21% [kernel.kallsyms] [k] amd_iommu_iotlb_sync_map 1.39% +0.12% [kernel.kallsyms] [k] _copy_to_iter 0.21% +0.10% [kernel.kallsyms] [k] update_sd_lb_stats Signed-off-by: Max Kellermann <max.kellermann@ionos.com> Link: https://lore.kernel.org/r/20250128133927.3989681-5-max.kellermann@ionos.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
90 lines
2.2 KiB
C
90 lines
2.2 KiB
C
#ifndef INTERNAL_IO_WQ_H
|
|
#define INTERNAL_IO_WQ_H
|
|
|
|
#include <linux/refcount.h>
|
|
#include <linux/io_uring_types.h>
|
|
|
|
struct io_wq;
|
|
|
|
enum {
|
|
IO_WQ_WORK_CANCEL = 1,
|
|
IO_WQ_WORK_HASHED = 2,
|
|
IO_WQ_WORK_UNBOUND = 4,
|
|
IO_WQ_WORK_CONCURRENT = 16,
|
|
|
|
IO_WQ_HASH_SHIFT = 24, /* upper 8 bits are used for hash key */
|
|
};
|
|
|
|
enum io_wq_cancel {
|
|
IO_WQ_CANCEL_OK, /* cancelled before started */
|
|
IO_WQ_CANCEL_RUNNING, /* found, running, and attempted cancelled */
|
|
IO_WQ_CANCEL_NOTFOUND, /* work not found */
|
|
};
|
|
|
|
typedef struct io_wq_work *(free_work_fn)(struct io_wq_work *);
|
|
typedef void (io_wq_work_fn)(struct io_wq_work *);
|
|
|
|
struct io_wq_hash {
|
|
refcount_t refs;
|
|
unsigned long map;
|
|
struct wait_queue_head wait;
|
|
};
|
|
|
|
static inline void io_wq_put_hash(struct io_wq_hash *hash)
|
|
{
|
|
if (refcount_dec_and_test(&hash->refs))
|
|
kfree(hash);
|
|
}
|
|
|
|
struct io_wq_data {
|
|
struct io_wq_hash *hash;
|
|
struct task_struct *task;
|
|
io_wq_work_fn *do_work;
|
|
free_work_fn *free_work;
|
|
};
|
|
|
|
struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data);
|
|
void io_wq_exit_start(struct io_wq *wq);
|
|
void io_wq_put_and_exit(struct io_wq *wq);
|
|
|
|
void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work);
|
|
void io_wq_hash_work(struct io_wq_work *work, void *val);
|
|
|
|
int io_wq_cpu_affinity(struct io_uring_task *tctx, cpumask_var_t mask);
|
|
int io_wq_max_workers(struct io_wq *wq, int *new_count);
|
|
bool io_wq_worker_stopped(void);
|
|
|
|
static inline bool __io_wq_is_hashed(unsigned int work_flags)
|
|
{
|
|
return work_flags & IO_WQ_WORK_HASHED;
|
|
}
|
|
|
|
static inline bool io_wq_is_hashed(struct io_wq_work *work)
|
|
{
|
|
return __io_wq_is_hashed(atomic_read(&work->flags));
|
|
}
|
|
|
|
typedef bool (work_cancel_fn)(struct io_wq_work *, void *);
|
|
|
|
enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel,
|
|
void *data, bool cancel_all);
|
|
|
|
#if defined(CONFIG_IO_WQ)
|
|
extern void io_wq_worker_sleeping(struct task_struct *);
|
|
extern void io_wq_worker_running(struct task_struct *);
|
|
#else
|
|
static inline void io_wq_worker_sleeping(struct task_struct *tsk)
|
|
{
|
|
}
|
|
static inline void io_wq_worker_running(struct task_struct *tsk)
|
|
{
|
|
}
|
|
#endif
|
|
|
|
static inline bool io_wq_current_is_worker(void)
|
|
{
|
|
return in_task() && (current->flags & PF_IO_WORKER) &&
|
|
current->worker_private;
|
|
}
|
|
#endif
|