mirror of
https://github.com/torvalds/linux.git
synced 2025-04-12 16:47:42 +00:00

A task wakeup can be either processed on the waker's CPU or bounced to the wakee's previous CPU using an IPI (ttwu_queue). Bouncing to the wakee's CPU avoids the waker's CPU locking and accessing the wakee's rq which can be expensive across cache and node boundaries. When ttwu_queue path is taken, select_task_rq() and thus ops.select_cpu() may be skipped in some cases (racing against the wakee switching out). As this confused some BPF schedulers, there wasn't a good way for a BPF scheduler to tell whether idle CPU selection has been skipped, ops.enqueue() couldn't insert tasks into foreign local DSQs, and the performance difference on machines with simple toplogies were minimal, sched_ext disabled ttwu_queue. However, this optimization makes noticeable difference on more complex topologies and a BPF scheduler now has an easy way tell whether ops.select_cpu() was skipped since 9b671793c7d9 ("sched_ext, scx_qmap: Add and use SCX_ENQ_CPU_SELECTED") and can insert tasks into foreign local DSQs since 5b26f7b920f7 ("sched_ext: Allow SCX_DSQ_LOCAL_ON for direct dispatches"). Implement SCX_OPS_ALLOW_QUEUED_WAKEUP which allows BPF schedulers to choose to enable ttwu_queue optimization. v2: Update the patch description and comment re. ops.select_cpu() being skipped in some cases as opposed to always as per Neel. Signed-off-by: Tejun Heo <tj@kernel.org> Reported-by: Neel Natu <neelnatu@google.com> Reported-by: Barret Rhoden <brho@google.com> Cc: Peter Zijlstra (Intel) <peterz@infradead.org> Acked-by: Andrea Righi <arighi@nvidia.com>
102 lines
3.8 KiB
C
102 lines
3.8 KiB
C
/* SPDX-License-Identifier: GPL-2.0 */
|
|
/*
|
|
* BPF extensible scheduler class: Documentation/scheduler/sched-ext.rst
|
|
*
|
|
* Copyright (c) 2022 Meta Platforms, Inc. and affiliates.
|
|
* Copyright (c) 2022 Tejun Heo <tj@kernel.org>
|
|
* Copyright (c) 2022 David Vernet <dvernet@meta.com>
|
|
*/
|
|
#ifdef CONFIG_SCHED_CLASS_EXT
|
|
|
|
DECLARE_STATIC_KEY_FALSE(scx_ops_allow_queued_wakeup);
|
|
|
|
void scx_tick(struct rq *rq);
|
|
void init_scx_entity(struct sched_ext_entity *scx);
|
|
void scx_pre_fork(struct task_struct *p);
|
|
int scx_fork(struct task_struct *p);
|
|
void scx_post_fork(struct task_struct *p);
|
|
void scx_cancel_fork(struct task_struct *p);
|
|
bool scx_can_stop_tick(struct rq *rq);
|
|
void scx_rq_activate(struct rq *rq);
|
|
void scx_rq_deactivate(struct rq *rq);
|
|
int scx_check_setscheduler(struct task_struct *p, int policy);
|
|
bool task_should_scx(int policy);
|
|
void init_sched_ext_class(void);
|
|
|
|
static inline u32 scx_cpuperf_target(s32 cpu)
|
|
{
|
|
if (scx_enabled())
|
|
return cpu_rq(cpu)->scx.cpuperf_target;
|
|
else
|
|
return 0;
|
|
}
|
|
|
|
static inline bool task_on_scx(const struct task_struct *p)
|
|
{
|
|
return scx_enabled() && p->sched_class == &ext_sched_class;
|
|
}
|
|
|
|
static inline bool scx_allow_ttwu_queue(const struct task_struct *p)
|
|
{
|
|
return !scx_enabled() ||
|
|
static_branch_likely(&scx_ops_allow_queued_wakeup) ||
|
|
p->sched_class != &ext_sched_class;
|
|
}
|
|
|
|
#ifdef CONFIG_SCHED_CORE
|
|
bool scx_prio_less(const struct task_struct *a, const struct task_struct *b,
|
|
bool in_fi);
|
|
#endif
|
|
|
|
#else /* CONFIG_SCHED_CLASS_EXT */
|
|
|
|
static inline void scx_tick(struct rq *rq) {}
|
|
static inline void scx_pre_fork(struct task_struct *p) {}
|
|
static inline int scx_fork(struct task_struct *p) { return 0; }
|
|
static inline void scx_post_fork(struct task_struct *p) {}
|
|
static inline void scx_cancel_fork(struct task_struct *p) {}
|
|
static inline u32 scx_cpuperf_target(s32 cpu) { return 0; }
|
|
static inline bool scx_can_stop_tick(struct rq *rq) { return true; }
|
|
static inline void scx_rq_activate(struct rq *rq) {}
|
|
static inline void scx_rq_deactivate(struct rq *rq) {}
|
|
static inline int scx_check_setscheduler(struct task_struct *p, int policy) { return 0; }
|
|
static inline bool task_on_scx(const struct task_struct *p) { return false; }
|
|
static inline bool scx_allow_ttwu_queue(const struct task_struct *p) { return true; }
|
|
static inline void init_sched_ext_class(void) {}
|
|
|
|
#endif /* CONFIG_SCHED_CLASS_EXT */
|
|
|
|
#if defined(CONFIG_SCHED_CLASS_EXT) && defined(CONFIG_SMP)
|
|
void __scx_update_idle(struct rq *rq, bool idle, bool do_notify);
|
|
|
|
static inline void scx_update_idle(struct rq *rq, bool idle, bool do_notify)
|
|
{
|
|
if (scx_enabled())
|
|
__scx_update_idle(rq, idle, do_notify);
|
|
}
|
|
#else
|
|
static inline void scx_update_idle(struct rq *rq, bool idle, bool do_notify) {}
|
|
#endif
|
|
|
|
#ifdef CONFIG_CGROUP_SCHED
|
|
#ifdef CONFIG_EXT_GROUP_SCHED
|
|
int scx_tg_online(struct task_group *tg);
|
|
void scx_tg_offline(struct task_group *tg);
|
|
int scx_cgroup_can_attach(struct cgroup_taskset *tset);
|
|
void scx_cgroup_move_task(struct task_struct *p);
|
|
void scx_cgroup_finish_attach(void);
|
|
void scx_cgroup_cancel_attach(struct cgroup_taskset *tset);
|
|
void scx_group_set_weight(struct task_group *tg, unsigned long cgrp_weight);
|
|
void scx_group_set_idle(struct task_group *tg, bool idle);
|
|
#else /* CONFIG_EXT_GROUP_SCHED */
|
|
static inline int scx_tg_online(struct task_group *tg) { return 0; }
|
|
static inline void scx_tg_offline(struct task_group *tg) {}
|
|
static inline int scx_cgroup_can_attach(struct cgroup_taskset *tset) { return 0; }
|
|
static inline void scx_cgroup_move_task(struct task_struct *p) {}
|
|
static inline void scx_cgroup_finish_attach(void) {}
|
|
static inline void scx_cgroup_cancel_attach(struct cgroup_taskset *tset) {}
|
|
static inline void scx_group_set_weight(struct task_group *tg, unsigned long cgrp_weight) {}
|
|
static inline void scx_group_set_idle(struct task_group *tg, bool idle) {}
|
|
#endif /* CONFIG_EXT_GROUP_SCHED */
|
|
#endif /* CONFIG_CGROUP_SCHED */
|