sched_ext: Fixes for v6.15-rc0

- Calling scx_bpf_create_dsq() with the same ID would succeed creating
   duplicate DSQs. Fix it to return -EEXIST.
 
 - scx_select_cpu_dfl() fixes and cleanups.
 
 - Synchronize tool/sched_ext with external scheduler repo. While this isn't
   a fix. There's no risk to the kernel and it's better if they stay synced
   closer.
 -----BEGIN PGP SIGNATURE-----
 
 iIMEABYKACwWIQTfIjM1kS57o3GsC/uxYfJx3gVYGQUCZ+29Eg4cdGpAa2VybmVs
 Lm9yZwAKCRCxYfJx3gVYGeNGAP97GCCCwovepx3f9HV3RRk8oEregsGI7gmr+TC5
 +XJrqwD4urg6I5JGM3K5dB9m626RyUP6k5RmYdjqBrEL6LauCg==
 =uWzD
 -----END PGP SIGNATURE-----

Merge tag 'sched_ext-for-6.15-rc0-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext

Pull sched_ext fixes from Tejun Heo:

 - Calling scx_bpf_create_dsq() with the same ID would succeed creating
   duplicate DSQs. Fix it to return -EEXIST.

 - scx_select_cpu_dfl() fixes and cleanups.

 - Synchronize tool/sched_ext with external scheduler repo. While this
   isn't a fix. There's no risk to the kernel and it's better if they
   stay synced closer.

* tag 'sched_ext-for-6.15-rc0-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/sched_ext:
  tools/sched_ext: Sync with scx repo
  sched_ext: initialize built-in idle state before ops.init()
  sched_ext: create_dsq: Return -EEXIST on duplicate request
  sched_ext: Remove a meaningless conditional goto in scx_select_cpu_dfl()
  sched_ext: idle: Fix return code of scx_select_cpu_dfl()
This commit is contained in:
Linus Torvalds 2025-04-03 10:03:38 -07:00
commit ea59cb7423
7 changed files with 103 additions and 40 deletions

View File

@ -4171,8 +4171,8 @@ static struct scx_dispatch_q *create_dsq(u64 dsq_id, int node)
init_dsq(dsq, dsq_id);
ret = rhashtable_insert_fast(&dsq_hash, &dsq->hash_node,
dsq_hash_params);
ret = rhashtable_lookup_insert_fast(&dsq_hash, &dsq->hash_node,
dsq_hash_params);
if (ret) {
kfree(dsq);
return ERR_PTR(ret);
@ -5361,6 +5361,8 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
*/
cpus_read_lock();
scx_idle_enable(ops);
if (scx_ops.init) {
ret = SCX_CALL_OP_RET(SCX_KF_UNLOCKED, init);
if (ret) {
@ -5427,8 +5429,6 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
if (scx_ops.cpu_acquire || scx_ops.cpu_release)
static_branch_enable(&scx_ops_cpu_preempt);
scx_idle_enable(ops);
/*
* Lock out forks, cgroup on/offlining and moves before opening the
* floodgate so that they don't wander into the operations prematurely.

View File

@ -544,7 +544,7 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64
* core.
*/
if (flags & SCX_PICK_IDLE_CORE) {
cpu = prev_cpu;
cpu = -EBUSY;
goto out_unlock;
}
}
@ -584,8 +584,6 @@ s32 scx_select_cpu_dfl(struct task_struct *p, s32 prev_cpu, u64 wake_flags, u64
* increasing distance.
*/
cpu = scx_pick_idle_cpu(p->cpus_ptr, node, flags);
if (cpu >= 0)
goto out_unlock;
out_unlock:
rcu_read_unlock();
@ -723,14 +721,14 @@ static void reset_idle_masks(struct sched_ext_ops *ops)
void scx_idle_enable(struct sched_ext_ops *ops)
{
if (!ops->update_idle || (ops->flags & SCX_OPS_KEEP_BUILTIN_IDLE))
static_branch_enable(&scx_builtin_idle_enabled);
static_branch_enable_cpuslocked(&scx_builtin_idle_enabled);
else
static_branch_disable(&scx_builtin_idle_enabled);
static_branch_disable_cpuslocked(&scx_builtin_idle_enabled);
if (ops->flags & SCX_OPS_BUILTIN_IDLE_PER_NODE)
static_branch_enable(&scx_builtin_idle_per_node);
static_branch_enable_cpuslocked(&scx_builtin_idle_per_node);
else
static_branch_disable(&scx_builtin_idle_per_node);
static_branch_disable_cpuslocked(&scx_builtin_idle_per_node);
#ifdef CONFIG_SMP
reset_idle_masks(ops);

View File

@ -586,36 +586,48 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s
}
}
#define READ_ONCE(x) \
({ \
union { typeof(x) __val; char __c[1]; } __u = \
{ .__c = { 0 } }; \
__read_once_size(&(x), __u.__c, sizeof(x)); \
__u.__val; \
/*
* __unqual_typeof(x) - Declare an unqualified scalar type, leaving
* non-scalar types unchanged,
*
* Prefer C11 _Generic for better compile-times and simpler code. Note: 'char'
* is not type-compatible with 'signed char', and we define a separate case.
*
* This is copied verbatim from kernel's include/linux/compiler_types.h, but
* with default expression (for pointers) changed from (x) to (typeof(x)0).
*
* This is because LLVM has a bug where for lvalue (x), it does not get rid of
* an extra address_space qualifier, but does in case of rvalue (typeof(x)0).
* Hence, for pointers, we need to create an rvalue expression to get the
* desired type. See https://github.com/llvm/llvm-project/issues/53400.
*/
#define __scalar_type_to_expr_cases(type) \
unsigned type : (unsigned type)0, signed type : (signed type)0
#define __unqual_typeof(x) \
typeof(_Generic((x), \
char: (char)0, \
__scalar_type_to_expr_cases(char), \
__scalar_type_to_expr_cases(short), \
__scalar_type_to_expr_cases(int), \
__scalar_type_to_expr_cases(long), \
__scalar_type_to_expr_cases(long long), \
default: (typeof(x))0))
#define READ_ONCE(x) \
({ \
union { __unqual_typeof(x) __val; char __c[1]; } __u = \
{ .__c = { 0 } }; \
__read_once_size((__unqual_typeof(x) *)&(x), __u.__c, sizeof(x)); \
__u.__val; \
})
#define WRITE_ONCE(x, val) \
({ \
union { typeof(x) __val; char __c[1]; } __u = \
{ .__val = (val) }; \
__write_once_size(&(x), __u.__c, sizeof(x)); \
__u.__val; \
})
#define READ_ONCE_ARENA(type, x) \
({ \
union { type __val; char __c[1]; } __u = \
{ .__c = { 0 } }; \
__read_once_size((void *)&(x), __u.__c, sizeof(x)); \
__u.__val; \
})
#define WRITE_ONCE_ARENA(type, x, val) \
({ \
union { type __val; char __c[1]; } __u = \
{ .__val = (val) }; \
__write_once_size((void *)&(x), __u.__c, sizeof(x)); \
__u.__val; \
#define WRITE_ONCE(x, val) \
({ \
union { __unqual_typeof(x) __val; char __c[1]; } __u = \
{ .__val = (val) }; \
__write_once_size((__unqual_typeof(x) *)&(x), __u.__c, sizeof(x)); \
__u.__val; \
})
/*
@ -648,6 +660,23 @@ static inline u32 log2_u64(u64 v)
return log2_u32(v) + 1;
}
/*
* Return a value proportionally scaled to the task's weight.
*/
static inline u64 scale_by_task_weight(const struct task_struct *p, u64 value)
{
return (value * p->scx.weight) / 100;
}
/*
* Return a value inversely proportional to the task's weight.
*/
static inline u64 scale_by_task_weight_inverse(const struct task_struct *p, u64 value)
{
return value * 100 / p->scx.weight;
}
#include "compat.bpf.h"
#include "enums.bpf.h"

View File

@ -88,6 +88,8 @@
#define HAVE_SCX_OPS_ENQ_LAST
#define HAVE_SCX_OPS_ENQ_EXITING
#define HAVE_SCX_OPS_SWITCH_PARTIAL
#define HAVE_SCX_OPS_ENQ_MIGRATION_DISABLED
#define HAVE_SCX_OPS_ALLOW_QUEUED_WAKEUP
#define HAVE_SCX_OPS_HAS_CGROUP_WEIGHT
#define HAVE_SCX_OPS_ALL_FLAGS
#define HAVE_SCX_OPSS_NONE
@ -104,6 +106,7 @@
#define HAVE_SCX_RQ_BAL_PENDING
#define HAVE_SCX_RQ_BAL_KEEP
#define HAVE_SCX_RQ_BYPASSING
#define HAVE_SCX_RQ_CLK_VALID
#define HAVE_SCX_RQ_IN_WAKEUP
#define HAVE_SCX_RQ_IN_BALANCE
#define HAVE_SCX_TASK_NONE

View File

@ -13,6 +13,30 @@ const volatile u64 __SCX_SLICE_DFL __weak;
const volatile u64 __SCX_SLICE_INF __weak;
#define SCX_SLICE_INF __SCX_SLICE_INF
const volatile u64 __SCX_RQ_ONLINE __weak;
#define SCX_RQ_ONLINE __SCX_RQ_ONLINE
const volatile u64 __SCX_RQ_CAN_STOP_TICK __weak;
#define SCX_RQ_CAN_STOP_TICK __SCX_RQ_CAN_STOP_TICK
const volatile u64 __SCX_RQ_BAL_PENDING __weak;
#define SCX_RQ_BAL_PENDING __SCX_RQ_BAL_PENDING
const volatile u64 __SCX_RQ_BAL_KEEP __weak;
#define SCX_RQ_BAL_KEEP __SCX_RQ_BAL_KEEP
const volatile u64 __SCX_RQ_BYPASSING __weak;
#define SCX_RQ_BYPASSING __SCX_RQ_BYPASSING
const volatile u64 __SCX_RQ_CLK_VALID __weak;
#define SCX_RQ_CLK_VALID __SCX_RQ_CLK_VALID
const volatile u64 __SCX_RQ_IN_WAKEUP __weak;
#define SCX_RQ_IN_WAKEUP __SCX_RQ_IN_WAKEUP
const volatile u64 __SCX_RQ_IN_BALANCE __weak;
#define SCX_RQ_IN_BALANCE __SCX_RQ_IN_BALANCE
const volatile u64 __SCX_DSQ_FLAG_BUILTIN __weak;
#define SCX_DSQ_FLAG_BUILTIN __SCX_DSQ_FLAG_BUILTIN

View File

@ -8,6 +8,14 @@
SCX_ENUM_SET(skel, scx_public_consts, SCX_OPS_NAME_LEN); \
SCX_ENUM_SET(skel, scx_public_consts, SCX_SLICE_DFL); \
SCX_ENUM_SET(skel, scx_public_consts, SCX_SLICE_INF); \
SCX_ENUM_SET(skel, scx_rq_flags, SCX_RQ_ONLINE); \
SCX_ENUM_SET(skel, scx_rq_flags, SCX_RQ_CAN_STOP_TICK); \
SCX_ENUM_SET(skel, scx_rq_flags, SCX_RQ_BAL_PENDING); \
SCX_ENUM_SET(skel, scx_rq_flags, SCX_RQ_BAL_KEEP); \
SCX_ENUM_SET(skel, scx_rq_flags, SCX_RQ_BYPASSING); \
SCX_ENUM_SET(skel, scx_rq_flags, SCX_RQ_CLK_VALID); \
SCX_ENUM_SET(skel, scx_rq_flags, SCX_RQ_IN_WAKEUP); \
SCX_ENUM_SET(skel, scx_rq_flags, SCX_RQ_IN_BALANCE); \
SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_FLAG_BUILTIN); \
SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_FLAG_LOCAL_ON); \
SCX_ENUM_SET(skel, scx_dsq_id_flags, SCX_DSQ_INVALID); \

View File

@ -14,7 +14,8 @@ static inline void __ENUM_set(u64 *val, char *type, char *name)
bool res;
res = __COMPAT_read_enum(type, name, val);
SCX_BUG_ON(!res, "enum not found(%s)", name);
if (!res)
*val = 0;
}
#define SCX_ENUM_SET(skel, type, name) do { \