[PATCH 3/3] tools/sched_ext: scx_sdt: Fix BPF verifier rejection on older LLVMs
From: Zhao Mengmeng
Date: Fri Mar 06 2026 - 02:32:34 EST
From: Zhao Mengmeng <zhaomengmeng@xxxxxxxxxx>
Under Clang 17/18, when running scx_sdt scheduler, it fails with:
libbpf: prog 'sdt_init_task': BPF program load failed: -EACCES
libbpf: prog 'sdt_init_task': -- BEGIN PROG LOAD LOG --
...
; desc = desc_find_empty(alloc->root, &idx); @ scx_sdt.bpf.c:479
43: (79) r8 = *(u64 *)(r6 +32) ; frame1: R6=map_value(map=scx_sdt.bss,ks=4,vs=200,off=120) R8=scalar()
; for (level = zero; level < SDT_TASK_LEVELS && can_loop; level++) { @ scx_sdt.bpf.c:407
44: (e5) may_goto pc+51
; idx |= pos; @ scx_sdt.bpf.c:418
96: (bf) r7 = r2 ; frame1: R2=0 R7=0
97: (bf) r1 = r10 ; frame1: R1=fp0 R10=fp0
; @ scx_sdt.bpf.c:0
98: (07) r1 += -56 ; frame1: R1=fp-56
; bpf_for(u, 0, SDT_TASK_LEVELS) { @ scx_sdt.bpf.c:447
99: (b4) w2 = 0 ; frame1: R2=0
100: (b4) w3 = 3 ; frame1: R3=3
101: (85) call bpf_iter_num_new#82234 ; frame1: R0=scalar() fp-56=iter_num(ref_id=2,state=active,depth=0)
102: (18) r9 = 0x1ffffffffffffff8 ; frame1: R9=0x1ffffffffffffff8
104: (bf) r1 = r10 ; frame1: R1=fp0 R10=fp0
; @ scx_sdt.bpf.c:0
105: (07) r1 += -56 ; frame1: R1=fp-56
; bpf_for(u, 0, SDT_TASK_LEVELS) { @ scx_sdt.bpf.c:447
106: (85) call bpf_iter_num_next#82235 ; frame1: R0=0 fp-56=iter_num(ref_id=2,state=drained,depth=0)
107: (15) if r0 == 0x0 goto pc+29 ; frame1: R0=0
; if (tmp->nr_free > 0) @ scx_sdt.bpf.c:456
137: (bf) r1 = r10 ; frame1: R1=fp0 R10=fp0
; bpf_for(u, 0, SDT_TASK_LEVELS) { @ scx_sdt.bpf.c:447
138: (07) r1 += -56 ; frame1: R1=fp-56
139: (85) call bpf_iter_num_destroy#82232 ; frame1:
140: (b7) r9 = 0 ; frame1: R9=0
; if (unlikely(desc == NULL)) { @ scx_sdt.bpf.c:480
141: (15) if r8 == 0x0 goto pc+15 ; frame1: R8=scalar(umin=1)
; chunk = desc->chunk; @ scx_sdt.bpf.c:485
142: (79) r4 = *(u64 *)(r8 +72)
R8 invalid mem access 'scalar'
The reason is these older compilers lacks native support for
__BPF_FEATURE_ADDR_SPACE_CAST, __arena macro is defined as empty.
Fix it by adding cast_kern when dereferencing variables with __arena tag.
Signed-off-by: Zhao Mengmeng <zhaomengmeng@xxxxxxxxxx>
---
tools/sched_ext/scx_sdt.bpf.c | 20 +++++++++++++++++++-
1 file changed, 19 insertions(+), 1 deletion(-)
diff --git a/tools/sched_ext/scx_sdt.bpf.c b/tools/sched_ext/scx_sdt.bpf.c
index 31b09958e8d5..caacc55bd7a5 100644
--- a/tools/sched_ext/scx_sdt.bpf.c
+++ b/tools/sched_ext/scx_sdt.bpf.c
@@ -148,6 +148,7 @@ static sdt_desc_t *scx_alloc_chunk(void)
out = desc;
+ cast_kern(desc);
desc->nr_free = SDT_TASK_ENTS_PER_CHUNK;
desc->chunk = chunk;
@@ -244,6 +245,7 @@ int mark_nodes_avail(sdt_desc_t *lv_desc[SDT_TASK_LEVELS], __u64 lv_pos[SDT_TASK
/* Only propagate upwards if we are the parent's only free chunk. */
desc = lv_desc[level];
+ cast_kern(desc);
ret = set_idx_state(desc, lv_pos[level], false);
if (unlikely(ret != 0))
return ret;
@@ -298,20 +300,26 @@ int scx_alloc_free_idx(struct scx_allocator *alloc, __u64 idx)
if (level == SDT_TASK_LEVELS - 1)
break;
+ cast_kern(desc);
chunk = desc->chunk;
+ cast_kern(chunk);
desc_children = (sdt_desc_t * __arena *)chunk->descs;
+ cast_kern(desc_children);
desc = desc_children[pos];
if (unlikely(!desc))
return -EINVAL;
}
+ cast_kern(desc);
chunk = desc->chunk;
pos = idx & mask;
+ cast_kern(chunk);
data = chunk->data[pos];
if (likely(data)) {
+ cast_kern(data);
*data = (struct sdt_data) {
.tid.genn = data->tid.genn + 1,
};
@@ -378,6 +386,7 @@ __u64 chunk_find_empty(sdt_desc_t __arg_arena *desc)
__u64 freeslots;
__u64 i;
+ cast_kern(desc);
for (i = 0; i < SDT_TASK_CHUNK_BITMAP_U64S; i++) {
freeslots = ~desc->allocated[i];
if (freeslots == (__u64)0)
@@ -426,9 +435,12 @@ static sdt_desc_t * desc_find_empty(sdt_desc_t *desc, __u64 *idxp)
break;
/* Allocate an internal node if necessary. */
+ cast_kern(desc);
chunk = desc->chunk;
+ cast_kern(chunk);
desc_children = (sdt_desc_t * __arena *)chunk->descs;
+ cast_kern(desc_children);
desc = desc_children[pos];
if (!desc) {
desc = scx_alloc_chunk();
@@ -448,6 +460,7 @@ static sdt_desc_t * desc_find_empty(sdt_desc_t *desc, __u64 *idxp)
level = SDT_TASK_LEVELS - 1 - u;
tmp = lv_desc[level];
+ cast_kern(tmp);
ret = set_idx_state(tmp, lv_pos[level], true);
if (ret != 0)
break;
@@ -482,10 +495,12 @@ void __arena *scx_alloc(struct scx_allocator *alloc)
return NULL;
}
+ cast_kern(desc);
chunk = desc->chunk;
/* Populate the leaf node if necessary. */
pos = idx & (SDT_TASK_ENTS_PER_CHUNK - 1);
+ cast_kern(chunk);
data = chunk->data[pos];
if (!data) {
data = scx_alloc_from_pool(&alloc->pool);
@@ -503,10 +518,12 @@ void __arena *scx_alloc(struct scx_allocator *alloc)
alloc_stats.alloc_ops += 1;
alloc_stats.active_allocs += 1;
+ cast_kern(data);
data->tid.idx = idx;
bpf_spin_unlock(&alloc_lock);
+ cast_user(data);
return data;
}
@@ -544,9 +561,10 @@ void __arena *scx_task_alloc(struct task_struct *p)
if (unlikely(!data))
return NULL;
+ mval->data = data;
+ cast_kern(data);
mval->tid = data->tid;
mval->tptr = (__u64) p;
- mval->data = data;
return (void __arena *)data->payload;
}
--
2.43.0