Re: [PATCH 1/2] x86/segment: Introduce storesegment() helper to write segment selectors to memory

From: Uros Bizjak

Date: Tue Mar 31 2026 - 06:05:20 EST

On Tue, Mar 31, 2026 at 11:53 AM Uros Bizjak <ubizjak@xxxxxxxxx> wrote:
>
> On Tue, Mar 31, 2026 at 8:56 AM Ingo Molnar <mingo@xxxxxxxxxx> wrote:
> >
> >
> > * Uros Bizjak <ubizjak@xxxxxxxxx> wrote:
> >
> > > Introduce a new helper, storesegment(), that stores a segment selector
> > > directly into a u16 (or compatible) memory location without using an
> > > intermediate general-purpose register.
> > >
> > > To support this, split the existing SAVE_SEGMENT macro into two parts:
> > >
> > > SAVE_SEGMENT_VAR(): retains the current behavior of reading a segment
> > > register into an unsigned long via a register.
> > > SAVE_SEGMENT_PTR(): adds a new variant that writes the 16-bit selector
> > > directly to memory.
> > >
> > > The combined SAVE_SEGMENT() macro now generates both helpers for each
> > > segment register.
> > >
> > > The new storesegment() interface is preferred over savesegment() when
> > > the value only needs to be stored (e.g. into a struct field), avoiding
> > > an unnecessary register move and making the intent clearer.
> > >
> > > No functional change for existing users of savesegment().
> >
> > Why does the API have to be split into =r and =m variants?
> >
> > Coulnd't we use a more generic constraint and let the compiler
> > decide what the target is? Would that negatively impact
> > other aspects of code generation?
>
> The "=r" variant actually outputs zero-extended value to the whole
> register width. So, the "=r" variant is used to eliminate
> zero-extensions when the value is used in the follow-up calculations,
> comparisons, or when the value is stored to a location that is more
> than 16-bits wide. Additionally, "r" variant always uses MOVL, where
> operand size prefix byte (0x66) is not needed.
>
> The "=m" variant only outputs to a 16-bit location. Having "=rm" here
> would always emit a 0x66 operand size prefix when register is used as
> an output, and there would be many zero-extensions emitted, because
> the compiler needs to zero-extend the value from 'unsigned short' to
> anything wider.
>
> Other than that, GCC (and Clang, too) has serious problems with "=rm"
> output constraints. Forward propagation (AKA combine pass) does not
> work reliably with assembly outputs (due to always present clobbers
> for assembly clauses), so there will be many cases of moves to a
> temporary register and even to a temporary stack location with this
> constraint. Having two separate functions (with clear and
> informational function comment) leaves the decision to the programmer,
> which function is the most optimal.

I forgot to say that there are more opportunities for storesegment()
in other subsystems, please see the attached patch.

Uros.
diff --git a/arch/x86/hyperv/hv_crash.c b/arch/x86/hyperv/hv_crash.c
index 5ffcc23255de..1d29ada3d9f1 100644
--- a/arch/x86/hyperv/hv_crash.c
+++ b/arch/x86/hyperv/hv_crash.c
@@ -207,12 +207,12 @@ static void hv_hvcrash_ctxt_save(void)
asm volatile("movq %%cr2, %0" : "=r"(ctxt->cr2));
asm volatile("movq %%cr8, %0" : "=r"(ctxt->cr8));

- asm volatile("movw %%cs, %0" : "=m"(ctxt->cs));
- asm volatile("movw %%ss, %0" : "=m"(ctxt->ss));
- asm volatile("movw %%ds, %0" : "=m"(ctxt->ds));
- asm volatile("movw %%es, %0" : "=m"(ctxt->es));
- asm volatile("movw %%fs, %0" : "=m"(ctxt->fs));
- asm volatile("movw %%gs, %0" : "=m"(ctxt->gs));
+ storesegment(cs, ctxt->cs);
+ storesegment(ss, ctxt->ss);
+ storesegment(ds, ctxt->ds);
+ storesegment(es, ctxt->es);
+ storesegment(fs, ctxt->fs);
+ storesegment(gs, ctxt->gs);

native_store_gdt(&ctxt->gdtr);
store_idt(&ctxt->idtr);
diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
index 5cfb27f26583..9b20eb699c54 100644
--- a/arch/x86/include/asm/kexec.h
+++ b/arch/x86/include/asm/kexec.h
@@ -106,11 +106,11 @@ static inline void crash_setup_regs(struct pt_regs *newregs,
asm volatile("mov %%r14,%0" : "=m"(newregs->r14));
asm volatile("mov %%r15,%0" : "=m"(newregs->r15));
#endif
- asm volatile("mov %%ss,%k0" : "=a"(newregs->ss));
- asm volatile("mov %%cs,%k0" : "=a"(newregs->cs));
+ storesegment(ss, newregs->ss);
+ storesegment(cs, newregs->cs);
#ifdef CONFIG_X86_32
- asm volatile("mov %%ds,%k0" : "=a"(newregs->ds));
- asm volatile("mov %%es,%k0" : "=a"(newregs->es));
+ storesegment(ds, newregs->ds);
+ storesegment(es, newregs->es);
#endif
asm volatile("pushf\n\t"
"pop %0" : "=m"(newregs->flags));
diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
index 8b24e682535b..4e9939a8ff30 100644
--- a/arch/x86/kvm/vmx/vmx.c
+++ b/arch/x86/kvm/vmx/vmx.c
@@ -1390,8 +1390,8 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
host_state->ldt_sel = kvm_read_ldt();

#ifdef CONFIG_X86_64
- savesegment(ds, host_state->ds_sel);
- savesegment(es, host_state->es_sel);
+ storesegment(ds, host_state->ds_sel);
+ storesegment(es, host_state->es_sel);

gs_base = cpu_kernelmode_gs_base(cpu);
if (likely(is_64bit_mm(current->mm))) {
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index 702f30eaf9c4..24253f40fa52 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -105,11 +105,11 @@ static void __save_processor_state(struct saved_context *ctxt)
/*
* segment registers
*/
- savesegment(gs, ctxt->gs);
+ storesegment(gs, ctxt->gs);
#ifdef CONFIG_X86_64
- savesegment(fs, ctxt->fs);
- savesegment(ds, ctxt->ds);
- savesegment(es, ctxt->es);
+ storesegment(fs, ctxt->fs);
+ storesegment(ds, ctxt->ds);
+ storesegment(es, ctxt->es);

rdmsrq(MSR_FS_BASE, ctxt->fs_base);
rdmsrq(MSR_GS_BASE, ctxt->kernelmode_gs_base);