Re: [tip: x86/fred] x86/ptrace: Cleanup the definition of the pt_regs structure

From: H. Peter Anvin
Date: Sat Feb 03 2024 - 18:53:09 EST


On January 31, 2024 1:14:52 PM PST, tip-bot2 for Xin Li <tip-bot2@xxxxxxxxxxxxx> wrote:
>The following commit has been merged into the x86/fred branch of tip:
>
>Commit-ID: ee63291aa8287cb7ded767d340155fe8681fc075
>Gitweb: https://git.kernel.org/tip/ee63291aa8287cb7ded767d340155fe8681fc075
>Author: Xin Li <xin3.li@xxxxxxxxx>
>AuthorDate: Tue, 05 Dec 2023 02:50:02 -08:00
>Committer: Borislav Petkov (AMD) <bp@xxxxxxxxx>
>CommitterDate: Wed, 31 Jan 2024 22:01:13 +01:00
>
>x86/ptrace: Cleanup the definition of the pt_regs structure
>
>struct pt_regs is hard to read because the member or section related
>comments are not aligned with the members.
>
>The 'cs' and 'ss' members of pt_regs are type of 'unsigned long' while
>in reality they are only 16-bit wide. This works so far as the
>remaining space is unused, but FRED will use the remaining bits for
>other purposes.
>
>To prepare for FRED:
>
> - Cleanup the formatting
> - Convert 'cs' and 'ss' to u16 and embed them into an union
> with a u64
> - Fixup the related printk() format strings
>
>Suggested-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
>Originally-by: H. Peter Anvin (Intel) <hpa@xxxxxxxxx>
>Signed-off-by: Xin Li <xin3.li@xxxxxxxxx>
>Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
>Signed-off-by: Borislav Petkov (AMD) <bp@xxxxxxxxx>
>Tested-by: Shan Kang <shan.kang@xxxxxxxxx>
>Link: https://lore.kernel.org/r/20231205105030.8698-14-xin3.li@xxxxxxxxx
>---
> arch/x86/entry/vsyscall/vsyscall_64.c | 2 +-
> arch/x86/include/asm/ptrace.h | 48 ++++++++++++++++++--------
> arch/x86/kernel/process_64.c | 2 +-
> 3 files changed, 37 insertions(+), 15 deletions(-)
>
>diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c
>index e0ca812..a3c0df1 100644
>--- a/arch/x86/entry/vsyscall/vsyscall_64.c
>+++ b/arch/x86/entry/vsyscall/vsyscall_64.c
>@@ -76,7 +76,7 @@ static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,
> if (!show_unhandled_signals)
> return;
>
>- printk_ratelimited("%s%s[%d] %s ip:%lx cs:%lx sp:%lx ax:%lx si:%lx di:%lx\n",
>+ printk_ratelimited("%s%s[%d] %s ip:%lx cs:%x sp:%lx ax:%lx si:%lx di:%lx\n",
> level, current->comm, task_pid_nr(current),
> message, regs->ip, regs->cs,
> regs->sp, regs->ax, regs->si, regs->di);
>diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h
>index f4db78b..b268cd2 100644
>--- a/arch/x86/include/asm/ptrace.h
>+++ b/arch/x86/include/asm/ptrace.h
>@@ -57,17 +57,19 @@ struct pt_regs {
> #else /* __i386__ */
>
> struct pt_regs {
>-/*
>- * C ABI says these regs are callee-preserved. They aren't saved on kernel entry
>- * unless syscall needs a complete, fully filled "struct pt_regs".
>- */
>+ /*
>+ * C ABI says these regs are callee-preserved. They aren't saved on
>+ * kernel entry unless syscall needs a complete, fully filled
>+ * "struct pt_regs".
>+ */
> unsigned long r15;
> unsigned long r14;
> unsigned long r13;
> unsigned long r12;
> unsigned long bp;
> unsigned long bx;
>-/* These regs are callee-clobbered. Always saved on kernel entry. */
>+
>+ /* These regs are callee-clobbered. Always saved on kernel entry. */
> unsigned long r11;
> unsigned long r10;
> unsigned long r9;
>@@ -77,18 +79,38 @@ struct pt_regs {
> unsigned long dx;
> unsigned long si;
> unsigned long di;
>-/*
>- * On syscall entry, this is syscall#. On CPU exception, this is error code.
>- * On hw interrupt, it's IRQ number:
>- */
>+
>+ /*
>+ * orig_ax is used on entry for:
>+ * - the syscall number (syscall, sysenter, int80)
>+ * - error_code stored by the CPU on traps and exceptions
>+ * - the interrupt number for device interrupts
>+ */
> unsigned long orig_ax;
>-/* Return frame for iretq */
>+
>+ /* The IRETQ return frame starts here */
> unsigned long ip;
>- unsigned long cs;
>+
>+ union {
>+ /* The full 64-bit data slot containing CS */
>+ u64 csx;
>+ /* CS selector */
>+ u16 cs;
>+ };
>+
> unsigned long flags;
> unsigned long sp;
>- unsigned long ss;
>-/* top of stack page */
>+
>+ union {
>+ /* The full 64-bit data slot containing SS */
>+ u64 ssx;
>+ /* SS selector */
>+ u16 ss;
>+ };
>+
>+ /*
>+ * Top of stack on IDT systems.
>+ */
> };
>
> #endif /* !__i386__ */
>diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
>index 33b2687..0f78b58 100644
>--- a/arch/x86/kernel/process_64.c
>+++ b/arch/x86/kernel/process_64.c
>@@ -117,7 +117,7 @@ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode,
>
> printk("%sFS: %016lx(%04x) GS:%016lx(%04x) knlGS:%016lx\n",
> log_lvl, fs, fsindex, gs, gsindex, shadowgs);
>- printk("%sCS: %04lx DS: %04x ES: %04x CR0: %016lx\n",
>+ printk("%sCS: %04x DS: %04x ES: %04x CR0: %016lx\n",
> log_lvl, regs->cs, ds, es, cr0);
> printk("%sCR2: %016lx CR3: %016lx CR4: %016lx\n",
> log_lvl, cr2, cr3, cr4);

Incidentally, the comment about callee-saved registers is long since both obsolete and is now outright wrong.

The next version of gcc (14 I think) will have an attribute to turn off saving registers which we can use for top-level C functions.