Re: [PATCH] checkpatch: Check for .byte-spelled insn opcodes documentation on x86

From: Joe Perches
Date: Sat Oct 10 2020 - 18:52:07 EST


On Sat, 2020-10-10 at 12:54 +0200, Borislav Petkov wrote:
> On Fri, Oct 09, 2020 at 11:01:18AM -0700, Joe Perches wrote:
> > Given the location, this only works on .c and .h files.
> > It does not work on .S files. Should it?
>
> Probably not because there will be too many false positives. .byte is
> used not only to spell instruction opcodes in .S files. And the main
> case we're addressing here is using those .byte spelled opcodes in asm
> volatile constructs so...

[]

> > So it looks like the regex would be more complete as:
> >
> > if ($realfile =~ m@^arch/x86/@ &&
> > $rawline =~ /\.byte\s+(?:$Constant|(?:\\)?$Ident|"\s*$Ident)\b/) {
>
> This is much less readable than what I have now (yes, realfile test
> should come first):

I care less about readability than completeness, correctness,
and minimal false positives.

> if ($realfile =~ m@^arch/x86/@ && $rawline =~ /\.byte[\s0-9a-fx,]+/) {
>
> Also, this
>
> $rawline =~ /\.byte\s+(?:$Constant|(?:\\)?$Ident|"\s*$Ident)\b/) {
>
> matches
>
> ".byte 0x66"

And? So does the regex you propose.

> This
>
> $rawline =~ /\.byte[\s0-9a-fx,]+)) {
>
> matches
>
> ".byte 0x66, 0x0f, 0x38, 0xf8, 0x02"
>
> which is what it needs to match.

Is that the only case you are trying to match?
two or more "0x<hex>," ?

Then this could use:

/"\s*\.byte\s+(?:0x[0-9a-fA-F]{1,2}\s*,\s*){2,4}/

$ git grep -P '"\s*\.byte\s+(?:0x[0-9a-fA-F]{1,2}\s*,\s*){2,4}' arch/x86
arch/x86/include/asm/intel_pconfig.h:#define PCONFIG ".byte 0x0f, 0x01, 0xc5"
arch/x86/include/asm/mwait.h: asm volatile(".byte 0x0f, 0x01, 0xc8;"
arch/x86/include/asm/mwait.h: asm volatile(".byte 0x0f, 0x01, 0xfa;"
arch/x86/include/asm/mwait.h: asm volatile(".byte 0x0f, 0x01, 0xc9;"
arch/x86/include/asm/mwait.h: asm volatile(".byte 0x0f, 0x01, 0xfb;"
arch/x86/include/asm/mwait.h: asm volatile(".byte 0x66, 0x0f, 0xae, 0xf1\t\n"
arch/x86/include/asm/segment.h: ".byte 0xf3,0x0f,0xc7,0xf8", /* RDPID %eax/rax */
arch/x86/include/asm/smap.h:#define __ASM_CLAC ".byte 0x0f,0x01,0xca"
arch/x86/include/asm/smap.h:#define __ASM_STAC ".byte 0x0f,0x01,0xcb"
arch/x86/include/asm/special_insns.h: asm volatile(".byte 0x0f,0x01,0xee\n\t"
arch/x86/include/asm/special_insns.h: asm volatile(".byte 0x0f,0x01,0xef\n\t"
arch/x86/include/asm/special_insns.h: ".byte 0x66, 0x0f, 0xae, 0x30", /* clwb (%%rax) */
arch/x86/include/asm/special_insns.h: asm volatile(".byte 0x66, 0x0f, 0x38, 0xf8, 0x02"
arch/x86/include/asm/special_insns.h: asm volatile(".byte 0xf3, 0x0f, 0x38, 0xf8, 0x02, 0x66, 0x90"
arch/x86/include/asm/special_insns.h: asm volatile(".byte 0xf, 0x1, 0xe8" ::: "memory");

vs:

$ git grep -P '\.byte[\s0-9a-fx,]+' -- 'arch/x86/*.[ch]' | cat
arch/x86/include/asm/alternative-asm.h: .byte \orig_len
arch/x86/include/asm/alternative-asm.h: .byte \alt_len
arch/x86/include/asm/alternative-asm.h: .byte \pad_len
arch/x86/include/asm/alternative.h: " .byte " alt_total_slen "\n" /* source len */ \
arch/x86/include/asm/alternative.h: " .byte " alt_rlen(num) "\n" /* replacement len */ \
arch/x86/include/asm/alternative.h: " .byte " alt_pad_len "\n" /* pad len */
arch/x86/include/asm/bug.h:#define ASM_UD0 ".byte 0x0f, 0xff" /* + ModRM (for Intel) */
arch/x86/include/asm/bug.h:#define ASM_UD1 ".byte 0x0f, 0xb9" /* + ModRM */
arch/x86/include/asm/bug.h:#define ASM_UD2 ".byte 0x0f, 0x0b"
arch/x86/include/asm/cpufeature.h: " .byte 3b - 1b\n" /* src len */
arch/x86/include/asm/cpufeature.h: " .byte 5f - 4f\n" /* repl len */
arch/x86/include/asm/cpufeature.h: " .byte 3b - 2b\n" /* pad len */
arch/x86/include/asm/cpufeature.h: " .byte 3b - 1b\n" /* src len */
arch/x86/include/asm/cpufeature.h: " .byte 0\n" /* repl len */
arch/x86/include/asm/cpufeature.h: " .byte 0\n" /* pad len */
arch/x86/include/asm/fpu/internal.h:#define XSAVE ".byte " REX_PREFIX "0x0f,0xae,0x27"
arch/x86/include/asm/fpu/internal.h:#define XSAVEOPT ".byte " REX_PREFIX "0x0f,0xae,0x37"
arch/x86/include/asm/fpu/internal.h:#define XSAVES ".byte " REX_PREFIX "0x0f,0xc7,0x2f"
arch/x86/include/asm/fpu/internal.h:#define XRSTOR ".byte " REX_PREFIX "0x0f,0xae,0x2f"
arch/x86/include/asm/fpu/internal.h:#define XRSTORS ".byte " REX_PREFIX "0x0f,0xc7,0x1f"
arch/x86/include/asm/idtentry.h: * Note, that the 'pushq imm8' is emitted via '.byte 0x6a, vector' because
arch/x86/include/asm/idtentry.h: * .byte achieves the same thing and the only fixup needed in the C entry
arch/x86/include/asm/idtentry.h: .byte 0x6a, vector
arch/x86/include/asm/idtentry.h: .byte 0x6a, vector
arch/x86/include/asm/inst.h: * Generate .byte code for some instructions not supported by old
arch/x86/include/asm/inst.h: .byte 0x40 | ((\opd1 & 8) >> 3) | ((\opd2 & 8) >> 1) | (\W << 3)
arch/x86/include/asm/inst.h: .byte \mod | (\opd1 & 7) | ((\opd2 & 7) << 3)
arch/x86/include/asm/inst.h: .byte 0xf3
arch/x86/include/asm/inst.h: .byte 0x0f, 0xc7
arch/x86/include/asm/intel_pconfig.h:#define PCONFIG ".byte 0x0f, 0x01, 0xc5"
arch/x86/include/asm/jump_label.h: ".byte " __stringify(STATIC_KEY_INIT_NOP) "\n\t"
arch/x86/include/asm/jump_label.h: ".byte 0xe9\n\t .long %l[l_yes] - 2f\n\t"
arch/x86/include/asm/jump_label.h: .byte 0xe9
arch/x86/include/asm/jump_label.h: .byte STATIC_KEY_INIT_NOP
arch/x86/include/asm/jump_label.h: .byte STATIC_KEY_INIT_NOP
arch/x86/include/asm/jump_label.h: .byte 0xe9
arch/x86/include/asm/mwait.h: asm volatile(".byte 0x0f, 0x01, 0xc8;"
arch/x86/include/asm/mwait.h: asm volatile(".byte 0x0f, 0x01, 0xfa;"
arch/x86/include/asm/mwait.h: asm volatile(".byte 0x0f, 0x01, 0xc9;"
arch/x86/include/asm/mwait.h: asm volatile(".byte 0x0f, 0x01, 0xfb;"
arch/x86/include/asm/mwait.h: asm volatile("sti; .byte 0x0f, 0x01, 0xc9;"
arch/x86/include/asm/mwait.h: asm volatile(".byte 0x66, 0x0f, 0xae, 0xf1\t\n"
arch/x86/include/asm/nops.h:#define _ASM_MK_NOP(x) .byte x
arch/x86/include/asm/nops.h:#define _ASM_MK_NOP(x) ".byte " __stringify(x) "\n"
arch/x86/include/asm/paravirt.h: .byte ptype; \
arch/x86/include/asm/paravirt.h: .byte 772b-771b; \
arch/x86/include/asm/paravirt_types.h: " .byte " type "\n" \
arch/x86/include/asm/paravirt_types.h: " .byte 772b-771b\n" \
arch/x86/include/asm/segment.h: ".byte 0xf3,0x0f,0xc7,0xf8", /* RDPID %eax/rax */
arch/x86/include/asm/smap.h:#define __ASM_CLAC ".byte 0x0f,0x01,0xca"
arch/x86/include/asm/smap.h:#define __ASM_STAC ".byte 0x0f,0x01,0xcb"
arch/x86/include/asm/special_insns.h: asm volatile(".byte 0x0f,0x01,0xee\n\t"
arch/x86/include/asm/special_insns.h: asm volatile(".byte 0x0f,0x01,0xef\n\t"
arch/x86/include/asm/special_insns.h: alternative_io(".byte " __stringify(NOP_DS_PREFIX) "; clflush %P0",
arch/x86/include/asm/special_insns.h: ".byte 0x66; clflush %P0",
arch/x86/include/asm/special_insns.h: ".byte " __stringify(NOP_DS_PREFIX) "; clflush (%[pax])",
arch/x86/include/asm/special_insns.h: ".byte 0x66; clflush (%[pax])", /* clflushopt (%%rax) */
arch/x86/include/asm/special_insns.h: ".byte 0x66, 0x0f, 0xae, 0x30", /* clwb (%%rax) */
arch/x86/include/asm/special_insns.h: asm volatile(".byte 0x66, 0x0f, 0x38, 0xf8, 0x02"
arch/x86/include/asm/special_insns.h: asm volatile(".byte 0xf3, 0x0f, 0x38, 0xf8, 0x02, 0x66, 0x90"
arch/x86/include/asm/special_insns.h: asm volatile(".byte 0xf, 0x1, 0xe8" ::: "memory");
arch/x86/include/asm/static_call.h: __ARCH_DEFINE_STATIC_CALL_TRAMP(name, ".byte 0xe9; .long " #func " - (. + 4)")
arch/x86/include/asm/xen/interface.h:#define XEN_EMULATE_PREFIX __ASM_FORM(.byte __XEN_EMULATE_PREFIX ;)
arch/x86/kvm/vmx/ops.h: ".byte 0x3e\n\t" /* branch taken hint */
arch/x86/kvm/vmx/ops.h: ".byte 0x2e\n\t" /* branch not taken hint */ \
arch/x86/kvm/vmx/ops.h: ".byte 0x2e\n\t" /* branch not taken hint */ \
arch/x86/realmode/rm/realmode.h:#define LJMPW_RM(to) .byte 0xea ; .word (to), real_mode_seg

and if lines like:

arch/x86/include/asm/inst.h: * Generate .byte code for some instructions not supported by old
arch/x86/include/asm/paravirt_types.h: " .byte " type "\n" \

are false positives then perhaps change the regex
to add required spaces after the \.byte before the
match.