[BUG] x86: failed to boot a kernel on a Ryzen machine

From: Satoru Takeuchi
Date: Mon Apr 24 2017 - 02:58:30 EST


Recently I bought a new Ryzen machine. When I tried to test v4.11-rc8 on it, it failed to boot
with the following panic log.

```
...
[ 0.227720] raid6: sse2x1 gen() 7985 MB/s
[ 0.295709] raid6: sse2x1 xor() 8181 MB/s
[ 0.363706] raid6: sse2x2 gen() 17531 MB/s
[ 0.431699] raid6: sse2x2 xor() 11098 MB/s
[ 0.499693] raid6: sse2x4 gen() 18509 MB/s
[ 0.567688] raid6: sse2x4 xor() 10177 MB/s
[ 0.571692] invalid opcode: 0000 [#1] SMP
[ 0.572312] Modules linked in:
[ 0.572822] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.11.0-rc8-ktest #1
[ 0.573734] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
[ 0.575040] task: ffff8f791e1c0000 task.stack: ffff9c72c00d0000
[ 0.575865] RIP: 0010:raid6_avx21_gen_syndrome+0x3d/0x120
[ 0.576634] RSP: 0018:ffff9c72c00d3d70 EFLAGS: 00010246
[ 0.577376] RAX: 0000000000000000 RBX: ffff9c72c00d3dc0 RCX: 00000000fffedb97
[ 0.578327] RDX: 0000000000000000 RSI: 0000000000001000 RDI: 0000000000000012
[ 0.579283] RBP: ffff9c72c00d3da0 R08: 0000000000000000 R09: 00000000000000cd
[ 0.580243] R10: 00000000fffedb86 R11: ffffffffa617008d R12: 0000000000001000
[ 0.581211] R13: ffff8f791e39e000 R14: ffff8f791e39f000 R15: 0000000000000012
[ 0.582163] FS: 0000000000000000(0000) GS:ffff8f791fc00000(0000) knlGS:0000000000000000
[ 0.583324] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 0.584128] CR2: 0000000000000000 CR3: 000000001be09000 CR4: 00000000003006f0
[ 0.585078] Call Trace:
[ 0.594952] raid6_select_algo+0x116/0x30b
[ 0.595592] ? libcrc32c_mod_init+0x2b/0x2b
[ 0.596240] do_one_initcall+0x53/0x1a0
[ 0.596843] ? parse_args+0x2cf/0x490
[ 0.597421] kernel_init_freeable+0x182/0x21c
[ 0.598077] ? rest_init+0x80/0x80
[ 0.598626] kernel_init+0xe/0x100
[ 0.599175] ret_from_fork+0x2c/0x40
[ 0.599741] Code: 55 41 54 53 48 89 d3 48 8d 14 c5 00 00 00 00 41 89 ff 49 89 f4 48 83 ec 08 4c 8b 2c c3 4c 8b 74 13 08 48 89 55 d0 e8 53 ed a9 ff <c5> fd 6f 05 2b 2d 4e 00 c5 e5 ef db 4d 85 e4 48 8b 55 d0 0f 84
[ 0.602215] RIP: raid6_avx21_gen_syndrome+0x3d/0x120 RSP: ffff9c72c00d3d70
[ 0.603154] ---[ end trace 17ee01f86b8fc548 ]---
[ 0.603850] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
[ 0.603850]
[ 0.605276] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
...
```

This panic occured at lib/raid6/avx2.c#raid6_avx21_gen_syndrome() and this
function consists of many AVX instructions.

lib/raid6/avx2.c:
```
/*
* Plain AVX2 implementation
*/
static void raid6_avx21_gen_syndrome(int disks, size_t bytes, void **ptrs)
{
u8 **dptr = (u8 **)ptrs;
u8 *p, *q;
int d, z, z0;

z0 = disks - 3; /* Highest data disk */
p = dptr[z0+1]; /* XOR parity */
q = dptr[z0+2]; /* RS syndrome */

kernel_fpu_begin();

asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
asm volatile("vpxor %ymm3,%ymm3,%ymm3"); /* Zero temp */

for (d = 0; d < bytes; d += 32) {
asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
asm volatile("vmovdqa %0,%%ymm2" : : "m" (dptr[z0][d]));/* P[0] */
asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d]));
asm volatile("vmovdqa %ymm2,%ymm4");/* Q[0] */
asm volatile("vmovdqa %0,%%ymm6" : : "m" (dptr[z0-1][d]));
for (z = z0-2; z >= 0; z--) {
asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
asm volatile("vpcmpgtb %ymm4,%ymm3,%ymm5");
asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
asm volatile("vpand %ymm0,%ymm5,%ymm5");
asm volatile("vpxor %ymm5,%ymm4,%ymm4");
asm volatile("vpxor %ymm6,%ymm2,%ymm2");
asm volatile("vpxor %ymm6,%ymm4,%ymm4");
asm volatile("vmovdqa %0,%%ymm6" : : "m" (dptr[z][d]));
}
asm volatile("vpcmpgtb %ymm4,%ymm3,%ymm5");
asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
asm volatile("vpand %ymm0,%ymm5,%ymm5");
asm volatile("vpxor %ymm5,%ymm4,%ymm4");
asm volatile("vpxor %ymm6,%ymm2,%ymm2");
asm volatile("vpxor %ymm6,%ymm4,%ymm4");

asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
asm volatile("vpxor %ymm2,%ymm2,%ymm2");
asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
asm volatile("vpxor %ymm4,%ymm4,%ymm4");
}

asm volatile("sfence" : : : "memory");
kernel_fpu_end();
}

```

This problem can be bypassed by the following patch.

```
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index 2d44933..b589097 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -162,7 +162,7 @@ asinstr := $(call as-instr,fxsaveq (%rax),-DCONFIG_AS_FXSAVEQ=1)
asinstr += $(call as-instr,pshufb %xmm0$(comma)%xmm0,-DCONFIG_AS_SSSE3=1)
asinstr += $(call as-instr,crc32l %eax$(comma)%eax,-DCONFIG_AS_CRC32=1)
avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1)
-avx2_instr :=$(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1)
+#avx2_instr :=$(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1)
avx512_instr :=$(call as-instr,vpmovm2b %k1$(comma)%zmm5,-DCONFIG_AS_AVX512=1)
sha1_ni_instr :=$(call as-instr,sha1msg1 %xmm0$(comma)%xmm1,-DCONFIG_AS_SHA1_NI=1)
sha256_ni_instr :=$(call as-instr,sha256msg1 %xmm0$(comma)%xmm1,-DCONFIG_AS_SHA256_NI=1)

```

I suspect that there is a problem about handling AVX2 in the linux kernel or in Ryzen.

# Build environent

## Hardware

CPU: Ryzen 1800x

## Software

- distro: Ubuntu 16.04 x86_64
- gcc: gcc (Ubuntu 5.4.0-6ubuntu1~16.04.4) 5.4.0 20160609

## Build target kernel version

linux v4.11-r8

## Test environment

## Hardware

A VM running on the above mentioned hardware.

## Software

- distro: Ubuntu 16.04

# Additional information

- .config is attached in this mail
- This problam also happens on v4.10
- When I tested v4.10 in my previous Core i5 machine, it succeeded to boot.

Thanks,
Satoru

Attachment: dot_config.xz
Description: Binary data