soft lockup when passing vvar address to write(2)

From: Rasmus Villemoes
Date: Thu Mar 03 2016 - 17:58:51 EST


Looking up the vvar mapping and dumping its contents sometimes results
in a soft lockup. On 4.5-rc6+ (master from earlier today) it seems to be
a little harder to trigger than on the 4.2something Ubuntu kernel I
first saw it on, but in both cases it's easy to reproduce (10s of
iterations at most) by running the below program in a loop.

The process is running at 100% cpu according to top and doesn't respond
to signals. But waiting long enough makes it recover. The first time I
saw this khugepaged also started consuming 100% cpu, and the load
average climbed to over 20 (despite no other processes apparently doing
anything).

#define _GNU_SOURCE
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <unistd.h>

static unsigned long vvar_start, vvar_end;

void read_maps()
{
char *line = NULL;
size_t cap = 0;
FILE *f = fopen("/proc/self/maps", "r");

while (getline(&line, &cap, f) > 0) {
unsigned long start, end;
char name[256];
int ret = sscanf(line, "%lx-%lx %*s %*s %*s %*s [%255[a-z]]", &start, &end, name);

if (ret == 3 && !strcmp(name, "vvar")) {
vvar_start = start;
vvar_end = end;
}
}
free(line);
fclose(f);
}

void dump_mem(const char *name, unsigned long start, unsigned long end)
{
int fd;

if (!start)
return;

fprintf(stderr, "%s, %lx-%lx, %lu\n", name, start, end, end-start);

fd = open(name, O_CREAT | O_WRONLY, 0666);
write(fd, (void*)start, end-start);
close(fd);
}

int main(void)
{
read_maps();
dump_mem("vvar.bin", vvar_start, vvar_end);

return 0;
}


dmesg from 4.5-rc6+:

[ 240.083864] NMI watchdog: BUG: soft lockup - CPU#4 stuck for 22s! [dumpvvar:2681]
[ 240.083868] Modules linked in: rfcomm ctr ccm pci_stub vboxpci(OE) vboxnetadp(OE) vboxnetflt(OE) vboxdrv(OE) binfmt_misc bnep synaptics_usb hid_generic hid_logitech ff_memless usbhid xfs libcrc32c arc4 nls_iso8859_1 drbg ansi_cprng dm_crypt dell_wmi sparse_keymap intel_rapl x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel dell_laptop dcdbas dell_smm_hwmon kvm irqbypass crct10dif_pclmul uvcvideo crc32_pclmul btusb ghash_clmulni_intel btrtl btbcm aesni_intel iwlmvm videobuf2_vmalloc aes_x86_64 videobuf2_memops lrw btintel videobuf2_v4l2 bluetooth videobuf2_core mac80211 gf128mul videodev glue_helper ablk_helper cryptd media iwlwifi input_leds snd_hda_codec_realtek snd_hda_codec_generic snd_hda_codec_hdmi joydev cfg80211 snd_soc_rt5640 serio_raw lpc_ich snd_soc_rl6231 snd_hda_intel snd_soc_core
[ 240.083900] snd_hda_codec snd_compress snd_hda_core ac97_bus snd_pcm_dmaengine snd_hwdep snd_pcm mei_me mei ie31200_edac snd_seq_midi edac_core shpchp snd_seq_midi_event snd_rawmidi snd_seq snd_seq_device snd_timer snd elan_i2c 8250_fintek soundcore snd_soc_sst_acpi dw_dmac i2c_designware_platform snd_soc_sst_match dw_dmac_core dell_smo8800 8250_dw i2c_designware_core dell_rbtn spi_pxa2xx_platform mac_hid parport_pc ppdev lp parport autofs4 amdkfd amd_iommu_v2 radeon i915 ttm i2c_algo_bit drm_kms_helper psmouse syscopyarea ahci sysfillrect libahci sysimgblt sdhci_pci fb_sys_fops e1000e drm ptp pps_core wmi sdhci_acpi video sdhci i2c_hid hid
[ 240.083926] CPU: 4 PID: 2681 Comm: dumpvvar Tainted: G OE 4.5.0-rc6+ #1
[ 240.083927] Hardware name: Dell Inc. Precision M2800/0JTY19, BIOS A06 05/18/2015
[ 240.083928] task: ffff8803e1d1e3c0 ti: ffff880415688000 task.ti: ffff880415688000
[ 240.083929] RIP: 0010:[<ffffffff813e7c05>] [<ffffffff813e7c05>] copy_user_enhanced_fast_string+0x5/0x10
[ 240.083934] RSP: 0018:ffff88041568bcf0 EFLAGS: 00010286
[ 240.083935] RAX: 0000000000000001 RBX: 0000000000001000 RCX: 000000000000031f
[ 240.083936] RDX: 0000000000001000 RSI: 00007ffd11ac1ce1 RDI: ffff88042e6e3ce1
[ 240.083937] RBP: ffff88041568bd20 R08: ffffea0010b9b8dc R09: ffff8803e589f478
[ 240.083938] R10: ffff88041568bc48 R11: 0000000000001000 R12: 0000000000001000
[ 240.083939] R13: ffff88042e6e4000 R14: ffff88041568be48 R15: ffff8803e1fd3168
[ 240.083940] FS: 00007fa068c37700(0000) GS:ffff88042eb00000(0000) knlGS:0000000000000000
[ 240.083941] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 240.083942] CR2: 00007ffd119fce3c CR3: 00000003f0877000 CR4: 00000000001406e0
[ 240.083943] Stack:
[ 240.083944] ffffffff813ee43d 0000000000001000 0000000000001000 ffff88041568be80
[ 240.083945] 0000000000000000 ffff8803e1fd3168 ffff88041568bdb0 ffffffff8118b962
[ 240.083947] ffff88041568bd78 0000000000001000 00000000cbbe8800 0000000000001000
[ 240.083948] Call Trace:
[ 240.083951] [<ffffffff813ee43d>] ? iov_iter_copy_from_user_atomic+0x8d/0x220
[ 240.083955] [<ffffffff8118b962>] generic_perform_write+0xf2/0x1c0
[ 240.083957] [<ffffffff8118e272>] __generic_file_write_iter+0x1a2/0x1e0
[ 240.083959] [<ffffffff8118e396>] generic_file_write_iter+0xe6/0x1e0
[ 240.083961] [<ffffffff812119a9>] __vfs_write+0xc9/0x120
[ 240.083963] [<ffffffff81212249>] vfs_write+0xa9/0x1a0
[ 240.083965] [<ffffffff81213175>] SyS_write+0x55/0xc0
[ 240.083968] [<ffffffff81813536>] entry_SYSCALL_64_fastpath+0x16/0x75
[ 240.083969] Code: 48 ff c6 48 ff c7 ff c9 75 f2 89 d1 c1 e9 03 83 e2 07 f3 48 a5 89 d1 f3 a4 31 c0 0f 1f 00 c3 0f 1f 80 00 00 00 00 0f 1f 00 89 d1 <f3> a4 31 c0 0f 1f 00 c3 0f 1f 00 0f 1f 00 83 fa 08 0f 82 95 00

dmesg from Ubuntu 4.2 ():

[ 132.261436] NMI watchdog: BUG: soft lockup - CPU#7 stuck for 23s! [dumpvvar:2959]
[ 132.261448] Modules linked in: rfcomm ctr ccm xt_conntrack ipt_MASQUERADE nf_nat_masquerade_ipv4 iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter ip_tables x_tables nf_nat nf_conntrack br_netfilter bridge stp llc aufs pci_stub vboxpci(OE) vboxnetadp(OE) vboxnetflt(OE) vboxdrv(OE) bnep binfmt_misc xfs libcrc32c arc4 drbg ansi_cprng dm_crypt dell_laptop dell_wmi dcdbas sparse_keymap nls_iso8859_1 dell_smm_hwmon intel_rapl x86_pkg_temp_thermal intel_powerclamp coretemp uvcvideo videobuf2_vmalloc videobuf2_memops kvm_intel videobuf2_core v4l2_common videodev kvm btusb media btrtl btbcm btintel crct10dif_pclmul synaptics_usb crc32_pclmul bluetooth iwlmvm aesni_intel aes_x86_64 lrw mac80211 gf128mul glue_helper ablk_helper cryptd snd_hda_codec_hdmi snd_soc_rt5640 snd_soc_rl6231
[ 132.261476] snd_soc_core joydev input_leds snd_compress snd_hda_codec_realtek serio_raw iwlwifi ac97_bus snd_hda_codec_generic snd_pcm_dmaengine snd_seq_midi cfg80211 snd_hda_intel mei_me snd_hda_codec lpc_ich snd_seq_midi_event snd_hda_core mei snd_hwdep snd_rawmidi snd_pcm ie31200_edac shpchp edac_core snd_seq snd_seq_device snd_timer snd 8250_fintek snd_soc_sst_acpi soundcore dw_dmac dw_dmac_core dell_rbtn 8250_dw i2c_designware_platform dell_smo8800 spi_pxa2xx_platform i2c_designware_core mac_hid parport_pc ppdev lp parport autofs4 hid_generic hid_logitech ff_memless usbhid amdkfd amd_iommu_v2 radeon i915 ttm i2c_algo_bit psmouse drm_kms_helper ahci e1000e libahci sdhci_pci drm ptp pps_core wmi sdhci_acpi video sdhci i2c_hid hid
[ 132.261510] CPU: 7 PID: 2959 Comm: dumpvvar Tainted: G OE 4.2.0-30-generic #36-Ubuntu
[ 132.261511] Hardware name: Dell Inc. Precision M2800/0JTY19, BIOS A06 05/18/2015
[ 132.261512] task: ffff8803e7d90dc0 ti: ffff880419eb0000 task.ti: ffff880419eb0000
[ 132.261513] RIP: 0010:[<ffffffff813d37a5>] [<ffffffff813d37a5>] copy_user_enhanced_fast_string+0x5/0x10
[ 132.261517] RSP: 0018:ffff880419eb3c80 EFLAGS: 00010286
[ 132.261518] RAX: 0000000000000001 RBX: 0000000000000000 RCX: 0000000000000123
[ 132.261519] RDX: 0000000000001000 RSI: 00007ffdf95f7edd RDI: ffff8803cacd5edd
[ 132.261520] RBP: ffff880419eb3cb8 R08: ffffea000f2b355c R09: ffff8803dcdb8000
[ 132.261520] R10: ffff880419eb3bb8 R11: 0000000000001000 R12: ffffffff811834b0
[ 132.261521] R13: ffff880419eb3c38 R14: 000000000000000f R15: 0000000000001000
[ 132.261522] FS: 00007fdb3cec6700(0000) GS:ffff88042ebc0000(0000) knlGS:0000000000000000
[ 132.261523] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 132.261523] CR2: 00007ffdf95a4ed0 CR3: 00000003eab61000 CR4: 00000000001406e0
[ 132.261524] Stack:
[ 132.261525] ffffffff813d9ed1 ffff880419eb3db8 0000000000001000 0000000000001000
[ 132.261526] ffff880419eb3e58 0000000000000000 ffff8803f6340568 ffff880419eb3d68
[ 132.261527] ffffffff81182595 ffff880419eb3d28 ffffffff81218989 ffff880419eb3da8
[ 132.261528] Call Trace:
[ 132.261531] [<ffffffff813d9ed1>] ? iov_iter_copy_from_user_atomic+0x91/0x220
[ 132.261535] [<ffffffff81182595>] generic_perform_write+0xf5/0x1d0
[ 132.261537] [<ffffffff81218989>] ? generic_update_time+0x79/0xd0
[ 132.261539] [<ffffffff81184752>] __generic_file_write_iter+0x1a2/0x1e0
[ 132.261540] [<ffffffff8118487a>] generic_file_write_iter+0xea/0x1e0
[ 132.261542] [<ffffffff811fd2eb>] new_sync_write+0x9b/0xe0
[ 132.261543] [<ffffffff811fd356>] __vfs_write+0x26/0x40
[ 132.261545] [<ffffffff811fdcd6>] vfs_write+0xa6/0x1a0
[ 132.261546] [<ffffffff811fcc8f>] ? do_sys_open+0x1bf/0x280
[ 132.261547] [<ffffffff811fe9c5>] SyS_write+0x55/0xc0
[ 132.261549] [<ffffffff817f2d72>] entry_SYSCALL_64_fastpath+0x16/0x75
[ 132.261550] Code: 48 ff c6 48 ff c7 ff c9 75 f2 89 d1 c1 e9 03 83 e2 07 f3 48 a5 89 d1 f3 a4 31 c0 0f 1f 00 c3 0f 1f 80 00 00 00 00 0f 1f 00 89 d1 <f3> a4 31 c0 0f 1f 00 c3 0f 1f 00 0f 1f 00 83 fa 08 0f 82 95 00
[ 140.254994] NMI watchdog: BUG: soft lockup - CPU#1 stuck for 22s! [kworker/1:2:441]
[ 140.255006] Modules linked in: rfcomm ctr ccm xt_conntrack ipt_MASQUERADE nf_nat_masquerade_ipv4 iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 xt_addrtype iptable_filter ip_tables x_tables nf_nat nf_conntrack br_netfilter bridge stp llc aufs pci_stub vboxpci(OE) vboxnetadp(OE) vboxnetflt(OE) vboxdrv(OE) bnep binfmt_misc xfs libcrc32c arc4 drbg ansi_cprng dm_crypt dell_laptop dell_wmi dcdbas sparse_keymap nls_iso8859_1 dell_smm_hwmon intel_rapl x86_pkg_temp_thermal intel_powerclamp coretemp uvcvideo videobuf2_vmalloc videobuf2_memops kvm_intel videobuf2_core v4l2_common videodev kvm btusb media btrtl btbcm btintel crct10dif_pclmul synaptics_usb crc32_pclmul bluetooth iwlmvm aesni_intel aes_x86_64 lrw mac80211 gf128mul glue_helper ablk_helper cryptd snd_hda_codec_hdmi snd_soc_rt5640 snd_soc_rl6231
[ 140.255034] snd_soc_core joydev input_leds snd_compress snd_hda_codec_realtek serio_raw iwlwifi ac97_bus snd_hda_codec_generic snd_pcm_dmaengine snd_seq_midi cfg80211 snd_hda_intel mei_me snd_hda_codec lpc_ich snd_seq_midi_event snd_hda_core mei snd_hwdep snd_rawmidi snd_pcm ie31200_edac shpchp edac_core snd_seq snd_seq_device snd_timer snd 8250_fintek snd_soc_sst_acpi soundcore dw_dmac dw_dmac_core dell_rbtn 8250_dw i2c_designware_platform dell_smo8800 spi_pxa2xx_platform i2c_designware_core mac_hid parport_pc ppdev lp parport autofs4 hid_generic hid_logitech ff_memless usbhid amdkfd amd_iommu_v2 radeon i915 ttm i2c_algo_bit psmouse drm_kms_helper ahci e1000e libahci sdhci_pci drm ptp pps_core wmi sdhci_acpi video sdhci i2c_hid hid
[ 140.255071] CPU: 1 PID: 441 Comm: kworker/1:2 Tainted: G OEL 4.2.0-30-generic #36-Ubuntu
[ 140.255072] Hardware name: Dell Inc. Precision M2800/0JTY19, BIOS A06 05/18/2015
[ 140.255075] Workqueue: events __net_random_once_deferred
[ 140.255076] task: ffff880415b58000 ti: ffff88041952c000 task.ti: ffff88041952c000
[ 140.255077] RIP: 0010:[<ffffffff810fcd88>] [<ffffffff810fcd88>] smp_call_function_many+0x1f8/0x260
[ 140.255080] RSP: 0018:ffff88041952fc38 EFLAGS: 00000202
[ 140.255081] RAX: 0000000000000003 RBX: ffff88042ea57780 RCX: 0000000000000007
[ 140.255082] RDX: ffff88042ebda858 RSI: 0000000000000100 RDI: ffff88042ea57788
[ 140.255082] RBP: ffff88041952fc78 R08: 0000000000000000 R09: 00000000000000fd
[ 140.255083] R10: 0000000000000004 R11: ffff88042ea57788 R12: ffffffff81d2ac40
[ 140.255084] R13: 000000000000024c R14: 0000000000000001 R15: 0000010000000001
[ 140.255085] FS: 0000000000000000(0000) GS:ffff88042ea40000(0000) knlGS:0000000000000000
[ 140.255085] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 140.255086] CR2: 0000564bc15691e0 CR3: 0000000002c0c000 CR4: 00000000001406e0
[ 140.255087] Stack:
[ 140.255087] 0000000000017740 01ffffff00000001 ffff88041952fc58 ffffffff817220eb
[ 140.255089] ffffffff8101c210 0000000000000000 ffffffff817220ec ffff8803dd20a340
[ 140.255090] ffff88041952fca8 ffffffff810fced1 0000000000000005 ffffffff817220eb
[ 140.255091] Call Trace:
[ 140.255094] [<ffffffff817220eb>] ? ipqhashfn+0x1b/0xb0
[ 140.255096] [<ffffffff8101c210>] ? arch_unregister_cpu+0x30/0x30
[ 140.255097] [<ffffffff817220ec>] ? ipqhashfn+0x1c/0xb0
[ 140.255098] [<ffffffff810fced1>] on_each_cpu+0x31/0x60
[ 140.255099] [<ffffffff817220eb>] ? ipqhashfn+0x1b/0xb0
[ 140.255101] [<ffffffff8101d20a>] text_poke_bp+0x6a/0xf0
[ 140.255102] [<ffffffff817220eb>] ? ipqhashfn+0x1b/0xb0
[ 140.255103] [<ffffffff81019cbb>] arch_jump_label_transform+0x9b/0x120
[ 140.255105] [<ffffffff81181068>] __jump_label_update+0x58/0x70
[ 140.255106] [<ffffffff81181113>] jump_label_update+0x93/0xa0
[ 140.255108] [<ffffffff811813b5>] __static_key_slow_dec+0x75/0xb0
[ 140.255109] [<ffffffff81181416>] static_key_slow_dec+0x26/0x50
[ 140.255110] [<ffffffff816fa983>] __net_random_once_deferred+0x23/0x40
[ 140.255113] [<ffffffff810947aa>] process_one_work+0x1aa/0x440
[ 140.255115] [<ffffffff81094a8b>] worker_thread+0x4b/0x4c0
[ 140.255116] [<ffffffff81094a40>] ? process_one_work+0x440/0x440
[ 140.255117] [<ffffffff81094a40>] ? process_one_work+0x440/0x440
[ 140.255119] [<ffffffff8109ae28>] kthread+0xd8/0xf0
[ 140.255121] [<ffffffff8109ad50>] ? kthread_create_on_node+0x1f0/0x1f0
[ 140.255123] [<ffffffff817f319f>] ret_from_fork+0x3f/0x70
[ 140.255124] [<ffffffff8109ad50>] ? kthread_create_on_node+0x1f0/0x1f0
[ 140.255124] Code: d2 e8 0d e9 2d 00 3b 05 ab f0 c2 00 89 c1 0f 8d 99 fe ff ff 48 98 49 8b 55 00 48 03 14 c5 80 9d d2 81 8b 42 18 a8 01 74 ca f3 90 <8b> 42 18 a8 01 75 f7 eb bf 0f b6 4d c8 4c 89 fa 4c 89 f6 44 89