[rfc/patch] mm/slub: restore/expand unfreeze_partials() local exclusion scope
From: Mike Galbraith
Date: Fri Jul 09 2021 - 01:21:10 EST
Well, bug report in patch for actually.
Empirical evidence that PREEMPT_RT exclusion around unfreeze_partials()
is buggy lies in the readily reproducable PREEMPT_RT+SLUB_CPU_PARTIAL=y
explosion below, which this patch precludes. Slub expertise required.
Dirt simple reproduction method:
terminal1:
while ! [ -f "/stop" ]; do
./runltp -f zram; hackbench.sh; ./runltp -f controllers;
done
terminal2:
while ! [ -f "/stop" ]; do
ccache -C; make clean; make -j8;
done
Wait for it...
[ 1321.540157] general protection fault, maybe for address 0xffffea0004624ea8: 0000 [#1] PREEMPT_RT SMP NOPTI
[ 1321.540162] CPU: 3 PID: 18442 Comm: dd Kdump: loaded Tainted: G E 5.13.1-rt1-rt #5
[ 1321.540165] Hardware name: MEDION MS-7848/MS-7848, BIOS M7848W08.20C 09/23/2013
[ 1321.540166] RIP: 0010:___slab_alloc.constprop.95+0x102/0xb00
[ 1321.540172] Code: 18 4d 85 db 66 89 55 b8 0f 95 c2 83 e0 7f c1 e2 07 09 d0 88 45 bb 41 f6 45 0b 40 4c 8b 65 b8 74 19 4c 89 d8 4c 89 f2 4c 89 e1 <f0> 49 0f c7 4f 20 0f 84 c6 00 00 00 f3 90 eb ab 4c 89 5d 80 9c 8f
[ 1321.540174] RSP: 0018:ffff8883008b3b70 EFLAGS: 00010202
[ 1321.540176] RAX: 0000000000190015 RBX: 0000000000000000 RCX: 00000001ffff7fff
[ 1321.540177] RDX: 00000001ffffffff RSI: 0000000000000023 RDI: ffffffff81e52c48
[ 1321.540179] RBP: ffff8883008b3c50 R08: ffffffffffffffff R09: 0000000000000000
[ 1321.540180] R10: ffff8883008b3c70 R11: 0000000000190015 R12: 00000001ffff7fff
[ 1321.540181] R13: ffff88810019dd00 R14: 00000001ffffffff R15: ffffea0004624e88
[ 1321.540182] FS: 00007fb763c4b580(0000) GS:ffff88840ecc0000(0000) knlGS:0000000000000000
[ 1321.540184] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 1321.540185] CR2: 00007fb763c8b000 CR3: 0000000193f2c002 CR4: 00000000001706e0
[ 1321.540187] Call Trace:
[ 1321.540191] ? __alloc_file+0x26/0xe0
[ 1321.540196] ? migrate_enable+0x9c/0x100
[ 1321.540201] ? __alloc_file+0x26/0xe0
[ 1321.540204] ? __slab_alloc.isra.81.constprop.94+0x3d/0x50
[ 1321.540206] ? __alloc_file+0x26/0xe0
[ 1321.540208] __slab_alloc.isra.81.constprop.94+0x3d/0x50
[ 1321.540210] ? __alloc_file+0x26/0xe0
[ 1321.540212] kmem_cache_alloc+0xba/0x450
[ 1321.540215] __alloc_file+0x26/0xe0
[ 1321.540218] alloc_empty_file+0x43/0xe0
[ 1321.540221] path_openat+0x35/0xe30
[ 1321.540224] ? ___slab_alloc.constprop.95+0x48e/0xb00
[ 1321.540227] ? filemap_map_pages+0xf0/0x3e0
[ 1321.540230] ? getname_flags+0x32/0x170
[ 1321.540233] do_filp_open+0xa2/0x100
[ 1321.540237] ? getname_flags+0x32/0x170
[ 1321.540240] ? migrate_enable+0x9c/0x100
[ 1321.540242] ? __slab_alloc.isra.81.constprop.94+0x45/0x50
[ 1321.540245] ? alloc_fd+0xe2/0x1b0
[ 1321.540249] ? do_sys_openat2+0x248/0x310
[ 1321.540250] do_sys_openat2+0x248/0x310
[ 1321.540253] do_sys_open+0x47/0x60
[ 1321.540255] do_syscall_64+0x39/0x80
[ 1321.540259] entry_SYSCALL_64_after_hwframe+0x44/0xae
[ 1321.540262] RIP: 0033:0x7fb76378cb41
[ 1321.540264] Code: 41 83 e2 40 48 89 54 24 30 75 3e 89 f0 25 00 00 41 00 3d 00 00 41 00 74 30 89 f2 b8 01 01 00 00 48 89 fe bf 9c ff ff ff 0f 05 <48> 3d 00 f0 ff ff 77 3f 48 8b 4c 24 18 64 48 33 0c 25 28 00 00 00
[ 1321.540266] RSP: 002b:00007fff10f2ad30 EFLAGS: 00000287 ORIG_RAX: 0000000000000101
[ 1321.540268] RAX: ffffffffffffffda RBX: 0000563c1c09e170 RCX: 00007fb76378cb41
[ 1321.540269] RDX: 0000000000080000 RSI: 0000563c1c09e140 RDI: 00000000ffffff9c
[ 1321.540271] RBP: 00007fff10f2ae70 R08: 0000000000000000 R09: 00007fff10f2ae83
[ 1321.540272] R10: 0000000000000000 R11: 0000000000000287 R12: 0000563c1c09df48
[ 1321.540273] R13: 000000000000000a R14: 0000563c1c09df20 R15: 000000000000000a
[ 1321.540275] Modules linked in: zram(E) sr_mod(E) cdrom(E) btrfs(E) blake2b_generic(E) xor(E) raid6_pq(E) xfs(E) libcrc32c(E) af_packet(E) ip6table_mangle(E) ip6table_raw(E) iptable_raw(E) bridge(E) stp(E) llc(E) iscsi_ibft(E) iscsi_boot_sysfs(E) nfnetlink(E) ebtable_filter(E) rfkill(E) ebtables(E) ip6table_filter(E) ip6_tables(E) iptable_filter(E) ip_tables(E) x_tables(E) bpfilter(E) joydev(E) usblp(E) intel_rapl_msr(E) mei_hdcp(E) at24(E) regmap_i2c(E) iTCO_wdt(E) intel_pmc_bxt(E) iTCO_vendor_support(E) intel_rapl_common(E) x86_pkg_temp_thermal(E) intel_powerclamp(E) coretemp(E) nls_iso8859_1(E) nls_cp437(E) kvm_intel(E) snd_hda_codec_realtek(E) snd_hda_codec_generic(E) kvm(E) ledtrig_audio(E) snd_hda_codec_hdmi(E) irqbypass(E) snd_hda_intel(E) crct10dif_pclmul(E) snd_intel_dspcfg(E) crc32_pclmul(E) ghash_clmulni_intel(E) snd_hda_codec(E) aesni_intel(E) snd_hwdep(E) crypto_simd(E) snd_hda_core(E) cryptd(E) r8169(E) snd_pcm(E) snd_timer(E) realtek(E) mei_me(E) mdio_devres(E) i2c_i801(E)
[ 1321.540320] lpc_ich(E) snd(E) pcspkr(E) i2c_smbus(E) mfd_core(E) libphy(E) soundcore(E) mei(E) fan(E) thermal(E) intel_smartconnect(E) nfsd(E) auth_rpcgss(E) nfs_acl(E) lockd(E) grace(E) sch_fq_codel(E) sunrpc(E) fuse(E) configfs(E) hid_logitech_hidpp(E) hid_logitech_dj(E) uas(E) usb_storage(E) hid_generic(E) usbhid(E) nouveau(E) wmi(E) drm_ttm_helper(E) ttm(E) i2c_algo_bit(E) drm_kms_helper(E) syscopyarea(E) sysfillrect(E) sysimgblt(E) fb_sys_fops(E) ahci(E) cec(E) xhci_pci(E) libahci(E) rc_core(E) ehci_pci(E) xhci_hcd(E) ehci_hcd(E) libata(E) drm(E) usbcore(E) video(E) button(E) sd_mod(E) t10_pi(E) vfat(E) fat(E) virtio_blk(E) virtio_mmio(E) virtio_ring(E) virtio(E) ext4(E) crc32c_intel(E) crc16(E) mbcache(E) jbd2(E) loop(E) sg(E) dm_multipath(E) dm_mod(E) scsi_dh_rdac(E) scsi_dh_emc(E) scsi_dh_alua(E) scsi_mod(E) msr(E) autofs4(E) [last unloaded: zram]
[ 1321.540366] Dumping ftrace buffer:
[ 1321.540369] (ftrace buffer empty)
Not-signed-off-by: Mike Galbraith <efault@xxxxxx>
---
mm/slub.c | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2497,7 +2497,9 @@ static void put_cpu_partial(struct kmem_
* partial array is full. Move the existing
* set to the per node partial list.
*/
+ local_lock(&s->cpu_slab->lock);
unfreeze_partials(s);
+ local_unlock(&s->cpu_slab->lock);
oldpage = NULL;
pobjects = 0;
pages = 0;
@@ -2579,7 +2581,9 @@ static void flush_cpu_slab(struct work_s
if (c->page)
flush_slab(s, c, true);
+ local_lock(&s->cpu_slab->lock);
unfreeze_partials(s);
+ local_unlock(&s->cpu_slab->lock);
}
static bool has_cpu_slab(int cpu, struct kmem_cache *s)
@@ -2632,8 +2636,11 @@ static int slub_cpu_dead(unsigned int cp
struct kmem_cache *s;
mutex_lock(&slab_mutex);
- list_for_each_entry(s, &slab_caches, list)
+ list_for_each_entry(s, &slab_caches, list) {
+ local_lock(&s->cpu_slab->lock);
__flush_cpu_slab(s, cpu);
+ local_unlock(&s->cpu_slab->lock);
+ }
mutex_unlock(&slab_mutex);
return 0;
}