Re: [BUG] general protection fault when reading /proc/kcore

From: Mike Rapoport
Date: Tue Aug 17 2021 - 03:56:12 EST


On Mon, Aug 16, 2021 at 10:13:18PM +0300, Mike Rapoport wrote:
> On Mon, Aug 16, 2021 at 08:38:43PM +0200, David Hildenbrand wrote:
> > On 16.08.21 20:12, Jiri Olsa wrote:
> > > On Mon, Aug 16, 2021 at 07:49:15PM +0200, David Hildenbrand wrote:
> > > > On 16.08.21 19:34, Jiri Olsa wrote:
> > > > > hi,
> > > > > I'm getting fault below when running:
> > > > >
> > > > > # cat /proc/kallsyms | grep ksys_read
> > > > > ffffffff8136d580 T ksys_read
> > > > > # objdump -d --start-address=0xffffffff8136d580 --stop-address=0xffffffff8136d590 /proc/kcore
> > > > >
> > > > > /proc/kcore: file format elf64-x86-64
> > > > >
> > > > > Segmentation fault
> > > > >
> > > > > any idea? config is attached
> > > >
> > > > Just tried with a different config on 5.14.0-rc6+
> > > >
> > > > [root@localhost ~]# cat /proc/kallsyms | grep ksys_read
> > > > ffffffff8927a800 T ksys_readahead
> > > > ffffffff89333660 T ksys_read
> > > >
> > > > [root@localhost ~]# objdump -d --start-address=0xffffffff89333660
> > > > --stop-address=0xffffffff89333670
> > > >
> > > > a.out: file format elf64-x86-64
> > > >
> > > >
> > > >
> > > > The kern_addr_valid(start) seems to fault in your case, which is weird,
> > > > because it merely walks the page tables. But it seems to complain about a
> > > > non-canonical address 0xf887ffcbff000
> > > >
> > > > Can you post your QEMU cmdline? Did you test this on other kernel versions?
> > >
> > > I'm using virt-manager so:
> > >
> > > /usr/bin/qemu-system-x86_64 -name guest=fedora33,debug-threads=on -S -object secret,id=masterKey0,format=raw,file=/var/lib/libvirt/qemu/domain-13-fedora33/master-key.aes -machine pc-q35-5.1,accel=kvm,usb=off,vmport=off,dump-guest-core=off,memory-backend=pc.ram -cpu Skylake-Server-IBRS,ss=on,vmx=on,pdcm=on,hypervisor=on,tsc-adjust=on,clflushopt=on,umip=on,pku=on,stibp=on,arch-capabilities=on,ssbd=on,xsaves=on,ibpb=on,amd-stibp=on,amd-ssbd=on,skip-l1dfl-vmentry=on,pschange-mc-no=on -m 8192 -object memory-backend-ram,id=pc.ram,size=8589934592 -overcommit mem-lock=off -smp 20,sockets=20,cores=1,threads=1 -uuid 2185d5a9-dbad-4d61-aa4e-97af9fd7ebca -no-user-config -nodefaults -chardev socket,id=charmonitor,fd=36,server,nowait -mon chardev=charmonitor,id=monitor,mode=control -rtc base=utc,driftfix=slew -global kvm-pit.lost_tick_policy=delay -no-hpet -no-shutdown -global ICH9-LPC.disable_s3=1 -global ICH9-LPC.disable_s4=1 -boot strict=on -kernel /home/jolsa/qemu/run/vmlinux -initrd /home/jolsa/qemu/run/initrd -append root=/dev/mapper/fedora_fedora-root ro rd.lvm.lv=fedora_fedora/root console=tty0 console=ttyS0,115200 -device pcie-root-port,port=0x10,chassis=1,id=pci.1,bus=pcie.0,multifunction=on,addr=0x2 -device pcie-root-port,port=0x11,chassis=2,id=pci.2,bus=pcie.0,addr=0x2.0x1 -device pcie-root-port,port=0x12,chassis=3,id=pci.3,bus=pcie.0,addr=0x2.0x2 -device pcie-root-port,port=0x13,chassis=4,id=pci.4,bus=pcie.0,addr=0x2.0x3 -device pcie-root-port,port=0x14,chassis=5,id=pci.5,bus=pcie.0,addr=0x2.0x4 -device pcie-root-port,port=0x15,chassis=6,id=pci.6,bus=pcie.0,addr=0x2.0x5 -device pcie-root-port,port=0x16,chassis=7,id=pci.7,bus=pcie.0,addr=0x2.0x6 -device qemu-xhci,p2=15,p3=15,id=usb,bus=pci.2,addr=0x0 -device virtio-serial-pci,id=virtio-serial0,bus=pci.3,addr=0x0 -blockdev {"driver":"file","filename":"/var/lib/libvirt/images/fedora33.qcow2","node-name":"libvirt-2-storage","auto-read-only":true,"discard":"unmap"} -blockdev {"node-name":"libvirt-2-format","read-only":false,"driver":"qcow2","file":"libvirt-2-storage","backing":null} -device virtio-blk-pci,bus=pci.4,addr=0x0,drive=libvirt-2-format,id=virtio-disk0,bootindex=1 -device ide-cd,bus=ide.0,id=sata0-0-0 -netdev tap,fd=38,id=hostnet0,vhost=on,vhostfd=39 -device virtio-net-pci,netdev=hostnet0,id=net0,mac=52:54:00:f3:c6:e7,bus=pci.1,addr=0x0 -chardev pty,id=charserial0 -device isa-serial,chardev=charserial0,id=serial0 -chardev socket,id=charchannel0,fd=40,server,nowait -device virtserialport,bus=virtio-serial0.0,nr=1,chardev=charchannel0,id=channel0,name=org.qemu.guest_agent.0 -chardev spicevmc,id=charchannel1,name=vdagent -device virtserialport,bus=virtio-serial0.0,nr=2,chardev=charchannel1,id=channel1,name=com.redhat.spice.0 -device usb-tablet,id=input0,bus=usb.0,port=1 -spice port=5900,addr=127.0.0.1,disable-ticketing,image-compression=off,seamless-migration=on -device qxl-vga,id=video0,ram_size=67108864,vram_size=67108864,vram64_size_mb=0,vgamem_mb=16,max_outputs=1,bus=pcie.0,addr=0x1 -device ich9-intel-hda,id=sound0,bus=pcie.0,addr=0x1b -device hda-duplex,id=sound0-codec0,bus=sound0.0,cad=0 -chardev spicevmc,id=charredir0,name=usbredir -device usb-redir,chardev=charredir0,id=redir0,bus=usb.0,port=2 -chardev spicevmc,id=charredir1,name=usbredir -device usb-redir,chardev=charredir1,id=redir1,bus=usb.0,port=3 -device virtio-balloon-pci,id=balloon0,bus=pci.5,addr=0x0 -object rng-random,id=objrng0,filename=/dev/urandom -device virtio-rng-pci,rng=objrng0,id=rng0,bus=pci.6,addr=0x0 -sandbox on,obsolete=deny,elevateprivileges=deny,spawn=deny,resourcecontrol=deny -msg timestamp=on
>
> > > so far I tested just bpf-next/master:
> > > git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git
> > >
> >
> > Just tried with upstream Linux (5.14.0-rc6) and your config without
> > triggering it. I'm using "-cpu host", though, on an AMD Ryzen 9 3900X
>
> With Jiri's config and '-cpu <very long string>' it triggers for me on
> v5.14-rc6.
>
> I'll also try to take a look tomorrow.

There are some non-zero PMDs that are not present in the high kernel
mappings. The patch below fixes for me the issue in kern_addr_valid()
trying to access a not-present PMD. Jiri, can you check if it works for
you?

diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index ddeaba947eb3..07b56e90db5d 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1433,18 +1433,18 @@ int kern_addr_valid(unsigned long addr)
return 0;

p4d = p4d_offset(pgd, addr);
- if (p4d_none(*p4d))
+ if (p4d_none(*p4d) || !p4d_present(*p4d))
return 0;

pud = pud_offset(p4d, addr);
- if (pud_none(*pud))
+ if (pud_none(*pud) || !pud_present(*pud))
return 0;

if (pud_large(*pud))
return pfn_valid(pud_pfn(*pud));

pmd = pmd_offset(pud, addr);
- if (pmd_none(*pmd))
+ if (pmd_none(*pmd) || !pmd_present(*pmd))
return 0;

if (pmd_large(*pmd))

--
Sincerely yours,
Mike.