Re: [PATCH] fs/proc/kcore.c: add mmap interface

From: Andrew Morton
Date: Mon May 31 2021 - 21:23:56 EST


On Wed, 26 May 2021 15:51:42 +0800 Feng zhou <zhoufeng.zf@xxxxxxxxxxxxx> wrote:

> From: ZHOUFENG <zhoufeng.zf@xxxxxxxxxxxxx>
>
> When we do the kernel monitor, use the DRGN
> (https://github.com/osandov/drgn) access to kernel data structures,
> found that the system calls a lot. DRGN is implemented by reading
> /proc/kcore. After looking at the kcore code, it is found that kcore
> does not implement mmap, resulting in frequent context switching
> triggered by read. Therefore, we want to add mmap interface to optimize
> performance. Since vmalloc and module areas will change with allocation
> and release, consistency cannot be guaranteed, so mmap interface only
> maps KCORE_TEXT and KCORE_RAM.
>
> ...
>
> --- a/fs/proc/kcore.c
> +++ b/fs/proc/kcore.c
> @@ -573,11 +573,81 @@ static int release_kcore(struct inode *inode, struct file *file)
> return 0;
> }
>
> +static vm_fault_t mmap_kcore_fault(struct vm_fault *vmf)
> +{
> + return VM_FAULT_SIGBUS;
> +}
> +
> +static const struct vm_operations_struct kcore_mmap_ops = {
> + .fault = mmap_kcore_fault,
> +};
> +
> +static int mmap_kcore(struct file *file, struct vm_area_struct *vma)
> +{
> + size_t size = vma->vm_end - vma->vm_start;
> + u64 start, pfn;
> + int nphdr;
> + size_t data_offset;
> + size_t phdrs_len, notes_len;
> + struct kcore_list *m = NULL;
> + int ret = 0;
> +
> + down_read(&kclist_lock);
> +
> + get_kcore_size(&nphdr, &phdrs_len, &notes_len, &data_offset);
> +
> + start = kc_offset_to_vaddr(((u64)vma->vm_pgoff << PAGE_SHIFT) -
> + ((data_offset >> PAGE_SHIFT) << PAGE_SHIFT));
> +
> + list_for_each_entry(m, &kclist_head, list) {
> + if (start >= m->addr && size <= m->size)
> + break;
> + }
> +
> + if (&m->list == &kclist_head) {
> + ret = -EINVAL;
> + goto out;
> + }
> +
> + if (vma->vm_flags & (VM_WRITE | VM_EXEC)) {
> + ret = -EPERM;
> + goto out;
> + }
> +
> + vma->vm_flags &= ~(VM_MAYWRITE | VM_MAYEXEC);
> + vma->vm_flags |= VM_MIXEDMAP;
> + vma->vm_ops = &kcore_mmap_ops;
> +
> + if (kern_addr_valid(start)) {
> + if (m->type == KCORE_RAM || m->type == KCORE_REMAP)
> + pfn = __pa(start) >> PAGE_SHIFT;
> + else if (m->type == KCORE_TEXT)
> + pfn = __pa_symbol(start) >> PAGE_SHIFT;
> + else {
> + ret = -EFAULT;
> + goto out;
> + }
> +
> + if (remap_pfn_range(vma, vma->vm_start, pfn, size,
> + vma->vm_page_prot)) {
> + ret = -EAGAIN;

EAGAIN seems a strange errno for this case. The mmap manpage says

EAGAIN The file has been locked, or too much memory has been locked
(see setrlimit(2)).


remap_pfn_range() already returns an errno - why not return whatever
that code was?

> + goto out;
> + }
> + } else {
> + ret = -EFAULT;
> + }
> +
> +out:
> + up_read(&kclist_lock);
> + return ret;
> +}
> +
> static const struct proc_ops kcore_proc_ops = {
> .proc_read = read_kcore,
> .proc_open = open_kcore,
> .proc_release = release_kcore,
> .proc_lseek = default_llseek,
> + .proc_mmap = mmap_kcore,
> };
>
> /* just remember that we have to update kcore */

Otherwise looks OK to me. Please update the changelog to reflect the
discussion thus far and send a v2?