Re: [PATCH] LoongArch: Add support to clone a time namespace

From: Huacai Chen
Date: Sun May 21 2023 - 23:58:03 EST


Hi, Tiezhu,

On Sat, May 20, 2023 at 6:35 PM Tiezhu Yang <yangtiezhu@xxxxxxxxxxx> wrote:
>
>
>
> On 05/18/2023 10:25 AM, Huacai Chen wrote:
> > Hi, Tiezhu,
> >
> > The layout of vdso data (loongarch_vdso_data):
> >
> > struct vdso_pcpu_data pdata[NR_CPUS];
> > struct vdso_data data[CS_BASES];
> >
> > VDSO_DATA_SIZE is the page aligned size of loongarch_vdso_data, and in
> > memory, vdso code is above vdso data.
> >
> > Then, get_vdso_base() returns the start of vdso code, and
> > get_vdso_data() returns the start of vdso data.
> >
> > In your patch, __arch_get_timens_vdso_data() returns get_vdso_data() +
> > PAGE_SIZE, but you don't increase the size of loongarch_vdso_data. The
> > result is it returns an address in vdso code.
> >
> > Now, do you know what the problem is? Or still insist that "I have tested"?
>
> Please review the following changes based on the current patch,
> modify the layout of vvar to expand a page size for timens_data,
> and also map it to zero pfn before creating time namespace, then
> the last thing is to add the callback function vvar_fault().
>
> $ git diff
> diff --git a/arch/loongarch/include/asm/page.h
> b/arch/loongarch/include/asm/page.h
> index fb5338b..26e8dcc 100644
> --- a/arch/loongarch/include/asm/page.h
> +++ b/arch/loongarch/include/asm/page.h
> @@ -81,6 +81,7 @@ typedef struct { unsigned long pgprot; } pgprot_t;
> #define __va(x) ((void *)((unsigned long)(x) +
> PAGE_OFFSET - PHYS_OFFSET))
>
> #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT)
> +#define sym_to_pfn(x) __phys_to_pfn(__pa_symbol(x))
>
> #define virt_to_pfn(kaddr) PFN_DOWN(PHYSADDR(kaddr))
> #define virt_to_page(kaddr) pfn_to_page(virt_to_pfn(kaddr))
> diff --git a/arch/loongarch/kernel/vdso.c b/arch/loongarch/kernel/vdso.c
> index cf62103..3e89aca 100644
> --- a/arch/loongarch/kernel/vdso.c
> +++ b/arch/loongarch/kernel/vdso.c
> @@ -23,7 +23,27 @@
> #include <vdso/vsyscall.h>
> #include <generated/vdso-offsets.h>
>
> +/*
> + * The layout of vvar:
> + *
> + * high
> + * +----------------+----------------+
> + * | timens_data | PAGE_SIZE |
> + * +----------------+----------------+
> + * | vdso_data | |
> + * | vdso_pcpu_data | VDSO_DATA_SIZE |
> + * +----------------+----------------+
> + * low
> + */
> +#define VVAR_SIZE (VDSO_DATA_SIZE + PAGE_SIZE)
> +
> +enum vvar_pages {
> + VVAR_DATA_PAGE_OFFSET,
> + VVAR_TIMENS_PAGE_OFFSET,
> +};
You suppose that vdso_data+vdso_pcpu_data can fit in one page, but
this isn't always the case.

> +
> extern char vdso_start[], vdso_end[];
> +extern unsigned long zero_pfn;
>
> /* Kernel-provided data used by the VDSO. */
> static union {
> @@ -42,6 +62,40 @@ static int vdso_mremap(const struct
> vm_special_mapping *sm, struct vm_area_struc
> return 0;
> }
>
> +static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
> + struct vm_area_struct *vma, struct vm_fault
> *vmf)
> +{
> + struct page *timens_page = find_timens_vvar_page(vma);
> + unsigned long pfn;
> +
> + switch (vmf->pgoff) {
> + case VVAR_DATA_PAGE_OFFSET:
> + if (timens_page)
> + pfn = page_to_pfn(timens_page);
> + else
> + pfn = sym_to_pfn(vdso_data);
> + break;
> +#ifdef CONFIG_TIME_NS
> + case VVAR_TIMENS_PAGE_OFFSET:
> + /*
> + * If a task belongs to a time namespace then a namespace
> + * specific VVAR is mapped with the
> VVAR_DATA_PAGE_OFFSET and
> + * the real VVAR page is mapped with the
> VVAR_TIMENS_PAGE_OFFSET
> + * offset.
> + * See also the comment near timens_setup_vdso_data().
> + */
> + if (!timens_page)
> + return VM_FAULT_SIGBUS;
> + pfn = sym_to_pfn(vdso_data);
> + break;
> +#endif /* CONFIG_TIME_NS */
> + default:
> + return VM_FAULT_SIGBUS;
> + }
> +
> + return vmf_insert_pfn(vma, vmf->address, pfn);
> +}
> +
> struct loongarch_vdso_info vdso_info = {
> .vdso = vdso_start,
> .size = PAGE_SIZE,
> @@ -52,6 +106,7 @@ struct loongarch_vdso_info vdso_info = {
> },
> .data_mapping = {
> .name = "[vvar]",
> + .fault = vvar_fault,
I prefer pre-allocate than page-fault if possible.

Huacai
> },
> .offset_sigreturn = vdso_offset_sigreturn,
> };
> @@ -120,7 +175,7 @@ static unsigned long vdso_base(void)
> int arch_setup_additional_pages(struct linux_binprm *bprm, int
> uses_interp)
> {
> int ret;
> - unsigned long vvar_size, size, data_addr, vdso_addr;
> + unsigned long size, data_addr, vdso_addr;
> struct mm_struct *mm = current->mm;
> struct vm_area_struct *vma;
> struct loongarch_vdso_info *info = current->thread.vdso;
> @@ -132,17 +187,16 @@ int arch_setup_additional_pages(struct
> linux_binprm *bprm, int uses_interp)
> * Determine total area size. This includes the VDSO data itself
> * and the data pages.
> */
> - vvar_size = VDSO_DATA_SIZE;
> - size = vvar_size + info->size;
> + size = VVAR_SIZE + info->size;
>
> data_addr = get_unmapped_area(NULL, vdso_base(), size, 0, 0);
> if (IS_ERR_VALUE(data_addr)) {
> ret = data_addr;
> goto out;
> }
> - vdso_addr = data_addr + VDSO_DATA_SIZE;
> + vdso_addr = data_addr + VVAR_SIZE;
>
> - vma = _install_special_mapping(mm, data_addr, vvar_size,
> + vma = _install_special_mapping(mm, data_addr, VVAR_SIZE,
> VM_READ | VM_MAYREAD,
> &info->data_mapping);
> if (IS_ERR(vma)) {
> @@ -153,7 +207,12 @@ int arch_setup_additional_pages(struct linux_binprm
> *bprm, int uses_interp)
> /* Map VDSO data page. */
> ret = remap_pfn_range(vma, data_addr,
> virt_to_phys(&loongarch_vdso_data) >>
> PAGE_SHIFT,
> - vvar_size, PAGE_READONLY);
> + VDSO_DATA_SIZE, PAGE_READONLY);
> + if (ret)
> + goto out;
> +
> + ret = remap_pfn_range(vma, data_addr + VDSO_DATA_SIZE, zero_pfn,
> + PAGE_SIZE, PAGE_READONLY);
> if (ret)
> goto out;
>
> If you have any more comments, please let me know, thank you.
> I will send v2 after waiting for some more feedbacks.
>
> Thanks,
> Tiezhu
>