Re: [PATCH V4 2/8] fork: move PF_IO_WORKER's kernel frame setup to new flag

From: Michael S. Tsirkin
Date: Fri Oct 22 2021 - 05:55:49 EST


On Thu, Oct 07, 2021 at 04:44:42PM -0500, Mike Christie wrote:
> The vhost worker threads need the same frame setup as io_uring's worker
> threads, but handle signals differently and do not need the same
> scheduling behavior. This patch separate's the frame setup parts of
> PF_IO_WORKER into a new PF flag PF_USER_WORKER.
>
> Signed-off-by: Mike Christie <michael.christie@xxxxxxxxxx>
> ---
> arch/alpha/kernel/process.c | 2 +-
> arch/arc/kernel/process.c | 2 +-
> arch/arm/kernel/process.c | 2 +-
> arch/arm64/kernel/process.c | 2 +-
> arch/csky/kernel/process.c | 2 +-
> arch/h8300/kernel/process.c | 2 +-
> arch/hexagon/kernel/process.c | 2 +-
> arch/ia64/kernel/process.c | 2 +-
> arch/m68k/kernel/process.c | 2 +-
> arch/microblaze/kernel/process.c | 2 +-
> arch/mips/kernel/process.c | 2 +-
> arch/nds32/kernel/process.c | 2 +-
> arch/nios2/kernel/process.c | 2 +-
> arch/openrisc/kernel/process.c | 2 +-
> arch/parisc/kernel/process.c | 2 +-
> arch/powerpc/kernel/process.c | 2 +-
> arch/riscv/kernel/process.c | 2 +-
> arch/s390/kernel/process.c | 2 +-
> arch/sh/kernel/process_32.c | 2 +-
> arch/sparc/kernel/process_32.c | 2 +-
> arch/sparc/kernel/process_64.c | 2 +-
> arch/um/kernel/process.c | 2 +-
> arch/x86/kernel/process.c | 2 +-
> arch/xtensa/kernel/process.c | 2 +-
> include/linux/sched.h | 1 +
> include/linux/sched/task.h | 1 +
> kernel/fork.c | 4 +++-
> 27 files changed, 29 insertions(+), 25 deletions(-)


For something that's touching include/linux/sched.h
and all arches at once, this has not been CC'd widely enough.


> diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c
> index a5123ea426ce..e350fff2ea14 100644
> --- a/arch/alpha/kernel/process.c
> +++ b/arch/alpha/kernel/process.c
> @@ -249,7 +249,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
> childti->pcb.ksp = (unsigned long) childstack;
> childti->pcb.flags = 1; /* set FEN, clear everything else */
>
> - if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
> + if (unlikely(p->flags & (PF_KTHREAD | PF_USER_WORKER))) {
> /* kernel thread */
> memset(childstack, 0,
> sizeof(struct switch_stack) + sizeof(struct pt_regs));
> diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c
> index 3793876f42d9..c3f4952cce17 100644
> --- a/arch/arc/kernel/process.c
> +++ b/arch/arc/kernel/process.c
> @@ -191,7 +191,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
> childksp[0] = 0; /* fp */
> childksp[1] = (unsigned long)ret_from_fork; /* blink */
>
> - if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
> + if (unlikely(p->flags & (PF_KTHREAD | PF_USER_WORKER))) {
> memset(c_regs, 0, sizeof(struct pt_regs));
>
> c_callee->r13 = kthread_arg;
> diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
> index 0e2d3051741e..449c9db3942a 100644
> --- a/arch/arm/kernel/process.c
> +++ b/arch/arm/kernel/process.c
> @@ -247,7 +247,7 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
> thread->cpu_domain = get_domain();
> #endif
>
> - if (likely(!(p->flags & (PF_KTHREAD | PF_IO_WORKER)))) {
> + if (likely(!(p->flags & (PF_KTHREAD | PF_USER_WORKER)))) {
> *childregs = *current_pt_regs();
> childregs->ARM_r0 = 0;
> if (stack_start)
> diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c
> index 40adb8cdbf5a..e2fe88a3ae90 100644
> --- a/arch/arm64/kernel/process.c
> +++ b/arch/arm64/kernel/process.c
> @@ -333,7 +333,7 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
>
> ptrauth_thread_init_kernel(p);
>
> - if (likely(!(p->flags & (PF_KTHREAD | PF_IO_WORKER)))) {
> + if (likely(!(p->flags & (PF_KTHREAD | PF_USER_WORKER)))) {
> *childregs = *current_pt_regs();
> childregs->regs[0] = 0;
>
> diff --git a/arch/csky/kernel/process.c b/arch/csky/kernel/process.c
> index 3d0ca22cd0e2..509f2bfe4ace 100644
> --- a/arch/csky/kernel/process.c
> +++ b/arch/csky/kernel/process.c
> @@ -49,7 +49,7 @@ int copy_thread(unsigned long clone_flags,
> /* setup thread.sp for switch_to !!! */
> p->thread.sp = (unsigned long)childstack;
>
> - if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
> + if (unlikely(p->flags & (PF_KTHREAD | PF_USER_WORKER))) {
> memset(childregs, 0, sizeof(struct pt_regs));
> childstack->r15 = (unsigned long) ret_from_kernel_thread;
> childstack->r10 = kthread_arg;
> diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c
> index 2ac27e4248a4..11baf058b6c5 100644
> --- a/arch/h8300/kernel/process.c
> +++ b/arch/h8300/kernel/process.c
> @@ -112,7 +112,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
>
> childregs = (struct pt_regs *) (THREAD_SIZE + task_stack_page(p)) - 1;
>
> - if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
> + if (unlikely(p->flags & (PF_KTHREAD | PF_USER_WORKER))) {
> memset(childregs, 0, sizeof(struct pt_regs));
> childregs->retpc = (unsigned long) ret_from_kernel_thread;
> childregs->er4 = topstk; /* arg */
> diff --git a/arch/hexagon/kernel/process.c b/arch/hexagon/kernel/process.c
> index 6a6835fb4242..f17573b66303 100644
> --- a/arch/hexagon/kernel/process.c
> +++ b/arch/hexagon/kernel/process.c
> @@ -73,7 +73,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
> sizeof(*ss));
> ss->lr = (unsigned long)ret_from_fork;
> p->thread.switch_sp = ss;
> - if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
> + if (unlikely(p->flags & (PF_KTHREAD | PF_USER_WORKER))) {
> memset(childregs, 0, sizeof(struct pt_regs));
> /* r24 <- fn, r25 <- arg */
> ss->r24 = usp;
> diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c
> index e56d63f4abf9..4a58daa56af4 100644
> --- a/arch/ia64/kernel/process.c
> +++ b/arch/ia64/kernel/process.c
> @@ -338,7 +338,7 @@ copy_thread(unsigned long clone_flags, unsigned long user_stack_base,
>
> ia64_drop_fpu(p); /* don't pick up stale state from a CPU's fph */
>
> - if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
> + if (unlikely(p->flags & (PF_KTHREAD | PF_USER_WORKER))) {
> if (unlikely(!user_stack_base)) {
> /* fork_idle() called us */
> return 0;
> diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c
> index 1ab692b952cd..e7474a118410 100644
> --- a/arch/m68k/kernel/process.c
> +++ b/arch/m68k/kernel/process.c
> @@ -157,7 +157,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
> */
> p->thread.fc = USER_DATA;
>
> - if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
> + if (unlikely(p->flags & (PF_KTHREAD | PF_USER_WORKER))) {
> /* kernel thread */
> memset(frame, 0, sizeof(struct fork_frame));
> frame->regs.sr = PS_S;
> diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c
> index 62aa237180b6..5b543be324d4 100644
> --- a/arch/microblaze/kernel/process.c
> +++ b/arch/microblaze/kernel/process.c
> @@ -59,7 +59,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
> struct pt_regs *childregs = task_pt_regs(p);
> struct thread_info *ti = task_thread_info(p);
>
> - if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
> + if (unlikely(p->flags & (PF_KTHREAD | PF_USER_WORKER))) {
> /* if we're creating a new kernel thread then just zeroing all
> * the registers. That's OK for a brand new thread.*/
> memset(childregs, 0, sizeof(struct pt_regs));
> diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
> index 95aa86fa6077..d9ca11dd544f 100644
> --- a/arch/mips/kernel/process.c
> +++ b/arch/mips/kernel/process.c
> @@ -120,7 +120,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
> /* Put the stack after the struct pt_regs. */
> childksp = (unsigned long) childregs;
> p->thread.cp0_status = (read_c0_status() & ~(ST0_CU2|ST0_CU1)) | ST0_KERNEL_CUMASK;
> - if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
> + if (unlikely(p->flags & (PF_KTHREAD | PF_USER_WORKER))) {
> /* kernel thread */
> unsigned long status = p->thread.cp0_status;
> memset(childregs, 0, sizeof(struct pt_regs));
> diff --git a/arch/nds32/kernel/process.c b/arch/nds32/kernel/process.c
> index 391895b54d13..2dba51d1889c 100644
> --- a/arch/nds32/kernel/process.c
> +++ b/arch/nds32/kernel/process.c
> @@ -156,7 +156,7 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
>
> memset(&p->thread.cpu_context, 0, sizeof(struct cpu_context));
>
> - if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
> + if (unlikely(p->flags & (PF_KTHREAD | PF_USER_WORKER))) {
> memset(childregs, 0, sizeof(struct pt_regs));
> /* kernel thread fn */
> p->thread.cpu_context.r6 = stack_start;
> diff --git a/arch/nios2/kernel/process.c b/arch/nios2/kernel/process.c
> index 9ff37ba2bb60..ce6ad177da15 100644
> --- a/arch/nios2/kernel/process.c
> +++ b/arch/nios2/kernel/process.c
> @@ -109,7 +109,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
> struct switch_stack *childstack =
> ((struct switch_stack *)childregs) - 1;
>
> - if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
> + if (unlikely(p->flags & (PF_KTHREAD | PF_USER_WORKER))) {
> memset(childstack, 0,
> sizeof(struct switch_stack) + sizeof(struct pt_regs));
>
> diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c
> index b0698d9ce14f..d1d189c16676 100644
> --- a/arch/openrisc/kernel/process.c
> +++ b/arch/openrisc/kernel/process.c
> @@ -172,7 +172,7 @@ copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
> sp -= sizeof(struct pt_regs);
> kregs = (struct pt_regs *)sp;
>
> - if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
> + if (unlikely(p->flags & (PF_KTHREAD | PF_USER_WORKER))) {
> memset(kregs, 0, sizeof(struct pt_regs));
> kregs->gpr[20] = usp; /* fn, kernel thread */
> kregs->gpr[22] = arg;
> diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c
> index 38ec4ae81239..257bec7e67d4 100644
> --- a/arch/parisc/kernel/process.c
> +++ b/arch/parisc/kernel/process.c
> @@ -197,7 +197,7 @@ copy_thread(unsigned long clone_flags, unsigned long usp,
> extern void * const ret_from_kernel_thread;
> extern void * const child_return;
>
> - if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
> + if (unlikely(p->flags & (PF_KTHREAD | PF_USER_WORKER))) {
> /* kernel thread */
> memset(cregs, 0, sizeof(struct pt_regs));
> if (!usp) /* idle thread */
> diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
> index 50436b52c213..817847723bff 100644
> --- a/arch/powerpc/kernel/process.c
> +++ b/arch/powerpc/kernel/process.c
> @@ -1700,7 +1700,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
> /* Copy registers */
> sp -= sizeof(struct pt_regs);
> childregs = (struct pt_regs *) sp;
> - if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
> + if (unlikely(p->flags & (PF_KTHREAD | PF_USER_WORKER))) {
> /* kernel thread */
> memset(childregs, 0, sizeof(struct pt_regs));
> childregs->gpr[1] = sp + sizeof(struct pt_regs);
> diff --git a/arch/riscv/kernel/process.c b/arch/riscv/kernel/process.c
> index 03ac3aa611f5..8deeb94eb51e 100644
> --- a/arch/riscv/kernel/process.c
> +++ b/arch/riscv/kernel/process.c
> @@ -125,7 +125,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
> struct pt_regs *childregs = task_pt_regs(p);
>
> /* p->thread holds context to be restored by __switch_to() */
> - if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
> + if (unlikely(p->flags & (PF_KTHREAD | PF_USER_WORKER))) {
> /* Kernel thread */
> memset(childregs, 0, sizeof(struct pt_regs));
> childregs->gp = gp_in_global;
> diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c
> index 350e94d0cac2..f596843ab55c 100644
> --- a/arch/s390/kernel/process.c
> +++ b/arch/s390/kernel/process.c
> @@ -130,7 +130,7 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
> frame->sf.gprs[9] = (unsigned long)frame;
>
> /* Store access registers to kernel stack of new process. */
> - if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
> + if (unlikely(p->flags & (PF_KTHREAD | PF_USER_WORKER))) {
> /* kernel thread */
> memset(&frame->childregs, 0, sizeof(struct pt_regs));
> frame->childregs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT |
> diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c
> index 717de05c81f4..e74906f53c3e 100644
> --- a/arch/sh/kernel/process_32.c
> +++ b/arch/sh/kernel/process_32.c
> @@ -114,7 +114,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
>
> childregs = task_pt_regs(p);
> p->thread.sp = (unsigned long) childregs;
> - if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
> + if (unlikely(p->flags & (PF_KTHREAD | PF_USER_WORKER))) {
> memset(childregs, 0, sizeof(struct pt_regs));
> p->thread.pc = (unsigned long) ret_from_kernel_thread;
> childregs->regs[4] = arg;
> diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c
> index bbbe0cfef746..978e0bc10ad4 100644
> --- a/arch/sparc/kernel/process_32.c
> +++ b/arch/sparc/kernel/process_32.c
> @@ -296,7 +296,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
> ti->ksp = (unsigned long) new_stack;
> p->thread.kregs = childregs;
>
> - if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
> + if (unlikely(p->flags & (PF_KTHREAD | PF_USER_WORKER))) {
> extern int nwindows;
> unsigned long psr;
> memset(new_stack, 0, STACKFRAME_SZ + TRACEREG_SZ);
> diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c
> index d1cc410d2f64..1c45cd5089f4 100644
> --- a/arch/sparc/kernel/process_64.c
> +++ b/arch/sparc/kernel/process_64.c
> @@ -594,7 +594,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
> sizeof(struct sparc_stackf));
> t->fpsaved[0] = 0;
>
> - if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
> + if (unlikely(p->flags & (PF_KTHREAD | PF_USER_WORKER))) {
> memset(child_trap_frame, 0, child_stack_sz);
> __thread_flag_byte_ptr(t)[TI_FLAG_BYTE_CWP] =
> (current_pt_regs()->tstate + 1) & TSTATE_CWP;
> diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c
> index 457a38db368b..2bc3141cbf01 100644
> --- a/arch/um/kernel/process.c
> +++ b/arch/um/kernel/process.c
> @@ -157,7 +157,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
> unsigned long arg, struct task_struct * p, unsigned long tls)
> {
> void (*handler)(void);
> - int kthread = current->flags & (PF_KTHREAD | PF_IO_WORKER);
> + int kthread = current->flags & (PF_KTHREAD | PF_USER_WORKER);
> int ret = 0;
>
> p->thread = (struct thread_struct) INIT_THREAD;
> diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
> index 1d9463e3096b..d88be9dd5dfd 100644
> --- a/arch/x86/kernel/process.c
> +++ b/arch/x86/kernel/process.c
> @@ -178,7 +178,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
> task_user_gs(p) = get_user_gs(current_pt_regs());
> #endif
>
> - if (unlikely(p->flags & PF_IO_WORKER)) {
> + if (unlikely(p->flags & PF_USER_WORKER)) {
> /*
> * An IO thread is a user space thread, but it doesn't
> * return to ret_after_fork().
> diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c
> index 060165340612..61ad0bfbd7ea 100644
> --- a/arch/xtensa/kernel/process.c
> +++ b/arch/xtensa/kernel/process.c
> @@ -217,7 +217,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp_thread_fn,
>
> p->thread.sp = (unsigned long)childregs;
>
> - if (!(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
> + if (!(p->flags & (PF_KTHREAD | PF_USER_WORKER))) {
> struct pt_regs *regs = current_pt_regs();
> unsigned long usp = usp_thread_fn ?
> usp_thread_fn : regs->areg[1];
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index c1a927ddec64..b1027e916be4 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1665,6 +1665,7 @@ extern struct pid *cad_pid;
> #define PF_VCPU 0x00000001 /* I'm a virtual CPU */
> #define PF_IDLE 0x00000002 /* I am an IDLE thread */
> #define PF_EXITING 0x00000004 /* Getting shut down */
> +#define PF_USER_WORKER 0x00000008 /* Kernel thread cloned from userspace thread */
> #define PF_IO_WORKER 0x00000010 /* Task is an IO worker */
> #define PF_WQ_WORKER 0x00000020 /* I'm a workqueue worker */
> #define PF_FORKNOEXEC 0x00000040 /* Forked but didn't exec */
> diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
> index 48417c735438..53599a99d7e0 100644
> --- a/include/linux/sched/task.h
> +++ b/include/linux/sched/task.h
> @@ -19,6 +19,7 @@ struct css_set;
> #define CLONE_LEGACY_FLAGS 0xffffffffULL
>
> #define KERN_WORKER_IO BIT(0)
> +#define KERN_WORKER_USER BIT(1)
>
> struct kernel_clone_args {
> u64 flags;
> diff --git a/kernel/fork.c b/kernel/fork.c
> index 3988106e9609..4f780424de46 100644
> --- a/kernel/fork.c
> +++ b/kernel/fork.c
> @@ -2035,6 +2035,8 @@ static __latent_entropy struct task_struct *copy_process(
> siginitsetinv(&p->blocked, sigmask(SIGKILL)|sigmask(SIGSTOP));
> }
>
> + if (args->worker_flags & KERN_WORKER_USER)
> + p->flags |= PF_USER_WORKER;
> /*
> * This _must_ happen before we call free_task(), i.e. before we jump
> * to any of the bad_fork_* labels. This is to avoid freeing
> @@ -2526,7 +2528,7 @@ struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node)
> .exit_signal = (lower_32_bits(flags) & CSIGNAL),
> .stack = (unsigned long)fn,
> .stack_size = (unsigned long)arg,
> - .worker_flags = KERN_WORKER_IO,
> + .worker_flags = KERN_WORKER_IO | KERN_WORKER_USER,
> };
>
> return copy_process(NULL, 0, node, &args);
> --
> 2.25.1