Re: [GIT PULL] First batch of KVM changes for 4.1

From: Peter Zijlstra
Date: Fri Apr 17 2015 - 09:10:57 EST


On Fri, Apr 17, 2015 at 02:46:57PM +0200, Paolo Bonzini wrote:
> On 17/04/2015 12:55, Peter Zijlstra wrote:
> > Also, it looks like you already do exactly this for other things, look
> > at:
> >
> > kvm_sched_in()
> > kvm_arch_vcpu_load()
> > if (unlikely(vcpu->cpu != cpu) ... )
> >
> > So no, I don't believe for one second you need this.
>
> You're missing that this snippet is running in the host, while this
> patch is concerned with the guest (paravirt).

This doesn't make sense; but it brings us back to where we were last
time. There is _0_ justification for this in the patches, that alone is
grounds enough to reject it.

Why should the guest task care about the physical cpu of the vcpu;
that's a layering fail if ever there was one.

Furthermore, the only thing that migration handler seems to do is
increment a variable that is not actually used in that file.

> And frankly, I think the static key is snake oil. The cost of task
> migration in terms of cache misses and TLB misses is in no way
> comparable to the cost of filling in a structure on the stack,
> dereferencing the head of the notifiers list and seeing that it's NULL.

The path this notifier is called from has nothing to do with those
costs. And the fact you're inflicting these costs on _everyone_ for a
single x86_64-paravirt case is insane.

I've had enough of this, the below goes into sched/urgent and you can
come back with sane patches if and when you're ready.

---
arch/x86/kernel/pvclock.c | 24 ------------------------
include/linux/sched.h | 8 --------
kernel/sched/core.c | 15 ---------------
3 files changed, 47 deletions(-)

diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c
index e5ecd20e72dd..82f116de3835 100644
--- a/arch/x86/kernel/pvclock.c
+++ b/arch/x86/kernel/pvclock.c
@@ -160,27 +160,6 @@ struct pvclock_vcpu_time_info *pvclock_get_vsyscall_time_info(int cpu)
}

#ifdef CONFIG_X86_64
-static int pvclock_task_migrate(struct notifier_block *nb, unsigned long l,
- void *v)
-{
- struct task_migration_notifier *mn = v;
- struct pvclock_vsyscall_time_info *pvti;
-
- pvti = pvclock_get_vsyscall_user_time_info(mn->from_cpu);
-
- /* this is NULL when pvclock vsyscall is not initialized */
- if (unlikely(pvti == NULL))
- return NOTIFY_DONE;
-
- pvti->migrate_count++;
-
- return NOTIFY_DONE;
-}
-
-static struct notifier_block pvclock_migrate = {
- .notifier_call = pvclock_task_migrate,
-};
-
/*
* Initialize the generic pvclock vsyscall state. This will allocate
* a/some page(s) for the per-vcpu pvclock information, set up a
@@ -202,9 +181,6 @@ int __init pvclock_init_vsyscall(struct pvclock_vsyscall_time_info *i,
PAGE_KERNEL_VVAR);
}

-
- register_task_migration_notifier(&pvclock_migrate);
-
return 0;
}
#endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 3f3308824fa4..0eabab99e126 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -176,14 +176,6 @@ extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load);
extern void calc_global_load(unsigned long ticks);
extern void update_cpu_load_nohz(void);

-/* Notifier for when a task gets migrated to a new CPU */
-struct task_migration_notifier {
- struct task_struct *task;
- int from_cpu;
- int to_cpu;
-};
-extern void register_task_migration_notifier(struct notifier_block *n);
-
extern unsigned long get_parent_ip(unsigned long addr);

extern void dump_cpu_task(int cpu);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 1d0bc4fe266d..dbfc93d40292 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1013,13 +1013,6 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
rq_clock_skip_update(rq, true);
}

-static ATOMIC_NOTIFIER_HEAD(task_migration_notifier);
-
-void register_task_migration_notifier(struct notifier_block *n)
-{
- atomic_notifier_chain_register(&task_migration_notifier, n);
-}
-
#ifdef CONFIG_SMP
void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
{
@@ -1050,18 +1043,10 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
trace_sched_migrate_task(p, new_cpu);

if (task_cpu(p) != new_cpu) {
- struct task_migration_notifier tmn;
-
if (p->sched_class->migrate_task_rq)
p->sched_class->migrate_task_rq(p, new_cpu);
p->se.nr_migrations++;
perf_sw_event_sched(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 0);
-
- tmn.task = p;
- tmn.from_cpu = task_cpu(p);
- tmn.to_cpu = new_cpu;
-
- atomic_notifier_call_chain(&task_migration_notifier, 0, &tmn);
}

__set_task_cpu(p, new_cpu);
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/