Re: [PATCH RFC V9 1/19] x86/spinlock: Replace pv spinlocks withpv ticketlocks

From: Jeremy Fitzhardinge
Date: Sat Jun 01 2013 - 16:42:21 EST


On 06/01/2013 12:21 PM, Raghavendra K T wrote:
> x86/spinlock: Replace pv spinlocks with pv ticketlocks
>
> From: Jeremy Fitzhardinge <jeremy.fitzhardinge@xxxxxxxxxx>
I'm not sure what the etiquette is here; I did the work while at Citrix,
but jeremy@xxxxxxxx is my canonical email address. The Citrix address
is dead and bounces, so is useless for anything. Probably best to
change it.

J

>
> Rather than outright replacing the entire spinlock implementation in
> order to paravirtualize it, keep the ticket lock implementation but add
> a couple of pvops hooks on the slow patch (long spin on lock, unlocking
> a contended lock).
>
> Ticket locks have a number of nice properties, but they also have some
> surprising behaviours in virtual environments. They enforce a strict
> FIFO ordering on cpus trying to take a lock; however, if the hypervisor
> scheduler does not schedule the cpus in the correct order, the system can
> waste a huge amount of time spinning until the next cpu can take the lock.
>
> (See Thomas Friebel's talk "Prevent Guests from Spinning Around"
> http://www.xen.org/files/xensummitboston08/LHP.pdf for more details.)
>
> To address this, we add two hooks:
> - __ticket_spin_lock which is called after the cpu has been
> spinning on the lock for a significant number of iterations but has
> failed to take the lock (presumably because the cpu holding the lock
> has been descheduled). The lock_spinning pvop is expected to block
> the cpu until it has been kicked by the current lock holder.
> - __ticket_spin_unlock, which on releasing a contended lock
> (there are more cpus with tail tickets), it looks to see if the next
> cpu is blocked and wakes it if so.
>
> When compiled with CONFIG_PARAVIRT_SPINLOCKS disabled, a set of stub
> functions causes all the extra code to go away.
>
> Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@xxxxxxxxxx>
> Reviewed-by: Konrad Rzeszutek Wilk <konrad.wilk@xxxxxxxxxx>
> Tested-by: Attilio Rao <attilio.rao@xxxxxxxxxx>
> [ Raghavendra: Changed SPIN_THRESHOLD ]
> Signed-off-by: Raghavendra K T <raghavendra.kt@xxxxxxxxxxxxxxxxxx>
> ---
> arch/x86/include/asm/paravirt.h | 32 ++++----------------
> arch/x86/include/asm/paravirt_types.h | 10 ++----
> arch/x86/include/asm/spinlock.h | 53 +++++++++++++++++++++++++++------
> arch/x86/include/asm/spinlock_types.h | 4 --
> arch/x86/kernel/paravirt-spinlocks.c | 15 +--------
> arch/x86/xen/spinlock.c | 8 ++++-
> 6 files changed, 61 insertions(+), 61 deletions(-)
>
> diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
> index cfdc9ee..040e72d 100644
> --- a/arch/x86/include/asm/paravirt.h
> +++ b/arch/x86/include/asm/paravirt.h
> @@ -712,36 +712,16 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx,
>
> #if defined(CONFIG_SMP) && defined(CONFIG_PARAVIRT_SPINLOCKS)
>
> -static inline int arch_spin_is_locked(struct arch_spinlock *lock)
> +static __always_inline void __ticket_lock_spinning(struct arch_spinlock *lock,
> + __ticket_t ticket)
> {
> - return PVOP_CALL1(int, pv_lock_ops.spin_is_locked, lock);
> + PVOP_VCALL2(pv_lock_ops.lock_spinning, lock, ticket);
> }
>
> -static inline int arch_spin_is_contended(struct arch_spinlock *lock)
> +static __always_inline void ____ticket_unlock_kick(struct arch_spinlock *lock,
> + __ticket_t ticket)
> {
> - return PVOP_CALL1(int, pv_lock_ops.spin_is_contended, lock);
> -}
> -#define arch_spin_is_contended arch_spin_is_contended
> -
> -static __always_inline void arch_spin_lock(struct arch_spinlock *lock)
> -{
> - PVOP_VCALL1(pv_lock_ops.spin_lock, lock);
> -}
> -
> -static __always_inline void arch_spin_lock_flags(struct arch_spinlock *lock,
> - unsigned long flags)
> -{
> - PVOP_VCALL2(pv_lock_ops.spin_lock_flags, lock, flags);
> -}
> -
> -static __always_inline int arch_spin_trylock(struct arch_spinlock *lock)
> -{
> - return PVOP_CALL1(int, pv_lock_ops.spin_trylock, lock);
> -}
> -
> -static __always_inline void arch_spin_unlock(struct arch_spinlock *lock)
> -{
> - PVOP_VCALL1(pv_lock_ops.spin_unlock, lock);
> + PVOP_VCALL2(pv_lock_ops.unlock_kick, lock, ticket);
> }
>
> #endif
> diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h
> index 0db1fca..d5deb6d 100644
> --- a/arch/x86/include/asm/paravirt_types.h
> +++ b/arch/x86/include/asm/paravirt_types.h
> @@ -327,13 +327,11 @@ struct pv_mmu_ops {
> };
>
> struct arch_spinlock;
> +#include <asm/spinlock_types.h>
> +
> struct pv_lock_ops {
> - int (*spin_is_locked)(struct arch_spinlock *lock);
> - int (*spin_is_contended)(struct arch_spinlock *lock);
> - void (*spin_lock)(struct arch_spinlock *lock);
> - void (*spin_lock_flags)(struct arch_spinlock *lock, unsigned long flags);
> - int (*spin_trylock)(struct arch_spinlock *lock);
> - void (*spin_unlock)(struct arch_spinlock *lock);
> + void (*lock_spinning)(struct arch_spinlock *lock, __ticket_t ticket);
> + void (*unlock_kick)(struct arch_spinlock *lock, __ticket_t ticket);
> };
>
> /* This contains all the paravirt structures: we get a convenient
> diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
> index 33692ea..4d54244 100644
> --- a/arch/x86/include/asm/spinlock.h
> +++ b/arch/x86/include/asm/spinlock.h
> @@ -34,6 +34,35 @@
> # define UNLOCK_LOCK_PREFIX
> #endif
>
> +/* How long a lock should spin before we consider blocking */
> +#define SPIN_THRESHOLD (1 << 15)
> +
> +#ifndef CONFIG_PARAVIRT_SPINLOCKS
> +
> +static __always_inline void __ticket_lock_spinning(struct arch_spinlock *lock,
> + __ticket_t ticket)
> +{
> +}
> +
> +static __always_inline void ____ticket_unlock_kick(struct arch_spinlock *lock,
> + __ticket_t ticket)
> +{
> +}
> +
> +#endif /* CONFIG_PARAVIRT_SPINLOCKS */
> +
> +
> +/*
> + * If a spinlock has someone waiting on it, then kick the appropriate
> + * waiting cpu.
> + */
> +static __always_inline void __ticket_unlock_kick(struct arch_spinlock *lock,
> + __ticket_t next)
> +{
> + if (unlikely(lock->tickets.tail != next))
> + ____ticket_unlock_kick(lock, next);
> +}
> +
> /*
> * Ticket locks are conceptually two parts, one indicating the current head of
> * the queue, and the other indicating the current tail. The lock is acquired
> @@ -47,19 +76,24 @@
> * in the high part, because a wide xadd increment of the low part would carry
> * up and contaminate the high part.
> */
> -static __always_inline void __ticket_spin_lock(arch_spinlock_t *lock)
> +static __always_inline void __ticket_spin_lock(struct arch_spinlock *lock)
> {
> register struct __raw_tickets inc = { .tail = 1 };
>
> inc = xadd(&lock->tickets, inc);
>
> for (;;) {
> - if (inc.head == inc.tail)
> - break;
> - cpu_relax();
> - inc.head = ACCESS_ONCE(lock->tickets.head);
> + unsigned count = SPIN_THRESHOLD;
> +
> + do {
> + if (inc.head == inc.tail)
> + goto out;
> + cpu_relax();
> + inc.head = ACCESS_ONCE(lock->tickets.head);
> + } while (--count);
> + __ticket_lock_spinning(lock, inc.tail);
> }
> - barrier(); /* make sure nothing creeps before the lock is taken */
> +out: barrier(); /* make sure nothing creeps before the lock is taken */
> }
>
> static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
> @@ -78,7 +112,10 @@ static __always_inline int __ticket_spin_trylock(arch_spinlock_t *lock)
>
> static __always_inline void __ticket_spin_unlock(arch_spinlock_t *lock)
> {
> + __ticket_t next = lock->tickets.head + 1;
> +
> __add(&lock->tickets.head, 1, UNLOCK_LOCK_PREFIX);
> + __ticket_unlock_kick(lock, next);
> }
>
> static inline int __ticket_spin_is_locked(arch_spinlock_t *lock)
> @@ -95,8 +132,6 @@ static inline int __ticket_spin_is_contended(arch_spinlock_t *lock)
> return (__ticket_t)(tmp.tail - tmp.head) > 1;
> }
>
> -#ifndef CONFIG_PARAVIRT_SPINLOCKS
> -
> static inline int arch_spin_is_locked(arch_spinlock_t *lock)
> {
> return __ticket_spin_is_locked(lock);
> @@ -129,8 +164,6 @@ static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock,
> arch_spin_lock(lock);
> }
>
> -#endif /* CONFIG_PARAVIRT_SPINLOCKS */
> -
> static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
> {
> while (arch_spin_is_locked(lock))
> diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h
> index ad0ad07..83fd3c7 100644
> --- a/arch/x86/include/asm/spinlock_types.h
> +++ b/arch/x86/include/asm/spinlock_types.h
> @@ -1,10 +1,6 @@
> #ifndef _ASM_X86_SPINLOCK_TYPES_H
> #define _ASM_X86_SPINLOCK_TYPES_H
>
> -#ifndef __LINUX_SPINLOCK_TYPES_H
> -# error "please don't include this file directly"
> -#endif
> -
> #include <linux/types.h>
>
> #if (CONFIG_NR_CPUS < 256)
> diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c
> index 676b8c7..c2e010e 100644
> --- a/arch/x86/kernel/paravirt-spinlocks.c
> +++ b/arch/x86/kernel/paravirt-spinlocks.c
> @@ -7,21 +7,10 @@
>
> #include <asm/paravirt.h>
>
> -static inline void
> -default_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags)
> -{
> - arch_spin_lock(lock);
> -}
> -
> struct pv_lock_ops pv_lock_ops = {
> #ifdef CONFIG_SMP
> - .spin_is_locked = __ticket_spin_is_locked,
> - .spin_is_contended = __ticket_spin_is_contended,
> -
> - .spin_lock = __ticket_spin_lock,
> - .spin_lock_flags = default_spin_lock_flags,
> - .spin_trylock = __ticket_spin_trylock,
> - .spin_unlock = __ticket_spin_unlock,
> + .lock_spinning = paravirt_nop,
> + .unlock_kick = paravirt_nop,
> #endif
> };
> EXPORT_SYMBOL(pv_lock_ops);
> diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c
> index 3002ec1..d6481a9 100644
> --- a/arch/x86/xen/spinlock.c
> +++ b/arch/x86/xen/spinlock.c
> @@ -138,6 +138,9 @@ struct xen_spinlock {
> xen_spinners_t spinners; /* count of waiting cpus */
> };
>
> +static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
> +
> +#if 0
> static int xen_spin_is_locked(struct arch_spinlock *lock)
> {
> struct xen_spinlock *xl = (struct xen_spinlock *)lock;
> @@ -165,7 +168,6 @@ static int xen_spin_trylock(struct arch_spinlock *lock)
> return old == 0;
> }
>
> -static DEFINE_PER_CPU(int, lock_kicker_irq) = -1;
> static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners);
>
> /*
> @@ -352,6 +354,7 @@ static void xen_spin_unlock(struct arch_spinlock *lock)
> if (unlikely(xl->spinners))
> xen_spin_unlock_slow(xl);
> }
> +#endif
>
> static irqreturn_t dummy_handler(int irq, void *dev_id)
> {
> @@ -413,13 +416,14 @@ void __init xen_init_spinlocks(void)
> return;
>
> BUILD_BUG_ON(sizeof(struct xen_spinlock) > sizeof(arch_spinlock_t));
> -
> +#if 0
> pv_lock_ops.spin_is_locked = xen_spin_is_locked;
> pv_lock_ops.spin_is_contended = xen_spin_is_contended;
> pv_lock_ops.spin_lock = xen_spin_lock;
> pv_lock_ops.spin_lock_flags = xen_spin_lock_flags;
> pv_lock_ops.spin_trylock = xen_spin_trylock;
> pv_lock_ops.spin_unlock = xen_spin_unlock;
> +#endif
> }
>
> #ifdef CONFIG_XEN_DEBUG_FS
>

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/