[PATCH 2/2] ipc/sem: sem_lock with hysteresis

From: Manfred Spraul
Date: Sat Jun 18 2016 - 16:03:18 EST


sysv sem has two lock modes: One with per-semaphore locks, one lock mode
with a single big lock for the whole array.
When switching from the per-semaphore locks to the big lock, all
per-semaphore locks must be scanned for ongoing operations.

The patch adds a hysteresis for switching from the big lock to the per
semaphore locks. This reduces how often the per-semaphore locks must
be scanned.

Passed stress testing with sem-scalebench.

Signed-off-by: Manfred Spraul <manfred@xxxxxxxxxxxxxxxx>

---
include/linux/sem.h | 2 +-
ipc/sem.c | 91 ++++++++++++++++++++++++++++-------------------------
2 files changed, 49 insertions(+), 44 deletions(-)

diff --git a/include/linux/sem.h b/include/linux/sem.h
index d0efd6e..6fb3227 100644
--- a/include/linux/sem.h
+++ b/include/linux/sem.h
@@ -21,7 +21,7 @@ struct sem_array {
struct list_head list_id; /* undo requests on this array */
int sem_nsems; /* no. of semaphores in array */
int complex_count; /* pending complex operations */
- bool complex_mode; /* no parallel simple ops */
+ int complex_mode; /* >0: no parallel simple ops */
};

#ifdef CONFIG_SYSVIPC
diff --git a/ipc/sem.c b/ipc/sem.c
index 11d9e60..1f43fb8 100644
--- a/ipc/sem.c
+++ b/ipc/sem.c
@@ -161,6 +161,13 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it);
#define SEMOPM_FAST 64 /* ~ 372 bytes on stack */

/*
+ * Switching from the mode suitable for simple ops
+ * to the mode for complex ops is costly. Therefore:
+ * use some hysteresis
+ */
+#define COMPLEX_MODE_ENTER 10
+
+/*
* Locking:
* a) global sem_lock() for read/write
* sem_undo.id_next,
@@ -279,17 +286,25 @@ static void sem_rcu_free(struct rcu_head *head)
/*
* Enter the mode suitable for non-simple operations:
* Caller must own sem_perm.lock.
+ * Note:
+ * There is no leave complex mode function. Leaving
+ * happens in sem_lock, with some hysteresis.
*/
static void complexmode_enter(struct sem_array *sma)
{
int i;
struct sem *sem;

- if (sma->complex_mode) {
- /* We are already in complex_mode. Nothing to do */
+ if (sma->complex_mode > 0) {
+ /*
+ * We are already in complex_mode.
+ * Nothing to do, just increase
+ * counter until we return to simple mode
+ */
+ WRITE_ONCE(sma->complex_mode, COMPLEX_MODE_ENTER);
return;
}
- WRITE_ONCE(sma->complex_mode, true);
+ WRITE_ONCE(sma->complex_mode, COMPLEX_MODE_ENTER);

/* We need a full barrier:
* The write to complex_mode must be visible
@@ -305,29 +320,6 @@ static void complexmode_enter(struct sem_array *sma)
}

/*
- * Try to leave the mode that disallows simple operations:
- * Caller must own sem_perm.lock.
- */
-static void complexmode_tryleave(struct sem_array *sma)
-{
- if (sma->complex_count) {
- /* Complex ops are sleeping.
- * We must stay in complex mode
- */
- return;
- }
- /*
- * Immediately after setting complex_mode to false,
- * a simple op can start. Thus: all memory writes
- * performed by the current operation must be visible
- * before we set complex_mode to false.
- */
- smp_wmb();
-
- WRITE_ONCE(sma->complex_mode, false);
-}
-
-/*
* If the request contains only one semaphore operation, and there are
* no complex transactions pending, lock only the semaphore involved.
* Otherwise, lock the entire semaphore array, since we either have
@@ -383,27 +375,42 @@ static inline int sem_lock(struct sem_array *sma, struct sembuf *sops,
ipc_lock_object(&sma->sem_perm);

if (sma->complex_count == 0) {
- /* False alarm:
- * There is no complex operation, thus we can switch
- * back to the fast path.
- */
- spin_lock(&sem->lock);
- ipc_unlock_object(&sma->sem_perm);
- return sops->sem_num;
- } else {
- /* Not a false alarm, thus complete the sequence for a
- * full lock.
+ /*
+ * Check if fast path is possible:
+ * There is no complex operation, check hysteresis
+ * If 0, switch back to the fast path.
*/
- complexmode_enter(sma);
- return -1;
+ if (sma->complex_mode > 0) {
+ /* Note:
+ * Immediately after setting complex_mode to 0,
+ * a simple op could start.
+ * The data it would access was written by the
+ * previous owner of sem->sem_perm.lock, i.e
+ * a release and an acquire memory barrier ago.
+ * No need for another barrier.
+ */
+ WRITE_ONCE(sma->complex_mode, sma->complex_mode-1);
+ }
+ if (sma->complex_mode == 0) {
+ spin_lock(&sem->lock);
+ ipc_unlock_object(&sma->sem_perm);
+ return sops->sem_num;
+ }
}
+ /*
+ * Not a false alarm, full lock is required.
+ * Since we are already in complex_mode (either because of waiting
+ * complex ops or due to hysteresis), there is not need for a
+ * complexmode_enter().
+ */
+ WARN_ON(sma->complex_mode == 0);
+ return -1;
}

static inline void sem_unlock(struct sem_array *sma, int locknum)
{
if (locknum == -1) {
unmerge_queues(sma);
- complexmode_tryleave(sma);
ipc_unlock_object(&sma->sem_perm);
} else {
struct sem *sem = sma->sem_base + locknum;
@@ -555,7 +562,7 @@ static int newary(struct ipc_namespace *ns, struct ipc_params *params)
}

sma->complex_count = 0;
- sma->complex_mode = true; /* dropped by sem_unlock below */
+ WRITE_ONCE(sma->complex_mode, COMPLEX_MODE_ENTER);
INIT_LIST_HEAD(&sma->pending_alter);
INIT_LIST_HEAD(&sma->pending_const);
INIT_LIST_HEAD(&sma->list_id);
@@ -2212,7 +2219,7 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it)
* The proc interface isn't aware of sem_lock(), it calls
* ipc_lock_object() directly (in sysvipc_find_ipc).
* In order to stay compatible with sem_lock(), we must
- * enter / leave complex_mode.
+ * enter complex_mode.
*/
complexmode_enter(sma);

@@ -2231,8 +2238,6 @@ static int sysvipc_sem_proc_show(struct seq_file *s, void *it)
sem_otime,
sma->sem_ctime);

- complexmode_tryleave(sma);
-
return 0;
}
#endif
--
2.5.5