On Thu, Sep 09, 2021 at 11:00:05AM -0700, Paul E. McKenney wrote:
[...]
Boqun, I vaguely remember a suggested change from you along these lines,
but now I cannot find it. Could you please send it as a formal patch
if you have not already done so or point me at it if you have?
Here is a draft patch based on the change I did when I discussed with
Peter, and I really want to hear Alan's thought first. Ideally, we
should also have related litmus tests and send to linux-arch list so
that we know the ordering is provided by every architecture.
Regards,
Boqun
--------------------------------->8
Subject: [PATCH] tools/memory-model: Provide extra ordering for
lock-{release,acquire} on the same CPU
A recent discussion[1] shows that we are in favor of strengthening the
ordering of lock-release + lock-acquire on the same CPU: a lock-release
and a po-after lock-acquire should provide the so-called RCtso ordering,
that is a memory access S po-before the lock-release should be ordered
against a memory access R po-after the lock-acquire, unless S is a store
and R is a load.
The strengthening meets programmers' expection that "sequence of two
locked regions to be ordered wrt each other" (from Linus), and can
reduce the mental burden when using locks. Therefore add it in LKMM.
[1]: https://lore.kernel.org/lkml/20210909185937.GA12379@xxxxxxxxxxxxxxxxxxx/
Signed-off-by: Boqun Feng <boqun.feng@xxxxxxxxx>
---
.../Documentation/explanation.txt | 28 +++++++++++++++++++
tools/memory-model/linux-kernel.cat | 6 ++--
2 files changed, 31 insertions(+), 3 deletions(-)
diff --git a/tools/memory-model/Documentation/explanation.txt b/tools/memory-model/Documentation/explanation.txt
index 5d72f3112e56..d62de21f32c4 100644
--- a/tools/memory-model/Documentation/explanation.txt
+++ b/tools/memory-model/Documentation/explanation.txt
@@ -1847,6 +1847,34 @@ therefore the load of x must execute before the load of y. Thus we
cannot have r1 = 1 and r2 = 0 at the end (this is an instance of the
MP pattern).
+This requirement also applies to a lock-release and a lock-acquire
+on different locks, as long as the lock-acquire is po-after the
+lock-release. Note that "po-after" means the lock-acquire and the
+lock-release are on the same cpu. An example simliar to the above:
+
+ int x, y;
+ spinlock_t s;
+ spinlock_t t;
+
+ P0()
+ {
+ int r1, r2;
+
+ spin_lock(&s);
+ r1 = READ_ONCE(x);
+ spin_unlock(&s);
+ spin_lock(&t);
+ r2 = READ_ONCE(y);
+ spin_unlock(&t);
+ }
+
+ P1()
+ {
+ WRITE_ONCE(y, 1);
+ smp_wmb();
+ WRITE_ONCE(x, 1);
+ }
+
This requirement does not apply to ordinary release and acquire
fences, only to lock-related operations. For instance, suppose P0()
in the example had been written as:
diff --git a/tools/memory-model/linux-kernel.cat b/tools/memory-model/linux-kernel.cat
index 2a9b4fe4a84e..d70315fddef6 100644
--- a/tools/memory-model/linux-kernel.cat
+++ b/tools/memory-model/linux-kernel.cat
@@ -27,7 +27,7 @@ include "lock.cat"
(* Release Acquire *)
let acq-po = [Acquire] ; po ; [M]
let po-rel = [M] ; po ; [Release]
-let po-unlock-rf-lock-po = po ; [UL] ; rf ; [LKR] ; po
+let po-unlock-lock-po = po ; [UL] ; (po|rf) ; [LKR] ; po
(* Fences *)
let R4rmb = R \ Noreturn (* Reads for which rmb works *)
@@ -70,12 +70,12 @@ let rwdep = (dep | ctrl) ; [W]
let overwrite = co | fr
let to-w = rwdep | (overwrite & int) | (addr ; [Plain] ; wmb)
let to-r = addr | (dep ; [Marked] ; rfi)
-let ppo = to-r | to-w | fence | (po-unlock-rf-lock-po & int)
+let ppo = to-r | to-w | fence | (po-unlock-lock-po & int)
(* Propagation: Ordering from release operations and strong fences. *)
let A-cumul(r) = (rfe ; [Marked])? ; r
let cumul-fence = [Marked] ; (A-cumul(strong-fence | po-rel) | wmb |
- po-unlock-rf-lock-po) ; [Marked]
+ po-unlock-lock-po) ; [Marked]
let prop = [Marked] ; (overwrite & ext)? ; cumul-fence* ;
[Marked] ; rfe? ; [Marked]