[PATCH 0/5][Bionic][SRU Artful] Switch arm64 over to qrwlock

classic Classic list List threaded Threaded
10 messages Options
Reply | Threaded
Open this post in threaded view
|

[PATCH 0/5][Bionic][SRU Artful] Switch arm64 over to qrwlock

dann frazier-4
BugLink: http://bugs.launchpad.net/bugs/1732238

These patches comprise all clean cherry picks that landed in the 4.15
merge window. Verified on a ThunderX2-based Sabre board, and regression
tested on a 128-cpu x86 system using stress-ng and locktorture. (The other
Ubuntu architectures don't use qrwlocks).

Will Deacon (5):
  locking/qrwlock: Use 'struct qrwlock' instead of 'struct __qrwlock'
  locking/atomic: Add atomic_cond_read_acquire()
  locking/qrwlock: Use atomic_cond_read_acquire() when spinning in
    qrwlock
  locking/qrwlock, arm64: Move rwlock implementation over to qrwlocks
  locking/qrwlock: Prevent slowpath writers getting held up by fastpath

 arch/arm64/Kconfig                      |  17 ++++
 arch/arm64/include/asm/Kbuild           |   1 +
 arch/arm64/include/asm/spinlock.h       | 164 +-------------------------------
 arch/arm64/include/asm/spinlock_types.h |   6 +-
 include/asm-generic/atomic-long.h       |   3 +
 include/asm-generic/qrwlock.h           |  37 ++-----
 include/asm-generic/qrwlock_types.h     |  15 ++-
 include/linux/atomic.h                  |   4 +
 kernel/locking/qrwlock.c                |  86 +++--------------
 9 files changed, 61 insertions(+), 272 deletions(-)

--
2.15.1


--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team
Reply | Threaded
Open this post in threaded view
|

[PATCH 1/5][Bionic][SRU Artful] locking/qrwlock: Use 'struct qrwlock' instead of 'struct __qrwlock'

dann frazier-4
From: Will Deacon <[hidden email]>

BugLink: https://bugs.launchpad.net/bugs/1732238

There's no good reason to keep the internal structure of struct qrwlock
hidden from qrwlock.h, particularly as it's actually needed for unlock
and ends up being abstracted independently behind the __qrwlock_write_byte()
function.

Stop pretending we can hide this stuff, and move the __qrwlock definition
into qrwlock, removing the __qrwlock_write_byte() nastiness and using the
same struct definition everywhere instead.

Signed-off-by: Will Deacon <[hidden email]>
Acked-by: Peter Zijlstra <[hidden email]>
Cc: Boqun Feng <[hidden email]>
Cc: [hidden email]
Cc: Linus Torvalds <[hidden email]>
Cc: Paul E. McKenney <[hidden email]>
Cc: Thomas Gleixner <[hidden email]>
Cc: Waiman Long <[hidden email]>
Cc: [hidden email]
Link: http://lkml.kernel.org/r/1507810851-306-2-git-send-email-will.deacon@...
Signed-off-by: Ingo Molnar <[hidden email]>
(cherry picked from commit e0d02285f16e8d5810f3d5d5e8a5886ca0015d3b)
Signed-off-by: dann frazier <[hidden email]>
---
 include/asm-generic/qrwlock.h       | 12 +-----------
 include/asm-generic/qrwlock_types.h | 15 +++++++++++++--
 kernel/locking/qrwlock.c            | 26 ++------------------------
 3 files changed, 16 insertions(+), 37 deletions(-)

diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h
index 7d026bf27713..5a571469f324 100644
--- a/include/asm-generic/qrwlock.h
+++ b/include/asm-generic/qrwlock.h
@@ -146,23 +146,13 @@ static inline void queued_read_unlock(struct qrwlock *lock)
  (void)atomic_sub_return_release(_QR_BIAS, &lock->cnts);
 }
 
-/**
- * __qrwlock_write_byte - retrieve the write byte address of a queue rwlock
- * @lock : Pointer to queue rwlock structure
- * Return: the write byte address of a queue rwlock
- */
-static inline u8 *__qrwlock_write_byte(struct qrwlock *lock)
-{
- return (u8 *)lock + 3 * IS_BUILTIN(CONFIG_CPU_BIG_ENDIAN);
-}
-
 /**
  * queued_write_unlock - release write lock of a queue rwlock
  * @lock : Pointer to queue rwlock structure
  */
 static inline void queued_write_unlock(struct qrwlock *lock)
 {
- smp_store_release(__qrwlock_write_byte(lock), 0);
+ smp_store_release(&lock->wmode, 0);
 }
 
 /*
diff --git a/include/asm-generic/qrwlock_types.h b/include/asm-generic/qrwlock_types.h
index 0abc6b6062fb..507f2dc51bba 100644
--- a/include/asm-generic/qrwlock_types.h
+++ b/include/asm-generic/qrwlock_types.h
@@ -9,12 +9,23 @@
  */
 
 typedef struct qrwlock {
- atomic_t cnts;
+ union {
+ atomic_t cnts;
+ struct {
+#ifdef __LITTLE_ENDIAN
+ u8 wmode; /* Writer mode   */
+ u8 rcnts[3]; /* Reader counts */
+#else
+ u8 rcnts[3]; /* Reader counts */
+ u8 wmode; /* Writer mode   */
+#endif
+ };
+ };
  arch_spinlock_t wait_lock;
 } arch_rwlock_t;
 
 #define __ARCH_RW_LOCK_UNLOCKED { \
- .cnts = ATOMIC_INIT(0), \
+ { .cnts = ATOMIC_INIT(0), }, \
  .wait_lock = __ARCH_SPIN_LOCK_UNLOCKED, \
 }
 
diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c
index 2655f26ec882..1af791e37348 100644
--- a/kernel/locking/qrwlock.c
+++ b/kernel/locking/qrwlock.c
@@ -23,26 +23,6 @@
 #include <linux/spinlock.h>
 #include <asm/qrwlock.h>
 
-/*
- * This internal data structure is used for optimizing access to some of
- * the subfields within the atomic_t cnts.
- */
-struct __qrwlock {
- union {
- atomic_t cnts;
- struct {
-#ifdef __LITTLE_ENDIAN
- u8 wmode; /* Writer mode   */
- u8 rcnts[3]; /* Reader counts */
-#else
- u8 rcnts[3]; /* Reader counts */
- u8 wmode; /* Writer mode   */
-#endif
- };
- };
- arch_spinlock_t lock;
-};
-
 /**
  * rspin_until_writer_unlock - inc reader count & spin until writer is gone
  * @lock  : Pointer to queue rwlock structure
@@ -125,10 +105,8 @@ void queued_write_lock_slowpath(struct qrwlock *lock)
  * or wait for a previous writer to go away.
  */
  for (;;) {
- struct __qrwlock *l = (struct __qrwlock *)lock;
-
- if (!READ_ONCE(l->wmode) &&
-   (cmpxchg_relaxed(&l->wmode, 0, _QW_WAITING) == 0))
+ if (!READ_ONCE(lock->wmode) &&
+   (cmpxchg_relaxed(&lock->wmode, 0, _QW_WAITING) == 0))
  break;
 
  cpu_relax();
--
2.15.1


--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team
Reply | Threaded
Open this post in threaded view
|

[PATCH 2/5][Bionic][SRU Artful] locking/atomic: Add atomic_cond_read_acquire()

dann frazier-4
In reply to this post by dann frazier-4
From: Will Deacon <[hidden email]>

BugLink: https://bugs.launchpad.net/bugs/1732238

smp_cond_load_acquire() provides a way to spin on a variable with acquire
semantics until some conditional expression involving the variable is
satisfied. Architectures such as arm64 can potentially enter a low-power
state, waking up only when the value of the variable changes, which
reduces the system impact of tight polling loops.

This patch makes the same interface available to users of atomic_t,
atomic64_t and atomic_long_t, rather than require messy accesses to the
structure internals.

Signed-off-by: Will Deacon <[hidden email]>
Acked-by: Peter Zijlstra <[hidden email]>
Cc: Boqun Feng <[hidden email]>
Cc: [hidden email]
Cc: Linus Torvalds <[hidden email]>
Cc: Paul E. McKenney <[hidden email]>
Cc: Thomas Gleixner <[hidden email]>
Cc: Waiman Long <[hidden email]>
Cc: [hidden email]
Link: http://lkml.kernel.org/r/1507810851-306-3-git-send-email-will.deacon@...
Signed-off-by: Ingo Molnar <[hidden email]>
(cherry picked from commit 4df714be4dcf40bfb0d4af0f851a6e1977afa02e)
Signed-off-by: dann frazier <[hidden email]>
---
 include/asm-generic/atomic-long.h | 3 +++
 include/linux/atomic.h            | 4 ++++
 2 files changed, 7 insertions(+)

diff --git a/include/asm-generic/atomic-long.h b/include/asm-generic/atomic-long.h
index 288cc9e96395..f2d97b782031 100644
--- a/include/asm-generic/atomic-long.h
+++ b/include/asm-generic/atomic-long.h
@@ -243,4 +243,7 @@ static inline long atomic_long_add_unless(atomic_long_t *l, long a, long u)
 #define atomic_long_inc_not_zero(l) \
  ATOMIC_LONG_PFX(_inc_not_zero)((ATOMIC_LONG_PFX(_t) *)(l))
 
+#define atomic_long_cond_read_acquire(v, c) \
+ ATOMIC_LONG_PFX(_cond_read_acquire)((ATOMIC_LONG_PFX(_t) *)(v), (c))
+
 #endif  /*  _ASM_GENERIC_ATOMIC_LONG_H  */
diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index c56be7410130..b848b91c3137 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -650,6 +650,8 @@ static inline int atomic_dec_if_positive(atomic_t *v)
 }
 #endif
 
+#define atomic_cond_read_acquire(v, c) smp_cond_load_acquire(&(v)->counter, (c))
+
 #ifdef CONFIG_GENERIC_ATOMIC64
 #include <asm-generic/atomic64.h>
 #endif
@@ -1069,6 +1071,8 @@ static inline long long atomic64_fetch_andnot_release(long long i, atomic64_t *v
 }
 #endif
 
+#define atomic64_cond_read_acquire(v, c) smp_cond_load_acquire(&(v)->counter, (c))
+
 #include <asm-generic/atomic-long.h>
 
 #endif /* _LINUX_ATOMIC_H */
--
2.15.1


--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team
Reply | Threaded
Open this post in threaded view
|

[PATCH 3/5][Bionic][SRU Artful] locking/qrwlock: Use atomic_cond_read_acquire() when spinning in qrwlock

dann frazier-4
In reply to this post by dann frazier-4
From: Will Deacon <[hidden email]>

BugLink: https://bugs.launchpad.net/bugs/1732238

The qrwlock slowpaths involve spinning when either a prospective reader
is waiting for a concurrent writer to drain, or a prospective writer is
waiting for concurrent readers to drain. In both of these situations,
atomic_cond_read_acquire() can be used to avoid busy-waiting and make use
of any backoff functionality provided by the architecture.

This patch replaces the open-code loops and rspin_until_writer_unlock()
implementation with atomic_cond_read_acquire(). The write mode transition
zero to _QW_WAITING is left alone, since (a) this doesn't need acquire
semantics and (b) should be fast.

Tested-by: Waiman Long <[hidden email]>
Tested-by: Jeremy Linton <[hidden email]>
Tested-by: Adam Wallis <[hidden email]>
Tested-by: Jan Glauber <[hidden email]>
Signed-off-by: Will Deacon <[hidden email]>
Acked-by: Peter Zijlstra <[hidden email]>
Cc: Boqun Feng <[hidden email]>
Cc: [hidden email]
Cc: Linus Torvalds <[hidden email]>
Cc: Paul E. McKenney <[hidden email]>
Cc: Thomas Gleixner <[hidden email]>
Cc: [hidden email]
Link: http://lkml.kernel.org/r/1507810851-306-4-git-send-email-will.deacon@...
Signed-off-by: Ingo Molnar <[hidden email]>
(cherry picked from commit b519b56e378ee82caf9b079b04f5db87dedc3251)
Signed-off-by: dann frazier <[hidden email]>
---
 include/asm-generic/qrwlock.h |  4 ++--
 kernel/locking/qrwlock.c      | 50 +++++++++++--------------------------------
 2 files changed, 14 insertions(+), 40 deletions(-)

diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h
index 5a571469f324..814d5b64528c 100644
--- a/include/asm-generic/qrwlock.h
+++ b/include/asm-generic/qrwlock.h
@@ -49,7 +49,7 @@
 /*
  * External function declarations
  */
-extern void queued_read_lock_slowpath(struct qrwlock *lock, u32 cnts);
+extern void queued_read_lock_slowpath(struct qrwlock *lock);
 extern void queued_write_lock_slowpath(struct qrwlock *lock);
 
 /**
@@ -118,7 +118,7 @@ static inline void queued_read_lock(struct qrwlock *lock)
  return;
 
  /* The slowpath will decrement the reader count, if necessary. */
- queued_read_lock_slowpath(lock, cnts);
+ queued_read_lock_slowpath(lock);
 }
 
 /**
diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c
index 1af791e37348..5825e0fc1a8e 100644
--- a/kernel/locking/qrwlock.c
+++ b/kernel/locking/qrwlock.c
@@ -23,29 +23,11 @@
 #include <linux/spinlock.h>
 #include <asm/qrwlock.h>
 
-/**
- * rspin_until_writer_unlock - inc reader count & spin until writer is gone
- * @lock  : Pointer to queue rwlock structure
- * @writer: Current queue rwlock writer status byte
- *
- * In interrupt context or at the head of the queue, the reader will just
- * increment the reader count & wait until the writer releases the lock.
- */
-static __always_inline void
-rspin_until_writer_unlock(struct qrwlock *lock, u32 cnts)
-{
- while ((cnts & _QW_WMASK) == _QW_LOCKED) {
- cpu_relax();
- cnts = atomic_read_acquire(&lock->cnts);
- }
-}
-
 /**
  * queued_read_lock_slowpath - acquire read lock of a queue rwlock
  * @lock: Pointer to queue rwlock structure
- * @cnts: Current qrwlock lock value
  */
-void queued_read_lock_slowpath(struct qrwlock *lock, u32 cnts)
+void queued_read_lock_slowpath(struct qrwlock *lock)
 {
  /*
  * Readers come here when they cannot get the lock without waiting
@@ -53,13 +35,12 @@ void queued_read_lock_slowpath(struct qrwlock *lock, u32 cnts)
  if (unlikely(in_interrupt())) {
  /*
  * Readers in interrupt context will get the lock immediately
- * if the writer is just waiting (not holding the lock yet).
- * The rspin_until_writer_unlock() function returns immediately
- * in this case. Otherwise, they will spin (with ACQUIRE
- * semantics) until the lock is available without waiting in
- * the queue.
+ * if the writer is just waiting (not holding the lock yet),
+ * so spin with ACQUIRE semantics until the lock is available
+ * without waiting in the queue.
  */
- rspin_until_writer_unlock(lock, cnts);
+ atomic_cond_read_acquire(&lock->cnts, (VAL & _QW_WMASK)
+ != _QW_LOCKED);
  return;
  }
  atomic_sub(_QR_BIAS, &lock->cnts);
@@ -68,14 +49,14 @@ void queued_read_lock_slowpath(struct qrwlock *lock, u32 cnts)
  * Put the reader into the wait queue
  */
  arch_spin_lock(&lock->wait_lock);
+ atomic_add(_QR_BIAS, &lock->cnts);
 
  /*
  * The ACQUIRE semantics of the following spinning code ensure
  * that accesses can't leak upwards out of our subsequent critical
  * section in the case that the lock is currently held for write.
  */
- cnts = atomic_fetch_add_acquire(_QR_BIAS, &lock->cnts);
- rspin_until_writer_unlock(lock, cnts);
+ atomic_cond_read_acquire(&lock->cnts, (VAL & _QW_WMASK) != _QW_LOCKED);
 
  /*
  * Signal the next one in queue to become queue head
@@ -90,8 +71,6 @@ EXPORT_SYMBOL(queued_read_lock_slowpath);
  */
 void queued_write_lock_slowpath(struct qrwlock *lock)
 {
- u32 cnts;
-
  /* Put the writer into the wait queue */
  arch_spin_lock(&lock->wait_lock);
 
@@ -113,15 +92,10 @@ void queued_write_lock_slowpath(struct qrwlock *lock)
  }
 
  /* When no more readers, set the locked flag */
- for (;;) {
- cnts = atomic_read(&lock->cnts);
- if ((cnts == _QW_WAITING) &&
-    (atomic_cmpxchg_acquire(&lock->cnts, _QW_WAITING,
-    _QW_LOCKED) == _QW_WAITING))
- break;
-
- cpu_relax();
- }
+ do {
+ atomic_cond_read_acquire(&lock->cnts, VAL == _QW_WAITING);
+ } while (atomic_cmpxchg_relaxed(&lock->cnts, _QW_WAITING,
+ _QW_LOCKED) != _QW_WAITING);
 unlock:
  arch_spin_unlock(&lock->wait_lock);
 }
--
2.15.1


--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team
Reply | Threaded
Open this post in threaded view
|

[PATCH 4/5][Bionic][SRU Artful] locking/qrwlock, arm64: Move rwlock implementation over to qrwlocks

dann frazier-4
In reply to this post by dann frazier-4
From: Will Deacon <[hidden email]>

BugLink: https://bugs.launchpad.net/bugs/1732238

Now that the qrwlock can make use of WFE, remove our homebrewed rwlock
code in favour of the generic queued implementation.

Tested-by: Waiman Long <[hidden email]>
Tested-by: Jeremy Linton <[hidden email]>
Tested-by: Adam Wallis <[hidden email]>
Tested-by: Jan Glauber <[hidden email]>
Signed-off-by: Will Deacon <[hidden email]>
Acked-by: Peter Zijlstra <[hidden email]>
Cc: [hidden email]
Cc: Linus Torvalds <[hidden email]>
Cc: Thomas Gleixner <[hidden email]>
Cc: [hidden email]
Cc: [hidden email]
Cc: [hidden email]
Link: http://lkml.kernel.org/r/1507810851-306-5-git-send-email-will.deacon@...
Signed-off-by: Ingo Molnar <[hidden email]>
(cherry picked from commit 087133ac90763cd339b6b67f2998f87dcc136c52)
Signed-off-by: dann frazier <[hidden email]>
---
 arch/arm64/Kconfig                      |  17 ++++
 arch/arm64/include/asm/Kbuild           |   1 +
 arch/arm64/include/asm/spinlock.h       | 164 +-------------------------------
 arch/arm64/include/asm/spinlock_types.h |   6 +-
 4 files changed, 20 insertions(+), 168 deletions(-)

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 98dcaf82fbe6..6b232b38d022 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -22,7 +22,24 @@ config ARM64
  select ARCH_HAS_STRICT_MODULE_RWX
  select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
  select ARCH_HAVE_NMI_SAFE_CMPXCHG if ACPI_APEI_SEA
+ select ARCH_INLINE_READ_LOCK if !PREEMPT
+ select ARCH_INLINE_READ_LOCK_BH if !PREEMPT
+ select ARCH_INLINE_READ_LOCK_IRQ if !PREEMPT
+ select ARCH_INLINE_READ_LOCK_IRQSAVE if !PREEMPT
+ select ARCH_INLINE_READ_UNLOCK if !PREEMPT
+ select ARCH_INLINE_READ_UNLOCK_BH if !PREEMPT
+ select ARCH_INLINE_READ_UNLOCK_IRQ if !PREEMPT
+ select ARCH_INLINE_READ_UNLOCK_IRQRESTORE if !PREEMPT
+ select ARCH_INLINE_WRITE_LOCK if !PREEMPT
+ select ARCH_INLINE_WRITE_LOCK_BH if !PREEMPT
+ select ARCH_INLINE_WRITE_LOCK_IRQ if !PREEMPT
+ select ARCH_INLINE_WRITE_LOCK_IRQSAVE if !PREEMPT
+ select ARCH_INLINE_WRITE_UNLOCK if !PREEMPT
+ select ARCH_INLINE_WRITE_UNLOCK_BH if !PREEMPT
+ select ARCH_INLINE_WRITE_UNLOCK_IRQ if !PREEMPT
+ select ARCH_INLINE_WRITE_UNLOCK_IRQRESTORE if !PREEMPT
  select ARCH_USE_CMPXCHG_LOCKREF
+ select ARCH_USE_QUEUED_RWLOCKS
  select ARCH_SUPPORTS_MEMORY_FAILURE
  select ARCH_SUPPORTS_ATOMIC_RMW
  select ARCH_SUPPORTS_NUMA_BALANCING
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index f81c7b685fc6..67dcb793a17c 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -16,6 +16,7 @@ generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
 generic-y += msi.h
 generic-y += preempt.h
+generic-y += qrwlock.h
 generic-y += rwsem.h
 generic-y += segment.h
 generic-y += serial.h
diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h
index cae331d553f8..60140d5b328e 100644
--- a/arch/arm64/include/asm/spinlock.h
+++ b/arch/arm64/include/asm/spinlock.h
@@ -187,169 +187,7 @@ static inline int arch_spin_is_contended(arch_spinlock_t *lock)
 }
 #define arch_spin_is_contended arch_spin_is_contended
 
-/*
- * Write lock implementation.
- *
- * Write locks set bit 31. Unlocking, is done by writing 0 since the lock is
- * exclusively held.
- *
- * The memory barriers are implicit with the load-acquire and store-release
- * instructions.
- */
-
-static inline void arch_write_lock(arch_rwlock_t *rw)
-{
- unsigned int tmp;
-
- asm volatile(ARM64_LSE_ATOMIC_INSN(
- /* LL/SC */
- " sevl\n"
- "1: wfe\n"
- "2: ldaxr %w0, %1\n"
- " cbnz %w0, 1b\n"
- " stxr %w0, %w2, %1\n"
- " cbnz %w0, 2b\n"
- __nops(1),
- /* LSE atomics */
- "1: mov %w0, wzr\n"
- "2: casa %w0, %w2, %1\n"
- " cbz %w0, 3f\n"
- " ldxr %w0, %1\n"
- " cbz %w0, 2b\n"
- " wfe\n"
- " b 1b\n"
- "3:")
- : "=&r" (tmp), "+Q" (rw->lock)
- : "r" (0x80000000)
- : "memory");
-}
-
-static inline int arch_write_trylock(arch_rwlock_t *rw)
-{
- unsigned int tmp;
-
- asm volatile(ARM64_LSE_ATOMIC_INSN(
- /* LL/SC */
- "1: ldaxr %w0, %1\n"
- " cbnz %w0, 2f\n"
- " stxr %w0, %w2, %1\n"
- " cbnz %w0, 1b\n"
- "2:",
- /* LSE atomics */
- " mov %w0, wzr\n"
- " casa %w0, %w2, %1\n"
- __nops(2))
- : "=&r" (tmp), "+Q" (rw->lock)
- : "r" (0x80000000)
- : "memory");
-
- return !tmp;
-}
-
-static inline void arch_write_unlock(arch_rwlock_t *rw)
-{
- asm volatile(ARM64_LSE_ATOMIC_INSN(
- " stlr wzr, %0",
- " swpl wzr, wzr, %0")
- : "=Q" (rw->lock) :: "memory");
-}
-
-/* write_can_lock - would write_trylock() succeed? */
-#define arch_write_can_lock(x) ((x)->lock == 0)
-
-/*
- * Read lock implementation.
- *
- * It exclusively loads the lock value, increments it and stores the new value
- * back if positive and the CPU still exclusively owns the location. If the
- * value is negative, the lock is already held.
- *
- * During unlocking there may be multiple active read locks but no write lock.
- *
- * The memory barriers are implicit with the load-acquire and store-release
- * instructions.
- *
- * Note that in UNDEFINED cases, such as unlocking a lock twice, the LL/SC
- * and LSE implementations may exhibit different behaviour (although this
- * will have no effect on lockdep).
- */
-static inline void arch_read_lock(arch_rwlock_t *rw)
-{
- unsigned int tmp, tmp2;
-
- asm volatile(
- " sevl\n"
- ARM64_LSE_ATOMIC_INSN(
- /* LL/SC */
- "1: wfe\n"
- "2: ldaxr %w0, %2\n"
- " add %w0, %w0, #1\n"
- " tbnz %w0, #31, 1b\n"
- " stxr %w1, %w0, %2\n"
- " cbnz %w1, 2b\n"
- __nops(1),
- /* LSE atomics */
- "1: wfe\n"
- "2: ldxr %w0, %2\n"
- " adds %w1, %w0, #1\n"
- " tbnz %w1, #31, 1b\n"
- " casa %w0, %w1, %2\n"
- " sbc %w0, %w1, %w0\n"
- " cbnz %w0, 2b")
- : "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock)
- :
- : "cc", "memory");
-}
-
-static inline void arch_read_unlock(arch_rwlock_t *rw)
-{
- unsigned int tmp, tmp2;
-
- asm volatile(ARM64_LSE_ATOMIC_INSN(
- /* LL/SC */
- "1: ldxr %w0, %2\n"
- " sub %w0, %w0, #1\n"
- " stlxr %w1, %w0, %2\n"
- " cbnz %w1, 1b",
- /* LSE atomics */
- " movn %w0, #0\n"
- " staddl %w0, %2\n"
- __nops(2))
- : "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock)
- :
- : "memory");
-}
-
-static inline int arch_read_trylock(arch_rwlock_t *rw)
-{
- unsigned int tmp, tmp2;
-
- asm volatile(ARM64_LSE_ATOMIC_INSN(
- /* LL/SC */
- " mov %w1, #1\n"
- "1: ldaxr %w0, %2\n"
- " add %w0, %w0, #1\n"
- " tbnz %w0, #31, 2f\n"
- " stxr %w1, %w0, %2\n"
- " cbnz %w1, 1b\n"
- "2:",
- /* LSE atomics */
- " ldr %w0, %2\n"
- " adds %w1, %w0, #1\n"
- " tbnz %w1, #31, 1f\n"
- " casa %w0, %w1, %2\n"
- " sbc %w1, %w1, %w0\n"
- __nops(1)
- "1:")
- : "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock)
- :
- : "cc", "memory");
-
- return !tmp2;
-}
-
-/* read_can_lock - would read_trylock() succeed? */
-#define arch_read_can_lock(x) ((x)->lock < 0x80000000)
+#include <asm/qrwlock.h>
 
 #define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
 #define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
diff --git a/arch/arm64/include/asm/spinlock_types.h b/arch/arm64/include/asm/spinlock_types.h
index 55be59a35e3f..6b856012c51b 100644
--- a/arch/arm64/include/asm/spinlock_types.h
+++ b/arch/arm64/include/asm/spinlock_types.h
@@ -36,10 +36,6 @@ typedef struct {
 
 #define __ARCH_SPIN_LOCK_UNLOCKED { 0 , 0 }
 
-typedef struct {
- volatile unsigned int lock;
-} arch_rwlock_t;
-
-#define __ARCH_RW_LOCK_UNLOCKED { 0 }
+#include <asm-generic/qrwlock_types.h>
 
 #endif
--
2.15.1


--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team
Reply | Threaded
Open this post in threaded view
|

[PATCH 5/5][Bionic][SRU Artful] locking/qrwlock: Prevent slowpath writers getting held up by fastpath

dann frazier-4
In reply to this post by dann frazier-4
From: Will Deacon <[hidden email]>

BugLink: https://bugs.launchpad.net/bugs/1732238

When a prospective writer takes the qrwlock locking slowpath due to the
lock being held, it attempts to cmpxchg the wmode field from 0 to
_QW_WAITING so that concurrent lockers also take the slowpath and queue
on the spinlock accordingly, allowing the lockers to drain.

Unfortunately, this isn't fair, because a fastpath writer that comes in
after the lock is made available but before the _QW_WAITING flag is set
can effectively jump the queue. If there is a steady stream of prospective
writers, then the waiter will be held off indefinitely.

This patch restores fairness by separating _QW_WAITING and _QW_LOCKED
into two distinct fields: _QW_LOCKED continues to occupy the bottom byte
of the lockword so that it can be cleared unconditionally when unlocking,
but _QW_WAITING now occupies what used to be the bottom bit of the reader
count. This then forces the slow-path for concurrent lockers.

Tested-by: Waiman Long <[hidden email]>
Tested-by: Jeremy Linton <[hidden email]>
Tested-by: Adam Wallis <[hidden email]>
Tested-by: Jan Glauber <[hidden email]>
Signed-off-by: Will Deacon <[hidden email]>
Acked-by: Peter Zijlstra <[hidden email]>
Cc: Boqun Feng <[hidden email]>
Cc: [hidden email]
Cc: Linus Torvalds <[hidden email]>
Cc: Paul E. McKenney <[hidden email]>
Cc: Thomas Gleixner <[hidden email]>
Cc: [hidden email]
Link: http://lkml.kernel.org/r/1507810851-306-6-git-send-email-will.deacon@...
Signed-off-by: Ingo Molnar <[hidden email]>
(cherry picked from commit d133166146333e1f13fc81c0e6c43c8d99290a8a)
Signed-off-by: dann frazier <[hidden email]>
---
 include/asm-generic/qrwlock.h       | 23 +++++------------------
 include/asm-generic/qrwlock_types.h |  8 ++++----
 kernel/locking/qrwlock.c            | 20 +++++---------------
 3 files changed, 14 insertions(+), 37 deletions(-)

diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h
index 814d5b64528c..c39a93a6d91d 100644
--- a/include/asm-generic/qrwlock.h
+++ b/include/asm-generic/qrwlock.h
@@ -26,24 +26,11 @@
 
 /*
  * Writer states & reader shift and bias.
- *
- *       | +0 | +1 | +2 | +3 |
- *   ----+----+----+----+----+
- *    LE | 78 | 56 | 34 | 12 | 0x12345678
- *   ----+----+----+----+----+
- *       | wr |      rd      |
- *       +----+----+----+----+
- *
- *   ----+----+----+----+----+
- *    BE | 12 | 34 | 56 | 78 | 0x12345678
- *   ----+----+----+----+----+
- *       |      rd      | wr |
- *       +----+----+----+----+
  */
-#define _QW_WAITING 1 /* A writer is waiting   */
-#define _QW_LOCKED 0xff /* A writer holds the lock */
-#define _QW_WMASK 0xff /* Writer mask   */
-#define _QR_SHIFT 8 /* Reader count shift   */
+#define _QW_WAITING 0x100 /* A writer is waiting   */
+#define _QW_LOCKED 0x0ff /* A writer holds the lock */
+#define _QW_WMASK 0x1ff /* Writer mask   */
+#define _QR_SHIFT 9 /* Reader count shift   */
 #define _QR_BIAS (1U << _QR_SHIFT)
 
 /*
@@ -152,7 +139,7 @@ static inline void queued_read_unlock(struct qrwlock *lock)
  */
 static inline void queued_write_unlock(struct qrwlock *lock)
 {
- smp_store_release(&lock->wmode, 0);
+ smp_store_release(&lock->wlocked, 0);
 }
 
 /*
diff --git a/include/asm-generic/qrwlock_types.h b/include/asm-generic/qrwlock_types.h
index 507f2dc51bba..8af752acbdc0 100644
--- a/include/asm-generic/qrwlock_types.h
+++ b/include/asm-generic/qrwlock_types.h
@@ -13,11 +13,11 @@ typedef struct qrwlock {
  atomic_t cnts;
  struct {
 #ifdef __LITTLE_ENDIAN
- u8 wmode; /* Writer mode   */
- u8 rcnts[3]; /* Reader counts */
+ u8 wlocked; /* Locked for write? */
+ u8 __lstate[3];
 #else
- u8 rcnts[3]; /* Reader counts */
- u8 wmode; /* Writer mode   */
+ u8 __lstate[3];
+ u8 wlocked; /* Locked for write? */
 #endif
  };
  };
diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c
index 5825e0fc1a8e..c7471c3fb798 100644
--- a/kernel/locking/qrwlock.c
+++ b/kernel/locking/qrwlock.c
@@ -39,8 +39,7 @@ void queued_read_lock_slowpath(struct qrwlock *lock)
  * so spin with ACQUIRE semantics until the lock is available
  * without waiting in the queue.
  */
- atomic_cond_read_acquire(&lock->cnts, (VAL & _QW_WMASK)
- != _QW_LOCKED);
+ atomic_cond_read_acquire(&lock->cnts, !(VAL & _QW_LOCKED));
  return;
  }
  atomic_sub(_QR_BIAS, &lock->cnts);
@@ -56,7 +55,7 @@ void queued_read_lock_slowpath(struct qrwlock *lock)
  * that accesses can't leak upwards out of our subsequent critical
  * section in the case that the lock is currently held for write.
  */
- atomic_cond_read_acquire(&lock->cnts, (VAL & _QW_WMASK) != _QW_LOCKED);
+ atomic_cond_read_acquire(&lock->cnts, !(VAL & _QW_LOCKED));
 
  /*
  * Signal the next one in queue to become queue head
@@ -79,19 +78,10 @@ void queued_write_lock_slowpath(struct qrwlock *lock)
     (atomic_cmpxchg_acquire(&lock->cnts, 0, _QW_LOCKED) == 0))
  goto unlock;
 
- /*
- * Set the waiting flag to notify readers that a writer is pending,
- * or wait for a previous writer to go away.
- */
- for (;;) {
- if (!READ_ONCE(lock->wmode) &&
-   (cmpxchg_relaxed(&lock->wmode, 0, _QW_WAITING) == 0))
- break;
-
- cpu_relax();
- }
+ /* Set the waiting flag to notify readers that a writer is pending */
+ atomic_add(_QW_WAITING, &lock->cnts);
 
- /* When no more readers, set the locked flag */
+ /* When no more readers or writers, set the locked flag */
  do {
  atomic_cond_read_acquire(&lock->cnts, VAL == _QW_WAITING);
  } while (atomic_cmpxchg_relaxed(&lock->cnts, _QW_WAITING,
--
2.15.1


--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team
Reply | Threaded
Open this post in threaded view
|

ACK/APPLIED[B]: [PATCH 0/5][Bionic][SRU Artful] Switch arm64 over to qrwlock

Seth Forshee
In reply to this post by dann frazier-4
On Thu, Jan 04, 2018 at 06:48:32PM -0700, dann frazier wrote:
> BugLink: http://bugs.launchpad.net/bugs/1732238
>
> These patches comprise all clean cherry picks that landed in the 4.15
> merge window. Verified on a ThunderX2-based Sabre board, and regression
> tested on a 128-cpu x86 system using stress-ng and locktorture. (The other
> Ubuntu architectures don't use qrwlocks).

Clean cherry picks. It's a little frightening to be making significant
changes to locking code in an SRU, but this is limited to arm64 and
appears to have been well tested there.

Acked-by: Seth Forshee <[hidden email]>

Applied to bionic/master-next, thanks!

--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team
Reply | Threaded
Open this post in threaded view
|

ACK: [PATCH 0/5][Bionic][SRU Artful] Switch arm64 over to qrwlock

Marcelo Henrique Cerri
In reply to this post by dann frazier-4
Clean cherry-picks with scope restricted to arm64.

Acked-by: Marcelo Henrique Cerri <[hidden email]>

--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team

signature.asc (484 bytes) Download Attachment
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH 0/5][Bionic][SRU Artful] Switch arm64 over to qrwlock

Paolo Pisati-5
In reply to this post by dann frazier-4
All clean cherry-picks from upstream.

Acked-by: Paolo Pisati <[hidden email]>

On Fri, Jan 5, 2018 at 2:48 AM, dann frazier <[hidden email]> wrote:

> BugLink: http://bugs.launchpad.net/bugs/1732238
>
> These patches comprise all clean cherry picks that landed in the 4.15
> merge window. Verified on a ThunderX2-based Sabre board, and regression
> tested on a 128-cpu x86 system using stress-ng and locktorture. (The other
> Ubuntu architectures don't use qrwlocks).
>
> Will Deacon (5):
>   locking/qrwlock: Use 'struct qrwlock' instead of 'struct __qrwlock'
>   locking/atomic: Add atomic_cond_read_acquire()
>   locking/qrwlock: Use atomic_cond_read_acquire() when spinning in
>     qrwlock
>   locking/qrwlock, arm64: Move rwlock implementation over to qrwlocks
>   locking/qrwlock: Prevent slowpath writers getting held up by fastpath
>
>  arch/arm64/Kconfig                      |  17 ++++
>  arch/arm64/include/asm/Kbuild           |   1 +
>  arch/arm64/include/asm/spinlock.h       | 164 +-------------------------------
>  arch/arm64/include/asm/spinlock_types.h |   6 +-
>  include/asm-generic/atomic-long.h       |   3 +
>  include/asm-generic/qrwlock.h           |  37 ++-----
>  include/asm-generic/qrwlock_types.h     |  15 ++-
>  include/linux/atomic.h                  |   4 +
>  kernel/locking/qrwlock.c                |  86 +++--------------
>  9 files changed, 61 insertions(+), 272 deletions(-)
>
> --
> 2.15.1
>
>
> --
> kernel-team mailing list
> [hidden email]
> https://lists.ubuntu.com/mailman/listinfo/kernel-team



--
bye,
p.

--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team
Reply | Threaded
Open this post in threaded view
|

APPLIED: [PATCH 0/5][Bionic][SRU Artful] Switch arm64 over to qrwlock

Khalid Elmously
In reply to this post by dann frazier-4
Applied to Artful

On 2018-01-04 18:48:32 , dann frazier wrote:

> BugLink: http://bugs.launchpad.net/bugs/1732238
>
> These patches comprise all clean cherry picks that landed in the 4.15
> merge window. Verified on a ThunderX2-based Sabre board, and regression
> tested on a 128-cpu x86 system using stress-ng and locktorture. (The other
> Ubuntu architectures don't use qrwlocks).
>
> Will Deacon (5):
>   locking/qrwlock: Use 'struct qrwlock' instead of 'struct __qrwlock'
>   locking/atomic: Add atomic_cond_read_acquire()
>   locking/qrwlock: Use atomic_cond_read_acquire() when spinning in
>     qrwlock
>   locking/qrwlock, arm64: Move rwlock implementation over to qrwlocks
>   locking/qrwlock: Prevent slowpath writers getting held up by fastpath
>
>  arch/arm64/Kconfig                      |  17 ++++
>  arch/arm64/include/asm/Kbuild           |   1 +
>  arch/arm64/include/asm/spinlock.h       | 164 +-------------------------------
>  arch/arm64/include/asm/spinlock_types.h |   6 +-
>  include/asm-generic/atomic-long.h       |   3 +
>  include/asm-generic/qrwlock.h           |  37 ++-----
>  include/asm-generic/qrwlock_types.h     |  15 ++-
>  include/linux/atomic.h                  |   4 +
>  kernel/locking/qrwlock.c                |  86 +++--------------
>  9 files changed, 61 insertions(+), 272 deletions(-)
>
> --
> 2.15.1
>
>
> --
> kernel-team mailing list
> [hidden email]
> https://lists.ubuntu.com/mailman/listinfo/kernel-team

--
kernel-team mailing list
[hidden email]
https://lists.ubuntu.com/mailman/listinfo/kernel-team