Merge tag 'smp-urgent-2023-09-02' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-2.6-block.git] / arch / powerpc / include / asm / qspinlock.h
CommitLineData
aa65ff6b
NP
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef _ASM_POWERPC_QSPINLOCK_H
3#define _ASM_POWERPC_QSPINLOCK_H
4
9f61521c
NP
5#include <linux/compiler.h>
6#include <asm/qspinlock_types.h>
20c0e826 7#include <asm/paravirt.h>
aa65ff6b 8
0b219984
NP
9#ifdef CONFIG_PPC64
10/*
11 * Use the EH=1 hint for accesses that result in the lock being acquired.
12 * The hardware is supposed to optimise this pattern by holding the lock
13 * cacheline longer, and releasing when a store to the same memory (the
14 * unlock) is performed.
15 */
16#define _Q_SPIN_EH_HINT 1
17#else
18#define _Q_SPIN_EH_HINT 0
19#endif
20
f61ab43c
NP
21/*
22 * The trylock itself may steal. This makes trylocks slightly stronger, and
0b219984 23 * makes locks slightly more efficient when stealing.
f61ab43c
NP
24 *
25 * This is compile-time, so if true then there may always be stealers, so the
26 * nosteal paths become unused.
27 */
28#define _Q_SPIN_TRY_LOCK_STEAL 1
29
0b219984
NP
30/*
31 * Put a speculation barrier after testing the lock/node and finding it
32 * busy. Try to prevent pointless speculation in slow paths.
33 *
34 * Slows down the lockstorm microbenchmark with no stealing, where locking
35 * is purely FIFO through the queue. May have more benefit in real workload
36 * where speculating into the wrong place could have a greater cost.
37 */
38#define _Q_SPIN_SPEC_BARRIER 0
39
40#ifdef CONFIG_PPC64
41/*
42 * Execute a miso instruction after passing the MCS lock ownership to the
43 * queue head. Miso is intended to make stores visible to other CPUs sooner.
44 *
45 * This seems to make the lockstorm microbenchmark nospin test go slightly
46 * faster on POWER10, but disable for now.
47 */
48#define _Q_SPIN_MISO 0
49#else
50#define _Q_SPIN_MISO 0
51#endif
52
53#ifdef CONFIG_PPC64
54/*
55 * This executes miso after an unlock of the lock word, having ownership
56 * pass to the next CPU sooner. This will slow the uncontended path to some
57 * degree. Not evidence it helps yet.
58 */
59#define _Q_SPIN_MISO_UNLOCK 0
60#else
61#define _Q_SPIN_MISO_UNLOCK 0
62#endif
63
64/*
65 * Seems to slow down lockstorm microbenchmark, suspect queue node just
66 * has to become shared again right afterwards when its waiter spins on
67 * the lock field.
68 */
69#define _Q_SPIN_PREFETCH_NEXT 0
70
9f61521c 71static __always_inline int queued_spin_is_locked(struct qspinlock *lock)
20c0e826 72{
b3a73b7d 73 return READ_ONCE(lock->val);
20c0e826
NP
74}
75
9f61521c 76static __always_inline int queued_spin_value_unlocked(struct qspinlock lock)
20c0e826 77{
b3a73b7d 78 return !lock.val;
20c0e826
NP
79}
80
9f61521c 81static __always_inline int queued_spin_is_contended(struct qspinlock *lock)
20c0e826 82{
b3a73b7d 83 return !!(READ_ONCE(lock->val) & _Q_TAIL_CPU_MASK);
20c0e826 84}
20c0e826 85
e1a31e7f
NP
86static __always_inline u32 queued_spin_encode_locked_val(void)
87{
88 /* XXX: make this use lock value in paca like simple spinlocks? */
89 return _Q_LOCKED_VAL | (smp_processor_id() << _Q_OWNER_CPU_OFFSET);
90}
91
f61ab43c 92static __always_inline int __queued_spin_trylock_nosteal(struct qspinlock *lock)
20c0e826 93{
e1a31e7f 94 u32 new = queued_spin_encode_locked_val();
b3a73b7d
NP
95 u32 prev;
96
f61ab43c 97 /* Trylock succeeds only when unlocked and no queued nodes */
b3a73b7d 98 asm volatile(
f61ab43c 99"1: lwarx %0,0,%1,%3 # __queued_spin_trylock_nosteal \n"
b3a73b7d
NP
100" cmpwi 0,%0,0 \n"
101" bne- 2f \n"
102" stwcx. %2,0,%1 \n"
103" bne- 1b \n"
104"\t" PPC_ACQUIRE_BARRIER " \n"
105"2: \n"
106 : "=&r" (prev)
e1a31e7f 107 : "r" (&lock->val), "r" (new),
0b219984 108 "i" (_Q_SPIN_EH_HINT)
b3a73b7d
NP
109 : "cr0", "memory");
110
111 return likely(prev == 0);
20c0e826
NP
112}
113
6aa42f88
NP
114static __always_inline int __queued_spin_trylock_steal(struct qspinlock *lock)
115{
e1a31e7f 116 u32 new = queued_spin_encode_locked_val();
6aa42f88
NP
117 u32 prev, tmp;
118
119 /* Trylock may get ahead of queued nodes if it finds unlocked */
120 asm volatile(
121"1: lwarx %0,0,%2,%5 # __queued_spin_trylock_steal \n"
122" andc. %1,%0,%4 \n"
123" bne- 2f \n"
124" and %1,%0,%4 \n"
125" or %1,%1,%3 \n"
126" stwcx. %1,0,%2 \n"
127" bne- 1b \n"
128"\t" PPC_ACQUIRE_BARRIER " \n"
129"2: \n"
130 : "=&r" (prev), "=&r" (tmp)
e1a31e7f 131 : "r" (&lock->val), "r" (new), "r" (_Q_TAIL_CPU_MASK),
0b219984 132 "i" (_Q_SPIN_EH_HINT)
6aa42f88
NP
133 : "cr0", "memory");
134
135 return likely(!(prev & ~_Q_TAIL_CPU_MASK));
136}
137
f61ab43c
NP
138static __always_inline int queued_spin_trylock(struct qspinlock *lock)
139{
140 if (!_Q_SPIN_TRY_LOCK_STEAL)
141 return __queued_spin_trylock_nosteal(lock);
142 else
143 return __queued_spin_trylock_steal(lock);
144}
145
9f61521c
NP
146void queued_spin_lock_slowpath(struct qspinlock *lock);
147
148static __always_inline void queued_spin_lock(struct qspinlock *lock)
20c0e826 149{
9f61521c
NP
150 if (!queued_spin_trylock(lock))
151 queued_spin_lock_slowpath(lock);
20c0e826
NP
152}
153
9f61521c 154static inline void queued_spin_unlock(struct qspinlock *lock)
20c0e826 155{
4c93c2e4 156 smp_store_release(&lock->locked, 0);
0b219984
NP
157 if (_Q_SPIN_MISO_UNLOCK)
158 asm volatile("miso" ::: "memory");
20c0e826
NP
159}
160
9f61521c
NP
161#define arch_spin_is_locked(l) queued_spin_is_locked(l)
162#define arch_spin_is_contended(l) queued_spin_is_contended(l)
163#define arch_spin_value_unlocked(l) queued_spin_value_unlocked(l)
164#define arch_spin_lock(l) queued_spin_lock(l)
165#define arch_spin_trylock(l) queued_spin_trylock(l)
166#define arch_spin_unlock(l) queued_spin_unlock(l)
deb9b13e 167
9f61521c
NP
168#ifdef CONFIG_PARAVIRT_SPINLOCKS
169void pv_spinlocks_init(void);
170#else
171static inline void pv_spinlocks_init(void) { }
172#endif
aa65ff6b
NP
173
174#endif /* _ASM_POWERPC_QSPINLOCK_H */