Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
fb0527bd | 2 | #include <linux/percpu.h> |
fb0527bd | 3 | #include <linux/sched.h> |
d84b6728 | 4 | #include <linux/osq_lock.h> |
fb0527bd PZ |
5 | |
6 | /* | |
7 | * An MCS like lock especially tailored for optimistic spinning for sleeping | |
8 | * lock implementations (mutex, rwsem, etc). | |
9 | * | |
10 | * Using a single mcs node per CPU is safe because sleeping locks should not be | |
11 | * called from interrupt context and we have preemption disabled while | |
12 | * spinning. | |
13 | */ | |
7c223098 DL |
14 | |
15 | struct optimistic_spin_node { | |
16 | struct optimistic_spin_node *next, *prev; | |
17 | int locked; /* 1 if lock acquired */ | |
18 | int cpu; /* encoded CPU # + 1 value */ | |
19 | }; | |
20 | ||
046a619d | 21 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct optimistic_spin_node, osq_node); |
fb0527bd | 22 | |
90631822 JL |
23 | /* |
24 | * We use the value 0 to represent "no CPU", thus the encoded value | |
25 | * will be the CPU number incremented by 1. | |
26 | */ | |
27 | static inline int encode_cpu(int cpu_nr) | |
28 | { | |
29 | return cpu_nr + 1; | |
30 | } | |
31 | ||
5aff60a1 PX |
32 | static inline int node_cpu(struct optimistic_spin_node *node) |
33 | { | |
34 | return node->cpu - 1; | |
35 | } | |
36 | ||
90631822 JL |
37 | static inline struct optimistic_spin_node *decode_cpu(int encoded_cpu_val) |
38 | { | |
39 | int cpu_nr = encoded_cpu_val - 1; | |
40 | ||
41 | return per_cpu_ptr(&osq_node, cpu_nr); | |
42 | } | |
43 | ||
fb0527bd PZ |
44 | /* |
45 | * Get a stable @node->next pointer, either for unlock() or unqueue() purposes. | |
46 | * Can return NULL in case we were the last queued and we updated @lock instead. | |
563adbfc DL |
47 | * |
48 | * If osq_lock() is being cancelled there must be a previous node | |
49 | * and 'old_cpu' is its CPU #. | |
50 | * For osq_unlock() there is never a previous node and old_cpu is | |
51 | * set to OSQ_UNLOCKED_VAL. | |
fb0527bd | 52 | */ |
046a619d | 53 | static inline struct optimistic_spin_node * |
90631822 | 54 | osq_wait_next(struct optimistic_spin_queue *lock, |
046a619d | 55 | struct optimistic_spin_node *node, |
563adbfc | 56 | int old_cpu) |
fb0527bd | 57 | { |
90631822 | 58 | int curr = encode_cpu(smp_processor_id()); |
fb0527bd PZ |
59 | |
60 | for (;;) { | |
90631822 | 61 | if (atomic_read(&lock->tail) == curr && |
563adbfc | 62 | atomic_cmpxchg_acquire(&lock->tail, curr, old_cpu) == curr) { |
fb0527bd PZ |
63 | /* |
64 | * We were the last queued, we moved @lock back. @prev | |
65 | * will now observe @lock and will complete its | |
66 | * unlock()/unqueue(). | |
67 | */ | |
b106bcf0 | 68 | return NULL; |
fb0527bd PZ |
69 | } |
70 | ||
71 | /* | |
72 | * We must xchg() the @node->next value, because if we were to | |
73 | * leave it in, a concurrent unlock()/unqueue() from | |
74 | * @node->next might complete Step-A and think its @prev is | |
75 | * still valid. | |
76 | * | |
77 | * If the concurrent unlock()/unqueue() wins the race, we'll | |
78 | * wait for either @lock to point to us, through its Step-B, or | |
79 | * wait for a new @node->next from its Step-C. | |
80 | */ | |
81 | if (node->next) { | |
b106bcf0 DL |
82 | struct optimistic_spin_node *next; |
83 | ||
fb0527bd PZ |
84 | next = xchg(&node->next, NULL); |
85 | if (next) | |
b106bcf0 | 86 | return next; |
fb0527bd PZ |
87 | } |
88 | ||
f2f09a4c | 89 | cpu_relax(); |
fb0527bd | 90 | } |
fb0527bd PZ |
91 | } |
92 | ||
90631822 | 93 | bool osq_lock(struct optimistic_spin_queue *lock) |
fb0527bd | 94 | { |
046a619d JL |
95 | struct optimistic_spin_node *node = this_cpu_ptr(&osq_node); |
96 | struct optimistic_spin_node *prev, *next; | |
90631822 JL |
97 | int curr = encode_cpu(smp_processor_id()); |
98 | int old; | |
fb0527bd PZ |
99 | |
100 | node->locked = 0; | |
101 | node->next = NULL; | |
90631822 | 102 | node->cpu = curr; |
fb0527bd | 103 | |
c55a6ffa | 104 | /* |
b4b29f94 WD |
105 | * We need both ACQUIRE (pairs with corresponding RELEASE in |
106 | * unlock() uncontended, or fastpath) and RELEASE (to publish | |
107 | * the node fields we just initialised) semantics when updating | |
108 | * the lock tail. | |
c55a6ffa | 109 | */ |
b4b29f94 | 110 | old = atomic_xchg(&lock->tail, curr); |
90631822 | 111 | if (old == OSQ_UNLOCKED_VAL) |
fb0527bd PZ |
112 | return true; |
113 | ||
90631822 JL |
114 | prev = decode_cpu(old); |
115 | node->prev = prev; | |
50972fe7 PS |
116 | |
117 | /* | |
118 | * osq_lock() unqueue | |
119 | * | |
120 | * node->prev = prev osq_wait_next() | |
121 | * WMB MB | |
122 | * prev->next = node next->prev = prev // unqueue-C | |
123 | * | |
124 | * Here 'node->prev' and 'next->prev' are the same variable and we need | |
125 | * to ensure these stores happen in-order to avoid corrupting the list. | |
126 | */ | |
127 | smp_wmb(); | |
128 | ||
4d3199e4 | 129 | WRITE_ONCE(prev->next, node); |
fb0527bd PZ |
130 | |
131 | /* | |
132 | * Normally @prev is untouchable after the above store; because at that | |
133 | * moment unlock can proceed and wipe the node element from stack. | |
134 | * | |
135 | * However, since our nodes are static per-cpu storage, we're | |
136 | * guaranteed their existence -- this allows us to apply | |
137 | * cmpxchg in an attempt to undo our queueing. | |
138 | */ | |
139 | ||
f5bfdc8e | 140 | /* |
e2db7592 | 141 | * Wait to acquire the lock or cancellation. Note that need_resched() |
f5bfdc8e WL |
142 | * will come with an IPI, which will wake smp_cond_load_relaxed() if it |
143 | * is implemented with a monitor-wait. vcpu_is_preempted() relies on | |
144 | * polling, be careful. | |
145 | */ | |
146 | if (smp_cond_load_relaxed(&node->locked, VAL || need_resched() || | |
147 | vcpu_is_preempted(node_cpu(node->prev)))) | |
148 | return true; | |
fb0527bd | 149 | |
f5bfdc8e | 150 | /* unqueue */ |
fb0527bd PZ |
151 | /* |
152 | * Step - A -- stabilize @prev | |
153 | * | |
154 | * Undo our @prev->next assignment; this will make @prev's | |
155 | * unlock()/unqueue() wait for a next pointer since @lock points to us | |
156 | * (or later). | |
157 | */ | |
158 | ||
159 | for (;;) { | |
33190b67 QC |
160 | /* |
161 | * cpu_relax() below implies a compiler barrier which would | |
162 | * prevent this comparison being optimized away. | |
163 | */ | |
164 | if (data_race(prev->next) == node && | |
fb0527bd PZ |
165 | cmpxchg(&prev->next, node, NULL) == node) |
166 | break; | |
167 | ||
168 | /* | |
169 | * We can only fail the cmpxchg() racing against an unlock(), | |
e2db7592 | 170 | * in which case we should observe @node->locked becoming |
fb0527bd PZ |
171 | * true. |
172 | */ | |
173 | if (smp_load_acquire(&node->locked)) | |
174 | return true; | |
175 | ||
f2f09a4c | 176 | cpu_relax(); |
fb0527bd PZ |
177 | |
178 | /* | |
179 | * Or we race against a concurrent unqueue()'s step-B, in which | |
180 | * case its step-C will write us a new @node->prev pointer. | |
181 | */ | |
4d3199e4 | 182 | prev = READ_ONCE(node->prev); |
fb0527bd PZ |
183 | } |
184 | ||
185 | /* | |
186 | * Step - B -- stabilize @next | |
187 | * | |
188 | * Similar to unlock(), wait for @node->next or move @lock from @node | |
189 | * back to @prev. | |
190 | */ | |
191 | ||
563adbfc | 192 | next = osq_wait_next(lock, node, prev->cpu); |
fb0527bd PZ |
193 | if (!next) |
194 | return false; | |
195 | ||
196 | /* | |
197 | * Step - C -- unlink | |
198 | * | |
199 | * @prev is stable because its still waiting for a new @prev->next | |
200 | * pointer, @next is stable because our @node->next pointer is NULL and | |
201 | * it will wait in Step-A. | |
202 | */ | |
203 | ||
4d3199e4 DB |
204 | WRITE_ONCE(next->prev, prev); |
205 | WRITE_ONCE(prev->next, next); | |
fb0527bd PZ |
206 | |
207 | return false; | |
208 | } | |
209 | ||
90631822 | 210 | void osq_unlock(struct optimistic_spin_queue *lock) |
fb0527bd | 211 | { |
33ecd208 | 212 | struct optimistic_spin_node *node, *next; |
90631822 | 213 | int curr = encode_cpu(smp_processor_id()); |
fb0527bd PZ |
214 | |
215 | /* | |
216 | * Fast path for the uncontended case. | |
217 | */ | |
c55a6ffa DB |
218 | if (likely(atomic_cmpxchg_release(&lock->tail, curr, |
219 | OSQ_UNLOCKED_VAL) == curr)) | |
fb0527bd PZ |
220 | return; |
221 | ||
222 | /* | |
223 | * Second most likely case. | |
224 | */ | |
33ecd208 | 225 | node = this_cpu_ptr(&osq_node); |
fb0527bd PZ |
226 | next = xchg(&node->next, NULL); |
227 | if (next) { | |
4d3199e4 | 228 | WRITE_ONCE(next->locked, 1); |
fb0527bd PZ |
229 | return; |
230 | } | |
231 | ||
563adbfc | 232 | next = osq_wait_next(lock, node, OSQ_UNLOCKED_VAL); |
fb0527bd | 233 | if (next) |
4d3199e4 | 234 | WRITE_ONCE(next->locked, 1); |
fb0527bd | 235 | } |