x86/percpu: Clean up percpu_from_op()
[linux-2.6-block.git] / arch / x86 / include / asm / percpu.h
CommitLineData
b2441318 1/* SPDX-License-Identifier: GPL-2.0 */
1965aae3
PA
2#ifndef _ASM_X86_PERCPU_H
3#define _ASM_X86_PERCPU_H
3334052a 4
1a51e3a0 5#ifdef CONFIG_X86_64
9939ddaf
TH
6#define __percpu_seg gs
7#define __percpu_mov_op movq
1a51e3a0 8#else
9939ddaf
TH
9#define __percpu_seg fs
10#define __percpu_mov_op movl
96a388de 11#endif
3334052a 12
13#ifdef __ASSEMBLY__
14
15/*
16 * PER_CPU finds an address of a per-cpu variable.
17 *
18 * Args:
19 * var - variable name
20 * reg - 32bit register
21 *
22 * The resulting address is stored in the "reg" argument.
23 *
24 * Example:
25 * PER_CPU(cpu_gdt_descr, %ebx)
26 */
27#ifdef CONFIG_SMP
9939ddaf 28#define PER_CPU(var, reg) \
dd17c8f7
RR
29 __percpu_mov_op %__percpu_seg:this_cpu_off, reg; \
30 lea var(reg), reg
31#define PER_CPU_VAR(var) %__percpu_seg:var
3334052a 32#else /* ! SMP */
dd17c8f7
RR
33#define PER_CPU(var, reg) __percpu_mov_op $var, reg
34#define PER_CPU_VAR(var) var
3334052a 35#endif /* SMP */
36
2add8e23
BG
37#ifdef CONFIG_X86_64_SMP
38#define INIT_PER_CPU_VAR(var) init_per_cpu__##var
39#else
dd17c8f7 40#define INIT_PER_CPU_VAR(var) var
2add8e23
BG
41#endif
42
3334052a 43#else /* ...!ASSEMBLY */
44
e59a1bb2 45#include <linux/kernel.h>
9939ddaf 46#include <linux/stringify.h>
3334052a 47
9939ddaf 48#ifdef CONFIG_SMP
d7c3f8ce 49#define __percpu_prefix "%%"__stringify(__percpu_seg)":"
c6ae41e7 50#define __my_cpu_offset this_cpu_read(this_cpu_off)
db7829c6
BG
51
52/*
53 * Compared to the generic __my_cpu_offset version, the following
54 * saves one instruction and avoids clobbering a temp register.
55 */
bbc344e1 56#define arch_raw_cpu_ptr(ptr) \
db7829c6
BG
57({ \
58 unsigned long tcp_ptr__; \
db7829c6
BG
59 asm volatile("add " __percpu_arg(1) ", %0" \
60 : "=r" (tcp_ptr__) \
61 : "m" (this_cpu_off), "0" (ptr)); \
62 (typeof(*(ptr)) __kernel __force *)tcp_ptr__; \
63})
9939ddaf 64#else
d7c3f8ce 65#define __percpu_prefix ""
9939ddaf 66#endif
3334052a 67
97b67ae5 68#define __percpu_arg(x) __percpu_prefix "%" #x
d7c3f8ce 69
2add8e23
BG
70/*
71 * Initialized pointers to per-cpu variables needed for the boot
72 * processor need to use these macros to get the proper address
73 * offset from __per_cpu_load on SMP.
74 *
75 * There also must be an entry in vmlinux_64.lds.S
76 */
77#define DECLARE_INIT_PER_CPU(var) \
dd17c8f7 78 extern typeof(var) init_per_cpu_var(var)
2add8e23
BG
79
80#ifdef CONFIG_X86_64_SMP
81#define init_per_cpu_var(var) init_per_cpu__##var
82#else
dd17c8f7 83#define init_per_cpu_var(var) var
2add8e23
BG
84#endif
85
3334052a 86/* For arch-specific code, we can use direct single-insn ops (they
87 * don't give an lvalue though). */
88extern void __bad_percpu_size(void);
89
6865dc3a
BG
90#define __pcpu_type_1 u8
91#define __pcpu_type_2 u16
92#define __pcpu_type_4 u32
93#define __pcpu_type_8 u64
94
95#define __pcpu_cast_1(val) ((u8)(((unsigned long) val) & 0xff))
96#define __pcpu_cast_2(val) ((u16)(((unsigned long) val) & 0xffff))
97#define __pcpu_cast_4(val) ((u32)(((unsigned long) val) & 0xffffffff))
98#define __pcpu_cast_8(val) ((u64)(val))
99
100#define __pcpu_op1_1(op, dst) op "b " dst
101#define __pcpu_op1_2(op, dst) op "w " dst
102#define __pcpu_op1_4(op, dst) op "l " dst
103#define __pcpu_op1_8(op, dst) op "q " dst
104
105#define __pcpu_op2_1(op, src, dst) op "b " src ", " dst
106#define __pcpu_op2_2(op, src, dst) op "w " src ", " dst
107#define __pcpu_op2_4(op, src, dst) op "l " src ", " dst
108#define __pcpu_op2_8(op, src, dst) op "q " src ", " dst
109
110#define __pcpu_reg_1(mod, x) mod "q" (x)
111#define __pcpu_reg_2(mod, x) mod "r" (x)
112#define __pcpu_reg_4(mod, x) mod "r" (x)
113#define __pcpu_reg_8(mod, x) mod "r" (x)
114
115#define __pcpu_reg_imm_1(x) "qi" (x)
116#define __pcpu_reg_imm_2(x) "ri" (x)
117#define __pcpu_reg_imm_4(x) "ri" (x)
118#define __pcpu_reg_imm_8(x) "re" (x)
119
c175acc1
BG
120#define percpu_to_op(size, qual, op, _var, _val) \
121do { \
122 __pcpu_type_##size pto_val__ = __pcpu_cast_##size(_val); \
123 if (0) { \
124 typeof(_var) pto_tmp__; \
125 pto_tmp__ = (_val); \
126 (void)pto_tmp__; \
127 } \
128 asm qual(__pcpu_op2_##size(op, "%[val]", __percpu_arg([var])) \
129 : [var] "+m" (_var) \
130 : [val] __pcpu_reg_imm_##size(pto_val__)); \
bc9e3be2
JP
131} while (0)
132
5917dae8
CL
133/*
134 * Generate a percpu add to memory instruction and optimize code
40f0a5d0 135 * if one is added or subtracted.
5917dae8 136 */
0b9ccc0a 137#define percpu_add_op(qual, var, val) \
5917dae8
CL
138do { \
139 typedef typeof(var) pao_T__; \
140 const int pao_ID__ = (__builtin_constant_p(val) && \
bd09d9a3
GT
141 ((val) == 1 || (val) == -1)) ? \
142 (int)(val) : 0; \
5917dae8
CL
143 if (0) { \
144 pao_T__ pao_tmp__; \
145 pao_tmp__ = (val); \
23b764d0 146 (void)pao_tmp__; \
5917dae8
CL
147 } \
148 switch (sizeof(var)) { \
149 case 1: \
150 if (pao_ID__ == 1) \
0b9ccc0a 151 asm qual ("incb "__percpu_arg(0) : "+m" (var)); \
5917dae8 152 else if (pao_ID__ == -1) \
0b9ccc0a 153 asm qual ("decb "__percpu_arg(0) : "+m" (var)); \
5917dae8 154 else \
0b9ccc0a 155 asm qual ("addb %1, "__percpu_arg(0) \
5917dae8
CL
156 : "+m" (var) \
157 : "qi" ((pao_T__)(val))); \
158 break; \
159 case 2: \
160 if (pao_ID__ == 1) \
0b9ccc0a 161 asm qual ("incw "__percpu_arg(0) : "+m" (var)); \
5917dae8 162 else if (pao_ID__ == -1) \
0b9ccc0a 163 asm qual ("decw "__percpu_arg(0) : "+m" (var)); \
5917dae8 164 else \
0b9ccc0a 165 asm qual ("addw %1, "__percpu_arg(0) \
5917dae8
CL
166 : "+m" (var) \
167 : "ri" ((pao_T__)(val))); \
168 break; \
169 case 4: \
170 if (pao_ID__ == 1) \
0b9ccc0a 171 asm qual ("incl "__percpu_arg(0) : "+m" (var)); \
5917dae8 172 else if (pao_ID__ == -1) \
0b9ccc0a 173 asm qual ("decl "__percpu_arg(0) : "+m" (var)); \
5917dae8 174 else \
0b9ccc0a 175 asm qual ("addl %1, "__percpu_arg(0) \
5917dae8
CL
176 : "+m" (var) \
177 : "ri" ((pao_T__)(val))); \
178 break; \
179 case 8: \
180 if (pao_ID__ == 1) \
0b9ccc0a 181 asm qual ("incq "__percpu_arg(0) : "+m" (var)); \
5917dae8 182 else if (pao_ID__ == -1) \
0b9ccc0a 183 asm qual ("decq "__percpu_arg(0) : "+m" (var)); \
5917dae8 184 else \
0b9ccc0a 185 asm qual ("addq %1, "__percpu_arg(0) \
5917dae8
CL
186 : "+m" (var) \
187 : "re" ((pao_T__)(val))); \
188 break; \
189 default: __bad_percpu_size(); \
190 } \
191} while (0)
192
bb631e30
BG
193#define percpu_from_op(size, qual, op, _var) \
194({ \
195 __pcpu_type_##size pfo_val__; \
196 asm qual (__pcpu_op2_##size(op, __percpu_arg([var]), "%[val]") \
197 : [val] __pcpu_reg_##size("=", pfo_val__) \
198 : [var] "m" (_var)); \
199 (typeof(_var))(unsigned long) pfo_val__; \
97b67ae5
JB
200})
201
202#define percpu_stable_op(op, var) \
203({ \
204 typeof(var) pfo_ret__; \
205 switch (sizeof(var)) { \
206 case 1: \
207 asm(op "b "__percpu_arg(P1)",%0" \
208 : "=q" (pfo_ret__) \
209 : "p" (&(var))); \
210 break; \
211 case 2: \
212 asm(op "w "__percpu_arg(P1)",%0" \
213 : "=r" (pfo_ret__) \
214 : "p" (&(var))); \
215 break; \
216 case 4: \
217 asm(op "l "__percpu_arg(P1)",%0" \
218 : "=r" (pfo_ret__) \
219 : "p" (&(var))); \
220 break; \
221 case 8: \
222 asm(op "q "__percpu_arg(P1)",%0" \
223 : "=r" (pfo_ret__) \
224 : "p" (&(var))); \
bc9e3be2
JP
225 break; \
226 default: __bad_percpu_size(); \
227 } \
0f5e4816 228 pfo_ret__; \
bc9e3be2 229})
3334052a 230
0b9ccc0a 231#define percpu_unary_op(qual, op, var) \
402af0d7
JB
232({ \
233 switch (sizeof(var)) { \
234 case 1: \
0b9ccc0a 235 asm qual (op "b "__percpu_arg(0) \
402af0d7
JB
236 : "+m" (var)); \
237 break; \
238 case 2: \
0b9ccc0a 239 asm qual (op "w "__percpu_arg(0) \
402af0d7
JB
240 : "+m" (var)); \
241 break; \
242 case 4: \
0b9ccc0a 243 asm qual (op "l "__percpu_arg(0) \
402af0d7
JB
244 : "+m" (var)); \
245 break; \
246 case 8: \
0b9ccc0a 247 asm qual (op "q "__percpu_arg(0) \
402af0d7
JB
248 : "+m" (var)); \
249 break; \
250 default: __bad_percpu_size(); \
251 } \
252})
253
40304775
TH
254/*
255 * Add return operation
256 */
0b9ccc0a 257#define percpu_add_return_op(qual, var, val) \
40304775
TH
258({ \
259 typeof(var) paro_ret__ = val; \
260 switch (sizeof(var)) { \
261 case 1: \
0b9ccc0a 262 asm qual ("xaddb %0, "__percpu_arg(1) \
40304775
TH
263 : "+q" (paro_ret__), "+m" (var) \
264 : : "memory"); \
265 break; \
266 case 2: \
0b9ccc0a 267 asm qual ("xaddw %0, "__percpu_arg(1) \
40304775
TH
268 : "+r" (paro_ret__), "+m" (var) \
269 : : "memory"); \
270 break; \
271 case 4: \
0b9ccc0a 272 asm qual ("xaddl %0, "__percpu_arg(1) \
40304775
TH
273 : "+r" (paro_ret__), "+m" (var) \
274 : : "memory"); \
275 break; \
276 case 8: \
0b9ccc0a 277 asm qual ("xaddq %0, "__percpu_arg(1) \
40304775
TH
278 : "+re" (paro_ret__), "+m" (var) \
279 : : "memory"); \
280 break; \
281 default: __bad_percpu_size(); \
282 } \
283 paro_ret__ += val; \
284 paro_ret__; \
285})
286
7296e08a 287/*
8270137a
CL
288 * xchg is implemented using cmpxchg without a lock prefix. xchg is
289 * expensive due to the implied lock prefix. The processor cannot prefetch
290 * cachelines if xchg is used.
7296e08a 291 */
0b9ccc0a 292#define percpu_xchg_op(qual, var, nval) \
7296e08a
CL
293({ \
294 typeof(var) pxo_ret__; \
295 typeof(var) pxo_new__ = (nval); \
296 switch (sizeof(var)) { \
297 case 1: \
0b9ccc0a 298 asm qual ("\n\tmov "__percpu_arg(1)",%%al" \
889a7a6a 299 "\n1:\tcmpxchgb %2, "__percpu_arg(1) \
8270137a 300 "\n\tjnz 1b" \
889a7a6a 301 : "=&a" (pxo_ret__), "+m" (var) \
7296e08a
CL
302 : "q" (pxo_new__) \
303 : "memory"); \
304 break; \
305 case 2: \
0b9ccc0a 306 asm qual ("\n\tmov "__percpu_arg(1)",%%ax" \
889a7a6a 307 "\n1:\tcmpxchgw %2, "__percpu_arg(1) \
8270137a 308 "\n\tjnz 1b" \
889a7a6a 309 : "=&a" (pxo_ret__), "+m" (var) \
7296e08a
CL
310 : "r" (pxo_new__) \
311 : "memory"); \
312 break; \
313 case 4: \
0b9ccc0a 314 asm qual ("\n\tmov "__percpu_arg(1)",%%eax" \
889a7a6a 315 "\n1:\tcmpxchgl %2, "__percpu_arg(1) \
8270137a 316 "\n\tjnz 1b" \
889a7a6a 317 : "=&a" (pxo_ret__), "+m" (var) \
7296e08a
CL
318 : "r" (pxo_new__) \
319 : "memory"); \
320 break; \
321 case 8: \
0b9ccc0a 322 asm qual ("\n\tmov "__percpu_arg(1)",%%rax" \
889a7a6a 323 "\n1:\tcmpxchgq %2, "__percpu_arg(1) \
8270137a 324 "\n\tjnz 1b" \
889a7a6a 325 : "=&a" (pxo_ret__), "+m" (var) \
7296e08a
CL
326 : "r" (pxo_new__) \
327 : "memory"); \
328 break; \
329 default: __bad_percpu_size(); \
330 } \
331 pxo_ret__; \
332})
333
334/*
335 * cmpxchg has no such implied lock semantics as a result it is much
336 * more efficient for cpu local operations.
337 */
0b9ccc0a 338#define percpu_cmpxchg_op(qual, var, oval, nval) \
7296e08a
CL
339({ \
340 typeof(var) pco_ret__; \
341 typeof(var) pco_old__ = (oval); \
342 typeof(var) pco_new__ = (nval); \
343 switch (sizeof(var)) { \
344 case 1: \
0b9ccc0a 345 asm qual ("cmpxchgb %2, "__percpu_arg(1) \
7296e08a
CL
346 : "=a" (pco_ret__), "+m" (var) \
347 : "q" (pco_new__), "0" (pco_old__) \
348 : "memory"); \
349 break; \
350 case 2: \
0b9ccc0a 351 asm qual ("cmpxchgw %2, "__percpu_arg(1) \
7296e08a
CL
352 : "=a" (pco_ret__), "+m" (var) \
353 : "r" (pco_new__), "0" (pco_old__) \
354 : "memory"); \
355 break; \
356 case 4: \
0b9ccc0a 357 asm qual ("cmpxchgl %2, "__percpu_arg(1) \
7296e08a
CL
358 : "=a" (pco_ret__), "+m" (var) \
359 : "r" (pco_new__), "0" (pco_old__) \
360 : "memory"); \
361 break; \
362 case 8: \
0b9ccc0a 363 asm qual ("cmpxchgq %2, "__percpu_arg(1) \
7296e08a
CL
364 : "=a" (pco_ret__), "+m" (var) \
365 : "r" (pco_new__), "0" (pco_old__) \
366 : "memory"); \
367 break; \
368 default: __bad_percpu_size(); \
369 } \
370 pco_ret__; \
371})
372
ed8d9adf 373/*
641b695c 374 * this_cpu_read() makes gcc load the percpu variable every time it is
c6ae41e7
AS
375 * accessed while this_cpu_read_stable() allows the value to be cached.
376 * this_cpu_read_stable() is more efficient and can be used if its value
ed8d9adf
LT
377 * is guaranteed to be valid across cpus. The current users include
378 * get_current() and get_thread_info() both of which are actually
379 * per-thread variables implemented as per-cpu variables and thus
380 * stable for the duration of the respective task.
381 */
97b67ae5 382#define this_cpu_read_stable(var) percpu_stable_op("mov", var)
9939ddaf 383
bb631e30
BG
384#define raw_cpu_read_1(pcp) percpu_from_op(1, , "mov", pcp)
385#define raw_cpu_read_2(pcp) percpu_from_op(2, , "mov", pcp)
386#define raw_cpu_read_4(pcp) percpu_from_op(4, , "mov", pcp)
b3ca1c10 387
c175acc1
BG
388#define raw_cpu_write_1(pcp, val) percpu_to_op(1, , "mov", (pcp), val)
389#define raw_cpu_write_2(pcp, val) percpu_to_op(2, , "mov", (pcp), val)
390#define raw_cpu_write_4(pcp, val) percpu_to_op(4, , "mov", (pcp), val)
0b9ccc0a
PZ
391#define raw_cpu_add_1(pcp, val) percpu_add_op(, (pcp), val)
392#define raw_cpu_add_2(pcp, val) percpu_add_op(, (pcp), val)
393#define raw_cpu_add_4(pcp, val) percpu_add_op(, (pcp), val)
c175acc1
BG
394#define raw_cpu_and_1(pcp, val) percpu_to_op(1, , "and", (pcp), val)
395#define raw_cpu_and_2(pcp, val) percpu_to_op(2, , "and", (pcp), val)
396#define raw_cpu_and_4(pcp, val) percpu_to_op(4, , "and", (pcp), val)
397#define raw_cpu_or_1(pcp, val) percpu_to_op(1, , "or", (pcp), val)
398#define raw_cpu_or_2(pcp, val) percpu_to_op(2, , "or", (pcp), val)
399#define raw_cpu_or_4(pcp, val) percpu_to_op(4, , "or", (pcp), val)
2234a6d3
PZ
400
401/*
402 * raw_cpu_xchg() can use a load-store since it is not required to be
403 * IRQ-safe.
404 */
405#define raw_percpu_xchg_op(var, nval) \
406({ \
407 typeof(var) pxo_ret__ = raw_cpu_read(var); \
408 raw_cpu_write(var, (nval)); \
409 pxo_ret__; \
410})
411
412#define raw_cpu_xchg_1(pcp, val) raw_percpu_xchg_op(pcp, val)
413#define raw_cpu_xchg_2(pcp, val) raw_percpu_xchg_op(pcp, val)
414#define raw_cpu_xchg_4(pcp, val) raw_percpu_xchg_op(pcp, val)
30ed1a79 415
bb631e30
BG
416#define this_cpu_read_1(pcp) percpu_from_op(1, volatile, "mov", pcp)
417#define this_cpu_read_2(pcp) percpu_from_op(2, volatile, "mov", pcp)
418#define this_cpu_read_4(pcp) percpu_from_op(4, volatile, "mov", pcp)
c175acc1
BG
419#define this_cpu_write_1(pcp, val) percpu_to_op(1, volatile, "mov", (pcp), val)
420#define this_cpu_write_2(pcp, val) percpu_to_op(2, volatile, "mov", (pcp), val)
421#define this_cpu_write_4(pcp, val) percpu_to_op(4, volatile, "mov", (pcp), val)
0b9ccc0a
PZ
422#define this_cpu_add_1(pcp, val) percpu_add_op(volatile, (pcp), val)
423#define this_cpu_add_2(pcp, val) percpu_add_op(volatile, (pcp), val)
424#define this_cpu_add_4(pcp, val) percpu_add_op(volatile, (pcp), val)
c175acc1
BG
425#define this_cpu_and_1(pcp, val) percpu_to_op(1, volatile, "and", (pcp), val)
426#define this_cpu_and_2(pcp, val) percpu_to_op(2, volatile, "and", (pcp), val)
427#define this_cpu_and_4(pcp, val) percpu_to_op(4, volatile, "and", (pcp), val)
428#define this_cpu_or_1(pcp, val) percpu_to_op(1, volatile, "or", (pcp), val)
429#define this_cpu_or_2(pcp, val) percpu_to_op(2, volatile, "or", (pcp), val)
430#define this_cpu_or_4(pcp, val) percpu_to_op(4, volatile, "or", (pcp), val)
0b9ccc0a
PZ
431#define this_cpu_xchg_1(pcp, nval) percpu_xchg_op(volatile, pcp, nval)
432#define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(volatile, pcp, nval)
433#define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(volatile, pcp, nval)
30ed1a79 434
0b9ccc0a
PZ
435#define raw_cpu_add_return_1(pcp, val) percpu_add_return_op(, pcp, val)
436#define raw_cpu_add_return_2(pcp, val) percpu_add_return_op(, pcp, val)
437#define raw_cpu_add_return_4(pcp, val) percpu_add_return_op(, pcp, val)
438#define raw_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, nval)
439#define raw_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, nval)
440#define raw_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, nval)
7296e08a 441
0b9ccc0a
PZ
442#define this_cpu_add_return_1(pcp, val) percpu_add_return_op(volatile, pcp, val)
443#define this_cpu_add_return_2(pcp, val) percpu_add_return_op(volatile, pcp, val)
444#define this_cpu_add_return_4(pcp, val) percpu_add_return_op(volatile, pcp, val)
445#define this_cpu_cmpxchg_1(pcp, oval, nval) percpu_cmpxchg_op(volatile, pcp, oval, nval)
446#define this_cpu_cmpxchg_2(pcp, oval, nval) percpu_cmpxchg_op(volatile, pcp, oval, nval)
447#define this_cpu_cmpxchg_4(pcp, oval, nval) percpu_cmpxchg_op(volatile, pcp, oval, nval)
7296e08a 448
b9ec40af 449#ifdef CONFIG_X86_CMPXCHG64
cebef5be 450#define percpu_cmpxchg8b_double(pcp1, pcp2, o1, o2, n1, n2) \
b9ec40af 451({ \
cebef5be
JB
452 bool __ret; \
453 typeof(pcp1) __o1 = (o1), __n1 = (n1); \
454 typeof(pcp2) __o2 = (o2), __n2 = (n2); \
1966c5e5
UB
455 asm volatile("cmpxchg8b "__percpu_arg(1) \
456 CC_SET(z) \
457 : CC_OUT(z) (__ret), "+m" (pcp1), "+m" (pcp2), "+a" (__o1), "+d" (__o2) \
458 : "b" (__n1), "c" (__n2)); \
b9ec40af
CL
459 __ret; \
460})
461
b3ca1c10 462#define raw_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double
cebef5be 463#define this_cpu_cmpxchg_double_4 percpu_cmpxchg8b_double
b9ec40af
CL
464#endif /* CONFIG_X86_CMPXCHG64 */
465
30ed1a79
CL
466/*
467 * Per cpu atomic 64 bit operations are only available under 64 bit.
468 * 32 bit must fall back to generic operations.
469 */
470#ifdef CONFIG_X86_64
bb631e30 471#define raw_cpu_read_8(pcp) percpu_from_op(8, , "mov", pcp)
c175acc1 472#define raw_cpu_write_8(pcp, val) percpu_to_op(8, , "mov", (pcp), val)
0b9ccc0a 473#define raw_cpu_add_8(pcp, val) percpu_add_op(, (pcp), val)
c175acc1
BG
474#define raw_cpu_and_8(pcp, val) percpu_to_op(8, , "and", (pcp), val)
475#define raw_cpu_or_8(pcp, val) percpu_to_op(8, , "or", (pcp), val)
0b9ccc0a 476#define raw_cpu_add_return_8(pcp, val) percpu_add_return_op(, pcp, val)
2234a6d3 477#define raw_cpu_xchg_8(pcp, nval) raw_percpu_xchg_op(pcp, nval)
0b9ccc0a 478#define raw_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(, pcp, oval, nval)
b3ca1c10 479
bb631e30 480#define this_cpu_read_8(pcp) percpu_from_op(8, volatile, "mov", pcp)
c175acc1 481#define this_cpu_write_8(pcp, val) percpu_to_op(8, volatile, "mov", (pcp), val)
0b9ccc0a 482#define this_cpu_add_8(pcp, val) percpu_add_op(volatile, (pcp), val)
c175acc1
BG
483#define this_cpu_and_8(pcp, val) percpu_to_op(8, volatile, "and", (pcp), val)
484#define this_cpu_or_8(pcp, val) percpu_to_op(8, volatile, "or", (pcp), val)
0b9ccc0a
PZ
485#define this_cpu_add_return_8(pcp, val) percpu_add_return_op(volatile, pcp, val)
486#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(volatile, pcp, nval)
487#define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(volatile, pcp, oval, nval)
30ed1a79 488
b9ec40af
CL
489/*
490 * Pretty complex macro to generate cmpxchg16 instruction. The instruction
491 * is not supported on early AMD64 processors so we must be able to emulate
492 * it in software. The address used in the cmpxchg16 instruction must be
493 * aligned to a 16 byte boundary.
494 */
cebef5be 495#define percpu_cmpxchg16b_double(pcp1, pcp2, o1, o2, n1, n2) \
b9ec40af 496({ \
cebef5be
JB
497 bool __ret; \
498 typeof(pcp1) __o1 = (o1), __n1 = (n1); \
499 typeof(pcp2) __o2 = (o2), __n2 = (n2); \
500 alternative_io("leaq %P1,%%rsi\n\tcall this_cpu_cmpxchg16b_emu\n\t", \
501 "cmpxchg16b " __percpu_arg(1) "\n\tsetz %0\n\t", \
b9ec40af 502 X86_FEATURE_CX16, \
cebef5be
JB
503 ASM_OUTPUT2("=a" (__ret), "+m" (pcp1), \
504 "+m" (pcp2), "+d" (__o2)), \
505 "b" (__n1), "c" (__n2), "a" (__o1) : "rsi"); \
b9ec40af
CL
506 __ret; \
507})
508
b3ca1c10 509#define raw_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double
cebef5be 510#define this_cpu_cmpxchg_double_8 percpu_cmpxchg16b_double
b9ec40af 511
30ed1a79
CL
512#endif
513
117780ee 514static __always_inline bool x86_this_cpu_constant_test_bit(unsigned int nr,
349c004e
CL
515 const unsigned long __percpu *addr)
516{
799bc3c5
LR
517 unsigned long __percpu *a =
518 (unsigned long __percpu *)addr + nr / BITS_PER_LONG;
349c004e 519
641b695c 520#ifdef CONFIG_X86_64
b3ca1c10 521 return ((1UL << (nr % BITS_PER_LONG)) & raw_cpu_read_8(*a)) != 0;
641b695c 522#else
b3ca1c10 523 return ((1UL << (nr % BITS_PER_LONG)) & raw_cpu_read_4(*a)) != 0;
641b695c 524#endif
349c004e
CL
525}
526
117780ee 527static inline bool x86_this_cpu_variable_test_bit(int nr,
349c004e
CL
528 const unsigned long __percpu *addr)
529{
117780ee 530 bool oldbit;
349c004e 531
22636f8c 532 asm volatile("btl "__percpu_arg(2)",%1"
64be6d36
PA
533 CC_SET(c)
534 : CC_OUT(c) (oldbit)
799bc3c5 535 : "m" (*(unsigned long __percpu *)addr), "Ir" (nr));
349c004e
CL
536
537 return oldbit;
538}
539
540#define x86_this_cpu_test_bit(nr, addr) \
541 (__builtin_constant_p((nr)) \
542 ? x86_this_cpu_constant_test_bit((nr), (addr)) \
543 : x86_this_cpu_variable_test_bit((nr), (addr)))
544
545
6dbde353
IM
546#include <asm-generic/percpu.h>
547
548/* We can use this directly for local CPU (faster). */
2c773dd3 549DECLARE_PER_CPU_READ_MOSTLY(unsigned long, this_cpu_off);
6dbde353 550
3334052a 551#endif /* !__ASSEMBLY__ */
23ca4bba
MT
552
553#ifdef CONFIG_SMP
554
555/*
556 * Define the "EARLY_PER_CPU" macros. These are used for some per_cpu
557 * variables that are initialized and accessed before there are per_cpu
558 * areas allocated.
559 */
560
561#define DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) \
562 DEFINE_PER_CPU(_type, _name) = _initvalue; \
563 __typeof__(_type) _name##_early_map[NR_CPUS] __initdata = \
564 { [0 ... NR_CPUS-1] = _initvalue }; \
c6a92a25 565 __typeof__(_type) *_name##_early_ptr __refdata = _name##_early_map
23ca4bba 566
c35f7741
IY
567#define DEFINE_EARLY_PER_CPU_READ_MOSTLY(_type, _name, _initvalue) \
568 DEFINE_PER_CPU_READ_MOSTLY(_type, _name) = _initvalue; \
569 __typeof__(_type) _name##_early_map[NR_CPUS] __initdata = \
570 { [0 ... NR_CPUS-1] = _initvalue }; \
571 __typeof__(_type) *_name##_early_ptr __refdata = _name##_early_map
572
23ca4bba
MT
573#define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \
574 EXPORT_PER_CPU_SYMBOL(_name)
575
576#define DECLARE_EARLY_PER_CPU(_type, _name) \
577 DECLARE_PER_CPU(_type, _name); \
578 extern __typeof__(_type) *_name##_early_ptr; \
579 extern __typeof__(_type) _name##_early_map[]
580
c35f7741
IY
581#define DECLARE_EARLY_PER_CPU_READ_MOSTLY(_type, _name) \
582 DECLARE_PER_CPU_READ_MOSTLY(_type, _name); \
583 extern __typeof__(_type) *_name##_early_ptr; \
584 extern __typeof__(_type) _name##_early_map[]
585
23ca4bba
MT
586#define early_per_cpu_ptr(_name) (_name##_early_ptr)
587#define early_per_cpu_map(_name, _idx) (_name##_early_map[_idx])
588#define early_per_cpu(_name, _cpu) \
f10fcd47
TH
589 *(early_per_cpu_ptr(_name) ? \
590 &early_per_cpu_ptr(_name)[_cpu] : \
591 &per_cpu(_name, _cpu))
23ca4bba
MT
592
593#else /* !CONFIG_SMP */
594#define DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) \
595 DEFINE_PER_CPU(_type, _name) = _initvalue
596
c35f7741
IY
597#define DEFINE_EARLY_PER_CPU_READ_MOSTLY(_type, _name, _initvalue) \
598 DEFINE_PER_CPU_READ_MOSTLY(_type, _name) = _initvalue
599
23ca4bba
MT
600#define EXPORT_EARLY_PER_CPU_SYMBOL(_name) \
601 EXPORT_PER_CPU_SYMBOL(_name)
602
603#define DECLARE_EARLY_PER_CPU(_type, _name) \
604 DECLARE_PER_CPU(_type, _name)
605
c35f7741
IY
606#define DECLARE_EARLY_PER_CPU_READ_MOSTLY(_type, _name) \
607 DECLARE_PER_CPU_READ_MOSTLY(_type, _name)
608
23ca4bba
MT
609#define early_per_cpu(_name, _cpu) per_cpu(_name, _cpu)
610#define early_per_cpu_ptr(_name) NULL
611/* no early_per_cpu_map() */
612
613#endif /* !CONFIG_SMP */
614
1965aae3 615#endif /* _ASM_X86_PERCPU_H */