Commit | Line | Data |
---|---|---|
f05e798a DH |
1 | #ifndef _ASM_X86_BARRIER_H |
2 | #define _ASM_X86_BARRIER_H | |
3 | ||
4 | #include <asm/alternative.h> | |
5 | #include <asm/nops.h> | |
6 | ||
7 | /* | |
8 | * Force strict CPU ordering. | |
9 | * And yes, this is required on UP too when we're talking | |
10 | * to devices. | |
11 | */ | |
12 | ||
13 | #ifdef CONFIG_X86_32 | |
14 | /* | |
15 | * Some non-Intel clones support out of order store. wmb() ceases to be a | |
16 | * nop for these. | |
17 | */ | |
18 | #define mb() alternative("lock; addl $0,0(%%esp)", "mfence", X86_FEATURE_XMM2) | |
19 | #define rmb() alternative("lock; addl $0,0(%%esp)", "lfence", X86_FEATURE_XMM2) | |
20 | #define wmb() alternative("lock; addl $0,0(%%esp)", "sfence", X86_FEATURE_XMM) | |
21 | #else | |
22 | #define mb() asm volatile("mfence":::"memory") | |
23 | #define rmb() asm volatile("lfence":::"memory") | |
24 | #define wmb() asm volatile("sfence" ::: "memory") | |
25 | #endif | |
26 | ||
27 | /** | |
28 | * read_barrier_depends - Flush all pending reads that subsequents reads | |
29 | * depend on. | |
30 | * | |
31 | * No data-dependent reads from memory-like regions are ever reordered | |
32 | * over this barrier. All reads preceding this primitive are guaranteed | |
33 | * to access memory (but not necessarily other CPUs' caches) before any | |
34 | * reads following this primitive that depend on the data return by | |
35 | * any of the preceding reads. This primitive is much lighter weight than | |
36 | * rmb() on most CPUs, and is never heavier weight than is | |
37 | * rmb(). | |
38 | * | |
39 | * These ordering constraints are respected by both the local CPU | |
40 | * and the compiler. | |
41 | * | |
42 | * Ordering is not guaranteed by anything other than these primitives, | |
43 | * not even by data dependencies. See the documentation for | |
44 | * memory_barrier() for examples and URLs to more information. | |
45 | * | |
46 | * For example, the following code would force ordering (the initial | |
47 | * value of "a" is zero, "b" is one, and "p" is "&a"): | |
48 | * | |
49 | * <programlisting> | |
50 | * CPU 0 CPU 1 | |
51 | * | |
52 | * b = 2; | |
53 | * memory_barrier(); | |
54 | * p = &b; q = p; | |
55 | * read_barrier_depends(); | |
56 | * d = *q; | |
57 | * </programlisting> | |
58 | * | |
59 | * because the read of "*q" depends on the read of "p" and these | |
60 | * two reads are separated by a read_barrier_depends(). However, | |
61 | * the following code, with the same initial values for "a" and "b": | |
62 | * | |
63 | * <programlisting> | |
64 | * CPU 0 CPU 1 | |
65 | * | |
66 | * a = 2; | |
67 | * memory_barrier(); | |
68 | * b = 3; y = b; | |
69 | * read_barrier_depends(); | |
70 | * x = a; | |
71 | * </programlisting> | |
72 | * | |
73 | * does not enforce ordering, since there is no data dependency between | |
74 | * the read of "a" and the read of "b". Therefore, on some CPUs, such | |
75 | * as Alpha, "y" could be set to 3 and "x" to 0. Use rmb() | |
76 | * in cases like this where there are no data dependencies. | |
77 | **/ | |
78 | ||
79 | #define read_barrier_depends() do { } while (0) | |
80 | ||
81 | #ifdef CONFIG_SMP | |
82 | #define smp_mb() mb() | |
83 | #ifdef CONFIG_X86_PPRO_FENCE | |
84 | # define smp_rmb() rmb() | |
85 | #else | |
86 | # define smp_rmb() barrier() | |
87 | #endif | |
09df7c4c | 88 | #define smp_wmb() barrier() |
f05e798a DH |
89 | #define smp_read_barrier_depends() read_barrier_depends() |
90 | #define set_mb(var, value) do { (void)xchg(&var, value); } while (0) | |
47933ad4 | 91 | #else /* !SMP */ |
f05e798a DH |
92 | #define smp_mb() barrier() |
93 | #define smp_rmb() barrier() | |
94 | #define smp_wmb() barrier() | |
95 | #define smp_read_barrier_depends() do { } while (0) | |
96 | #define set_mb(var, value) do { var = value; barrier(); } while (0) | |
47933ad4 PZ |
97 | #endif /* SMP */ |
98 | ||
09df7c4c | 99 | #if defined(CONFIG_X86_PPRO_FENCE) |
47933ad4 PZ |
100 | |
101 | /* | |
102 | * For either of these options x86 doesn't have a strong TSO memory | |
103 | * model and we should fall back to full barriers. | |
104 | */ | |
105 | ||
106 | #define smp_store_release(p, v) \ | |
107 | do { \ | |
108 | compiletime_assert_atomic_type(*p); \ | |
109 | smp_mb(); \ | |
110 | ACCESS_ONCE(*p) = (v); \ | |
111 | } while (0) | |
112 | ||
113 | #define smp_load_acquire(p) \ | |
114 | ({ \ | |
115 | typeof(*p) ___p1 = ACCESS_ONCE(*p); \ | |
116 | compiletime_assert_atomic_type(*p); \ | |
117 | smp_mb(); \ | |
118 | ___p1; \ | |
119 | }) | |
120 | ||
121 | #else /* regular x86 TSO memory ordering */ | |
122 | ||
123 | #define smp_store_release(p, v) \ | |
124 | do { \ | |
125 | compiletime_assert_atomic_type(*p); \ | |
126 | barrier(); \ | |
127 | ACCESS_ONCE(*p) = (v); \ | |
128 | } while (0) | |
129 | ||
130 | #define smp_load_acquire(p) \ | |
131 | ({ \ | |
132 | typeof(*p) ___p1 = ACCESS_ONCE(*p); \ | |
133 | compiletime_assert_atomic_type(*p); \ | |
134 | barrier(); \ | |
135 | ___p1; \ | |
136 | }) | |
137 | ||
f05e798a DH |
138 | #endif |
139 | ||
140 | /* | |
141 | * Stop RDTSC speculation. This is needed when you need to use RDTSC | |
142 | * (or get_cycles or vread that possibly accesses the TSC) in a defined | |
143 | * code region. | |
144 | * | |
145 | * (Could use an alternative three way for this if there was one.) | |
146 | */ | |
147 | static __always_inline void rdtsc_barrier(void) | |
148 | { | |
149 | alternative(ASM_NOP3, "mfence", X86_FEATURE_MFENCE_RDTSC); | |
150 | alternative(ASM_NOP3, "lfence", X86_FEATURE_LFENCE_RDTSC); | |
151 | } | |
152 | ||
153 | #endif /* _ASM_X86_BARRIER_H */ |