Commit | Line | Data |
---|---|---|
0c867537 | 1 | /* |
ae02679c | 2 | * x86 FPU boot time init code: |
0c867537 | 3 | */ |
78f7f1e5 | 4 | #include <asm/fpu/internal.h> |
0c867537 IM |
5 | #include <asm/tlbflush.h> |
6 | ||
5aaeb5c0 IM |
7 | #include <linux/sched.h> |
8 | ||
ae02679c IM |
9 | /* |
10 | * Initialize the TS bit in CR0 according to the style of context-switches | |
11 | * we are using: | |
12 | */ | |
41e78410 IM |
13 | static void fpu__init_cpu_ctx_switch(void) |
14 | { | |
15 | if (!cpu_has_eager_fpu) | |
16 | stts(); | |
17 | else | |
18 | clts(); | |
19 | } | |
20 | ||
21 | /* | |
22 | * Initialize the registers found in all CPUs, CR0 and CR4: | |
23 | */ | |
24 | static void fpu__init_cpu_generic(void) | |
25 | { | |
26 | unsigned long cr0; | |
27 | unsigned long cr4_mask = 0; | |
28 | ||
29 | if (cpu_has_fxsr) | |
30 | cr4_mask |= X86_CR4_OSFXSR; | |
31 | if (cpu_has_xmm) | |
32 | cr4_mask |= X86_CR4_OSXMMEXCPT; | |
33 | if (cr4_mask) | |
34 | cr4_set_bits(cr4_mask); | |
35 | ||
36 | cr0 = read_cr0(); | |
37 | cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */ | |
38 | if (!cpu_has_fpu) | |
39 | cr0 |= X86_CR0_EM; | |
40 | write_cr0(cr0); | |
b1276c48 IM |
41 | |
42 | /* Flush out any pending x87 state: */ | |
43 | asm volatile ("fninit"); | |
41e78410 IM |
44 | } |
45 | ||
46 | /* | |
ae02679c | 47 | * Enable all supported FPU features. Called when a CPU is brought online: |
41e78410 IM |
48 | */ |
49 | void fpu__init_cpu(void) | |
50 | { | |
51 | fpu__init_cpu_generic(); | |
52 | fpu__init_cpu_xstate(); | |
53 | fpu__init_cpu_ctx_switch(); | |
54 | } | |
55 | ||
2e2f3da7 | 56 | /* |
dd863880 IM |
57 | * The earliest FPU detection code. |
58 | * | |
59 | * Set the X86_FEATURE_FPU CPU-capability bit based on | |
60 | * trying to execute an actual sequence of FPU instructions: | |
2e2f3da7 IM |
61 | */ |
62 | static void fpu__init_system_early_generic(struct cpuinfo_x86 *c) | |
63 | { | |
64 | unsigned long cr0; | |
65 | u16 fsw, fcw; | |
66 | ||
67 | fsw = fcw = 0xffff; | |
68 | ||
69 | cr0 = read_cr0(); | |
70 | cr0 &= ~(X86_CR0_TS | X86_CR0_EM); | |
71 | write_cr0(cr0); | |
72 | ||
73 | asm volatile("fninit ; fnstsw %0 ; fnstcw %1" | |
74 | : "+m" (fsw), "+m" (fcw)); | |
75 | ||
76 | if (fsw == 0 && (fcw & 0x103f) == 0x003f) | |
77 | set_cpu_cap(c, X86_FEATURE_FPU); | |
78 | else | |
79 | clear_cpu_cap(c, X86_FEATURE_FPU); | |
e83ab9ad IM |
80 | |
81 | #ifndef CONFIG_MATH_EMULATION | |
82 | if (!cpu_has_fpu) { | |
ae02679c | 83 | pr_emerg("x86/fpu: Giving up, no FPU found and no math emulation present\n"); |
e83ab9ad IM |
84 | for (;;) |
85 | asm volatile("hlt"); | |
86 | } | |
87 | #endif | |
2e2f3da7 IM |
88 | } |
89 | ||
4d164092 IM |
90 | /* |
91 | * Boot time FPU feature detection code: | |
92 | */ | |
0c867537 | 93 | unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; |
91a8c2a5 | 94 | |
32231879 | 95 | static void __init fpu__init_system_mxcsr(void) |
0c867537 | 96 | { |
91a8c2a5 | 97 | unsigned int mask = 0; |
0c867537 IM |
98 | |
99 | if (cpu_has_fxsr) { | |
b96fecbf IM |
100 | /* Static because GCC does not get 16-byte stack alignment right: */ |
101 | static struct fxregs_state fxregs __initdata; | |
91a8c2a5 | 102 | |
b96fecbf | 103 | asm volatile("fxsave %0" : "+m" (fxregs)); |
91a8c2a5 | 104 | |
b96fecbf | 105 | mask = fxregs.mxcsr_mask; |
91a8c2a5 IM |
106 | |
107 | /* | |
108 | * If zero then use the default features mask, | |
109 | * which has all features set, except the | |
110 | * denormals-are-zero feature bit: | |
111 | */ | |
0c867537 IM |
112 | if (mask == 0) |
113 | mask = 0x0000ffbf; | |
114 | } | |
115 | mxcsr_feature_mask &= mask; | |
116 | } | |
117 | ||
7218e8b7 IM |
118 | /* |
119 | * Once per bootup FPU initialization sequences that will run on most x86 CPUs: | |
120 | */ | |
32231879 | 121 | static void __init fpu__init_system_generic(void) |
7218e8b7 IM |
122 | { |
123 | /* | |
124 | * Set up the legacy init FPU context. (xstate init might overwrite this | |
125 | * with a more modern format, if the CPU supports it.) | |
126 | */ | |
6f575023 | 127 | fpstate_init_fxstate(&init_fpstate.fxsave); |
7218e8b7 IM |
128 | |
129 | fpu__init_system_mxcsr(); | |
130 | } | |
131 | ||
ae02679c IM |
132 | /* |
133 | * Size of the FPU context state. All tasks in the system use the | |
134 | * same context size, regardless of what portion they use. | |
135 | * This is inherent to the XSAVE architecture which puts all state | |
136 | * components into a single, continuous memory block: | |
137 | */ | |
41e78410 IM |
138 | unsigned int xstate_size; |
139 | EXPORT_SYMBOL_GPL(xstate_size); | |
140 | ||
5aaeb5c0 IM |
141 | /* Enforce that 'MEMBER' is the last field of 'TYPE': */ |
142 | #define CHECK_MEMBER_AT_END_OF(TYPE, MEMBER) \ | |
143 | BUILD_BUG_ON(sizeof(TYPE) != offsetofend(TYPE, MEMBER)) | |
0c8c0f03 DH |
144 | |
145 | /* | |
5aaeb5c0 | 146 | * We append the 'struct fpu' to the task_struct: |
0c8c0f03 | 147 | */ |
5aaeb5c0 | 148 | static void __init fpu__init_task_struct_size(void) |
0c8c0f03 DH |
149 | { |
150 | int task_size = sizeof(struct task_struct); | |
151 | ||
152 | /* | |
153 | * Subtract off the static size of the register state. | |
154 | * It potentially has a bunch of padding. | |
155 | */ | |
156 | task_size -= sizeof(((struct task_struct *)0)->thread.fpu.state); | |
157 | ||
158 | /* | |
159 | * Add back the dynamically-calculated register state | |
160 | * size. | |
161 | */ | |
162 | task_size += xstate_size; | |
163 | ||
164 | /* | |
165 | * We dynamically size 'struct fpu', so we require that | |
166 | * it be at the end of 'thread_struct' and that | |
167 | * 'thread_struct' be at the end of 'task_struct'. If | |
168 | * you hit a compile error here, check the structure to | |
169 | * see if something got added to the end. | |
170 | */ | |
171 | CHECK_MEMBER_AT_END_OF(struct fpu, state); | |
172 | CHECK_MEMBER_AT_END_OF(struct thread_struct, fpu); | |
173 | CHECK_MEMBER_AT_END_OF(struct task_struct, thread); | |
174 | ||
5aaeb5c0 | 175 | arch_task_struct_size = task_size; |
0c8c0f03 DH |
176 | } |
177 | ||
41e78410 IM |
178 | /* |
179 | * Set up the xstate_size based on the legacy FPU context size. | |
180 | * | |
181 | * We set this up first, and later it will be overwritten by | |
182 | * fpu__init_system_xstate() if the CPU knows about xstates. | |
183 | */ | |
32231879 | 184 | static void __init fpu__init_system_xstate_size_legacy(void) |
0c867537 | 185 | { |
e97131a8 IM |
186 | static int on_boot_cpu = 1; |
187 | ||
188 | WARN_ON_FPU(!on_boot_cpu); | |
189 | on_boot_cpu = 0; | |
190 | ||
0c867537 IM |
191 | /* |
192 | * Note that xstate_size might be overwriten later during | |
c42103b2 | 193 | * fpu__init_system_xstate(). |
0c867537 IM |
194 | */ |
195 | ||
196 | if (!cpu_has_fpu) { | |
197 | /* | |
198 | * Disable xsave as we do not support it if i387 | |
199 | * emulation is enabled. | |
200 | */ | |
201 | setup_clear_cpu_cap(X86_FEATURE_XSAVE); | |
202 | setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); | |
c47ada30 | 203 | xstate_size = sizeof(struct swregs_state); |
6a133207 IM |
204 | } else { |
205 | if (cpu_has_fxsr) | |
c47ada30 | 206 | xstate_size = sizeof(struct fxregs_state); |
6a133207 | 207 | else |
c47ada30 | 208 | xstate_size = sizeof(struct fregs_state); |
0c867537 | 209 | } |
6f56a8d0 IM |
210 | /* |
211 | * Quirk: we don't yet handle the XSAVES* instructions | |
212 | * correctly, as we don't correctly convert between | |
213 | * standard and compacted format when interfacing | |
214 | * with user-space - so disable it for now. | |
215 | * | |
216 | * The difference is small: with recent CPUs the | |
217 | * compacted format is only marginally smaller than | |
218 | * the standard FPU state format. | |
219 | * | |
220 | * ( This is easy to backport while we are fixing | |
221 | * XSAVES* support. ) | |
222 | */ | |
223 | setup_clear_cpu_cap(X86_FEATURE_XSAVES); | |
0c867537 IM |
224 | } |
225 | ||
ae02679c IM |
226 | /* |
227 | * FPU context switching strategies: | |
228 | * | |
229 | * Against popular belief, we don't do lazy FPU saves, due to the | |
230 | * task migration complications it brings on SMP - we only do | |
231 | * lazy FPU restores. | |
232 | * | |
233 | * 'lazy' is the traditional strategy, which is based on setting | |
234 | * CR0::TS to 1 during context-switch (instead of doing a full | |
235 | * restore of the FPU state), which causes the first FPU instruction | |
236 | * after the context switch (whenever it is executed) to fault - at | |
237 | * which point we lazily restore the FPU state into FPU registers. | |
238 | * | |
239 | * Tasks are of course under no obligation to execute FPU instructions, | |
240 | * so it can easily happen that another context-switch occurs without | |
241 | * a single FPU instruction being executed. If we eventually switch | |
242 | * back to the original task (that still owns the FPU) then we have | |
243 | * not only saved the restores along the way, but we also have the | |
244 | * FPU ready to be used for the original task. | |
245 | * | |
246 | * 'eager' switching is used on modern CPUs, there we switch the FPU | |
247 | * state during every context switch, regardless of whether the task | |
248 | * has used FPU instructions in that time slice or not. This is done | |
249 | * because modern FPU context saving instructions are able to optimize | |
250 | * state saving and restoration in hardware: they can detect both | |
251 | * unused and untouched FPU state and optimize accordingly. | |
252 | * | |
253 | * [ Note that even in 'lazy' mode we might optimize context switches | |
254 | * to use 'eager' restores, if we detect that a task is using the FPU | |
255 | * frequently. See the fpu->counter logic in fpu/internal.h for that. ] | |
256 | */ | |
6f5d265a IM |
257 | static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO; |
258 | ||
259 | static int __init eager_fpu_setup(char *s) | |
260 | { | |
261 | if (!strcmp(s, "on")) | |
262 | eagerfpu = ENABLE; | |
263 | else if (!strcmp(s, "off")) | |
264 | eagerfpu = DISABLE; | |
265 | else if (!strcmp(s, "auto")) | |
266 | eagerfpu = AUTO; | |
267 | return 1; | |
268 | } | |
269 | __setup("eagerfpu=", eager_fpu_setup); | |
270 | ||
271 | /* | |
ae02679c | 272 | * Pick the FPU context switching strategy: |
6f5d265a | 273 | */ |
32231879 | 274 | static void __init fpu__init_system_ctx_switch(void) |
6f5d265a | 275 | { |
e97131a8 IM |
276 | static bool on_boot_cpu = 1; |
277 | ||
278 | WARN_ON_FPU(!on_boot_cpu); | |
279 | on_boot_cpu = 0; | |
280 | ||
281 | WARN_ON_FPU(current->thread.fpu.fpstate_active); | |
6f5d265a IM |
282 | current_thread_info()->status = 0; |
283 | ||
284 | /* Auto enable eagerfpu for xsaveopt */ | |
285 | if (cpu_has_xsaveopt && eagerfpu != DISABLE) | |
286 | eagerfpu = ENABLE; | |
287 | ||
288 | if (xfeatures_mask & XSTATE_EAGER) { | |
289 | if (eagerfpu == DISABLE) { | |
290 | pr_err("x86/fpu: eagerfpu switching disabled, disabling the following xstate features: 0x%llx.\n", | |
291 | xfeatures_mask & XSTATE_EAGER); | |
292 | xfeatures_mask &= ~XSTATE_EAGER; | |
293 | } else { | |
294 | eagerfpu = ENABLE; | |
295 | } | |
296 | } | |
297 | ||
298 | if (eagerfpu == ENABLE) | |
299 | setup_force_cpu_cap(X86_FEATURE_EAGER_FPU); | |
300 | ||
32231879 | 301 | printk(KERN_INFO "x86/fpu: Using '%s' FPU context switches.\n", eagerfpu == ENABLE ? "eager" : "lazy"); |
6f5d265a IM |
302 | } |
303 | ||
e35f6f14 | 304 | /* |
ae02679c IM |
305 | * Called on the boot CPU once per system bootup, to set up the initial |
306 | * FPU state that is later cloned into all processes: | |
e35f6f14 | 307 | */ |
32231879 | 308 | void __init fpu__init_system(struct cpuinfo_x86 *c) |
e35f6f14 | 309 | { |
dd863880 IM |
310 | fpu__init_system_early_generic(c); |
311 | ||
ae02679c IM |
312 | /* |
313 | * The FPU has to be operational for some of the | |
314 | * later FPU init activities: | |
315 | */ | |
e35f6f14 | 316 | fpu__init_cpu(); |
0c867537 | 317 | |
530b37e4 | 318 | /* |
ae02679c IM |
319 | * But don't leave CR0::TS set yet, as some of the FPU setup |
320 | * methods depend on being able to execute FPU instructions | |
321 | * that will fault on a set TS, such as the FXSAVE in | |
322 | * fpu__init_system_mxcsr(). | |
530b37e4 IM |
323 | */ |
324 | clts(); | |
325 | ||
7218e8b7 | 326 | fpu__init_system_generic(); |
7638b74b | 327 | fpu__init_system_xstate_size_legacy(); |
c42103b2 | 328 | fpu__init_system_xstate(); |
5aaeb5c0 | 329 | fpu__init_task_struct_size(); |
997578b1 | 330 | |
011545b5 | 331 | fpu__init_system_ctx_switch(); |
0c867537 | 332 | } |
146ed598 | 333 | |
ae02679c IM |
334 | /* |
335 | * Boot parameter to turn off FPU support and fall back to math-emu: | |
336 | */ | |
146ed598 IM |
337 | static int __init no_387(char *s) |
338 | { | |
339 | setup_clear_cpu_cap(X86_FEATURE_FPU); | |
340 | return 1; | |
341 | } | |
146ed598 | 342 | __setup("no387", no_387); |
7cf82d33 | 343 | |
5856afed IM |
344 | /* |
345 | * Disable all xstate CPU features: | |
346 | */ | |
347 | static int __init x86_noxsave_setup(char *s) | |
7cf82d33 IM |
348 | { |
349 | if (strlen(s)) | |
350 | return 0; | |
5856afed | 351 | |
7cf82d33 IM |
352 | setup_clear_cpu_cap(X86_FEATURE_XSAVE); |
353 | setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); | |
354 | setup_clear_cpu_cap(X86_FEATURE_XSAVES); | |
355 | setup_clear_cpu_cap(X86_FEATURE_AVX); | |
356 | setup_clear_cpu_cap(X86_FEATURE_AVX2); | |
5856afed | 357 | |
7cf82d33 IM |
358 | return 1; |
359 | } | |
5856afed | 360 | __setup("noxsave", x86_noxsave_setup); |
7cf82d33 | 361 | |
5856afed IM |
362 | /* |
363 | * Disable the XSAVEOPT instruction specifically: | |
364 | */ | |
365 | static int __init x86_noxsaveopt_setup(char *s) | |
7cf82d33 IM |
366 | { |
367 | setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT); | |
5856afed | 368 | |
7cf82d33 IM |
369 | return 1; |
370 | } | |
5856afed | 371 | __setup("noxsaveopt", x86_noxsaveopt_setup); |
7cf82d33 | 372 | |
5856afed IM |
373 | /* |
374 | * Disable the XSAVES instruction: | |
375 | */ | |
376 | static int __init x86_noxsaves_setup(char *s) | |
7cf82d33 IM |
377 | { |
378 | setup_clear_cpu_cap(X86_FEATURE_XSAVES); | |
5856afed | 379 | |
7cf82d33 IM |
380 | return 1; |
381 | } | |
5856afed | 382 | __setup("noxsaves", x86_noxsaves_setup); |
7cf82d33 | 383 | |
5856afed IM |
384 | /* |
385 | * Disable FX save/restore and SSE support: | |
386 | */ | |
387 | static int __init x86_nofxsr_setup(char *s) | |
7cf82d33 IM |
388 | { |
389 | setup_clear_cpu_cap(X86_FEATURE_FXSR); | |
390 | setup_clear_cpu_cap(X86_FEATURE_FXSR_OPT); | |
391 | setup_clear_cpu_cap(X86_FEATURE_XMM); | |
5856afed | 392 | |
7cf82d33 IM |
393 | return 1; |
394 | } | |
5856afed | 395 | __setup("nofxsr", x86_nofxsr_setup); |