x86/fpu, sched: Introduce CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT and use it on x86
[linux-2.6-block.git] / arch / x86 / kernel / fpu / init.c
CommitLineData
0c867537 1/*
ae02679c 2 * x86 FPU boot time init code:
0c867537 3 */
78f7f1e5 4#include <asm/fpu/internal.h>
0c867537
IM
5#include <asm/tlbflush.h>
6
5aaeb5c0
IM
7#include <linux/sched.h>
8
ae02679c
IM
9/*
10 * Initialize the TS bit in CR0 according to the style of context-switches
11 * we are using:
12 */
41e78410
IM
13static void fpu__init_cpu_ctx_switch(void)
14{
15 if (!cpu_has_eager_fpu)
16 stts();
17 else
18 clts();
19}
20
21/*
22 * Initialize the registers found in all CPUs, CR0 and CR4:
23 */
24static void fpu__init_cpu_generic(void)
25{
26 unsigned long cr0;
27 unsigned long cr4_mask = 0;
28
29 if (cpu_has_fxsr)
30 cr4_mask |= X86_CR4_OSFXSR;
31 if (cpu_has_xmm)
32 cr4_mask |= X86_CR4_OSXMMEXCPT;
33 if (cr4_mask)
34 cr4_set_bits(cr4_mask);
35
36 cr0 = read_cr0();
37 cr0 &= ~(X86_CR0_TS|X86_CR0_EM); /* clear TS and EM */
38 if (!cpu_has_fpu)
39 cr0 |= X86_CR0_EM;
40 write_cr0(cr0);
b1276c48
IM
41
42 /* Flush out any pending x87 state: */
43 asm volatile ("fninit");
41e78410
IM
44}
45
46/*
ae02679c 47 * Enable all supported FPU features. Called when a CPU is brought online:
41e78410
IM
48 */
49void fpu__init_cpu(void)
50{
51 fpu__init_cpu_generic();
52 fpu__init_cpu_xstate();
53 fpu__init_cpu_ctx_switch();
54}
55
2e2f3da7 56/*
dd863880
IM
57 * The earliest FPU detection code.
58 *
59 * Set the X86_FEATURE_FPU CPU-capability bit based on
60 * trying to execute an actual sequence of FPU instructions:
2e2f3da7
IM
61 */
62static void fpu__init_system_early_generic(struct cpuinfo_x86 *c)
63{
64 unsigned long cr0;
65 u16 fsw, fcw;
66
67 fsw = fcw = 0xffff;
68
69 cr0 = read_cr0();
70 cr0 &= ~(X86_CR0_TS | X86_CR0_EM);
71 write_cr0(cr0);
72
73 asm volatile("fninit ; fnstsw %0 ; fnstcw %1"
74 : "+m" (fsw), "+m" (fcw));
75
76 if (fsw == 0 && (fcw & 0x103f) == 0x003f)
77 set_cpu_cap(c, X86_FEATURE_FPU);
78 else
79 clear_cpu_cap(c, X86_FEATURE_FPU);
e83ab9ad
IM
80
81#ifndef CONFIG_MATH_EMULATION
82 if (!cpu_has_fpu) {
ae02679c 83 pr_emerg("x86/fpu: Giving up, no FPU found and no math emulation present\n");
e83ab9ad
IM
84 for (;;)
85 asm volatile("hlt");
86 }
87#endif
2e2f3da7
IM
88}
89
4d164092
IM
90/*
91 * Boot time FPU feature detection code:
92 */
0c867537 93unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
91a8c2a5 94
32231879 95static void __init fpu__init_system_mxcsr(void)
0c867537 96{
91a8c2a5 97 unsigned int mask = 0;
0c867537
IM
98
99 if (cpu_has_fxsr) {
b96fecbf
IM
100 /* Static because GCC does not get 16-byte stack alignment right: */
101 static struct fxregs_state fxregs __initdata;
91a8c2a5 102
b96fecbf 103 asm volatile("fxsave %0" : "+m" (fxregs));
91a8c2a5 104
b96fecbf 105 mask = fxregs.mxcsr_mask;
91a8c2a5
IM
106
107 /*
108 * If zero then use the default features mask,
109 * which has all features set, except the
110 * denormals-are-zero feature bit:
111 */
0c867537
IM
112 if (mask == 0)
113 mask = 0x0000ffbf;
114 }
115 mxcsr_feature_mask &= mask;
116}
117
7218e8b7
IM
118/*
119 * Once per bootup FPU initialization sequences that will run on most x86 CPUs:
120 */
32231879 121static void __init fpu__init_system_generic(void)
7218e8b7
IM
122{
123 /*
124 * Set up the legacy init FPU context. (xstate init might overwrite this
125 * with a more modern format, if the CPU supports it.)
126 */
6f575023 127 fpstate_init_fxstate(&init_fpstate.fxsave);
7218e8b7
IM
128
129 fpu__init_system_mxcsr();
130}
131
ae02679c
IM
132/*
133 * Size of the FPU context state. All tasks in the system use the
134 * same context size, regardless of what portion they use.
135 * This is inherent to the XSAVE architecture which puts all state
136 * components into a single, continuous memory block:
137 */
41e78410
IM
138unsigned int xstate_size;
139EXPORT_SYMBOL_GPL(xstate_size);
140
5aaeb5c0
IM
141/* Enforce that 'MEMBER' is the last field of 'TYPE': */
142#define CHECK_MEMBER_AT_END_OF(TYPE, MEMBER) \
143 BUILD_BUG_ON(sizeof(TYPE) != offsetofend(TYPE, MEMBER))
0c8c0f03
DH
144
145/*
5aaeb5c0 146 * We append the 'struct fpu' to the task_struct:
0c8c0f03 147 */
5aaeb5c0 148static void __init fpu__init_task_struct_size(void)
0c8c0f03
DH
149{
150 int task_size = sizeof(struct task_struct);
151
152 /*
153 * Subtract off the static size of the register state.
154 * It potentially has a bunch of padding.
155 */
156 task_size -= sizeof(((struct task_struct *)0)->thread.fpu.state);
157
158 /*
159 * Add back the dynamically-calculated register state
160 * size.
161 */
162 task_size += xstate_size;
163
164 /*
165 * We dynamically size 'struct fpu', so we require that
166 * it be at the end of 'thread_struct' and that
167 * 'thread_struct' be at the end of 'task_struct'. If
168 * you hit a compile error here, check the structure to
169 * see if something got added to the end.
170 */
171 CHECK_MEMBER_AT_END_OF(struct fpu, state);
172 CHECK_MEMBER_AT_END_OF(struct thread_struct, fpu);
173 CHECK_MEMBER_AT_END_OF(struct task_struct, thread);
174
5aaeb5c0 175 arch_task_struct_size = task_size;
0c8c0f03
DH
176}
177
41e78410
IM
178/*
179 * Set up the xstate_size based on the legacy FPU context size.
180 *
181 * We set this up first, and later it will be overwritten by
182 * fpu__init_system_xstate() if the CPU knows about xstates.
183 */
32231879 184static void __init fpu__init_system_xstate_size_legacy(void)
0c867537 185{
e97131a8
IM
186 static int on_boot_cpu = 1;
187
188 WARN_ON_FPU(!on_boot_cpu);
189 on_boot_cpu = 0;
190
0c867537
IM
191 /*
192 * Note that xstate_size might be overwriten later during
c42103b2 193 * fpu__init_system_xstate().
0c867537
IM
194 */
195
196 if (!cpu_has_fpu) {
197 /*
198 * Disable xsave as we do not support it if i387
199 * emulation is enabled.
200 */
201 setup_clear_cpu_cap(X86_FEATURE_XSAVE);
202 setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
c47ada30 203 xstate_size = sizeof(struct swregs_state);
6a133207
IM
204 } else {
205 if (cpu_has_fxsr)
c47ada30 206 xstate_size = sizeof(struct fxregs_state);
6a133207 207 else
c47ada30 208 xstate_size = sizeof(struct fregs_state);
0c867537 209 }
6f56a8d0
IM
210 /*
211 * Quirk: we don't yet handle the XSAVES* instructions
212 * correctly, as we don't correctly convert between
213 * standard and compacted format when interfacing
214 * with user-space - so disable it for now.
215 *
216 * The difference is small: with recent CPUs the
217 * compacted format is only marginally smaller than
218 * the standard FPU state format.
219 *
220 * ( This is easy to backport while we are fixing
221 * XSAVES* support. )
222 */
223 setup_clear_cpu_cap(X86_FEATURE_XSAVES);
0c867537
IM
224}
225
ae02679c
IM
226/*
227 * FPU context switching strategies:
228 *
229 * Against popular belief, we don't do lazy FPU saves, due to the
230 * task migration complications it brings on SMP - we only do
231 * lazy FPU restores.
232 *
233 * 'lazy' is the traditional strategy, which is based on setting
234 * CR0::TS to 1 during context-switch (instead of doing a full
235 * restore of the FPU state), which causes the first FPU instruction
236 * after the context switch (whenever it is executed) to fault - at
237 * which point we lazily restore the FPU state into FPU registers.
238 *
239 * Tasks are of course under no obligation to execute FPU instructions,
240 * so it can easily happen that another context-switch occurs without
241 * a single FPU instruction being executed. If we eventually switch
242 * back to the original task (that still owns the FPU) then we have
243 * not only saved the restores along the way, but we also have the
244 * FPU ready to be used for the original task.
245 *
246 * 'eager' switching is used on modern CPUs, there we switch the FPU
247 * state during every context switch, regardless of whether the task
248 * has used FPU instructions in that time slice or not. This is done
249 * because modern FPU context saving instructions are able to optimize
250 * state saving and restoration in hardware: they can detect both
251 * unused and untouched FPU state and optimize accordingly.
252 *
253 * [ Note that even in 'lazy' mode we might optimize context switches
254 * to use 'eager' restores, if we detect that a task is using the FPU
255 * frequently. See the fpu->counter logic in fpu/internal.h for that. ]
256 */
6f5d265a
IM
257static enum { AUTO, ENABLE, DISABLE } eagerfpu = AUTO;
258
259static int __init eager_fpu_setup(char *s)
260{
261 if (!strcmp(s, "on"))
262 eagerfpu = ENABLE;
263 else if (!strcmp(s, "off"))
264 eagerfpu = DISABLE;
265 else if (!strcmp(s, "auto"))
266 eagerfpu = AUTO;
267 return 1;
268}
269__setup("eagerfpu=", eager_fpu_setup);
270
271/*
ae02679c 272 * Pick the FPU context switching strategy:
6f5d265a 273 */
32231879 274static void __init fpu__init_system_ctx_switch(void)
6f5d265a 275{
e97131a8
IM
276 static bool on_boot_cpu = 1;
277
278 WARN_ON_FPU(!on_boot_cpu);
279 on_boot_cpu = 0;
280
281 WARN_ON_FPU(current->thread.fpu.fpstate_active);
6f5d265a
IM
282 current_thread_info()->status = 0;
283
284 /* Auto enable eagerfpu for xsaveopt */
285 if (cpu_has_xsaveopt && eagerfpu != DISABLE)
286 eagerfpu = ENABLE;
287
288 if (xfeatures_mask & XSTATE_EAGER) {
289 if (eagerfpu == DISABLE) {
290 pr_err("x86/fpu: eagerfpu switching disabled, disabling the following xstate features: 0x%llx.\n",
291 xfeatures_mask & XSTATE_EAGER);
292 xfeatures_mask &= ~XSTATE_EAGER;
293 } else {
294 eagerfpu = ENABLE;
295 }
296 }
297
298 if (eagerfpu == ENABLE)
299 setup_force_cpu_cap(X86_FEATURE_EAGER_FPU);
300
32231879 301 printk(KERN_INFO "x86/fpu: Using '%s' FPU context switches.\n", eagerfpu == ENABLE ? "eager" : "lazy");
6f5d265a
IM
302}
303
e35f6f14 304/*
ae02679c
IM
305 * Called on the boot CPU once per system bootup, to set up the initial
306 * FPU state that is later cloned into all processes:
e35f6f14 307 */
32231879 308void __init fpu__init_system(struct cpuinfo_x86 *c)
e35f6f14 309{
dd863880
IM
310 fpu__init_system_early_generic(c);
311
ae02679c
IM
312 /*
313 * The FPU has to be operational for some of the
314 * later FPU init activities:
315 */
e35f6f14 316 fpu__init_cpu();
0c867537 317
530b37e4 318 /*
ae02679c
IM
319 * But don't leave CR0::TS set yet, as some of the FPU setup
320 * methods depend on being able to execute FPU instructions
321 * that will fault on a set TS, such as the FXSAVE in
322 * fpu__init_system_mxcsr().
530b37e4
IM
323 */
324 clts();
325
7218e8b7 326 fpu__init_system_generic();
7638b74b 327 fpu__init_system_xstate_size_legacy();
c42103b2 328 fpu__init_system_xstate();
5aaeb5c0 329 fpu__init_task_struct_size();
997578b1 330
011545b5 331 fpu__init_system_ctx_switch();
0c867537 332}
146ed598 333
ae02679c
IM
334/*
335 * Boot parameter to turn off FPU support and fall back to math-emu:
336 */
146ed598
IM
337static int __init no_387(char *s)
338{
339 setup_clear_cpu_cap(X86_FEATURE_FPU);
340 return 1;
341}
146ed598 342__setup("no387", no_387);
7cf82d33 343
5856afed
IM
344/*
345 * Disable all xstate CPU features:
346 */
347static int __init x86_noxsave_setup(char *s)
7cf82d33
IM
348{
349 if (strlen(s))
350 return 0;
5856afed 351
7cf82d33
IM
352 setup_clear_cpu_cap(X86_FEATURE_XSAVE);
353 setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
354 setup_clear_cpu_cap(X86_FEATURE_XSAVES);
355 setup_clear_cpu_cap(X86_FEATURE_AVX);
356 setup_clear_cpu_cap(X86_FEATURE_AVX2);
5856afed 357
7cf82d33
IM
358 return 1;
359}
5856afed 360__setup("noxsave", x86_noxsave_setup);
7cf82d33 361
5856afed
IM
362/*
363 * Disable the XSAVEOPT instruction specifically:
364 */
365static int __init x86_noxsaveopt_setup(char *s)
7cf82d33
IM
366{
367 setup_clear_cpu_cap(X86_FEATURE_XSAVEOPT);
5856afed 368
7cf82d33
IM
369 return 1;
370}
5856afed 371__setup("noxsaveopt", x86_noxsaveopt_setup);
7cf82d33 372
5856afed
IM
373/*
374 * Disable the XSAVES instruction:
375 */
376static int __init x86_noxsaves_setup(char *s)
7cf82d33
IM
377{
378 setup_clear_cpu_cap(X86_FEATURE_XSAVES);
5856afed 379
7cf82d33
IM
380 return 1;
381}
5856afed 382__setup("noxsaves", x86_noxsaves_setup);
7cf82d33 383
5856afed
IM
384/*
385 * Disable FX save/restore and SSE support:
386 */
387static int __init x86_nofxsr_setup(char *s)
7cf82d33
IM
388{
389 setup_clear_cpu_cap(X86_FEATURE_FXSR);
390 setup_clear_cpu_cap(X86_FEATURE_FXSR_OPT);
391 setup_clear_cpu_cap(X86_FEATURE_XMM);
5856afed 392
7cf82d33
IM
393 return 1;
394}
5856afed 395__setup("nofxsr", x86_nofxsr_setup);