x86, clockevents: add C1E aware idle function
[linux-2.6-block.git] / arch / x86 / kernel / process.c
CommitLineData
61c4628b
SS
1#include <linux/errno.h>
2#include <linux/kernel.h>
3#include <linux/mm.h>
4#include <linux/smp.h>
5#include <linux/slab.h>
6#include <linux/sched.h>
7f424a8b
PZ
7#include <linux/module.h>
8#include <linux/pm.h>
aa276e1c 9#include <linux/clockchips.h>
61c4628b 10
aa283f49 11struct kmem_cache *task_xstate_cachep;
61c4628b
SS
12
13int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
14{
15 *dst = *src;
aa283f49
SS
16 if (src->thread.xstate) {
17 dst->thread.xstate = kmem_cache_alloc(task_xstate_cachep,
18 GFP_KERNEL);
19 if (!dst->thread.xstate)
20 return -ENOMEM;
21 WARN_ON((unsigned long)dst->thread.xstate & 15);
22 memcpy(dst->thread.xstate, src->thread.xstate, xstate_size);
23 }
61c4628b
SS
24 return 0;
25}
26
aa283f49 27void free_thread_xstate(struct task_struct *tsk)
61c4628b 28{
aa283f49
SS
29 if (tsk->thread.xstate) {
30 kmem_cache_free(task_xstate_cachep, tsk->thread.xstate);
31 tsk->thread.xstate = NULL;
32 }
33}
34
aa283f49
SS
35void free_thread_info(struct thread_info *ti)
36{
37 free_thread_xstate(ti->task);
1679f271 38 free_pages((unsigned long)ti, get_order(THREAD_SIZE));
61c4628b
SS
39}
40
41void arch_task_cache_init(void)
42{
43 task_xstate_cachep =
44 kmem_cache_create("task_xstate", xstate_size,
45 __alignof__(union thread_xstate),
46 SLAB_PANIC, NULL);
47}
7f424a8b 48
00dba564
TG
49/*
50 * Idle related variables and functions
51 */
52unsigned long boot_option_idle_override = 0;
53EXPORT_SYMBOL(boot_option_idle_override);
54
55/*
56 * Powermanagement idle function, if any..
57 */
58void (*pm_idle)(void);
59EXPORT_SYMBOL(pm_idle);
60
61#ifdef CONFIG_X86_32
62/*
63 * This halt magic was a workaround for ancient floppy DMA
64 * wreckage. It should be safe to remove.
65 */
66static int hlt_counter;
67void disable_hlt(void)
68{
69 hlt_counter++;
70}
71EXPORT_SYMBOL(disable_hlt);
72
73void enable_hlt(void)
74{
75 hlt_counter--;
76}
77EXPORT_SYMBOL(enable_hlt);
78
79static inline int hlt_use_halt(void)
80{
81 return (!hlt_counter && boot_cpu_data.hlt_works_ok);
82}
83#else
84static inline int hlt_use_halt(void)
85{
86 return 1;
87}
88#endif
89
90/*
91 * We use this if we don't have any better
92 * idle routine..
93 */
94void default_idle(void)
95{
96 if (hlt_use_halt()) {
97 current_thread_info()->status &= ~TS_POLLING;
98 /*
99 * TS_POLLING-cleared state must be visible before we
100 * test NEED_RESCHED:
101 */
102 smp_mb();
103
104 if (!need_resched())
105 safe_halt(); /* enables interrupts racelessly */
106 else
107 local_irq_enable();
108 current_thread_info()->status |= TS_POLLING;
109 } else {
110 local_irq_enable();
111 /* loop is done by the caller */
112 cpu_relax();
113 }
114}
115#ifdef CONFIG_APM_MODULE
116EXPORT_SYMBOL(default_idle);
117#endif
118
7f424a8b
PZ
119static void do_nothing(void *unused)
120{
121}
122
123/*
124 * cpu_idle_wait - Used to ensure that all the CPUs discard old value of
125 * pm_idle and update to new pm_idle value. Required while changing pm_idle
126 * handler on SMP systems.
127 *
128 * Caller must have changed pm_idle to the new value before the call. Old
129 * pm_idle value will not be used by any CPU after the return of this function.
130 */
131void cpu_idle_wait(void)
132{
133 smp_mb();
134 /* kick all the CPUs so that they exit out of pm_idle */
135 smp_call_function(do_nothing, NULL, 0, 1);
136}
137EXPORT_SYMBOL_GPL(cpu_idle_wait);
138
139/*
140 * This uses new MONITOR/MWAIT instructions on P4 processors with PNI,
141 * which can obviate IPI to trigger checking of need_resched.
142 * We execute MONITOR against need_resched and enter optimized wait state
143 * through MWAIT. Whenever someone changes need_resched, we would be woken
144 * up from MWAIT (without an IPI).
145 *
146 * New with Core Duo processors, MWAIT can take some hints based on CPU
147 * capability.
148 */
149void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
150{
151 if (!need_resched()) {
152 __monitor((void *)&current_thread_info()->flags, 0, 0);
153 smp_mb();
154 if (!need_resched())
155 __mwait(ax, cx);
156 }
157}
158
159/* Default MONITOR/MWAIT with no hints, used for default C1 state */
160static void mwait_idle(void)
161{
162 if (!need_resched()) {
163 __monitor((void *)&current_thread_info()->flags, 0, 0);
164 smp_mb();
165 if (!need_resched())
166 __sti_mwait(0, 0);
167 else
168 local_irq_enable();
169 } else
170 local_irq_enable();
171}
172
7f424a8b
PZ
173/*
174 * On SMP it's slightly faster (but much more power-consuming!)
175 * to poll the ->work.need_resched flag instead of waiting for the
176 * cross-CPU IPI to arrive. Use this option with caution.
177 */
178static void poll_idle(void)
179{
180 local_irq_enable();
181 cpu_relax();
182}
183
e9623b35
TG
184/*
185 * mwait selection logic:
186 *
187 * It depends on the CPU. For AMD CPUs that support MWAIT this is
188 * wrong. Family 0x10 and 0x11 CPUs will enter C1 on HLT. Powersavings
189 * then depend on a clock divisor and current Pstate of the core. If
190 * all cores of a processor are in halt state (C1) the processor can
191 * enter the C1E (C1 enhanced) state. If mwait is used this will never
192 * happen.
193 *
194 * idle=mwait overrides this decision and forces the usage of mwait.
195 */
09fd4b4e
TG
196
197#define MWAIT_INFO 0x05
198#define MWAIT_ECX_EXTENDED_INFO 0x01
199#define MWAIT_EDX_C1 0xf0
200
e9623b35
TG
201static int __cpuinit mwait_usable(const struct cpuinfo_x86 *c)
202{
09fd4b4e
TG
203 u32 eax, ebx, ecx, edx;
204
e9623b35
TG
205 if (force_mwait)
206 return 1;
207
09fd4b4e
TG
208 if (c->cpuid_level < MWAIT_INFO)
209 return 0;
210
211 cpuid(MWAIT_INFO, &eax, &ebx, &ecx, &edx);
212 /* Check, whether EDX has extended info about MWAIT */
213 if (!(ecx & MWAIT_ECX_EXTENDED_INFO))
214 return 1;
215
216 /*
217 * edx enumeratios MONITOR/MWAIT extensions. Check, whether
218 * C1 supports MWAIT
219 */
220 return (edx & MWAIT_EDX_C1);
e9623b35
TG
221}
222
aa276e1c
TG
223/*
224 * Check for AMD CPUs, which have potentially C1E support
225 */
226static int __cpuinit check_c1e_idle(const struct cpuinfo_x86 *c)
227{
228 if (c->x86_vendor != X86_VENDOR_AMD)
229 return 0;
230
231 if (c->x86 < 0x0F)
232 return 0;
233
234 /* Family 0x0f models < rev F do not have C1E */
235 if (c->x86 == 0x0f && c->x86_model < 0x40)
236 return 0;
237
238 return 1;
239}
240
241/*
242 * C1E aware idle routine. We check for C1E active in the interrupt
243 * pending message MSR. If we detect C1E, then we handle it the same
244 * way as C3 power states (local apic timer and TSC stop)
245 */
246static void c1e_idle(void)
247{
248 static cpumask_t c1e_mask = CPU_MASK_NONE;
249 static int c1e_detected;
250
251 if (need_resched())
252 return;
253
254 if (!c1e_detected) {
255 u32 lo, hi;
256
257 rdmsr(MSR_K8_INT_PENDING_MSG, lo, hi);
258 if (lo & K8_INTP_C1E_ACTIVE_MASK) {
259 c1e_detected = 1;
260 mark_tsc_unstable("TSC halt in C1E");
261 printk(KERN_INFO "System has C1E enabled\n");
262 }
263 }
264
265 if (c1e_detected) {
266 int cpu = smp_processor_id();
267
268 if (!cpu_isset(cpu, c1e_mask)) {
269 cpu_set(cpu, c1e_mask);
270 /* Force broadcast so ACPI can not interfere */
271 clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_FORCE,
272 &cpu);
273 printk(KERN_INFO "Switch to broadcast mode on CPU%d\n",
274 cpu);
275 }
276 clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_ENTER, &cpu);
277 default_idle();
278 local_irq_disable();
279 clockevents_notify(CLOCK_EVT_NOTIFY_BROADCAST_EXIT, &cpu);
280 local_irq_enable();
281 } else
282 default_idle();
283}
284
7f424a8b
PZ
285void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
286{
7f424a8b
PZ
287#ifdef CONFIG_X86_SMP
288 if (pm_idle == poll_idle && smp_num_siblings > 1) {
289 printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
290 " performance may degrade.\n");
291 }
292#endif
6ddd2a27
TG
293 if (pm_idle)
294 return;
295
e9623b35 296 if (cpu_has(c, X86_FEATURE_MWAIT) && mwait_usable(c)) {
7f424a8b 297 /*
7f424a8b
PZ
298 * One CPU supports mwait => All CPUs supports mwait
299 */
6ddd2a27
TG
300 printk(KERN_INFO "using mwait in idle threads.\n");
301 pm_idle = mwait_idle;
aa276e1c
TG
302 } else if (check_c1e_idle(c)) {
303 printk(KERN_INFO "using C1E aware idle routine\n");
304 pm_idle = c1e_idle;
6ddd2a27
TG
305 } else
306 pm_idle = default_idle;
7f424a8b
PZ
307}
308
309static int __init idle_setup(char *str)
310{
311 if (!strcmp(str, "poll")) {
312 printk("using polling idle threads.\n");
313 pm_idle = poll_idle;
314 } else if (!strcmp(str, "mwait"))
315 force_mwait = 1;
316 else
317 return -1;
318
319 boot_option_idle_override = 1;
320 return 0;
321}
322early_param("idle", idle_setup);
323