hotplug cpu: migrate a task within its cpuset
[linux-2.6-block.git] / kernel / cpu.c
CommitLineData
1da177e4
LT
1/* CPU control.
2 * (C) 2001, 2002, 2003, 2004 Rusty Russell
3 *
4 * This code is licenced under the GPL.
5 */
6#include <linux/proc_fs.h>
7#include <linux/smp.h>
8#include <linux/init.h>
9#include <linux/notifier.h>
10#include <linux/sched.h>
11#include <linux/unistd.h>
12#include <linux/cpu.h>
13#include <linux/module.h>
14#include <linux/kthread.h>
15#include <linux/stop_machine.h>
81615b62 16#include <linux/mutex.h>
1da177e4
LT
17
18/* This protects CPUs going up and down... */
aa953877
LT
19static DEFINE_MUTEX(cpu_add_remove_lock);
20static DEFINE_MUTEX(cpu_bitmask_lock);
1da177e4 21
bd5349cf 22static __cpuinitdata RAW_NOTIFIER_HEAD(cpu_chain);
1da177e4 23
e3920fb4
RW
24/* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
25 * Should always be manipulated under cpu_add_remove_lock
26 */
27static int cpu_hotplug_disabled;
28
a9d9baa1 29#ifdef CONFIG_HOTPLUG_CPU
90d45d17 30
aa953877
LT
31/* Crappy recursive lock-takers in cpufreq! Complain loudly about idiots */
32static struct task_struct *recursive;
33static int recursive_depth;
90d45d17 34
a9d9baa1
AR
35void lock_cpu_hotplug(void)
36{
aa953877
LT
37 struct task_struct *tsk = current;
38
39 if (tsk == recursive) {
40 static int warnings = 10;
41 if (warnings) {
42 printk(KERN_ERR "Lukewarm IQ detected in hotplug locking\n");
43 WARN_ON(1);
44 warnings--;
45 }
46 recursive_depth++;
47 return;
48 }
49 mutex_lock(&cpu_bitmask_lock);
50 recursive = tsk;
a9d9baa1
AR
51}
52EXPORT_SYMBOL_GPL(lock_cpu_hotplug);
90d45d17 53
a9d9baa1
AR
54void unlock_cpu_hotplug(void)
55{
aa953877
LT
56 WARN_ON(recursive != current);
57 if (recursive_depth) {
58 recursive_depth--;
59 return;
a9d9baa1 60 }
aa953877 61 recursive = NULL;
4b96b1a1 62 mutex_unlock(&cpu_bitmask_lock);
a9d9baa1
AR
63}
64EXPORT_SYMBOL_GPL(unlock_cpu_hotplug);
65
a9d9baa1 66#endif /* CONFIG_HOTPLUG_CPU */
90d45d17 67
1da177e4 68/* Need to know about CPUs going up/down? */
65edc68c 69int __cpuinit register_cpu_notifier(struct notifier_block *nb)
1da177e4 70{
bd5349cf
NB
71 int ret;
72 mutex_lock(&cpu_add_remove_lock);
73 ret = raw_notifier_chain_register(&cpu_chain, nb);
74 mutex_unlock(&cpu_add_remove_lock);
75 return ret;
1da177e4 76}
65edc68c
CS
77
78#ifdef CONFIG_HOTPLUG_CPU
79
1da177e4
LT
80EXPORT_SYMBOL(register_cpu_notifier);
81
82void unregister_cpu_notifier(struct notifier_block *nb)
83{
bd5349cf
NB
84 mutex_lock(&cpu_add_remove_lock);
85 raw_notifier_chain_unregister(&cpu_chain, nb);
86 mutex_unlock(&cpu_add_remove_lock);
1da177e4
LT
87}
88EXPORT_SYMBOL(unregister_cpu_notifier);
89
1da177e4
LT
90static inline void check_for_tasks(int cpu)
91{
92 struct task_struct *p;
93
94 write_lock_irq(&tasklist_lock);
95 for_each_process(p) {
96 if (task_cpu(p) == cpu &&
97 (!cputime_eq(p->utime, cputime_zero) ||
98 !cputime_eq(p->stime, cputime_zero)))
99 printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d\
e7407dcc 100 (state = %ld, flags = %x) \n",
ba25f9dc
PE
101 p->comm, task_pid_nr(p), cpu,
102 p->state, p->flags);
1da177e4
LT
103 }
104 write_unlock_irq(&tasklist_lock);
105}
106
db912f96
AK
107struct take_cpu_down_param {
108 unsigned long mod;
109 void *hcpu;
110};
111
1da177e4 112/* Take this CPU down. */
db912f96 113static int take_cpu_down(void *_param)
1da177e4 114{
db912f96 115 struct take_cpu_down_param *param = _param;
1da177e4
LT
116 int err;
117
db912f96
AK
118 raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
119 param->hcpu);
1da177e4
LT
120 /* Ensure this CPU doesn't handle any more interrupts. */
121 err = __cpu_disable();
122 if (err < 0)
f3705136 123 return err;
1da177e4 124
f3705136
ZM
125 /* Force idle task to run as soon as we yield: it should
126 immediately notice cpu is offline and die quickly. */
127 sched_idle_next();
128 return 0;
1da177e4
LT
129}
130
e3920fb4 131/* Requires cpu_add_remove_lock to be held */
8bb78442 132static int _cpu_down(unsigned int cpu, int tasks_frozen)
1da177e4 133{
e7407dcc 134 int err, nr_calls = 0;
1da177e4
LT
135 struct task_struct *p;
136 cpumask_t old_allowed, tmp;
e7407dcc 137 void *hcpu = (void *)(long)cpu;
8bb78442 138 unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
db912f96
AK
139 struct take_cpu_down_param tcd_param = {
140 .mod = mod,
141 .hcpu = hcpu,
142 };
1da177e4 143
e3920fb4
RW
144 if (num_online_cpus() == 1)
145 return -EBUSY;
1da177e4 146
e3920fb4
RW
147 if (!cpu_online(cpu))
148 return -EINVAL;
1da177e4 149
e7407dcc 150 raw_notifier_call_chain(&cpu_chain, CPU_LOCK_ACQUIRE, hcpu);
8bb78442 151 err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
e7407dcc 152 hcpu, -1, &nr_calls);
1da177e4 153 if (err == NOTIFY_BAD) {
a0d8cdb6 154 nr_calls--;
8bb78442
RW
155 __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
156 hcpu, nr_calls, NULL);
1da177e4
LT
157 printk("%s: attempt to take down CPU %u failed\n",
158 __FUNCTION__, cpu);
baaca49f
GS
159 err = -EINVAL;
160 goto out_release;
1da177e4
LT
161 }
162
163 /* Ensure that we are not runnable on dying cpu */
164 old_allowed = current->cpus_allowed;
165 tmp = CPU_MASK_ALL;
166 cpu_clear(cpu, tmp);
167 set_cpus_allowed(current, tmp);
168
aa953877 169 mutex_lock(&cpu_bitmask_lock);
db912f96 170 p = __stop_machine_run(take_cpu_down, &tcd_param, cpu);
aa953877
LT
171 mutex_unlock(&cpu_bitmask_lock);
172
8fa1d7d3 173 if (IS_ERR(p) || cpu_online(cpu)) {
1da177e4 174 /* CPU didn't die: tell everyone. Can't complain. */
8bb78442 175 if (raw_notifier_call_chain(&cpu_chain, CPU_DOWN_FAILED | mod,
e7407dcc 176 hcpu) == NOTIFY_BAD)
1da177e4
LT
177 BUG();
178
8fa1d7d3
ST
179 if (IS_ERR(p)) {
180 err = PTR_ERR(p);
181 goto out_allowed;
182 }
1da177e4 183 goto out_thread;
8fa1d7d3 184 }
1da177e4
LT
185
186 /* Wait for it to sleep (leaving idle task). */
187 while (!idle_cpu(cpu))
188 yield();
189
190 /* This actually kills the CPU. */
191 __cpu_die(cpu);
192
1da177e4 193 /* CPU is completely dead: tell everyone. Too late to complain. */
8bb78442
RW
194 if (raw_notifier_call_chain(&cpu_chain, CPU_DEAD | mod,
195 hcpu) == NOTIFY_BAD)
1da177e4
LT
196 BUG();
197
198 check_for_tasks(cpu);
199
200out_thread:
201 err = kthread_stop(p);
202out_allowed:
203 set_cpus_allowed(current, old_allowed);
baaca49f 204out_release:
8bb78442 205 raw_notifier_call_chain(&cpu_chain, CPU_LOCK_RELEASE, hcpu);
e3920fb4
RW
206 return err;
207}
208
209int cpu_down(unsigned int cpu)
210{
211 int err = 0;
212
213 mutex_lock(&cpu_add_remove_lock);
214 if (cpu_hotplug_disabled)
215 err = -EBUSY;
216 else
8bb78442 217 err = _cpu_down(cpu, 0);
e3920fb4 218
aa953877 219 mutex_unlock(&cpu_add_remove_lock);
1da177e4
LT
220 return err;
221}
222#endif /*CONFIG_HOTPLUG_CPU*/
223
e3920fb4 224/* Requires cpu_add_remove_lock to be held */
8bb78442 225static int __cpuinit _cpu_up(unsigned int cpu, int tasks_frozen)
1da177e4 226{
baaca49f 227 int ret, nr_calls = 0;
1da177e4 228 void *hcpu = (void *)(long)cpu;
8bb78442 229 unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
1da177e4 230
e3920fb4
RW
231 if (cpu_online(cpu) || !cpu_present(cpu))
232 return -EINVAL;
90d45d17 233
baaca49f 234 raw_notifier_call_chain(&cpu_chain, CPU_LOCK_ACQUIRE, hcpu);
8bb78442 235 ret = __raw_notifier_call_chain(&cpu_chain, CPU_UP_PREPARE | mod, hcpu,
baaca49f 236 -1, &nr_calls);
1da177e4 237 if (ret == NOTIFY_BAD) {
a0d8cdb6 238 nr_calls--;
1da177e4
LT
239 printk("%s: attempt to bring up CPU %u failed\n",
240 __FUNCTION__, cpu);
241 ret = -EINVAL;
242 goto out_notify;
243 }
244
245 /* Arch-specific enabling code. */
aa953877 246 mutex_lock(&cpu_bitmask_lock);
1da177e4 247 ret = __cpu_up(cpu);
aa953877 248 mutex_unlock(&cpu_bitmask_lock);
1da177e4
LT
249 if (ret != 0)
250 goto out_notify;
6978c705 251 BUG_ON(!cpu_online(cpu));
1da177e4
LT
252
253 /* Now call notifier in preparation. */
8bb78442 254 raw_notifier_call_chain(&cpu_chain, CPU_ONLINE | mod, hcpu);
1da177e4
LT
255
256out_notify:
257 if (ret != 0)
baaca49f 258 __raw_notifier_call_chain(&cpu_chain,
8bb78442 259 CPU_UP_CANCELED | mod, hcpu, nr_calls, NULL);
baaca49f 260 raw_notifier_call_chain(&cpu_chain, CPU_LOCK_RELEASE, hcpu);
e3920fb4
RW
261
262 return ret;
263}
264
b282b6f8 265int __cpuinit cpu_up(unsigned int cpu)
e3920fb4
RW
266{
267 int err = 0;
268
269 mutex_lock(&cpu_add_remove_lock);
270 if (cpu_hotplug_disabled)
271 err = -EBUSY;
272 else
8bb78442 273 err = _cpu_up(cpu, 0);
e3920fb4
RW
274
275 mutex_unlock(&cpu_add_remove_lock);
276 return err;
277}
278
f3de4be9 279#ifdef CONFIG_PM_SLEEP_SMP
e3920fb4
RW
280static cpumask_t frozen_cpus;
281
282int disable_nonboot_cpus(void)
283{
e1d9fd2e 284 int cpu, first_cpu, error = 0;
e3920fb4
RW
285
286 mutex_lock(&cpu_add_remove_lock);
1d64b9cb 287 first_cpu = first_cpu(cpu_online_map);
e3920fb4
RW
288 /* We take down all of the non-boot CPUs in one shot to avoid races
289 * with the userspace trying to use the CPU hotplug at the same time
290 */
291 cpus_clear(frozen_cpus);
292 printk("Disabling non-boot CPUs ...\n");
293 for_each_online_cpu(cpu) {
294 if (cpu == first_cpu)
295 continue;
8bb78442 296 error = _cpu_down(cpu, 1);
e3920fb4
RW
297 if (!error) {
298 cpu_set(cpu, frozen_cpus);
299 printk("CPU%d is down\n", cpu);
300 } else {
301 printk(KERN_ERR "Error taking CPU%d down: %d\n",
302 cpu, error);
303 break;
304 }
305 }
306 if (!error) {
307 BUG_ON(num_online_cpus() > 1);
308 /* Make sure the CPUs won't be enabled by someone else */
309 cpu_hotplug_disabled = 1;
310 } else {
e1d9fd2e 311 printk(KERN_ERR "Non-boot CPUs are not disabled\n");
e3920fb4 312 }
aa953877 313 mutex_unlock(&cpu_add_remove_lock);
e3920fb4
RW
314 return error;
315}
316
317void enable_nonboot_cpus(void)
318{
319 int cpu, error;
320
321 /* Allow everyone to use the CPU hotplug again */
322 mutex_lock(&cpu_add_remove_lock);
323 cpu_hotplug_disabled = 0;
ed746e3b 324 if (cpus_empty(frozen_cpus))
1d64b9cb 325 goto out;
e3920fb4
RW
326
327 printk("Enabling non-boot CPUs ...\n");
328 for_each_cpu_mask(cpu, frozen_cpus) {
8bb78442 329 error = _cpu_up(cpu, 1);
e3920fb4
RW
330 if (!error) {
331 printk("CPU%d is up\n", cpu);
332 continue;
333 }
1d64b9cb 334 printk(KERN_WARNING "Error taking CPU%d up: %d\n", cpu, error);
e3920fb4
RW
335 }
336 cpus_clear(frozen_cpus);
1d64b9cb
RW
337out:
338 mutex_unlock(&cpu_add_remove_lock);
1da177e4 339}
f3de4be9 340#endif /* CONFIG_PM_SLEEP_SMP */