hotplug: Fix UP bug in smpboot hotplug code
[linux-2.6-block.git] / kernel / softirq.c
CommitLineData
1da177e4
LT
1/*
2 * linux/kernel/softirq.c
3 *
4 * Copyright (C) 1992 Linus Torvalds
5 *
b10db7f0
PM
6 * Distribute under GPLv2.
7 *
8 * Rewritten. Old one was good in 2.2, but in 2.3 it was immoral. --ANK (990903)
54514a70
DM
9 *
10 * Remote softirq infrastructure is by Jens Axboe.
1da177e4
LT
11 */
12
9984de1a 13#include <linux/export.h>
1da177e4
LT
14#include <linux/kernel_stat.h>
15#include <linux/interrupt.h>
16#include <linux/init.h>
17#include <linux/mm.h>
18#include <linux/notifier.h>
19#include <linux/percpu.h>
20#include <linux/cpu.h>
83144186 21#include <linux/freezer.h>
1da177e4
LT
22#include <linux/kthread.h>
23#include <linux/rcupdate.h>
7e49fcce 24#include <linux/ftrace.h>
78eef01b 25#include <linux/smp.h>
79bf2bb3 26#include <linux/tick.h>
a0e39ed3
HC
27
28#define CREATE_TRACE_POINTS
ad8d75ff 29#include <trace/events/irq.h>
1da177e4
LT
30
31#include <asm/irq.h>
32/*
33 - No shared variables, all the data are CPU local.
34 - If a softirq needs serialization, let it serialize itself
35 by its own spinlocks.
36 - Even if softirq is serialized, only local cpu is marked for
37 execution. Hence, we get something sort of weak cpu binding.
38 Though it is still not clear, will it result in better locality
39 or will not.
40
41 Examples:
42 - NET RX softirq. It is multithreaded and does not require
43 any global serialization.
44 - NET TX softirq. It kicks software netdevice queues, hence
45 it is logically serialized per device, but this serialization
46 is invisible to common code.
47 - Tasklets: serialized wrt itself.
48 */
49
50#ifndef __ARCH_IRQ_STAT
51irq_cpustat_t irq_stat[NR_CPUS] ____cacheline_aligned;
52EXPORT_SYMBOL(irq_stat);
53#endif
54
978b0116 55static struct softirq_action softirq_vec[NR_SOFTIRQS] __cacheline_aligned_in_smp;
1da177e4 56
4dd53d89 57DEFINE_PER_CPU(struct task_struct *, ksoftirqd);
1da177e4 58
5d592b44 59char *softirq_to_name[NR_SOFTIRQS] = {
5dd4de58 60 "HI", "TIMER", "NET_TX", "NET_RX", "BLOCK", "BLOCK_IOPOLL",
09223371 61 "TASKLET", "SCHED", "HRTIMER", "RCU"
5d592b44
JB
62};
63
1da177e4
LT
64/*
65 * we cannot loop indefinitely here to avoid userspace starvation,
66 * but we also don't want to introduce a worst case 1/HZ latency
67 * to the pending events, so lets the scheduler to balance
68 * the softirq load for us.
69 */
676cb02d 70static void wakeup_softirqd(void)
1da177e4
LT
71{
72 /* Interrupts are disabled: no need to stop preemption */
909ea964 73 struct task_struct *tsk = __this_cpu_read(ksoftirqd);
1da177e4
LT
74
75 if (tsk && tsk->state != TASK_RUNNING)
76 wake_up_process(tsk);
77}
78
75e1056f
VP
79/*
80 * preempt_count and SOFTIRQ_OFFSET usage:
81 * - preempt_count is changed by SOFTIRQ_OFFSET on entering or leaving
82 * softirq processing.
83 * - preempt_count is changed by SOFTIRQ_DISABLE_OFFSET (= 2 * SOFTIRQ_OFFSET)
84 * on local_bh_disable or local_bh_enable.
85 * This lets us distinguish between whether we are currently processing
86 * softirq and whether we just have bh disabled.
87 */
88
de30a2b3
IM
89/*
90 * This one is for softirq.c-internal use,
91 * where hardirqs are disabled legitimately:
92 */
3c829c36 93#ifdef CONFIG_TRACE_IRQFLAGS
75e1056f 94static void __local_bh_disable(unsigned long ip, unsigned int cnt)
de30a2b3
IM
95{
96 unsigned long flags;
97
98 WARN_ON_ONCE(in_irq());
99
100 raw_local_irq_save(flags);
7e49fcce
SR
101 /*
102 * The preempt tracer hooks into add_preempt_count and will break
103 * lockdep because it calls back into lockdep after SOFTIRQ_OFFSET
104 * is set and before current->softirq_enabled is cleared.
105 * We must manually increment preempt_count here and manually
106 * call the trace_preempt_off later.
107 */
75e1056f 108 preempt_count() += cnt;
de30a2b3
IM
109 /*
110 * Were softirqs turned off above:
111 */
75e1056f 112 if (softirq_count() == cnt)
de30a2b3
IM
113 trace_softirqs_off(ip);
114 raw_local_irq_restore(flags);
7e49fcce 115
75e1056f 116 if (preempt_count() == cnt)
7e49fcce 117 trace_preempt_off(CALLER_ADDR0, get_parent_ip(CALLER_ADDR1));
de30a2b3 118}
3c829c36 119#else /* !CONFIG_TRACE_IRQFLAGS */
75e1056f 120static inline void __local_bh_disable(unsigned long ip, unsigned int cnt)
3c829c36 121{
75e1056f 122 add_preempt_count(cnt);
3c829c36
TC
123 barrier();
124}
125#endif /* CONFIG_TRACE_IRQFLAGS */
de30a2b3
IM
126
127void local_bh_disable(void)
128{
75e1056f
VP
129 __local_bh_disable((unsigned long)__builtin_return_address(0),
130 SOFTIRQ_DISABLE_OFFSET);
de30a2b3
IM
131}
132
133EXPORT_SYMBOL(local_bh_disable);
134
75e1056f
VP
135static void __local_bh_enable(unsigned int cnt)
136{
137 WARN_ON_ONCE(in_irq());
138 WARN_ON_ONCE(!irqs_disabled());
139
140 if (softirq_count() == cnt)
141 trace_softirqs_on((unsigned long)__builtin_return_address(0));
142 sub_preempt_count(cnt);
143}
144
de30a2b3
IM
145/*
146 * Special-case - softirqs can safely be enabled in
147 * cond_resched_softirq(), or by __do_softirq(),
148 * without processing still-pending softirqs:
149 */
150void _local_bh_enable(void)
151{
75e1056f 152 __local_bh_enable(SOFTIRQ_DISABLE_OFFSET);
de30a2b3
IM
153}
154
155EXPORT_SYMBOL(_local_bh_enable);
156
0f476b6d 157static inline void _local_bh_enable_ip(unsigned long ip)
de30a2b3 158{
0f476b6d 159 WARN_ON_ONCE(in_irq() || irqs_disabled());
3c829c36 160#ifdef CONFIG_TRACE_IRQFLAGS
0f476b6d 161 local_irq_disable();
3c829c36 162#endif
de30a2b3
IM
163 /*
164 * Are softirqs going to be turned on now:
165 */
75e1056f 166 if (softirq_count() == SOFTIRQ_DISABLE_OFFSET)
0f476b6d 167 trace_softirqs_on(ip);
de30a2b3
IM
168 /*
169 * Keep preemption disabled until we are done with
170 * softirq processing:
171 */
75e1056f 172 sub_preempt_count(SOFTIRQ_DISABLE_OFFSET - 1);
de30a2b3
IM
173
174 if (unlikely(!in_interrupt() && local_softirq_pending()))
175 do_softirq();
176
177 dec_preempt_count();
3c829c36 178#ifdef CONFIG_TRACE_IRQFLAGS
0f476b6d 179 local_irq_enable();
3c829c36 180#endif
de30a2b3
IM
181 preempt_check_resched();
182}
0f476b6d
JB
183
184void local_bh_enable(void)
185{
186 _local_bh_enable_ip((unsigned long)__builtin_return_address(0));
187}
de30a2b3
IM
188EXPORT_SYMBOL(local_bh_enable);
189
190void local_bh_enable_ip(unsigned long ip)
191{
0f476b6d 192 _local_bh_enable_ip(ip);
de30a2b3
IM
193}
194EXPORT_SYMBOL(local_bh_enable_ip);
195
1da177e4
LT
196/*
197 * We restart softirq processing MAX_SOFTIRQ_RESTART times,
198 * and we fall back to softirqd after that.
199 *
200 * This number has been established via experimentation.
201 * The two things to balance is latency against fairness -
202 * we want to handle softirqs as soon as possible, but they
203 * should not be able to lock up the box.
204 */
205#define MAX_SOFTIRQ_RESTART 10
206
207asmlinkage void __do_softirq(void)
208{
209 struct softirq_action *h;
210 __u32 pending;
211 int max_restart = MAX_SOFTIRQ_RESTART;
212 int cpu;
907aed48
MG
213 unsigned long old_flags = current->flags;
214
215 /*
216 * Mask out PF_MEMALLOC s current task context is borrowed for the
217 * softirq. A softirq handled such as network RX might set PF_MEMALLOC
218 * again if the socket is related to swap
219 */
220 current->flags &= ~PF_MEMALLOC;
1da177e4
LT
221
222 pending = local_softirq_pending();
829035fd
PM
223 account_system_vtime(current);
224
75e1056f
VP
225 __local_bh_disable((unsigned long)__builtin_return_address(0),
226 SOFTIRQ_OFFSET);
d820ac4c 227 lockdep_softirq_enter();
1da177e4 228
1da177e4
LT
229 cpu = smp_processor_id();
230restart:
231 /* Reset the pending bitmask before enabling irqs */
3f74478b 232 set_softirq_pending(0);
1da177e4 233
c70f5d66 234 local_irq_enable();
1da177e4
LT
235
236 h = softirq_vec;
237
238 do {
239 if (pending & 1) {
f4bc6bb2 240 unsigned int vec_nr = h - softirq_vec;
8e85b4b5
TG
241 int prev_count = preempt_count();
242
f4bc6bb2
TG
243 kstat_incr_softirqs_this_cpu(vec_nr);
244
245 trace_softirq_entry(vec_nr);
1da177e4 246 h->action(h);
f4bc6bb2 247 trace_softirq_exit(vec_nr);
8e85b4b5 248 if (unlikely(prev_count != preempt_count())) {
f4bc6bb2 249 printk(KERN_ERR "huh, entered softirq %u %s %p"
8e85b4b5 250 "with preempt_count %08x,"
f4bc6bb2
TG
251 " exited with %08x?\n", vec_nr,
252 softirq_to_name[vec_nr], h->action,
253 prev_count, preempt_count());
8e85b4b5
TG
254 preempt_count() = prev_count;
255 }
256
d6714c22 257 rcu_bh_qs(cpu);
1da177e4
LT
258 }
259 h++;
260 pending >>= 1;
261 } while (pending);
262
c70f5d66 263 local_irq_disable();
1da177e4
LT
264
265 pending = local_softirq_pending();
266 if (pending && --max_restart)
267 goto restart;
268
269 if (pending)
270 wakeup_softirqd();
271
d820ac4c 272 lockdep_softirq_exit();
829035fd
PM
273
274 account_system_vtime(current);
75e1056f 275 __local_bh_enable(SOFTIRQ_OFFSET);
907aed48 276 tsk_restore_flags(current, old_flags, PF_MEMALLOC);
1da177e4
LT
277}
278
279#ifndef __ARCH_HAS_DO_SOFTIRQ
280
281asmlinkage void do_softirq(void)
282{
283 __u32 pending;
284 unsigned long flags;
285
286 if (in_interrupt())
287 return;
288
289 local_irq_save(flags);
290
291 pending = local_softirq_pending();
292
293 if (pending)
294 __do_softirq();
295
296 local_irq_restore(flags);
297}
298
1da177e4
LT
299#endif
300
dde4b2b5
IM
301/*
302 * Enter an interrupt context.
303 */
304void irq_enter(void)
305{
6378ddb5 306 int cpu = smp_processor_id();
719254fa 307
64db4cff 308 rcu_irq_enter();
0a8a2e78 309 if (is_idle_task(current) && !in_interrupt()) {
d267f87f
VP
310 /*
311 * Prevent raise_softirq from needlessly waking up ksoftirqd
312 * here, as softirq will be serviced on return from interrupt.
313 */
314 local_bh_disable();
719254fa 315 tick_check_idle(cpu);
d267f87f
VP
316 _local_bh_enable();
317 }
318
319 __irq_enter();
dde4b2b5
IM
320}
321
8d32a307
TG
322static inline void invoke_softirq(void)
323{
b2a00178
HC
324 if (!force_irqthreads) {
325#ifdef __ARCH_IRQ_EXIT_IRQS_DISABLED
8d32a307 326 __do_softirq();
1da177e4 327#else
8d32a307 328 do_softirq();
b2a00178
HC
329#endif
330 } else {
ec433f0c
PZ
331 __local_bh_disable((unsigned long)__builtin_return_address(0),
332 SOFTIRQ_OFFSET);
8d32a307 333 wakeup_softirqd();
ec433f0c
PZ
334 __local_bh_enable(SOFTIRQ_OFFSET);
335 }
8d32a307 336}
1da177e4
LT
337
338/*
339 * Exit an interrupt context. Process softirqs if needed and possible:
340 */
341void irq_exit(void)
342{
343 account_system_vtime(current);
de30a2b3 344 trace_hardirq_exit();
1da177e4
LT
345 sub_preempt_count(IRQ_EXIT_OFFSET);
346 if (!in_interrupt() && local_softirq_pending())
347 invoke_softirq();
79bf2bb3
TG
348
349#ifdef CONFIG_NO_HZ
350 /* Make sure that timer wheel updates are propagated */
64db4cff 351 if (idle_cpu(smp_processor_id()) && !in_interrupt() && !need_resched())
280f0677 352 tick_nohz_irq_exit();
79bf2bb3 353#endif
416eb33c 354 rcu_irq_exit();
ba74c144 355 sched_preempt_enable_no_resched();
1da177e4
LT
356}
357
358/*
359 * This function must run with irqs disabled!
360 */
7ad5b3a5 361inline void raise_softirq_irqoff(unsigned int nr)
1da177e4
LT
362{
363 __raise_softirq_irqoff(nr);
364
365 /*
366 * If we're in an interrupt or softirq, we're done
367 * (this also catches softirq-disabled code). We will
368 * actually run the softirq once we return from
369 * the irq or softirq.
370 *
371 * Otherwise we wake up ksoftirqd to make sure we
372 * schedule the softirq soon.
373 */
374 if (!in_interrupt())
375 wakeup_softirqd();
376}
377
7ad5b3a5 378void raise_softirq(unsigned int nr)
1da177e4
LT
379{
380 unsigned long flags;
381
382 local_irq_save(flags);
383 raise_softirq_irqoff(nr);
384 local_irq_restore(flags);
385}
386
f069686e
SR
387void __raise_softirq_irqoff(unsigned int nr)
388{
389 trace_softirq_raise(nr);
390 or_softirq_pending(1UL << nr);
391}
392
962cf36c 393void open_softirq(int nr, void (*action)(struct softirq_action *))
1da177e4 394{
1da177e4
LT
395 softirq_vec[nr].action = action;
396}
397
9ba5f005
PZ
398/*
399 * Tasklets
400 */
1da177e4
LT
401struct tasklet_head
402{
48f20a9a
OJ
403 struct tasklet_struct *head;
404 struct tasklet_struct **tail;
1da177e4
LT
405};
406
4620b49f
VN
407static DEFINE_PER_CPU(struct tasklet_head, tasklet_vec);
408static DEFINE_PER_CPU(struct tasklet_head, tasklet_hi_vec);
1da177e4 409
7ad5b3a5 410void __tasklet_schedule(struct tasklet_struct *t)
1da177e4
LT
411{
412 unsigned long flags;
413
414 local_irq_save(flags);
48f20a9a 415 t->next = NULL;
909ea964
CL
416 *__this_cpu_read(tasklet_vec.tail) = t;
417 __this_cpu_write(tasklet_vec.tail, &(t->next));
1da177e4
LT
418 raise_softirq_irqoff(TASKLET_SOFTIRQ);
419 local_irq_restore(flags);
420}
421
422EXPORT_SYMBOL(__tasklet_schedule);
423
7ad5b3a5 424void __tasklet_hi_schedule(struct tasklet_struct *t)
1da177e4
LT
425{
426 unsigned long flags;
427
428 local_irq_save(flags);
48f20a9a 429 t->next = NULL;
909ea964
CL
430 *__this_cpu_read(tasklet_hi_vec.tail) = t;
431 __this_cpu_write(tasklet_hi_vec.tail, &(t->next));
1da177e4
LT
432 raise_softirq_irqoff(HI_SOFTIRQ);
433 local_irq_restore(flags);
434}
435
436EXPORT_SYMBOL(__tasklet_hi_schedule);
437
7c692cba
VN
438void __tasklet_hi_schedule_first(struct tasklet_struct *t)
439{
440 BUG_ON(!irqs_disabled());
441
909ea964
CL
442 t->next = __this_cpu_read(tasklet_hi_vec.head);
443 __this_cpu_write(tasklet_hi_vec.head, t);
7c692cba
VN
444 __raise_softirq_irqoff(HI_SOFTIRQ);
445}
446
447EXPORT_SYMBOL(__tasklet_hi_schedule_first);
448
1da177e4
LT
449static void tasklet_action(struct softirq_action *a)
450{
451 struct tasklet_struct *list;
452
453 local_irq_disable();
909ea964
CL
454 list = __this_cpu_read(tasklet_vec.head);
455 __this_cpu_write(tasklet_vec.head, NULL);
456 __this_cpu_write(tasklet_vec.tail, &__get_cpu_var(tasklet_vec).head);
1da177e4
LT
457 local_irq_enable();
458
459 while (list) {
460 struct tasklet_struct *t = list;
461
462 list = list->next;
463
464 if (tasklet_trylock(t)) {
465 if (!atomic_read(&t->count)) {
466 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
467 BUG();
468 t->func(t->data);
469 tasklet_unlock(t);
470 continue;
471 }
472 tasklet_unlock(t);
473 }
474
475 local_irq_disable();
48f20a9a 476 t->next = NULL;
909ea964
CL
477 *__this_cpu_read(tasklet_vec.tail) = t;
478 __this_cpu_write(tasklet_vec.tail, &(t->next));
1da177e4
LT
479 __raise_softirq_irqoff(TASKLET_SOFTIRQ);
480 local_irq_enable();
481 }
482}
483
484static void tasklet_hi_action(struct softirq_action *a)
485{
486 struct tasklet_struct *list;
487
488 local_irq_disable();
909ea964
CL
489 list = __this_cpu_read(tasklet_hi_vec.head);
490 __this_cpu_write(tasklet_hi_vec.head, NULL);
491 __this_cpu_write(tasklet_hi_vec.tail, &__get_cpu_var(tasklet_hi_vec).head);
1da177e4
LT
492 local_irq_enable();
493
494 while (list) {
495 struct tasklet_struct *t = list;
496
497 list = list->next;
498
499 if (tasklet_trylock(t)) {
500 if (!atomic_read(&t->count)) {
501 if (!test_and_clear_bit(TASKLET_STATE_SCHED, &t->state))
502 BUG();
503 t->func(t->data);
504 tasklet_unlock(t);
505 continue;
506 }
507 tasklet_unlock(t);
508 }
509
510 local_irq_disable();
48f20a9a 511 t->next = NULL;
909ea964
CL
512 *__this_cpu_read(tasklet_hi_vec.tail) = t;
513 __this_cpu_write(tasklet_hi_vec.tail, &(t->next));
1da177e4
LT
514 __raise_softirq_irqoff(HI_SOFTIRQ);
515 local_irq_enable();
516 }
517}
518
519
520void tasklet_init(struct tasklet_struct *t,
521 void (*func)(unsigned long), unsigned long data)
522{
523 t->next = NULL;
524 t->state = 0;
525 atomic_set(&t->count, 0);
526 t->func = func;
527 t->data = data;
528}
529
530EXPORT_SYMBOL(tasklet_init);
531
532void tasklet_kill(struct tasklet_struct *t)
533{
534 if (in_interrupt())
535 printk("Attempt to kill tasklet from interrupt\n");
536
537 while (test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) {
79d381c9 538 do {
1da177e4 539 yield();
79d381c9 540 } while (test_bit(TASKLET_STATE_SCHED, &t->state));
1da177e4
LT
541 }
542 tasklet_unlock_wait(t);
543 clear_bit(TASKLET_STATE_SCHED, &t->state);
544}
545
546EXPORT_SYMBOL(tasklet_kill);
547
9ba5f005
PZ
548/*
549 * tasklet_hrtimer
550 */
551
552/*
b9c30322
PZ
553 * The trampoline is called when the hrtimer expires. It schedules a tasklet
554 * to run __tasklet_hrtimer_trampoline() which in turn will call the intended
555 * hrtimer callback, but from softirq context.
9ba5f005
PZ
556 */
557static enum hrtimer_restart __hrtimer_tasklet_trampoline(struct hrtimer *timer)
558{
559 struct tasklet_hrtimer *ttimer =
560 container_of(timer, struct tasklet_hrtimer, timer);
561
b9c30322
PZ
562 tasklet_hi_schedule(&ttimer->tasklet);
563 return HRTIMER_NORESTART;
9ba5f005
PZ
564}
565
566/*
567 * Helper function which calls the hrtimer callback from
568 * tasklet/softirq context
569 */
570static void __tasklet_hrtimer_trampoline(unsigned long data)
571{
572 struct tasklet_hrtimer *ttimer = (void *)data;
573 enum hrtimer_restart restart;
574
575 restart = ttimer->function(&ttimer->timer);
576 if (restart != HRTIMER_NORESTART)
577 hrtimer_restart(&ttimer->timer);
578}
579
580/**
581 * tasklet_hrtimer_init - Init a tasklet/hrtimer combo for softirq callbacks
582 * @ttimer: tasklet_hrtimer which is initialized
25985edc 583 * @function: hrtimer callback function which gets called from softirq context
9ba5f005
PZ
584 * @which_clock: clock id (CLOCK_MONOTONIC/CLOCK_REALTIME)
585 * @mode: hrtimer mode (HRTIMER_MODE_ABS/HRTIMER_MODE_REL)
586 */
587void tasklet_hrtimer_init(struct tasklet_hrtimer *ttimer,
588 enum hrtimer_restart (*function)(struct hrtimer *),
589 clockid_t which_clock, enum hrtimer_mode mode)
590{
591 hrtimer_init(&ttimer->timer, which_clock, mode);
592 ttimer->timer.function = __hrtimer_tasklet_trampoline;
593 tasklet_init(&ttimer->tasklet, __tasklet_hrtimer_trampoline,
594 (unsigned long)ttimer);
595 ttimer->function = function;
596}
597EXPORT_SYMBOL_GPL(tasklet_hrtimer_init);
598
599/*
600 * Remote softirq bits
601 */
602
54514a70
DM
603DEFINE_PER_CPU(struct list_head [NR_SOFTIRQS], softirq_work_list);
604EXPORT_PER_CPU_SYMBOL(softirq_work_list);
605
606static void __local_trigger(struct call_single_data *cp, int softirq)
607{
608 struct list_head *head = &__get_cpu_var(softirq_work_list[softirq]);
609
610 list_add_tail(&cp->list, head);
611
612 /* Trigger the softirq only if the list was previously empty. */
613 if (head->next == &cp->list)
614 raise_softirq_irqoff(softirq);
615}
616
617#ifdef CONFIG_USE_GENERIC_SMP_HELPERS
618static void remote_softirq_receive(void *data)
619{
620 struct call_single_data *cp = data;
621 unsigned long flags;
622 int softirq;
623
624 softirq = cp->priv;
625
626 local_irq_save(flags);
627 __local_trigger(cp, softirq);
628 local_irq_restore(flags);
629}
630
631static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
632{
633 if (cpu_online(cpu)) {
634 cp->func = remote_softirq_receive;
635 cp->info = cp;
636 cp->flags = 0;
637 cp->priv = softirq;
638
6e275637 639 __smp_call_function_single(cpu, cp, 0);
54514a70
DM
640 return 0;
641 }
642 return 1;
643}
644#else /* CONFIG_USE_GENERIC_SMP_HELPERS */
645static int __try_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
646{
647 return 1;
648}
649#endif
650
651/**
652 * __send_remote_softirq - try to schedule softirq work on a remote cpu
653 * @cp: private SMP call function data area
654 * @cpu: the remote cpu
655 * @this_cpu: the currently executing cpu
656 * @softirq: the softirq for the work
657 *
658 * Attempt to schedule softirq work on a remote cpu. If this cannot be
659 * done, the work is instead queued up on the local cpu.
660 *
661 * Interrupts must be disabled.
662 */
663void __send_remote_softirq(struct call_single_data *cp, int cpu, int this_cpu, int softirq)
664{
665 if (cpu == this_cpu || __try_remote_softirq(cp, cpu, softirq))
666 __local_trigger(cp, softirq);
667}
668EXPORT_SYMBOL(__send_remote_softirq);
669
670/**
671 * send_remote_softirq - try to schedule softirq work on a remote cpu
672 * @cp: private SMP call function data area
673 * @cpu: the remote cpu
674 * @softirq: the softirq for the work
675 *
676 * Like __send_remote_softirq except that disabling interrupts and
677 * computing the current cpu is done for the caller.
678 */
679void send_remote_softirq(struct call_single_data *cp, int cpu, int softirq)
680{
681 unsigned long flags;
682 int this_cpu;
683
684 local_irq_save(flags);
685 this_cpu = smp_processor_id();
686 __send_remote_softirq(cp, cpu, this_cpu, softirq);
687 local_irq_restore(flags);
688}
689EXPORT_SYMBOL(send_remote_softirq);
690
691static int __cpuinit remote_softirq_cpu_notify(struct notifier_block *self,
692 unsigned long action, void *hcpu)
693{
694 /*
695 * If a CPU goes away, splice its entries to the current CPU
696 * and trigger a run of the softirq
697 */
698 if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) {
699 int cpu = (unsigned long) hcpu;
700 int i;
701
702 local_irq_disable();
703 for (i = 0; i < NR_SOFTIRQS; i++) {
704 struct list_head *head = &per_cpu(softirq_work_list[i], cpu);
705 struct list_head *local_head;
706
707 if (list_empty(head))
708 continue;
709
710 local_head = &__get_cpu_var(softirq_work_list[i]);
711 list_splice_init(head, local_head);
712 raise_softirq_irqoff(i);
713 }
714 local_irq_enable();
715 }
716
717 return NOTIFY_OK;
718}
719
720static struct notifier_block __cpuinitdata remote_softirq_cpu_notifier = {
721 .notifier_call = remote_softirq_cpu_notify,
722};
723
1da177e4
LT
724void __init softirq_init(void)
725{
48f20a9a
OJ
726 int cpu;
727
728 for_each_possible_cpu(cpu) {
54514a70
DM
729 int i;
730
48f20a9a
OJ
731 per_cpu(tasklet_vec, cpu).tail =
732 &per_cpu(tasklet_vec, cpu).head;
733 per_cpu(tasklet_hi_vec, cpu).tail =
734 &per_cpu(tasklet_hi_vec, cpu).head;
54514a70
DM
735 for (i = 0; i < NR_SOFTIRQS; i++)
736 INIT_LIST_HEAD(&per_cpu(softirq_work_list[i], cpu));
48f20a9a
OJ
737 }
738
54514a70
DM
739 register_hotcpu_notifier(&remote_softirq_cpu_notifier);
740
962cf36c
CM
741 open_softirq(TASKLET_SOFTIRQ, tasklet_action);
742 open_softirq(HI_SOFTIRQ, tasklet_hi_action);
1da177e4
LT
743}
744
1871e52c 745static int run_ksoftirqd(void * __bind_cpu)
1da177e4 746{
1da177e4
LT
747 set_current_state(TASK_INTERRUPTIBLE);
748
749 while (!kthread_should_stop()) {
750 preempt_disable();
751 if (!local_softirq_pending()) {
bd2f5536 752 schedule_preempt_disabled();
1da177e4
LT
753 }
754
755 __set_current_state(TASK_RUNNING);
756
757 while (local_softirq_pending()) {
758 /* Preempt disable stops cpu going offline.
759 If already offline, we'll be on wrong CPU:
760 don't process */
761 if (cpu_is_offline((long)__bind_cpu))
762 goto wait_to_die;
c305d524
TG
763 local_irq_disable();
764 if (local_softirq_pending())
765 __do_softirq();
766 local_irq_enable();
ba74c144 767 sched_preempt_enable_no_resched();
1da177e4
LT
768 cond_resched();
769 preempt_disable();
25502a6c 770 rcu_note_context_switch((long)__bind_cpu);
1da177e4
LT
771 }
772 preempt_enable();
773 set_current_state(TASK_INTERRUPTIBLE);
774 }
775 __set_current_state(TASK_RUNNING);
776 return 0;
777
778wait_to_die:
779 preempt_enable();
780 /* Wait for kthread_stop */
781 set_current_state(TASK_INTERRUPTIBLE);
782 while (!kthread_should_stop()) {
783 schedule();
784 set_current_state(TASK_INTERRUPTIBLE);
785 }
786 __set_current_state(TASK_RUNNING);
787 return 0;
788}
789
790#ifdef CONFIG_HOTPLUG_CPU
791/*
792 * tasklet_kill_immediate is called to remove a tasklet which can already be
793 * scheduled for execution on @cpu.
794 *
795 * Unlike tasklet_kill, this function removes the tasklet
796 * _immediately_, even if the tasklet is in TASKLET_STATE_SCHED state.
797 *
798 * When this function is called, @cpu must be in the CPU_DEAD state.
799 */
800void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu)
801{
802 struct tasklet_struct **i;
803
804 BUG_ON(cpu_online(cpu));
805 BUG_ON(test_bit(TASKLET_STATE_RUN, &t->state));
806
807 if (!test_bit(TASKLET_STATE_SCHED, &t->state))
808 return;
809
810 /* CPU is dead, so no lock needed. */
48f20a9a 811 for (i = &per_cpu(tasklet_vec, cpu).head; *i; i = &(*i)->next) {
1da177e4
LT
812 if (*i == t) {
813 *i = t->next;
48f20a9a
OJ
814 /* If this was the tail element, move the tail ptr */
815 if (*i == NULL)
816 per_cpu(tasklet_vec, cpu).tail = i;
1da177e4
LT
817 return;
818 }
819 }
820 BUG();
821}
822
823static void takeover_tasklets(unsigned int cpu)
824{
1da177e4
LT
825 /* CPU is dead, so no lock needed. */
826 local_irq_disable();
827
828 /* Find end, append list for that CPU. */
e5e41723 829 if (&per_cpu(tasklet_vec, cpu).head != per_cpu(tasklet_vec, cpu).tail) {
909ea964
CL
830 *__this_cpu_read(tasklet_vec.tail) = per_cpu(tasklet_vec, cpu).head;
831 this_cpu_write(tasklet_vec.tail, per_cpu(tasklet_vec, cpu).tail);
e5e41723
CB
832 per_cpu(tasklet_vec, cpu).head = NULL;
833 per_cpu(tasklet_vec, cpu).tail = &per_cpu(tasklet_vec, cpu).head;
834 }
1da177e4
LT
835 raise_softirq_irqoff(TASKLET_SOFTIRQ);
836
e5e41723 837 if (&per_cpu(tasklet_hi_vec, cpu).head != per_cpu(tasklet_hi_vec, cpu).tail) {
909ea964
CL
838 *__this_cpu_read(tasklet_hi_vec.tail) = per_cpu(tasklet_hi_vec, cpu).head;
839 __this_cpu_write(tasklet_hi_vec.tail, per_cpu(tasklet_hi_vec, cpu).tail);
e5e41723
CB
840 per_cpu(tasklet_hi_vec, cpu).head = NULL;
841 per_cpu(tasklet_hi_vec, cpu).tail = &per_cpu(tasklet_hi_vec, cpu).head;
842 }
1da177e4
LT
843 raise_softirq_irqoff(HI_SOFTIRQ);
844
845 local_irq_enable();
846}
847#endif /* CONFIG_HOTPLUG_CPU */
848
8c78f307 849static int __cpuinit cpu_callback(struct notifier_block *nfb,
1da177e4
LT
850 unsigned long action,
851 void *hcpu)
852{
853 int hotcpu = (unsigned long)hcpu;
854 struct task_struct *p;
855
856 switch (action) {
857 case CPU_UP_PREPARE:
8bb78442 858 case CPU_UP_PREPARE_FROZEN:
94dcf29a
ED
859 p = kthread_create_on_node(run_ksoftirqd,
860 hcpu,
861 cpu_to_node(hotcpu),
862 "ksoftirqd/%d", hotcpu);
1da177e4
LT
863 if (IS_ERR(p)) {
864 printk("ksoftirqd for %i failed\n", hotcpu);
80b5184c 865 return notifier_from_errno(PTR_ERR(p));
1da177e4
LT
866 }
867 kthread_bind(p, hotcpu);
868 per_cpu(ksoftirqd, hotcpu) = p;
869 break;
870 case CPU_ONLINE:
8bb78442 871 case CPU_ONLINE_FROZEN:
1da177e4
LT
872 wake_up_process(per_cpu(ksoftirqd, hotcpu));
873 break;
874#ifdef CONFIG_HOTPLUG_CPU
875 case CPU_UP_CANCELED:
8bb78442 876 case CPU_UP_CANCELED_FROZEN:
fc75cdfa
HC
877 if (!per_cpu(ksoftirqd, hotcpu))
878 break;
1da177e4 879 /* Unbind so it can run. Fall thru. */
a4c4af7c 880 kthread_bind(per_cpu(ksoftirqd, hotcpu),
f1fc057c 881 cpumask_any(cpu_online_mask));
1da177e4 882 case CPU_DEAD:
1c6b4aa9 883 case CPU_DEAD_FROZEN: {
c9b5f501 884 static const struct sched_param param = {
fe7de49f
KM
885 .sched_priority = MAX_RT_PRIO-1
886 };
1c6b4aa9 887
1da177e4
LT
888 p = per_cpu(ksoftirqd, hotcpu);
889 per_cpu(ksoftirqd, hotcpu) = NULL;
961ccddd 890 sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
1da177e4
LT
891 kthread_stop(p);
892 takeover_tasklets(hotcpu);
893 break;
1c6b4aa9 894 }
1da177e4
LT
895#endif /* CONFIG_HOTPLUG_CPU */
896 }
897 return NOTIFY_OK;
898}
899
8c78f307 900static struct notifier_block __cpuinitdata cpu_nfb = {
1da177e4
LT
901 .notifier_call = cpu_callback
902};
903
7babe8db 904static __init int spawn_ksoftirqd(void)
1da177e4
LT
905{
906 void *cpu = (void *)(long)smp_processor_id();
07dccf33
AM
907 int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);
908
9e506f7a 909 BUG_ON(err != NOTIFY_OK);
1da177e4
LT
910 cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
911 register_cpu_notifier(&cpu_nfb);
912 return 0;
913}
7babe8db 914early_initcall(spawn_ksoftirqd);
78eef01b 915
43a25632
YL
916/*
917 * [ These __weak aliases are kept in a separate compilation unit, so that
918 * GCC does not inline them incorrectly. ]
919 */
920
921int __init __weak early_irq_init(void)
922{
923 return 0;
924}
925
b683de2b 926#ifdef CONFIG_GENERIC_HARDIRQS
4a046d17
YL
927int __init __weak arch_probe_nr_irqs(void)
928{
b683de2b 929 return NR_IRQS_LEGACY;
4a046d17
YL
930}
931
43a25632
YL
932int __init __weak arch_early_irq_init(void)
933{
934 return 0;
935}
b683de2b 936#endif