Commit | Line | Data |
---|---|---|
457c8996 | 1 | // SPDX-License-Identifier: GPL-2.0-only |
e360adbe | 2 | /* |
90eec103 | 3 | * Copyright (C) 2010 Red Hat, Inc., Peter Zijlstra |
e360adbe PZ |
4 | * |
5 | * Provides a framework for enqueueing and running callbacks from hardirq | |
6 | * context. The enqueueing is NMI-safe. | |
7 | */ | |
8 | ||
83e3fa6f | 9 | #include <linux/bug.h> |
e360adbe | 10 | #include <linux/kernel.h> |
9984de1a | 11 | #include <linux/export.h> |
e360adbe | 12 | #include <linux/irq_work.h> |
967d1f90 | 13 | #include <linux/percpu.h> |
e360adbe | 14 | #include <linux/hardirq.h> |
ef1f0982 | 15 | #include <linux/irqflags.h> |
bc6679ae FW |
16 | #include <linux/sched.h> |
17 | #include <linux/tick.h> | |
c0e980a4 SR |
18 | #include <linux/cpu.h> |
19 | #include <linux/notifier.h> | |
47885016 | 20 | #include <linux/smp.h> |
b4c6f86e | 21 | #include <linux/smpboot.h> |
967d1f90 | 22 | #include <asm/processor.h> |
e2b5bcf9 | 23 | #include <linux/kasan.h> |
e360adbe | 24 | |
b93e0b8f FW |
25 | static DEFINE_PER_CPU(struct llist_head, raised_list); |
26 | static DEFINE_PER_CPU(struct llist_head, lazy_list); | |
b4c6f86e SAS |
27 | static DEFINE_PER_CPU(struct task_struct *, irq_workd); |
28 | ||
29 | static void wake_irq_workd(void) | |
30 | { | |
31 | struct task_struct *tsk = __this_cpu_read(irq_workd); | |
32 | ||
33 | if (!llist_empty(this_cpu_ptr(&lazy_list)) && tsk) | |
34 | wake_up_process(tsk); | |
35 | } | |
36 | ||
37 | #ifdef CONFIG_SMP | |
38 | static void irq_work_wake(struct irq_work *entry) | |
39 | { | |
40 | wake_irq_workd(); | |
41 | } | |
42 | ||
43 | static DEFINE_PER_CPU(struct irq_work, irq_work_wakeup) = | |
44 | IRQ_WORK_INIT_HARD(irq_work_wake); | |
45 | #endif | |
46 | ||
47 | static int irq_workd_should_run(unsigned int cpu) | |
48 | { | |
49 | return !llist_empty(this_cpu_ptr(&lazy_list)); | |
50 | } | |
e360adbe PZ |
51 | |
52 | /* | |
53 | * Claim the entry so that no one else will poke at it. | |
54 | */ | |
38aaf809 | 55 | static bool irq_work_claim(struct irq_work *work) |
e360adbe | 56 | { |
25269871 | 57 | int oflags; |
e360adbe | 58 | |
7a9f50a0 | 59 | oflags = atomic_fetch_or(IRQ_WORK_CLAIMED | CSD_TYPE_IRQ_WORK, &work->node.a_flags); |
e0bbe2d8 | 60 | /* |
25269871 | 61 | * If the work is already pending, no need to raise the IPI. |
2914b0ba | 62 | * The pairing smp_mb() in irq_work_single() makes sure |
25269871 | 63 | * everything we did before is visible. |
e0bbe2d8 | 64 | */ |
25269871 FW |
65 | if (oflags & IRQ_WORK_PENDING) |
66 | return false; | |
e360adbe PZ |
67 | return true; |
68 | } | |
69 | ||
e360adbe PZ |
70 | void __weak arch_irq_work_raise(void) |
71 | { | |
72 | /* | |
73 | * Lame architectures will get the timer tick callback | |
74 | */ | |
75 | } | |
76 | ||
471ba0e6 NP |
77 | /* Enqueue on current CPU, work must already be claimed and preempt disabled */ |
78 | static void __irq_work_queue_local(struct irq_work *work) | |
47885016 | 79 | { |
b4c6f86e SAS |
80 | struct llist_head *list; |
81 | bool rt_lazy_work = false; | |
82 | bool lazy_work = false; | |
83 | int work_flags; | |
84 | ||
85 | work_flags = atomic_read(&work->node.a_flags); | |
86 | if (work_flags & IRQ_WORK_LAZY) | |
87 | lazy_work = true; | |
88 | else if (IS_ENABLED(CONFIG_PREEMPT_RT) && | |
89 | !(work_flags & IRQ_WORK_HARD_IRQ)) | |
90 | rt_lazy_work = true; | |
91 | ||
92 | if (lazy_work || rt_lazy_work) | |
93 | list = this_cpu_ptr(&lazy_list); | |
94 | else | |
95 | list = this_cpu_ptr(&raised_list); | |
96 | ||
97 | if (!llist_add(&work->node.llist, list)) | |
98 | return; | |
99 | ||
471ba0e6 | 100 | /* If the work is "lazy", handle it from next tick if any */ |
b4c6f86e SAS |
101 | if (!lazy_work || tick_nohz_tick_stopped()) |
102 | arch_irq_work_raise(); | |
471ba0e6 | 103 | } |
47885016 | 104 | |
471ba0e6 NP |
105 | /* Enqueue the irq work @work on the current CPU */ |
106 | bool irq_work_queue(struct irq_work *work) | |
107 | { | |
47885016 FW |
108 | /* Only queue if not already pending */ |
109 | if (!irq_work_claim(work)) | |
110 | return false; | |
111 | ||
471ba0e6 NP |
112 | /* Queue the entry and raise the IPI if needed. */ |
113 | preempt_disable(); | |
114 | __irq_work_queue_local(work); | |
115 | preempt_enable(); | |
6733bab7 | 116 | |
47885016 FW |
117 | return true; |
118 | } | |
471ba0e6 | 119 | EXPORT_SYMBOL_GPL(irq_work_queue); |
47885016 | 120 | |
471ba0e6 NP |
121 | /* |
122 | * Enqueue the irq_work @work on @cpu unless it's already pending | |
123 | * somewhere. | |
124 | * | |
125 | * Can be re-enqueued while the callback is still in progress. | |
126 | */ | |
127 | bool irq_work_queue_on(struct irq_work *work, int cpu) | |
e360adbe | 128 | { |
471ba0e6 NP |
129 | #ifndef CONFIG_SMP |
130 | return irq_work_queue(work); | |
131 | ||
132 | #else /* CONFIG_SMP: */ | |
133 | /* All work should have been flushed before going offline */ | |
134 | WARN_ON_ONCE(cpu_is_offline(cpu)); | |
135 | ||
c02cf5f8 | 136 | /* Only queue if not already pending */ |
137 | if (!irq_work_claim(work)) | |
cd578abb | 138 | return false; |
c02cf5f8 | 139 | |
25934fcf | 140 | kasan_record_aux_stack_noalloc(work); |
e2b5bcf9 | 141 | |
20b87691 | 142 | preempt_disable(); |
471ba0e6 NP |
143 | if (cpu != smp_processor_id()) { |
144 | /* Arch remote IPI send/receive backend aren't NMI safe */ | |
145 | WARN_ON_ONCE(in_nmi()); | |
b4c6f86e SAS |
146 | |
147 | /* | |
148 | * On PREEMPT_RT the items which are not marked as | |
149 | * IRQ_WORK_HARD_IRQ are added to the lazy list and a HARD work | |
150 | * item is used on the remote CPU to wake the thread. | |
151 | */ | |
152 | if (IS_ENABLED(CONFIG_PREEMPT_RT) && | |
153 | !(atomic_read(&work->node.a_flags) & IRQ_WORK_HARD_IRQ)) { | |
154 | ||
155 | if (!llist_add(&work->node.llist, &per_cpu(lazy_list, cpu))) | |
156 | goto out; | |
157 | ||
158 | work = &per_cpu(irq_work_wakeup, cpu); | |
159 | if (!irq_work_claim(work)) | |
160 | goto out; | |
161 | } | |
162 | ||
7a9f50a0 | 163 | __smp_call_single_queue(cpu, &work->node.llist); |
b93e0b8f | 164 | } else { |
471ba0e6 | 165 | __irq_work_queue_local(work); |
bc6679ae | 166 | } |
b4c6f86e | 167 | out: |
20b87691 | 168 | preempt_enable(); |
cd578abb PZ |
169 | |
170 | return true; | |
471ba0e6 | 171 | #endif /* CONFIG_SMP */ |
e360adbe | 172 | } |
471ba0e6 | 173 | |
00b42959 FW |
174 | bool irq_work_needs_cpu(void) |
175 | { | |
b93e0b8f | 176 | struct llist_head *raised, *lazy; |
00b42959 | 177 | |
22127e93 CL |
178 | raised = this_cpu_ptr(&raised_list); |
179 | lazy = this_cpu_ptr(&lazy_list); | |
76a33061 FW |
180 | |
181 | if (llist_empty(raised) || arch_irq_work_has_interrupt()) | |
182 | if (llist_empty(lazy)) | |
183 | return false; | |
00b42959 | 184 | |
8aa2acce SR |
185 | /* All work should have been flushed before going offline */ |
186 | WARN_ON_ONCE(cpu_is_offline(smp_processor_id())); | |
187 | ||
00b42959 FW |
188 | return true; |
189 | } | |
190 | ||
4b44a21d PZ |
191 | void irq_work_single(void *arg) |
192 | { | |
193 | struct irq_work *work = arg; | |
194 | int flags; | |
195 | ||
196 | /* | |
2914b0ba PZ |
197 | * Clear the PENDING bit, after this point the @work can be re-used. |
198 | * The PENDING bit acts as a lock, and we own it, so we can clear it | |
199 | * without atomic ops. | |
4b44a21d | 200 | */ |
2914b0ba PZ |
201 | flags = atomic_read(&work->node.a_flags); |
202 | flags &= ~IRQ_WORK_PENDING; | |
203 | atomic_set(&work->node.a_flags, flags); | |
204 | ||
205 | /* | |
206 | * See irq_work_claim(). | |
207 | */ | |
208 | smp_mb(); | |
4b44a21d | 209 | |
2914b0ba | 210 | lockdep_irq_work_enter(flags); |
4b44a21d | 211 | work->func(work); |
2914b0ba PZ |
212 | lockdep_irq_work_exit(flags); |
213 | ||
4b44a21d | 214 | /* |
2914b0ba PZ |
215 | * Clear the BUSY bit, if set, and return to the free state if no-one |
216 | * else claimed it meanwhile. | |
4b44a21d | 217 | */ |
7a9f50a0 | 218 | (void)atomic_cmpxchg(&work->node.a_flags, flags, flags & ~IRQ_WORK_BUSY); |
81097968 | 219 | |
09089db7 SAS |
220 | if ((IS_ENABLED(CONFIG_PREEMPT_RT) && !irq_work_is_hard(work)) || |
221 | !arch_irq_work_has_interrupt()) | |
81097968 | 222 | rcuwait_wake_up(&work->irqwait); |
4b44a21d PZ |
223 | } |
224 | ||
b93e0b8f | 225 | static void irq_work_run_list(struct llist_head *list) |
e360adbe | 226 | { |
d00a08cf | 227 | struct irq_work *work, *tmp; |
38aaf809 | 228 | struct llist_node *llnode; |
e360adbe | 229 | |
b4c6f86e SAS |
230 | /* |
231 | * On PREEMPT_RT IRQ-work which is not marked as HARD will be processed | |
232 | * in a per-CPU thread in preemptible context. Only the items which are | |
233 | * marked as IRQ_WORK_HARD_IRQ will be processed in hardirq context. | |
234 | */ | |
235 | BUG_ON(!irqs_disabled() && !IS_ENABLED(CONFIG_PREEMPT_RT)); | |
bc6679ae | 236 | |
b93e0b8f | 237 | if (llist_empty(list)) |
e360adbe PZ |
238 | return; |
239 | ||
b93e0b8f | 240 | llnode = llist_del_all(list); |
7a9f50a0 | 241 | llist_for_each_entry_safe(work, tmp, llnode, node.llist) |
4b44a21d | 242 | irq_work_single(work); |
e360adbe | 243 | } |
c0e980a4 SR |
244 | |
245 | /* | |
a77353e5 PZ |
246 | * hotplug calls this through: |
247 | * hotplug_cfd() -> flush_smp_call_function_queue() | |
c0e980a4 SR |
248 | */ |
249 | void irq_work_run(void) | |
250 | { | |
22127e93 | 251 | irq_work_run_list(this_cpu_ptr(&raised_list)); |
b4c6f86e SAS |
252 | if (!IS_ENABLED(CONFIG_PREEMPT_RT)) |
253 | irq_work_run_list(this_cpu_ptr(&lazy_list)); | |
254 | else | |
255 | wake_irq_workd(); | |
c0e980a4 | 256 | } |
e360adbe PZ |
257 | EXPORT_SYMBOL_GPL(irq_work_run); |
258 | ||
76a33061 FW |
259 | void irq_work_tick(void) |
260 | { | |
56e4dea8 | 261 | struct llist_head *raised = this_cpu_ptr(&raised_list); |
76a33061 FW |
262 | |
263 | if (!llist_empty(raised) && !arch_irq_work_has_interrupt()) | |
264 | irq_work_run_list(raised); | |
b4c6f86e SAS |
265 | |
266 | if (!IS_ENABLED(CONFIG_PREEMPT_RT)) | |
267 | irq_work_run_list(this_cpu_ptr(&lazy_list)); | |
268 | else | |
269 | wake_irq_workd(); | |
76a33061 FW |
270 | } |
271 | ||
e360adbe PZ |
272 | /* |
273 | * Synchronize against the irq_work @entry, ensures the entry is not | |
274 | * currently in use. | |
275 | */ | |
38aaf809 | 276 | void irq_work_sync(struct irq_work *work) |
e360adbe | 277 | { |
3c7169a3 | 278 | lockdep_assert_irqs_enabled(); |
81097968 SAS |
279 | might_sleep(); |
280 | ||
09089db7 SAS |
281 | if ((IS_ENABLED(CONFIG_PREEMPT_RT) && !irq_work_is_hard(work)) || |
282 | !arch_irq_work_has_interrupt()) { | |
81097968 SAS |
283 | rcuwait_wait_event(&work->irqwait, !irq_work_is_busy(work), |
284 | TASK_UNINTERRUPTIBLE); | |
285 | return; | |
286 | } | |
e360adbe | 287 | |
7a9f50a0 | 288 | while (irq_work_is_busy(work)) |
e360adbe PZ |
289 | cpu_relax(); |
290 | } | |
291 | EXPORT_SYMBOL_GPL(irq_work_sync); | |
b4c6f86e SAS |
292 | |
293 | static void run_irq_workd(unsigned int cpu) | |
294 | { | |
295 | irq_work_run_list(this_cpu_ptr(&lazy_list)); | |
296 | } | |
297 | ||
298 | static void irq_workd_setup(unsigned int cpu) | |
299 | { | |
300 | sched_set_fifo_low(current); | |
301 | } | |
302 | ||
303 | static struct smp_hotplug_thread irqwork_threads = { | |
304 | .store = &irq_workd, | |
305 | .setup = irq_workd_setup, | |
306 | .thread_should_run = irq_workd_should_run, | |
307 | .thread_fn = run_irq_workd, | |
308 | .thread_comm = "irq_work/%u", | |
309 | }; | |
310 | ||
311 | static __init int irq_work_init_threads(void) | |
312 | { | |
313 | if (IS_ENABLED(CONFIG_PREEMPT_RT)) | |
314 | BUG_ON(smpboot_register_percpu_thread(&irqwork_threads)); | |
315 | return 0; | |
316 | } | |
317 | early_initcall(irq_work_init_threads); |