Commit | Line | Data |
---|---|---|
07fe7cb7 DH |
1 | /* Worker thread pool for slow items, such as filesystem lookups or mkdirs |
2 | * | |
3 | * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. | |
4 | * Written by David Howells (dhowells@redhat.com) | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU General Public Licence | |
8 | * as published by the Free Software Foundation; either version | |
9 | * 2 of the Licence, or (at your option) any later version. | |
8f0aa2f2 DH |
10 | * |
11 | * See Documentation/slow-work.txt | |
07fe7cb7 DH |
12 | */ |
13 | ||
14 | #include <linux/module.h> | |
15 | #include <linux/slow-work.h> | |
16 | #include <linux/kthread.h> | |
17 | #include <linux/freezer.h> | |
18 | #include <linux/wait.h> | |
8fba10a4 DH |
19 | #include <linux/proc_fs.h> |
20 | #include "slow-work.h" | |
3d7a641e | 21 | |
109d9272 DH |
22 | static void slow_work_cull_timeout(unsigned long); |
23 | static void slow_work_oom_timeout(unsigned long); | |
24 | ||
12e22c5e | 25 | #ifdef CONFIG_SYSCTL |
8d65af78 | 26 | static int slow_work_min_threads_sysctl(struct ctl_table *, int, |
12e22c5e DH |
27 | void __user *, size_t *, loff_t *); |
28 | ||
8d65af78 | 29 | static int slow_work_max_threads_sysctl(struct ctl_table *, int , |
12e22c5e DH |
30 | void __user *, size_t *, loff_t *); |
31 | #endif | |
32 | ||
07fe7cb7 DH |
33 | /* |
34 | * The pool of threads has at least min threads in it as long as someone is | |
35 | * using the facility, and may have as many as max. | |
36 | * | |
37 | * A portion of the pool may be processing very slow operations. | |
38 | */ | |
39 | static unsigned slow_work_min_threads = 2; | |
40 | static unsigned slow_work_max_threads = 4; | |
41 | static unsigned vslow_work_proportion = 50; /* % of threads that may process | |
42 | * very slow work */ | |
12e22c5e DH |
43 | |
44 | #ifdef CONFIG_SYSCTL | |
45 | static const int slow_work_min_min_threads = 2; | |
3d7a641e | 46 | static int slow_work_max_max_threads = SLOW_WORK_THREAD_LIMIT; |
12e22c5e DH |
47 | static const int slow_work_min_vslow = 1; |
48 | static const int slow_work_max_vslow = 99; | |
49 | ||
50 | ctl_table slow_work_sysctls[] = { | |
51 | { | |
52 | .ctl_name = CTL_UNNUMBERED, | |
53 | .procname = "min-threads", | |
54 | .data = &slow_work_min_threads, | |
55 | .maxlen = sizeof(unsigned), | |
56 | .mode = 0644, | |
57 | .proc_handler = slow_work_min_threads_sysctl, | |
58 | .extra1 = (void *) &slow_work_min_min_threads, | |
59 | .extra2 = &slow_work_max_threads, | |
60 | }, | |
61 | { | |
62 | .ctl_name = CTL_UNNUMBERED, | |
63 | .procname = "max-threads", | |
64 | .data = &slow_work_max_threads, | |
65 | .maxlen = sizeof(unsigned), | |
66 | .mode = 0644, | |
67 | .proc_handler = slow_work_max_threads_sysctl, | |
68 | .extra1 = &slow_work_min_threads, | |
69 | .extra2 = (void *) &slow_work_max_max_threads, | |
70 | }, | |
71 | { | |
72 | .ctl_name = CTL_UNNUMBERED, | |
73 | .procname = "vslow-percentage", | |
74 | .data = &vslow_work_proportion, | |
75 | .maxlen = sizeof(unsigned), | |
76 | .mode = 0644, | |
77 | .proc_handler = &proc_dointvec_minmax, | |
78 | .extra1 = (void *) &slow_work_min_vslow, | |
79 | .extra2 = (void *) &slow_work_max_vslow, | |
80 | }, | |
81 | { .ctl_name = 0 } | |
82 | }; | |
83 | #endif | |
84 | ||
85 | /* | |
86 | * The active state of the thread pool | |
87 | */ | |
07fe7cb7 DH |
88 | static atomic_t slow_work_thread_count; |
89 | static atomic_t vslow_work_executing_count; | |
90 | ||
109d9272 DH |
91 | static bool slow_work_may_not_start_new_thread; |
92 | static bool slow_work_cull; /* cull a thread due to lack of activity */ | |
93 | static DEFINE_TIMER(slow_work_cull_timer, slow_work_cull_timeout, 0, 0); | |
94 | static DEFINE_TIMER(slow_work_oom_timer, slow_work_oom_timeout, 0, 0); | |
95 | static struct slow_work slow_work_new_thread; /* new thread starter */ | |
96 | ||
3d7a641e DH |
97 | /* |
98 | * slow work ID allocation (use slow_work_queue_lock) | |
99 | */ | |
100 | static DECLARE_BITMAP(slow_work_ids, SLOW_WORK_THREAD_LIMIT); | |
101 | ||
102 | /* | |
103 | * Unregistration tracking to prevent put_ref() from disappearing during module | |
104 | * unload | |
105 | */ | |
106 | #ifdef CONFIG_MODULES | |
107 | static struct module *slow_work_thread_processing[SLOW_WORK_THREAD_LIMIT]; | |
108 | static struct module *slow_work_unreg_module; | |
109 | static struct slow_work *slow_work_unreg_work_item; | |
110 | static DECLARE_WAIT_QUEUE_HEAD(slow_work_unreg_wq); | |
111 | static DEFINE_MUTEX(slow_work_unreg_sync_lock); | |
112 | #endif | |
113 | ||
8fba10a4 DH |
114 | /* |
115 | * Data for tracking currently executing items for indication through /proc | |
116 | */ | |
117 | #ifdef CONFIG_SLOW_WORK_PROC | |
118 | struct slow_work *slow_work_execs[SLOW_WORK_THREAD_LIMIT]; | |
119 | pid_t slow_work_pids[SLOW_WORK_THREAD_LIMIT]; | |
120 | DEFINE_RWLOCK(slow_work_execs_lock); | |
121 | #endif | |
122 | ||
07fe7cb7 DH |
123 | /* |
124 | * The queues of work items and the lock governing access to them. These are | |
125 | * shared between all the CPUs. It doesn't make sense to have per-CPU queues | |
126 | * as the number of threads bears no relation to the number of CPUs. | |
127 | * | |
128 | * There are two queues of work items: one for slow work items, and one for | |
129 | * very slow work items. | |
130 | */ | |
8fba10a4 DH |
131 | LIST_HEAD(slow_work_queue); |
132 | LIST_HEAD(vslow_work_queue); | |
133 | DEFINE_SPINLOCK(slow_work_queue_lock); | |
07fe7cb7 DH |
134 | |
135 | /* | |
136 | * The thread controls. A variable used to signal to the threads that they | |
137 | * should exit when the queue is empty, a waitqueue used by the threads to wait | |
138 | * for signals, and a completion set by the last thread to exit. | |
139 | */ | |
140 | static bool slow_work_threads_should_exit; | |
141 | static DECLARE_WAIT_QUEUE_HEAD(slow_work_thread_wq); | |
142 | static DECLARE_COMPLETION(slow_work_last_thread_exited); | |
143 | ||
144 | /* | |
145 | * The number of users of the thread pool and its lock. Whilst this is zero we | |
146 | * have no threads hanging around, and when this reaches zero, we wait for all | |
147 | * active or queued work items to complete and kill all the threads we do have. | |
148 | */ | |
149 | static int slow_work_user_count; | |
150 | static DEFINE_MUTEX(slow_work_user_lock); | |
151 | ||
4d8bb2cb JA |
152 | static inline int slow_work_get_ref(struct slow_work *work) |
153 | { | |
154 | if (work->ops->get_ref) | |
155 | return work->ops->get_ref(work); | |
156 | ||
157 | return 0; | |
158 | } | |
159 | ||
160 | static inline void slow_work_put_ref(struct slow_work *work) | |
161 | { | |
162 | if (work->ops->put_ref) | |
163 | work->ops->put_ref(work); | |
164 | } | |
165 | ||
07fe7cb7 DH |
166 | /* |
167 | * Calculate the maximum number of active threads in the pool that are | |
168 | * permitted to process very slow work items. | |
169 | * | |
170 | * The answer is rounded up to at least 1, but may not equal or exceed the | |
171 | * maximum number of the threads in the pool. This means we always have at | |
172 | * least one thread that can process slow work items, and we always have at | |
173 | * least one thread that won't get tied up doing so. | |
174 | */ | |
175 | static unsigned slow_work_calc_vsmax(void) | |
176 | { | |
177 | unsigned vsmax; | |
178 | ||
179 | vsmax = atomic_read(&slow_work_thread_count) * vslow_work_proportion; | |
180 | vsmax /= 100; | |
181 | vsmax = max(vsmax, 1U); | |
182 | return min(vsmax, slow_work_max_threads - 1); | |
183 | } | |
184 | ||
185 | /* | |
186 | * Attempt to execute stuff queued on a slow thread. Return true if we managed | |
187 | * it, false if there was nothing to do. | |
188 | */ | |
8fba10a4 | 189 | static noinline bool slow_work_execute(int id) |
07fe7cb7 | 190 | { |
3d7a641e DH |
191 | #ifdef CONFIG_MODULES |
192 | struct module *module; | |
193 | #endif | |
07fe7cb7 DH |
194 | struct slow_work *work = NULL; |
195 | unsigned vsmax; | |
196 | bool very_slow; | |
197 | ||
198 | vsmax = slow_work_calc_vsmax(); | |
199 | ||
109d9272 DH |
200 | /* see if we can schedule a new thread to be started if we're not |
201 | * keeping up with the work */ | |
202 | if (!waitqueue_active(&slow_work_thread_wq) && | |
203 | (!list_empty(&slow_work_queue) || !list_empty(&vslow_work_queue)) && | |
204 | atomic_read(&slow_work_thread_count) < slow_work_max_threads && | |
205 | !slow_work_may_not_start_new_thread) | |
206 | slow_work_enqueue(&slow_work_new_thread); | |
207 | ||
07fe7cb7 DH |
208 | /* find something to execute */ |
209 | spin_lock_irq(&slow_work_queue_lock); | |
210 | if (!list_empty(&vslow_work_queue) && | |
211 | atomic_read(&vslow_work_executing_count) < vsmax) { | |
212 | work = list_entry(vslow_work_queue.next, | |
213 | struct slow_work, link); | |
214 | if (test_and_set_bit_lock(SLOW_WORK_EXECUTING, &work->flags)) | |
215 | BUG(); | |
216 | list_del_init(&work->link); | |
217 | atomic_inc(&vslow_work_executing_count); | |
218 | very_slow = true; | |
219 | } else if (!list_empty(&slow_work_queue)) { | |
220 | work = list_entry(slow_work_queue.next, | |
221 | struct slow_work, link); | |
222 | if (test_and_set_bit_lock(SLOW_WORK_EXECUTING, &work->flags)) | |
223 | BUG(); | |
224 | list_del_init(&work->link); | |
225 | very_slow = false; | |
226 | } else { | |
227 | very_slow = false; /* avoid the compiler warning */ | |
228 | } | |
3d7a641e DH |
229 | |
230 | #ifdef CONFIG_MODULES | |
231 | if (work) | |
232 | slow_work_thread_processing[id] = work->owner; | |
233 | #endif | |
8fba10a4 DH |
234 | if (work) { |
235 | slow_work_mark_time(work); | |
236 | slow_work_begin_exec(id, work); | |
237 | } | |
3d7a641e | 238 | |
07fe7cb7 DH |
239 | spin_unlock_irq(&slow_work_queue_lock); |
240 | ||
241 | if (!work) | |
242 | return false; | |
243 | ||
244 | if (!test_and_clear_bit(SLOW_WORK_PENDING, &work->flags)) | |
245 | BUG(); | |
246 | ||
01609502 JA |
247 | /* don't execute if the work is in the process of being cancelled */ |
248 | if (!test_bit(SLOW_WORK_CANCELLING, &work->flags)) | |
249 | work->ops->execute(work); | |
07fe7cb7 DH |
250 | |
251 | if (very_slow) | |
252 | atomic_dec(&vslow_work_executing_count); | |
253 | clear_bit_unlock(SLOW_WORK_EXECUTING, &work->flags); | |
254 | ||
01609502 JA |
255 | /* wake up anyone waiting for this work to be complete */ |
256 | wake_up_bit(&work->flags, SLOW_WORK_EXECUTING); | |
257 | ||
8fba10a4 DH |
258 | slow_work_end_exec(id, work); |
259 | ||
07fe7cb7 DH |
260 | /* if someone tried to enqueue the item whilst we were executing it, |
261 | * then it'll be left unenqueued to avoid multiple threads trying to | |
262 | * execute it simultaneously | |
263 | * | |
264 | * there is, however, a race between us testing the pending flag and | |
265 | * getting the spinlock, and between the enqueuer setting the pending | |
266 | * flag and getting the spinlock, so we use a deferral bit to tell us | |
267 | * if the enqueuer got there first | |
268 | */ | |
269 | if (test_bit(SLOW_WORK_PENDING, &work->flags)) { | |
270 | spin_lock_irq(&slow_work_queue_lock); | |
271 | ||
272 | if (!test_bit(SLOW_WORK_EXECUTING, &work->flags) && | |
273 | test_and_clear_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags)) | |
274 | goto auto_requeue; | |
275 | ||
276 | spin_unlock_irq(&slow_work_queue_lock); | |
277 | } | |
278 | ||
3d7a641e | 279 | /* sort out the race between module unloading and put_ref() */ |
4d8bb2cb | 280 | slow_work_put_ref(work); |
3d7a641e DH |
281 | |
282 | #ifdef CONFIG_MODULES | |
283 | module = slow_work_thread_processing[id]; | |
284 | slow_work_thread_processing[id] = NULL; | |
285 | smp_mb(); | |
286 | if (slow_work_unreg_work_item == work || | |
287 | slow_work_unreg_module == module) | |
288 | wake_up_all(&slow_work_unreg_wq); | |
289 | #endif | |
290 | ||
07fe7cb7 DH |
291 | return true; |
292 | ||
293 | auto_requeue: | |
294 | /* we must complete the enqueue operation | |
295 | * - we transfer our ref on the item back to the appropriate queue | |
296 | * - don't wake another thread up as we're awake already | |
297 | */ | |
8fba10a4 | 298 | slow_work_mark_time(work); |
07fe7cb7 DH |
299 | if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags)) |
300 | list_add_tail(&work->link, &vslow_work_queue); | |
301 | else | |
302 | list_add_tail(&work->link, &slow_work_queue); | |
303 | spin_unlock_irq(&slow_work_queue_lock); | |
3d7a641e | 304 | slow_work_thread_processing[id] = NULL; |
07fe7cb7 DH |
305 | return true; |
306 | } | |
307 | ||
308 | /** | |
309 | * slow_work_enqueue - Schedule a slow work item for processing | |
310 | * @work: The work item to queue | |
311 | * | |
312 | * Schedule a slow work item for processing. If the item is already undergoing | |
313 | * execution, this guarantees not to re-enter the execution routine until the | |
314 | * first execution finishes. | |
315 | * | |
316 | * The item is pinned by this function as it retains a reference to it, managed | |
317 | * through the item operations. The item is unpinned once it has been | |
318 | * executed. | |
319 | * | |
320 | * An item may hog the thread that is running it for a relatively large amount | |
321 | * of time, sufficient, for example, to perform several lookup, mkdir, create | |
322 | * and setxattr operations. It may sleep on I/O and may sleep to obtain locks. | |
323 | * | |
324 | * Conversely, if a number of items are awaiting processing, it may take some | |
325 | * time before any given item is given attention. The number of threads in the | |
326 | * pool may be increased to deal with demand, but only up to a limit. | |
327 | * | |
328 | * If SLOW_WORK_VERY_SLOW is set on the work item, then it will be placed in | |
329 | * the very slow queue, from which only a portion of the threads will be | |
330 | * allowed to pick items to execute. This ensures that very slow items won't | |
331 | * overly block ones that are just ordinarily slow. | |
332 | * | |
01609502 JA |
333 | * Returns 0 if successful, -EAGAIN if not (or -ECANCELED if cancelled work is |
334 | * attempted queued) | |
07fe7cb7 DH |
335 | */ |
336 | int slow_work_enqueue(struct slow_work *work) | |
337 | { | |
338 | unsigned long flags; | |
01609502 JA |
339 | int ret; |
340 | ||
341 | if (test_bit(SLOW_WORK_CANCELLING, &work->flags)) | |
342 | return -ECANCELED; | |
07fe7cb7 DH |
343 | |
344 | BUG_ON(slow_work_user_count <= 0); | |
345 | BUG_ON(!work); | |
346 | BUG_ON(!work->ops); | |
07fe7cb7 DH |
347 | |
348 | /* when honouring an enqueue request, we only promise that we will run | |
349 | * the work function in the future; we do not promise to run it once | |
350 | * per enqueue request | |
351 | * | |
352 | * we use the PENDING bit to merge together repeat requests without | |
353 | * having to disable IRQs and take the spinlock, whilst still | |
354 | * maintaining our promise | |
355 | */ | |
356 | if (!test_and_set_bit_lock(SLOW_WORK_PENDING, &work->flags)) { | |
357 | spin_lock_irqsave(&slow_work_queue_lock, flags); | |
358 | ||
01609502 JA |
359 | if (unlikely(test_bit(SLOW_WORK_CANCELLING, &work->flags))) |
360 | goto cancelled; | |
361 | ||
07fe7cb7 DH |
362 | /* we promise that we will not attempt to execute the work |
363 | * function in more than one thread simultaneously | |
364 | * | |
365 | * this, however, leaves us with a problem if we're asked to | |
366 | * enqueue the work whilst someone is executing the work | |
367 | * function as simply queueing the work immediately means that | |
368 | * another thread may try executing it whilst it is already | |
369 | * under execution | |
370 | * | |
371 | * to deal with this, we set the ENQ_DEFERRED bit instead of | |
372 | * enqueueing, and the thread currently executing the work | |
373 | * function will enqueue the work item when the work function | |
374 | * returns and it has cleared the EXECUTING bit | |
375 | */ | |
376 | if (test_bit(SLOW_WORK_EXECUTING, &work->flags)) { | |
377 | set_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags); | |
378 | } else { | |
01609502 JA |
379 | ret = slow_work_get_ref(work); |
380 | if (ret < 0) | |
381 | goto failed; | |
8fba10a4 | 382 | slow_work_mark_time(work); |
07fe7cb7 DH |
383 | if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags)) |
384 | list_add_tail(&work->link, &vslow_work_queue); | |
385 | else | |
386 | list_add_tail(&work->link, &slow_work_queue); | |
387 | wake_up(&slow_work_thread_wq); | |
388 | } | |
389 | ||
390 | spin_unlock_irqrestore(&slow_work_queue_lock, flags); | |
391 | } | |
392 | return 0; | |
393 | ||
01609502 JA |
394 | cancelled: |
395 | ret = -ECANCELED; | |
396 | failed: | |
07fe7cb7 | 397 | spin_unlock_irqrestore(&slow_work_queue_lock, flags); |
01609502 | 398 | return ret; |
07fe7cb7 DH |
399 | } |
400 | EXPORT_SYMBOL(slow_work_enqueue); | |
401 | ||
01609502 JA |
402 | static int slow_work_wait(void *word) |
403 | { | |
404 | schedule(); | |
405 | return 0; | |
406 | } | |
407 | ||
408 | /** | |
409 | * slow_work_cancel - Cancel a slow work item | |
410 | * @work: The work item to cancel | |
411 | * | |
412 | * This function will cancel a previously enqueued work item. If we cannot | |
413 | * cancel the work item, it is guarenteed to have run when this function | |
414 | * returns. | |
415 | */ | |
416 | void slow_work_cancel(struct slow_work *work) | |
417 | { | |
418 | bool wait = true, put = false; | |
419 | ||
420 | set_bit(SLOW_WORK_CANCELLING, &work->flags); | |
6b8268b1 JA |
421 | smp_mb(); |
422 | ||
423 | /* if the work item is a delayed work item with an active timer, we | |
424 | * need to wait for the timer to finish _before_ getting the spinlock, | |
425 | * lest we deadlock against the timer routine | |
426 | * | |
427 | * the timer routine will leave DELAYED set if it notices the | |
428 | * CANCELLING flag in time | |
429 | */ | |
430 | if (test_bit(SLOW_WORK_DELAYED, &work->flags)) { | |
431 | struct delayed_slow_work *dwork = | |
432 | container_of(work, struct delayed_slow_work, work); | |
433 | del_timer_sync(&dwork->timer); | |
434 | } | |
01609502 JA |
435 | |
436 | spin_lock_irq(&slow_work_queue_lock); | |
437 | ||
6b8268b1 JA |
438 | if (test_bit(SLOW_WORK_DELAYED, &work->flags)) { |
439 | /* the timer routine aborted or never happened, so we are left | |
440 | * holding the timer's reference on the item and should just | |
441 | * drop the pending flag and wait for any ongoing execution to | |
442 | * finish */ | |
443 | struct delayed_slow_work *dwork = | |
444 | container_of(work, struct delayed_slow_work, work); | |
445 | ||
446 | BUG_ON(timer_pending(&dwork->timer)); | |
447 | BUG_ON(!list_empty(&work->link)); | |
448 | ||
449 | clear_bit(SLOW_WORK_DELAYED, &work->flags); | |
450 | put = true; | |
451 | clear_bit(SLOW_WORK_PENDING, &work->flags); | |
452 | ||
453 | } else if (test_bit(SLOW_WORK_PENDING, &work->flags) && | |
454 | !list_empty(&work->link)) { | |
01609502 JA |
455 | /* the link in the pending queue holds a reference on the item |
456 | * that we will need to release */ | |
457 | list_del_init(&work->link); | |
458 | wait = false; | |
459 | put = true; | |
460 | clear_bit(SLOW_WORK_PENDING, &work->flags); | |
461 | ||
462 | } else if (test_and_clear_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags)) { | |
463 | /* the executor is holding our only reference on the item, so | |
464 | * we merely need to wait for it to finish executing */ | |
465 | clear_bit(SLOW_WORK_PENDING, &work->flags); | |
466 | } | |
467 | ||
468 | spin_unlock_irq(&slow_work_queue_lock); | |
469 | ||
470 | /* the EXECUTING flag is set by the executor whilst the spinlock is set | |
471 | * and before the item is dequeued - so assuming the above doesn't | |
472 | * actually dequeue it, simply waiting for the EXECUTING flag to be | |
473 | * released here should be sufficient */ | |
474 | if (wait) | |
475 | wait_on_bit(&work->flags, SLOW_WORK_EXECUTING, slow_work_wait, | |
476 | TASK_UNINTERRUPTIBLE); | |
477 | ||
478 | clear_bit(SLOW_WORK_CANCELLING, &work->flags); | |
479 | if (put) | |
480 | slow_work_put_ref(work); | |
481 | } | |
482 | EXPORT_SYMBOL(slow_work_cancel); | |
483 | ||
6b8268b1 JA |
484 | /* |
485 | * Handle expiry of the delay timer, indicating that a delayed slow work item | |
486 | * should now be queued if not cancelled | |
487 | */ | |
488 | static void delayed_slow_work_timer(unsigned long data) | |
489 | { | |
490 | struct slow_work *work = (struct slow_work *) data; | |
491 | unsigned long flags; | |
492 | bool queued = false, put = false; | |
493 | ||
494 | spin_lock_irqsave(&slow_work_queue_lock, flags); | |
495 | if (likely(!test_bit(SLOW_WORK_CANCELLING, &work->flags))) { | |
496 | clear_bit(SLOW_WORK_DELAYED, &work->flags); | |
497 | ||
498 | if (test_bit(SLOW_WORK_EXECUTING, &work->flags)) { | |
499 | /* we discard the reference the timer was holding in | |
500 | * favour of the one the executor holds */ | |
501 | set_bit(SLOW_WORK_ENQ_DEFERRED, &work->flags); | |
502 | put = true; | |
503 | } else { | |
8fba10a4 | 504 | slow_work_mark_time(work); |
6b8268b1 JA |
505 | if (test_bit(SLOW_WORK_VERY_SLOW, &work->flags)) |
506 | list_add_tail(&work->link, &vslow_work_queue); | |
507 | else | |
508 | list_add_tail(&work->link, &slow_work_queue); | |
509 | queued = true; | |
510 | } | |
511 | } | |
512 | ||
513 | spin_unlock_irqrestore(&slow_work_queue_lock, flags); | |
514 | if (put) | |
515 | slow_work_put_ref(work); | |
516 | if (queued) | |
517 | wake_up(&slow_work_thread_wq); | |
518 | } | |
519 | ||
520 | /** | |
521 | * delayed_slow_work_enqueue - Schedule a delayed slow work item for processing | |
522 | * @dwork: The delayed work item to queue | |
523 | * @delay: When to start executing the work, in jiffies from now | |
524 | * | |
525 | * This is similar to slow_work_enqueue(), but it adds a delay before the work | |
526 | * is actually queued for processing. | |
527 | * | |
528 | * The item can have delayed processing requested on it whilst it is being | |
529 | * executed. The delay will begin immediately, and if it expires before the | |
530 | * item finishes executing, the item will be placed back on the queue when it | |
531 | * has done executing. | |
532 | */ | |
533 | int delayed_slow_work_enqueue(struct delayed_slow_work *dwork, | |
534 | unsigned long delay) | |
535 | { | |
536 | struct slow_work *work = &dwork->work; | |
537 | unsigned long flags; | |
538 | int ret; | |
539 | ||
540 | if (delay == 0) | |
541 | return slow_work_enqueue(&dwork->work); | |
542 | ||
543 | BUG_ON(slow_work_user_count <= 0); | |
544 | BUG_ON(!work); | |
545 | BUG_ON(!work->ops); | |
546 | ||
547 | if (test_bit(SLOW_WORK_CANCELLING, &work->flags)) | |
548 | return -ECANCELED; | |
549 | ||
550 | if (!test_and_set_bit_lock(SLOW_WORK_PENDING, &work->flags)) { | |
551 | spin_lock_irqsave(&slow_work_queue_lock, flags); | |
552 | ||
553 | if (test_bit(SLOW_WORK_CANCELLING, &work->flags)) | |
554 | goto cancelled; | |
555 | ||
556 | /* the timer holds a reference whilst it is pending */ | |
557 | ret = work->ops->get_ref(work); | |
558 | if (ret < 0) | |
559 | goto cant_get_ref; | |
560 | ||
561 | if (test_and_set_bit(SLOW_WORK_DELAYED, &work->flags)) | |
562 | BUG(); | |
563 | dwork->timer.expires = jiffies + delay; | |
564 | dwork->timer.data = (unsigned long) work; | |
565 | dwork->timer.function = delayed_slow_work_timer; | |
566 | add_timer(&dwork->timer); | |
567 | ||
568 | spin_unlock_irqrestore(&slow_work_queue_lock, flags); | |
569 | } | |
570 | ||
571 | return 0; | |
572 | ||
573 | cancelled: | |
574 | ret = -ECANCELED; | |
575 | cant_get_ref: | |
576 | spin_unlock_irqrestore(&slow_work_queue_lock, flags); | |
577 | return ret; | |
578 | } | |
579 | EXPORT_SYMBOL(delayed_slow_work_enqueue); | |
580 | ||
009789f0 CP |
581 | /* |
582 | * Schedule a cull of the thread pool at some time in the near future | |
583 | */ | |
584 | static void slow_work_schedule_cull(void) | |
585 | { | |
586 | mod_timer(&slow_work_cull_timer, | |
587 | round_jiffies(jiffies + SLOW_WORK_CULL_TIMEOUT)); | |
588 | } | |
589 | ||
109d9272 DH |
590 | /* |
591 | * Worker thread culling algorithm | |
592 | */ | |
593 | static bool slow_work_cull_thread(void) | |
594 | { | |
595 | unsigned long flags; | |
596 | bool do_cull = false; | |
597 | ||
598 | spin_lock_irqsave(&slow_work_queue_lock, flags); | |
599 | ||
600 | if (slow_work_cull) { | |
601 | slow_work_cull = false; | |
602 | ||
603 | if (list_empty(&slow_work_queue) && | |
604 | list_empty(&vslow_work_queue) && | |
605 | atomic_read(&slow_work_thread_count) > | |
606 | slow_work_min_threads) { | |
009789f0 | 607 | slow_work_schedule_cull(); |
109d9272 DH |
608 | do_cull = true; |
609 | } | |
610 | } | |
611 | ||
612 | spin_unlock_irqrestore(&slow_work_queue_lock, flags); | |
613 | return do_cull; | |
614 | } | |
615 | ||
07fe7cb7 DH |
616 | /* |
617 | * Determine if there is slow work available for dispatch | |
618 | */ | |
619 | static inline bool slow_work_available(int vsmax) | |
620 | { | |
621 | return !list_empty(&slow_work_queue) || | |
622 | (!list_empty(&vslow_work_queue) && | |
623 | atomic_read(&vslow_work_executing_count) < vsmax); | |
624 | } | |
625 | ||
626 | /* | |
627 | * Worker thread dispatcher | |
628 | */ | |
629 | static int slow_work_thread(void *_data) | |
630 | { | |
3d7a641e | 631 | int vsmax, id; |
07fe7cb7 DH |
632 | |
633 | DEFINE_WAIT(wait); | |
634 | ||
635 | set_freezable(); | |
636 | set_user_nice(current, -5); | |
637 | ||
3d7a641e DH |
638 | /* allocate ourselves an ID */ |
639 | spin_lock_irq(&slow_work_queue_lock); | |
640 | id = find_first_zero_bit(slow_work_ids, SLOW_WORK_THREAD_LIMIT); | |
641 | BUG_ON(id < 0 || id >= SLOW_WORK_THREAD_LIMIT); | |
642 | __set_bit(id, slow_work_ids); | |
8fba10a4 | 643 | slow_work_set_thread_pid(id, current->pid); |
3d7a641e DH |
644 | spin_unlock_irq(&slow_work_queue_lock); |
645 | ||
646 | sprintf(current->comm, "kslowd%03u", id); | |
647 | ||
07fe7cb7 DH |
648 | for (;;) { |
649 | vsmax = vslow_work_proportion; | |
650 | vsmax *= atomic_read(&slow_work_thread_count); | |
651 | vsmax /= 100; | |
652 | ||
b415c49a ON |
653 | prepare_to_wait_exclusive(&slow_work_thread_wq, &wait, |
654 | TASK_INTERRUPTIBLE); | |
07fe7cb7 DH |
655 | if (!freezing(current) && |
656 | !slow_work_threads_should_exit && | |
109d9272 DH |
657 | !slow_work_available(vsmax) && |
658 | !slow_work_cull) | |
07fe7cb7 DH |
659 | schedule(); |
660 | finish_wait(&slow_work_thread_wq, &wait); | |
661 | ||
662 | try_to_freeze(); | |
663 | ||
664 | vsmax = vslow_work_proportion; | |
665 | vsmax *= atomic_read(&slow_work_thread_count); | |
666 | vsmax /= 100; | |
667 | ||
3d7a641e | 668 | if (slow_work_available(vsmax) && slow_work_execute(id)) { |
07fe7cb7 | 669 | cond_resched(); |
109d9272 DH |
670 | if (list_empty(&slow_work_queue) && |
671 | list_empty(&vslow_work_queue) && | |
672 | atomic_read(&slow_work_thread_count) > | |
673 | slow_work_min_threads) | |
009789f0 | 674 | slow_work_schedule_cull(); |
07fe7cb7 DH |
675 | continue; |
676 | } | |
677 | ||
678 | if (slow_work_threads_should_exit) | |
679 | break; | |
109d9272 DH |
680 | |
681 | if (slow_work_cull && slow_work_cull_thread()) | |
682 | break; | |
07fe7cb7 DH |
683 | } |
684 | ||
3d7a641e | 685 | spin_lock_irq(&slow_work_queue_lock); |
8fba10a4 | 686 | slow_work_set_thread_pid(id, 0); |
3d7a641e DH |
687 | __clear_bit(id, slow_work_ids); |
688 | spin_unlock_irq(&slow_work_queue_lock); | |
689 | ||
07fe7cb7 DH |
690 | if (atomic_dec_and_test(&slow_work_thread_count)) |
691 | complete_and_exit(&slow_work_last_thread_exited, 0); | |
692 | return 0; | |
693 | } | |
694 | ||
109d9272 DH |
695 | /* |
696 | * Handle thread cull timer expiration | |
697 | */ | |
698 | static void slow_work_cull_timeout(unsigned long data) | |
699 | { | |
700 | slow_work_cull = true; | |
701 | wake_up(&slow_work_thread_wq); | |
702 | } | |
703 | ||
109d9272 DH |
704 | /* |
705 | * Start a new slow work thread | |
706 | */ | |
707 | static void slow_work_new_thread_execute(struct slow_work *work) | |
708 | { | |
709 | struct task_struct *p; | |
710 | ||
711 | if (slow_work_threads_should_exit) | |
712 | return; | |
713 | ||
714 | if (atomic_read(&slow_work_thread_count) >= slow_work_max_threads) | |
715 | return; | |
716 | ||
717 | if (!mutex_trylock(&slow_work_user_lock)) | |
718 | return; | |
719 | ||
720 | slow_work_may_not_start_new_thread = true; | |
721 | atomic_inc(&slow_work_thread_count); | |
722 | p = kthread_run(slow_work_thread, NULL, "kslowd"); | |
723 | if (IS_ERR(p)) { | |
724 | printk(KERN_DEBUG "Slow work thread pool: OOM\n"); | |
725 | if (atomic_dec_and_test(&slow_work_thread_count)) | |
726 | BUG(); /* we're running on a slow work thread... */ | |
727 | mod_timer(&slow_work_oom_timer, | |
009789f0 | 728 | round_jiffies(jiffies + SLOW_WORK_OOM_TIMEOUT)); |
109d9272 DH |
729 | } else { |
730 | /* ratelimit the starting of new threads */ | |
731 | mod_timer(&slow_work_oom_timer, jiffies + 1); | |
732 | } | |
733 | ||
734 | mutex_unlock(&slow_work_user_lock); | |
735 | } | |
736 | ||
737 | static const struct slow_work_ops slow_work_new_thread_ops = { | |
3d7a641e | 738 | .owner = THIS_MODULE, |
109d9272 | 739 | .execute = slow_work_new_thread_execute, |
8fba10a4 DH |
740 | #ifdef CONFIG_SLOW_WORK_PROC |
741 | .desc = slow_work_new_thread_desc, | |
742 | #endif | |
109d9272 DH |
743 | }; |
744 | ||
745 | /* | |
746 | * post-OOM new thread start suppression expiration | |
747 | */ | |
748 | static void slow_work_oom_timeout(unsigned long data) | |
749 | { | |
750 | slow_work_may_not_start_new_thread = false; | |
751 | } | |
752 | ||
12e22c5e DH |
753 | #ifdef CONFIG_SYSCTL |
754 | /* | |
755 | * Handle adjustment of the minimum number of threads | |
756 | */ | |
757 | static int slow_work_min_threads_sysctl(struct ctl_table *table, int write, | |
8d65af78 | 758 | void __user *buffer, |
12e22c5e DH |
759 | size_t *lenp, loff_t *ppos) |
760 | { | |
8d65af78 | 761 | int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); |
12e22c5e DH |
762 | int n; |
763 | ||
764 | if (ret == 0) { | |
765 | mutex_lock(&slow_work_user_lock); | |
766 | if (slow_work_user_count > 0) { | |
767 | /* see if we need to start or stop threads */ | |
768 | n = atomic_read(&slow_work_thread_count) - | |
769 | slow_work_min_threads; | |
770 | ||
771 | if (n < 0 && !slow_work_may_not_start_new_thread) | |
772 | slow_work_enqueue(&slow_work_new_thread); | |
773 | else if (n > 0) | |
009789f0 | 774 | slow_work_schedule_cull(); |
12e22c5e DH |
775 | } |
776 | mutex_unlock(&slow_work_user_lock); | |
777 | } | |
778 | ||
779 | return ret; | |
780 | } | |
781 | ||
782 | /* | |
783 | * Handle adjustment of the maximum number of threads | |
784 | */ | |
785 | static int slow_work_max_threads_sysctl(struct ctl_table *table, int write, | |
8d65af78 | 786 | void __user *buffer, |
12e22c5e DH |
787 | size_t *lenp, loff_t *ppos) |
788 | { | |
8d65af78 | 789 | int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); |
12e22c5e DH |
790 | int n; |
791 | ||
792 | if (ret == 0) { | |
793 | mutex_lock(&slow_work_user_lock); | |
794 | if (slow_work_user_count > 0) { | |
795 | /* see if we need to stop threads */ | |
796 | n = slow_work_max_threads - | |
797 | atomic_read(&slow_work_thread_count); | |
798 | ||
799 | if (n < 0) | |
009789f0 | 800 | slow_work_schedule_cull(); |
12e22c5e DH |
801 | } |
802 | mutex_unlock(&slow_work_user_lock); | |
803 | } | |
804 | ||
805 | return ret; | |
806 | } | |
807 | #endif /* CONFIG_SYSCTL */ | |
808 | ||
07fe7cb7 DH |
809 | /** |
810 | * slow_work_register_user - Register a user of the facility | |
3d7a641e | 811 | * @module: The module about to make use of the facility |
07fe7cb7 DH |
812 | * |
813 | * Register a user of the facility, starting up the initial threads if there | |
814 | * aren't any other users at this point. This will return 0 if successful, or | |
815 | * an error if not. | |
816 | */ | |
3d7a641e | 817 | int slow_work_register_user(struct module *module) |
07fe7cb7 DH |
818 | { |
819 | struct task_struct *p; | |
820 | int loop; | |
821 | ||
822 | mutex_lock(&slow_work_user_lock); | |
823 | ||
824 | if (slow_work_user_count == 0) { | |
825 | printk(KERN_NOTICE "Slow work thread pool: Starting up\n"); | |
826 | init_completion(&slow_work_last_thread_exited); | |
827 | ||
828 | slow_work_threads_should_exit = false; | |
109d9272 DH |
829 | slow_work_init(&slow_work_new_thread, |
830 | &slow_work_new_thread_ops); | |
831 | slow_work_may_not_start_new_thread = false; | |
832 | slow_work_cull = false; | |
07fe7cb7 DH |
833 | |
834 | /* start the minimum number of threads */ | |
835 | for (loop = 0; loop < slow_work_min_threads; loop++) { | |
836 | atomic_inc(&slow_work_thread_count); | |
837 | p = kthread_run(slow_work_thread, NULL, "kslowd"); | |
838 | if (IS_ERR(p)) | |
839 | goto error; | |
840 | } | |
841 | printk(KERN_NOTICE "Slow work thread pool: Ready\n"); | |
842 | } | |
843 | ||
844 | slow_work_user_count++; | |
845 | mutex_unlock(&slow_work_user_lock); | |
846 | return 0; | |
847 | ||
848 | error: | |
849 | if (atomic_dec_and_test(&slow_work_thread_count)) | |
850 | complete(&slow_work_last_thread_exited); | |
851 | if (loop > 0) { | |
852 | printk(KERN_ERR "Slow work thread pool:" | |
853 | " Aborting startup on ENOMEM\n"); | |
854 | slow_work_threads_should_exit = true; | |
855 | wake_up_all(&slow_work_thread_wq); | |
856 | wait_for_completion(&slow_work_last_thread_exited); | |
857 | printk(KERN_ERR "Slow work thread pool: Aborted\n"); | |
858 | } | |
859 | mutex_unlock(&slow_work_user_lock); | |
860 | return PTR_ERR(p); | |
861 | } | |
862 | EXPORT_SYMBOL(slow_work_register_user); | |
863 | ||
3d7a641e DH |
864 | /* |
865 | * wait for all outstanding items from the calling module to complete | |
866 | * - note that more items may be queued whilst we're waiting | |
867 | */ | |
868 | static void slow_work_wait_for_items(struct module *module) | |
869 | { | |
870 | DECLARE_WAITQUEUE(myself, current); | |
871 | struct slow_work *work; | |
872 | int loop; | |
873 | ||
874 | mutex_lock(&slow_work_unreg_sync_lock); | |
875 | add_wait_queue(&slow_work_unreg_wq, &myself); | |
876 | ||
877 | for (;;) { | |
878 | spin_lock_irq(&slow_work_queue_lock); | |
879 | ||
880 | /* first of all, we wait for the last queued item in each list | |
881 | * to be processed */ | |
882 | list_for_each_entry_reverse(work, &vslow_work_queue, link) { | |
883 | if (work->owner == module) { | |
884 | set_current_state(TASK_UNINTERRUPTIBLE); | |
885 | slow_work_unreg_work_item = work; | |
886 | goto do_wait; | |
887 | } | |
888 | } | |
889 | list_for_each_entry_reverse(work, &slow_work_queue, link) { | |
890 | if (work->owner == module) { | |
891 | set_current_state(TASK_UNINTERRUPTIBLE); | |
892 | slow_work_unreg_work_item = work; | |
893 | goto do_wait; | |
894 | } | |
895 | } | |
896 | ||
897 | /* then we wait for the items being processed to finish */ | |
898 | slow_work_unreg_module = module; | |
899 | smp_mb(); | |
900 | for (loop = 0; loop < SLOW_WORK_THREAD_LIMIT; loop++) { | |
901 | if (slow_work_thread_processing[loop] == module) | |
902 | goto do_wait; | |
903 | } | |
904 | spin_unlock_irq(&slow_work_queue_lock); | |
905 | break; /* okay, we're done */ | |
906 | ||
907 | do_wait: | |
908 | spin_unlock_irq(&slow_work_queue_lock); | |
909 | schedule(); | |
910 | slow_work_unreg_work_item = NULL; | |
911 | slow_work_unreg_module = NULL; | |
912 | } | |
913 | ||
914 | remove_wait_queue(&slow_work_unreg_wq, &myself); | |
915 | mutex_unlock(&slow_work_unreg_sync_lock); | |
916 | } | |
917 | ||
07fe7cb7 DH |
918 | /** |
919 | * slow_work_unregister_user - Unregister a user of the facility | |
3d7a641e | 920 | * @module: The module whose items should be cleared |
07fe7cb7 DH |
921 | * |
922 | * Unregister a user of the facility, killing all the threads if this was the | |
923 | * last one. | |
3d7a641e DH |
924 | * |
925 | * This waits for all the work items belonging to the nominated module to go | |
926 | * away before proceeding. | |
07fe7cb7 | 927 | */ |
3d7a641e | 928 | void slow_work_unregister_user(struct module *module) |
07fe7cb7 | 929 | { |
3d7a641e DH |
930 | /* first of all, wait for all outstanding items from the calling module |
931 | * to complete */ | |
932 | if (module) | |
933 | slow_work_wait_for_items(module); | |
934 | ||
935 | /* then we can actually go about shutting down the facility if need | |
936 | * be */ | |
07fe7cb7 DH |
937 | mutex_lock(&slow_work_user_lock); |
938 | ||
939 | BUG_ON(slow_work_user_count <= 0); | |
940 | ||
941 | slow_work_user_count--; | |
942 | if (slow_work_user_count == 0) { | |
943 | printk(KERN_NOTICE "Slow work thread pool: Shutting down\n"); | |
944 | slow_work_threads_should_exit = true; | |
418df63c JC |
945 | del_timer_sync(&slow_work_cull_timer); |
946 | del_timer_sync(&slow_work_oom_timer); | |
07fe7cb7 DH |
947 | wake_up_all(&slow_work_thread_wq); |
948 | wait_for_completion(&slow_work_last_thread_exited); | |
949 | printk(KERN_NOTICE "Slow work thread pool:" | |
950 | " Shut down complete\n"); | |
951 | } | |
952 | ||
953 | mutex_unlock(&slow_work_user_lock); | |
954 | } | |
955 | EXPORT_SYMBOL(slow_work_unregister_user); | |
956 | ||
957 | /* | |
958 | * Initialise the slow work facility | |
959 | */ | |
960 | static int __init init_slow_work(void) | |
961 | { | |
962 | unsigned nr_cpus = num_possible_cpus(); | |
963 | ||
12e22c5e | 964 | if (slow_work_max_threads < nr_cpus) |
07fe7cb7 | 965 | slow_work_max_threads = nr_cpus; |
12e22c5e DH |
966 | #ifdef CONFIG_SYSCTL |
967 | if (slow_work_max_max_threads < nr_cpus * 2) | |
968 | slow_work_max_max_threads = nr_cpus * 2; | |
8fba10a4 DH |
969 | #endif |
970 | #ifdef CONFIG_SLOW_WORK_PROC | |
971 | proc_create("slow_work_rq", S_IFREG | 0400, NULL, | |
972 | &slow_work_runqueue_fops); | |
12e22c5e | 973 | #endif |
07fe7cb7 DH |
974 | return 0; |
975 | } | |
976 | ||
977 | subsys_initcall(init_slow_work); |