padata: ensure the reorder timer callback runs on the correct CPU
[linux-2.6-block.git] / kernel / padata.c
CommitLineData
16295bec
SK
1/*
2 * padata.c - generic interface to process data streams in parallel
3 *
107f8bda
SK
4 * See Documentation/padata.txt for an api documentation.
5 *
16295bec
SK
6 * Copyright (C) 2008, 2009 secunet Security Networks AG
7 * Copyright (C) 2008, 2009 Steffen Klassert <steffen.klassert@secunet.com>
8 *
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms and conditions of the GNU General Public License,
11 * version 2, as published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope it will be useful, but WITHOUT
14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 * more details.
17 *
18 * You should have received a copy of the GNU General Public License along with
19 * this program; if not, write to the Free Software Foundation, Inc.,
20 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
21 */
22
9984de1a 23#include <linux/export.h>
16295bec
SK
24#include <linux/cpumask.h>
25#include <linux/err.h>
26#include <linux/cpu.h>
27#include <linux/padata.h>
28#include <linux/mutex.h>
29#include <linux/sched.h>
5a0e3ad6 30#include <linux/slab.h>
5e017dc3 31#include <linux/sysfs.h>
16295bec 32#include <linux/rcupdate.h>
30e92153 33#include <linux/module.h>
16295bec 34
97e3d94a 35#define MAX_OBJ_NUM 1000
16295bec
SK
36
37static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index)
38{
39 int cpu, target_cpu;
40
e15bacbe 41 target_cpu = cpumask_first(pd->cpumask.pcpu);
16295bec 42 for (cpu = 0; cpu < cpu_index; cpu++)
e15bacbe 43 target_cpu = cpumask_next(target_cpu, pd->cpumask.pcpu);
16295bec
SK
44
45 return target_cpu;
46}
47
2dc9b5db 48static int padata_cpu_hash(struct parallel_data *pd)
16295bec 49{
0b6b098e 50 unsigned int seq_nr;
16295bec 51 int cpu_index;
16295bec
SK
52
53 /*
54 * Hash the sequence numbers to the cpus by taking
55 * seq_nr mod. number of cpus in use.
56 */
2dc9b5db 57
0b6b098e
MK
58 seq_nr = atomic_inc_return(&pd->seq_nr);
59 cpu_index = seq_nr % cpumask_weight(pd->cpumask.pcpu);
16295bec
SK
60
61 return padata_index_to_cpu(pd, cpu_index);
62}
63
e15bacbe 64static void padata_parallel_worker(struct work_struct *parallel_work)
16295bec 65{
e15bacbe 66 struct padata_parallel_queue *pqueue;
16295bec
SK
67 LIST_HEAD(local_list);
68
69 local_bh_disable();
e15bacbe
DK
70 pqueue = container_of(parallel_work,
71 struct padata_parallel_queue, work);
16295bec 72
e15bacbe
DK
73 spin_lock(&pqueue->parallel.lock);
74 list_replace_init(&pqueue->parallel.list, &local_list);
75 spin_unlock(&pqueue->parallel.lock);
16295bec
SK
76
77 while (!list_empty(&local_list)) {
78 struct padata_priv *padata;
79
80 padata = list_entry(local_list.next,
81 struct padata_priv, list);
82
83 list_del_init(&padata->list);
84
85 padata->parallel(padata);
86 }
87
88 local_bh_enable();
89}
90
0198ffd1 91/**
16295bec
SK
92 * padata_do_parallel - padata parallelization function
93 *
94 * @pinst: padata instance
95 * @padata: object to be parallelized
96 * @cb_cpu: cpu the serialization callback function will run on,
e15bacbe 97 * must be in the serial cpumask of padata(i.e. cpumask.cbcpu).
16295bec
SK
98 *
99 * The parallelization callback function will run with BHs off.
100 * Note: Every object which is parallelized by padata_do_parallel
101 * must be seen by padata_do_serial.
102 */
103int padata_do_parallel(struct padata_instance *pinst,
104 struct padata_priv *padata, int cb_cpu)
105{
106 int target_cpu, err;
e15bacbe 107 struct padata_parallel_queue *queue;
16295bec
SK
108 struct parallel_data *pd;
109
110 rcu_read_lock_bh();
111
c0e656b7 112 pd = rcu_dereference_bh(pinst->pd);
16295bec 113
83f619f3 114 err = -EINVAL;
7424713b 115 if (!(pinst->flags & PADATA_INIT) || pinst->flags & PADATA_INVALID)
16295bec
SK
116 goto out;
117
e15bacbe 118 if (!cpumask_test_cpu(cb_cpu, pd->cpumask.cbcpu))
16295bec
SK
119 goto out;
120
121 err = -EBUSY;
122 if ((pinst->flags & PADATA_RESET))
123 goto out;
124
125 if (atomic_read(&pd->refcnt) >= MAX_OBJ_NUM)
126 goto out;
127
83f619f3 128 err = 0;
16295bec
SK
129 atomic_inc(&pd->refcnt);
130 padata->pd = pd;
131 padata->cb_cpu = cb_cpu;
132
2dc9b5db 133 target_cpu = padata_cpu_hash(pd);
e15bacbe 134 queue = per_cpu_ptr(pd->pqueue, target_cpu);
16295bec
SK
135
136 spin_lock(&queue->parallel.lock);
137 list_add_tail(&padata->list, &queue->parallel.list);
138 spin_unlock(&queue->parallel.lock);
139
e15bacbe 140 queue_work_on(target_cpu, pinst->wq, &queue->work);
16295bec
SK
141
142out:
143 rcu_read_unlock_bh();
144
145 return err;
146}
147EXPORT_SYMBOL(padata_do_parallel);
148
0198ffd1
SK
149/*
150 * padata_get_next - Get the next object that needs serialization.
151 *
152 * Return values are:
153 *
154 * A pointer to the control struct of the next object that needs
155 * serialization, if present in one of the percpu reorder queues.
156 *
0198ffd1
SK
157 * -EINPROGRESS, if the next object that needs serialization will
158 * be parallel processed by another cpu and is not yet present in
159 * the cpu's reorder queue.
160 *
161 * -ENODATA, if this cpu has to do the parallel processing for
162 * the next object.
163 */
16295bec
SK
164static struct padata_priv *padata_get_next(struct parallel_data *pd)
165{
5f1a8c1b 166 int cpu, num_cpus;
2dc9b5db 167 unsigned int next_nr, next_index;
f0fcf200 168 struct padata_parallel_queue *next_queue;
16295bec
SK
169 struct padata_priv *padata;
170 struct padata_list *reorder;
171
e15bacbe 172 num_cpus = cpumask_weight(pd->cpumask.pcpu);
16295bec 173
5f1a8c1b
SK
174 /*
175 * Calculate the percpu reorder queue and the sequence
176 * number of the next object.
177 */
178 next_nr = pd->processed;
179 next_index = next_nr % num_cpus;
180 cpu = padata_index_to_cpu(pd, next_index);
e15bacbe 181 next_queue = per_cpu_ptr(pd->pqueue, cpu);
5f1a8c1b 182
16295bec
SK
183 reorder = &next_queue->reorder;
184
de5540d0 185 spin_lock(&reorder->lock);
16295bec
SK
186 if (!list_empty(&reorder->list)) {
187 padata = list_entry(reorder->list.next,
188 struct padata_priv, list);
189
16295bec
SK
190 list_del_init(&padata->list);
191 atomic_dec(&pd->reorder_objects);
16295bec 192
5f1a8c1b 193 pd->processed++;
16295bec 194
de5540d0 195 spin_unlock(&reorder->lock);
16295bec
SK
196 goto out;
197 }
de5540d0 198 spin_unlock(&reorder->lock);
16295bec 199
f0fcf200 200 if (__this_cpu_read(pd->pqueue->cpu_index) == next_queue->cpu_index) {
16295bec
SK
201 padata = ERR_PTR(-ENODATA);
202 goto out;
203 }
204
205 padata = ERR_PTR(-EINPROGRESS);
206out:
207 return padata;
208}
209
210static void padata_reorder(struct parallel_data *pd)
211{
3047817b 212 int cb_cpu;
16295bec 213 struct padata_priv *padata;
e15bacbe 214 struct padata_serial_queue *squeue;
16295bec
SK
215 struct padata_instance *pinst = pd->pinst;
216
0198ffd1
SK
217 /*
218 * We need to ensure that only one cpu can work on dequeueing of
219 * the reorder queue the time. Calculating in which percpu reorder
220 * queue the next object will arrive takes some time. A spinlock
221 * would be highly contended. Also it is not clear in which order
222 * the objects arrive to the reorder queues. So a cpu could wait to
223 * get the lock just to notice that there is nothing to do at the
224 * moment. Therefore we use a trylock and let the holder of the lock
225 * care for all the objects enqueued during the holdtime of the lock.
226 */
16295bec 227 if (!spin_trylock_bh(&pd->lock))
d46a5ac7 228 return;
16295bec
SK
229
230 while (1) {
231 padata = padata_get_next(pd);
232
0198ffd1 233 /*
69b34844
JD
234 * If the next object that needs serialization is parallel
235 * processed by another cpu and is still on it's way to the
236 * cpu's reorder queue, nothing to do for now.
0198ffd1 237 */
69b34844 238 if (PTR_ERR(padata) == -EINPROGRESS)
16295bec
SK
239 break;
240
0198ffd1
SK
241 /*
242 * This cpu has to do the parallel processing of the next
243 * object. It's waiting in the cpu's parallelization queue,
25985edc 244 * so exit immediately.
0198ffd1 245 */
16295bec 246 if (PTR_ERR(padata) == -ENODATA) {
d46a5ac7 247 del_timer(&pd->timer);
16295bec 248 spin_unlock_bh(&pd->lock);
d46a5ac7 249 return;
16295bec
SK
250 }
251
3047817b
SK
252 cb_cpu = padata->cb_cpu;
253 squeue = per_cpu_ptr(pd->squeue, cb_cpu);
16295bec 254
e15bacbe
DK
255 spin_lock(&squeue->serial.lock);
256 list_add_tail(&padata->list, &squeue->serial.list);
257 spin_unlock(&squeue->serial.lock);
16295bec 258
3047817b 259 queue_work_on(cb_cpu, pinst->wq, &squeue->work);
16295bec
SK
260 }
261
262 spin_unlock_bh(&pd->lock);
263
0198ffd1
SK
264 /*
265 * The next object that needs serialization might have arrived to
266 * the reorder queues in the meantime, we will be called again
25985edc 267 * from the timer function if no one else cares for it.
0198ffd1 268 */
d46a5ac7
SK
269 if (atomic_read(&pd->reorder_objects)
270 && !(pinst->flags & PADATA_RESET))
271 mod_timer(&pd->timer, jiffies + HZ);
272 else
273 del_timer(&pd->timer);
16295bec 274
16295bec
SK
275 return;
276}
277
cf5868c8
MK
278static void invoke_padata_reorder(struct work_struct *work)
279{
280 struct padata_parallel_queue *pqueue;
281 struct parallel_data *pd;
282
283 local_bh_disable();
284 pqueue = container_of(work, struct padata_parallel_queue, reorder_work);
285 pd = pqueue->pd;
286 padata_reorder(pd);
287 local_bh_enable();
288}
289
d46a5ac7
SK
290static void padata_reorder_timer(unsigned long arg)
291{
292 struct parallel_data *pd = (struct parallel_data *)arg;
cf5868c8
MK
293 unsigned int weight;
294 int target_cpu, cpu;
d46a5ac7 295
cf5868c8
MK
296 cpu = get_cpu();
297
298 /* We don't lock pd here to not interfere with parallel processing
299 * padata_reorder() calls on other CPUs. We just need any CPU out of
300 * the cpumask.pcpu set. It would be nice if it's the right one but
301 * it doesn't matter if we're off to the next one by using an outdated
302 * pd->processed value.
303 */
304 weight = cpumask_weight(pd->cpumask.pcpu);
305 target_cpu = padata_index_to_cpu(pd, pd->processed % weight);
306
307 /* ensure to call the reorder callback on the correct CPU */
308 if (cpu != target_cpu) {
309 struct padata_parallel_queue *pqueue;
310 struct padata_instance *pinst;
311
312 /* The timer function is serialized wrt itself -- no locking
313 * needed.
314 */
315 pinst = pd->pinst;
316 pqueue = per_cpu_ptr(pd->pqueue, target_cpu);
317 queue_work_on(target_cpu, pinst->wq, &pqueue->reorder_work);
318 } else {
319 padata_reorder(pd);
320 }
321
322 put_cpu();
d46a5ac7
SK
323}
324
e15bacbe 325static void padata_serial_worker(struct work_struct *serial_work)
16295bec 326{
e15bacbe 327 struct padata_serial_queue *squeue;
16295bec
SK
328 struct parallel_data *pd;
329 LIST_HEAD(local_list);
330
331 local_bh_disable();
e15bacbe
DK
332 squeue = container_of(serial_work, struct padata_serial_queue, work);
333 pd = squeue->pd;
16295bec 334
e15bacbe
DK
335 spin_lock(&squeue->serial.lock);
336 list_replace_init(&squeue->serial.list, &local_list);
337 spin_unlock(&squeue->serial.lock);
16295bec
SK
338
339 while (!list_empty(&local_list)) {
340 struct padata_priv *padata;
341
342 padata = list_entry(local_list.next,
343 struct padata_priv, list);
344
345 list_del_init(&padata->list);
346
347 padata->serial(padata);
348 atomic_dec(&pd->refcnt);
349 }
350 local_bh_enable();
351}
352
0198ffd1 353/**
16295bec
SK
354 * padata_do_serial - padata serialization function
355 *
356 * @padata: object to be serialized.
357 *
358 * padata_do_serial must be called for every parallelized object.
359 * The serialization callback function will run with BHs off.
360 */
361void padata_do_serial(struct padata_priv *padata)
362{
363 int cpu;
e15bacbe 364 struct padata_parallel_queue *pqueue;
16295bec
SK
365 struct parallel_data *pd;
366
367 pd = padata->pd;
368
369 cpu = get_cpu();
e15bacbe 370 pqueue = per_cpu_ptr(pd->pqueue, cpu);
16295bec 371
e15bacbe 372 spin_lock(&pqueue->reorder.lock);
16295bec 373 atomic_inc(&pd->reorder_objects);
e15bacbe
DK
374 list_add_tail(&padata->list, &pqueue->reorder.list);
375 spin_unlock(&pqueue->reorder.lock);
16295bec
SK
376
377 put_cpu();
378
379 padata_reorder(pd);
380}
381EXPORT_SYMBOL(padata_do_serial);
382
e15bacbe
DK
383static int padata_setup_cpumasks(struct parallel_data *pd,
384 const struct cpumask *pcpumask,
385 const struct cpumask *cbcpumask)
16295bec 386{
e15bacbe
DK
387 if (!alloc_cpumask_var(&pd->cpumask.pcpu, GFP_KERNEL))
388 return -ENOMEM;
16295bec 389
13614e0f 390 cpumask_and(pd->cpumask.pcpu, pcpumask, cpu_online_mask);
e15bacbe 391 if (!alloc_cpumask_var(&pd->cpumask.cbcpu, GFP_KERNEL)) {
07a77929 392 free_cpumask_var(pd->cpumask.pcpu);
e15bacbe
DK
393 return -ENOMEM;
394 }
16295bec 395
13614e0f 396 cpumask_and(pd->cpumask.cbcpu, cbcpumask, cpu_online_mask);
e15bacbe
DK
397 return 0;
398}
16295bec 399
e15bacbe
DK
400static void __padata_list_init(struct padata_list *pd_list)
401{
402 INIT_LIST_HEAD(&pd_list->list);
403 spin_lock_init(&pd_list->lock);
404}
16295bec 405
e15bacbe
DK
406/* Initialize all percpu queues used by serial workers */
407static void padata_init_squeues(struct parallel_data *pd)
408{
409 int cpu;
410 struct padata_serial_queue *squeue;
7b389b2c 411
e15bacbe
DK
412 for_each_cpu(cpu, pd->cpumask.cbcpu) {
413 squeue = per_cpu_ptr(pd->squeue, cpu);
414 squeue->pd = pd;
415 __padata_list_init(&squeue->serial);
416 INIT_WORK(&squeue->work, padata_serial_worker);
417 }
418}
16295bec 419
e15bacbe
DK
420/* Initialize all percpu queues used by parallel workers */
421static void padata_init_pqueues(struct parallel_data *pd)
422{
2dc9b5db 423 int cpu_index, cpu;
e15bacbe 424 struct padata_parallel_queue *pqueue;
16295bec 425
e15bacbe 426 cpu_index = 0;
1bd845bc 427 for_each_possible_cpu(cpu) {
e15bacbe 428 pqueue = per_cpu_ptr(pd->pqueue, cpu);
1bd845bc
MK
429
430 if (!cpumask_test_cpu(cpu, pd->cpumask.pcpu)) {
431 pqueue->cpu_index = -1;
432 continue;
433 }
434
e15bacbe
DK
435 pqueue->pd = pd;
436 pqueue->cpu_index = cpu_index;
7b389b2c 437 cpu_index++;
16295bec 438
e15bacbe
DK
439 __padata_list_init(&pqueue->reorder);
440 __padata_list_init(&pqueue->parallel);
441 INIT_WORK(&pqueue->work, padata_parallel_worker);
cf5868c8 442 INIT_WORK(&pqueue->reorder_work, invoke_padata_reorder);
e15bacbe 443 atomic_set(&pqueue->num_obj, 0);
16295bec 444 }
e15bacbe 445}
16295bec 446
e15bacbe
DK
447/* Allocate and initialize the internal cpumask dependend resources. */
448static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
449 const struct cpumask *pcpumask,
450 const struct cpumask *cbcpumask)
451{
452 struct parallel_data *pd;
16295bec 453
e15bacbe
DK
454 pd = kzalloc(sizeof(struct parallel_data), GFP_KERNEL);
455 if (!pd)
456 goto err;
16295bec 457
e15bacbe
DK
458 pd->pqueue = alloc_percpu(struct padata_parallel_queue);
459 if (!pd->pqueue)
460 goto err_free_pd;
461
462 pd->squeue = alloc_percpu(struct padata_serial_queue);
463 if (!pd->squeue)
464 goto err_free_pqueue;
465 if (padata_setup_cpumasks(pd, pcpumask, cbcpumask) < 0)
466 goto err_free_squeue;
16295bec 467
e15bacbe
DK
468 padata_init_pqueues(pd);
469 padata_init_squeues(pd);
d46a5ac7 470 setup_timer(&pd->timer, padata_reorder_timer, (unsigned long)pd);
0b6b098e 471 atomic_set(&pd->seq_nr, -1);
16295bec
SK
472 atomic_set(&pd->reorder_objects, 0);
473 atomic_set(&pd->refcnt, 0);
474 pd->pinst = pinst;
475 spin_lock_init(&pd->lock);
476
477 return pd;
478
e15bacbe
DK
479err_free_squeue:
480 free_percpu(pd->squeue);
481err_free_pqueue:
482 free_percpu(pd->pqueue);
16295bec
SK
483err_free_pd:
484 kfree(pd);
485err:
486 return NULL;
487}
488
489static void padata_free_pd(struct parallel_data *pd)
490{
e15bacbe
DK
491 free_cpumask_var(pd->cpumask.pcpu);
492 free_cpumask_var(pd->cpumask.cbcpu);
493 free_percpu(pd->pqueue);
494 free_percpu(pd->squeue);
16295bec
SK
495 kfree(pd);
496}
497
0198ffd1 498/* Flush all objects out of the padata queues. */
2b73b07a
SK
499static void padata_flush_queues(struct parallel_data *pd)
500{
501 int cpu;
e15bacbe
DK
502 struct padata_parallel_queue *pqueue;
503 struct padata_serial_queue *squeue;
2b73b07a 504
e15bacbe
DK
505 for_each_cpu(cpu, pd->cpumask.pcpu) {
506 pqueue = per_cpu_ptr(pd->pqueue, cpu);
507 flush_work(&pqueue->work);
2b73b07a
SK
508 }
509
510 del_timer_sync(&pd->timer);
511
512 if (atomic_read(&pd->reorder_objects))
513 padata_reorder(pd);
514
e15bacbe
DK
515 for_each_cpu(cpu, pd->cpumask.cbcpu) {
516 squeue = per_cpu_ptr(pd->squeue, cpu);
517 flush_work(&squeue->work);
2b73b07a
SK
518 }
519
520 BUG_ON(atomic_read(&pd->refcnt) != 0);
521}
522
4c879170
SK
523static void __padata_start(struct padata_instance *pinst)
524{
525 pinst->flags |= PADATA_INIT;
526}
527
ee836555
SK
528static void __padata_stop(struct padata_instance *pinst)
529{
530 if (!(pinst->flags & PADATA_INIT))
531 return;
532
533 pinst->flags &= ~PADATA_INIT;
534
535 synchronize_rcu();
536
537 get_online_cpus();
538 padata_flush_queues(pinst->pd);
539 put_online_cpus();
540}
541
25985edc 542/* Replace the internal control structure with a new one. */
16295bec
SK
543static void padata_replace(struct padata_instance *pinst,
544 struct parallel_data *pd_new)
545{
546 struct parallel_data *pd_old = pinst->pd;
e15bacbe 547 int notification_mask = 0;
16295bec
SK
548
549 pinst->flags |= PADATA_RESET;
550
551 rcu_assign_pointer(pinst->pd, pd_new);
552
553 synchronize_rcu();
554
e15bacbe
DK
555 if (!cpumask_equal(pd_old->cpumask.pcpu, pd_new->cpumask.pcpu))
556 notification_mask |= PADATA_CPU_PARALLEL;
557 if (!cpumask_equal(pd_old->cpumask.cbcpu, pd_new->cpumask.cbcpu))
558 notification_mask |= PADATA_CPU_SERIAL;
559
2b73b07a 560 padata_flush_queues(pd_old);
16295bec
SK
561 padata_free_pd(pd_old);
562
e15bacbe
DK
563 if (notification_mask)
564 blocking_notifier_call_chain(&pinst->cpumask_change_notifier,
c635696c
SK
565 notification_mask,
566 &pd_new->cpumask);
16295bec
SK
567
568 pinst->flags &= ~PADATA_RESET;
569}
570
0198ffd1 571/**
e15bacbe
DK
572 * padata_register_cpumask_notifier - Registers a notifier that will be called
573 * if either pcpu or cbcpu or both cpumasks change.
16295bec 574 *
e15bacbe
DK
575 * @pinst: A poineter to padata instance
576 * @nblock: A pointer to notifier block.
16295bec 577 */
e15bacbe
DK
578int padata_register_cpumask_notifier(struct padata_instance *pinst,
579 struct notifier_block *nblock)
16295bec 580{
e15bacbe
DK
581 return blocking_notifier_chain_register(&pinst->cpumask_change_notifier,
582 nblock);
583}
584EXPORT_SYMBOL(padata_register_cpumask_notifier);
585
586/**
587 * padata_unregister_cpumask_notifier - Unregisters cpumask notifier
588 * registered earlier using padata_register_cpumask_notifier
589 *
590 * @pinst: A pointer to data instance.
591 * @nlock: A pointer to notifier block.
592 */
593int padata_unregister_cpumask_notifier(struct padata_instance *pinst,
594 struct notifier_block *nblock)
595{
596 return blocking_notifier_chain_unregister(
597 &pinst->cpumask_change_notifier,
598 nblock);
599}
600EXPORT_SYMBOL(padata_unregister_cpumask_notifier);
601
602
33e54450
SK
603/* If cpumask contains no active cpu, we mark the instance as invalid. */
604static bool padata_validate_cpumask(struct padata_instance *pinst,
605 const struct cpumask *cpumask)
606{
13614e0f 607 if (!cpumask_intersects(cpumask, cpu_online_mask)) {
33e54450
SK
608 pinst->flags |= PADATA_INVALID;
609 return false;
610 }
611
612 pinst->flags &= ~PADATA_INVALID;
613 return true;
614}
615
65ff577e
SK
616static int __padata_set_cpumasks(struct padata_instance *pinst,
617 cpumask_var_t pcpumask,
618 cpumask_var_t cbcpumask)
619{
620 int valid;
16295bec 621 struct parallel_data *pd;
65ff577e
SK
622
623 valid = padata_validate_cpumask(pinst, pcpumask);
624 if (!valid) {
625 __padata_stop(pinst);
626 goto out_replace;
627 }
628
629 valid = padata_validate_cpumask(pinst, cbcpumask);
630 if (!valid)
631 __padata_stop(pinst);
632
633out_replace:
634 pd = padata_alloc_pd(pinst, pcpumask, cbcpumask);
635 if (!pd)
636 return -ENOMEM;
637
638 cpumask_copy(pinst->cpumask.pcpu, pcpumask);
639 cpumask_copy(pinst->cpumask.cbcpu, cbcpumask);
640
641 padata_replace(pinst, pd);
642
643 if (valid)
644 __padata_start(pinst);
645
646 return 0;
647}
648
e15bacbe
DK
649/**
650 * padata_set_cpumask: Sets specified by @cpumask_type cpumask to the value
651 * equivalent to @cpumask.
16295bec
SK
652 *
653 * @pinst: padata instance
e15bacbe
DK
654 * @cpumask_type: PADATA_CPU_SERIAL or PADATA_CPU_PARALLEL corresponding
655 * to parallel and serial cpumasks respectively.
16295bec
SK
656 * @cpumask: the cpumask to use
657 */
e15bacbe
DK
658int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type,
659 cpumask_var_t cpumask)
660{
661 struct cpumask *serial_mask, *parallel_mask;
65ff577e
SK
662 int err = -EINVAL;
663
664 mutex_lock(&pinst->lock);
6751fb3c
SK
665 get_online_cpus();
666
e15bacbe
DK
667 switch (cpumask_type) {
668 case PADATA_CPU_PARALLEL:
669 serial_mask = pinst->cpumask.cbcpu;
670 parallel_mask = cpumask;
671 break;
672 case PADATA_CPU_SERIAL:
673 parallel_mask = pinst->cpumask.pcpu;
674 serial_mask = cpumask;
675 break;
676 default:
65ff577e 677 goto out;
16295bec
SK
678 }
679
65ff577e 680 err = __padata_set_cpumasks(pinst, parallel_mask, serial_mask);
16295bec
SK
681
682out:
6751fb3c 683 put_online_cpus();
16295bec
SK
684 mutex_unlock(&pinst->lock);
685
686 return err;
687}
688EXPORT_SYMBOL(padata_set_cpumask);
689
19d795b6
AB
690/**
691 * padata_start - start the parallel processing
692 *
693 * @pinst: padata instance to start
694 */
695int padata_start(struct padata_instance *pinst)
696{
697 int err = 0;
698
699 mutex_lock(&pinst->lock);
700
701 if (pinst->flags & PADATA_INVALID)
702 err = -EINVAL;
703
704 __padata_start(pinst);
705
706 mutex_unlock(&pinst->lock);
707
708 return err;
709}
710EXPORT_SYMBOL(padata_start);
711
712/**
713 * padata_stop - stop the parallel processing
714 *
715 * @pinst: padata instance to stop
716 */
717void padata_stop(struct padata_instance *pinst)
718{
719 mutex_lock(&pinst->lock);
720 __padata_stop(pinst);
721 mutex_unlock(&pinst->lock);
722}
723EXPORT_SYMBOL(padata_stop);
724
725#ifdef CONFIG_HOTPLUG_CPU
726
16295bec
SK
727static int __padata_add_cpu(struct padata_instance *pinst, int cpu)
728{
729 struct parallel_data *pd;
730
13614e0f 731 if (cpumask_test_cpu(cpu, cpu_online_mask)) {
e15bacbe
DK
732 pd = padata_alloc_pd(pinst, pinst->cpumask.pcpu,
733 pinst->cpumask.cbcpu);
16295bec
SK
734 if (!pd)
735 return -ENOMEM;
736
737 padata_replace(pinst, pd);
33e54450 738
e15bacbe
DK
739 if (padata_validate_cpumask(pinst, pinst->cpumask.pcpu) &&
740 padata_validate_cpumask(pinst, pinst->cpumask.cbcpu))
33e54450 741 __padata_start(pinst);
16295bec
SK
742 }
743
744 return 0;
745}
746
16295bec
SK
747static int __padata_remove_cpu(struct padata_instance *pinst, int cpu)
748{
33e54450 749 struct parallel_data *pd = NULL;
16295bec
SK
750
751 if (cpumask_test_cpu(cpu, cpu_online_mask)) {
33e54450 752
e15bacbe 753 if (!padata_validate_cpumask(pinst, pinst->cpumask.pcpu) ||
b89661df 754 !padata_validate_cpumask(pinst, pinst->cpumask.cbcpu))
33e54450 755 __padata_stop(pinst);
33e54450 756
e15bacbe
DK
757 pd = padata_alloc_pd(pinst, pinst->cpumask.pcpu,
758 pinst->cpumask.cbcpu);
16295bec
SK
759 if (!pd)
760 return -ENOMEM;
761
762 padata_replace(pinst, pd);
96120905
SK
763
764 cpumask_clear_cpu(cpu, pd->cpumask.cbcpu);
765 cpumask_clear_cpu(cpu, pd->cpumask.pcpu);
16295bec
SK
766 }
767
768 return 0;
769}
770
e15bacbe 771 /**
25985edc 772 * padata_remove_cpu - remove a cpu from the one or both(serial and parallel)
e15bacbe 773 * padata cpumasks.
16295bec
SK
774 *
775 * @pinst: padata instance
776 * @cpu: cpu to remove
e15bacbe
DK
777 * @mask: bitmask specifying from which cpumask @cpu should be removed
778 * The @mask may be any combination of the following flags:
779 * PADATA_CPU_SERIAL - serial cpumask
780 * PADATA_CPU_PARALLEL - parallel cpumask
16295bec 781 */
e15bacbe 782int padata_remove_cpu(struct padata_instance *pinst, int cpu, int mask)
16295bec
SK
783{
784 int err;
785
e15bacbe
DK
786 if (!(mask & (PADATA_CPU_SERIAL | PADATA_CPU_PARALLEL)))
787 return -EINVAL;
788
16295bec
SK
789 mutex_lock(&pinst->lock);
790
6751fb3c 791 get_online_cpus();
e15bacbe
DK
792 if (mask & PADATA_CPU_SERIAL)
793 cpumask_clear_cpu(cpu, pinst->cpumask.cbcpu);
794 if (mask & PADATA_CPU_PARALLEL)
795 cpumask_clear_cpu(cpu, pinst->cpumask.pcpu);
796
16295bec 797 err = __padata_remove_cpu(pinst, cpu);
6751fb3c 798 put_online_cpus();
16295bec
SK
799
800 mutex_unlock(&pinst->lock);
801
802 return err;
803}
804EXPORT_SYMBOL(padata_remove_cpu);
805
e15bacbe
DK
806static inline int pinst_has_cpu(struct padata_instance *pinst, int cpu)
807{
808 return cpumask_test_cpu(cpu, pinst->cpumask.pcpu) ||
809 cpumask_test_cpu(cpu, pinst->cpumask.cbcpu);
810}
811
30e92153 812static int padata_cpu_online(unsigned int cpu, struct hlist_node *node)
16295bec 813{
16295bec 814 struct padata_instance *pinst;
30e92153 815 int ret;
16295bec 816
30e92153
SAS
817 pinst = hlist_entry_safe(node, struct padata_instance, node);
818 if (!pinst_has_cpu(pinst, cpu))
819 return 0;
16295bec 820
30e92153
SAS
821 mutex_lock(&pinst->lock);
822 ret = __padata_add_cpu(pinst, cpu);
823 mutex_unlock(&pinst->lock);
824 return ret;
825}
16295bec 826
30e92153
SAS
827static int padata_cpu_prep_down(unsigned int cpu, struct hlist_node *node)
828{
829 struct padata_instance *pinst;
830 int ret;
831
832 pinst = hlist_entry_safe(node, struct padata_instance, node);
833 if (!pinst_has_cpu(pinst, cpu))
834 return 0;
16295bec 835
30e92153
SAS
836 mutex_lock(&pinst->lock);
837 ret = __padata_remove_cpu(pinst, cpu);
838 mutex_unlock(&pinst->lock);
839 return ret;
16295bec 840}
30e92153
SAS
841
842static enum cpuhp_state hp_online;
e2cb2f1c 843#endif
16295bec 844
5e017dc3
DK
845static void __padata_free(struct padata_instance *pinst)
846{
847#ifdef CONFIG_HOTPLUG_CPU
30e92153 848 cpuhp_state_remove_instance_nocalls(hp_online, &pinst->node);
5e017dc3
DK
849#endif
850
851 padata_stop(pinst);
852 padata_free_pd(pinst->pd);
853 free_cpumask_var(pinst->cpumask.pcpu);
854 free_cpumask_var(pinst->cpumask.cbcpu);
855 kfree(pinst);
856}
857
858#define kobj2pinst(_kobj) \
859 container_of(_kobj, struct padata_instance, kobj)
860#define attr2pentry(_attr) \
861 container_of(_attr, struct padata_sysfs_entry, attr)
862
863static void padata_sysfs_release(struct kobject *kobj)
864{
865 struct padata_instance *pinst = kobj2pinst(kobj);
866 __padata_free(pinst);
867}
868
869struct padata_sysfs_entry {
870 struct attribute attr;
871 ssize_t (*show)(struct padata_instance *, struct attribute *, char *);
872 ssize_t (*store)(struct padata_instance *, struct attribute *,
873 const char *, size_t);
874};
875
876static ssize_t show_cpumask(struct padata_instance *pinst,
877 struct attribute *attr, char *buf)
878{
879 struct cpumask *cpumask;
880 ssize_t len;
881
882 mutex_lock(&pinst->lock);
883 if (!strcmp(attr->name, "serial_cpumask"))
884 cpumask = pinst->cpumask.cbcpu;
885 else
886 cpumask = pinst->cpumask.pcpu;
887
4497da6f
TH
888 len = snprintf(buf, PAGE_SIZE, "%*pb\n",
889 nr_cpu_ids, cpumask_bits(cpumask));
5e017dc3 890 mutex_unlock(&pinst->lock);
4497da6f 891 return len < PAGE_SIZE ? len : -EINVAL;
5e017dc3
DK
892}
893
894static ssize_t store_cpumask(struct padata_instance *pinst,
895 struct attribute *attr,
896 const char *buf, size_t count)
897{
898 cpumask_var_t new_cpumask;
899 ssize_t ret;
900 int mask_type;
901
902 if (!alloc_cpumask_var(&new_cpumask, GFP_KERNEL))
903 return -ENOMEM;
904
905 ret = bitmap_parse(buf, count, cpumask_bits(new_cpumask),
906 nr_cpumask_bits);
907 if (ret < 0)
908 goto out;
909
910 mask_type = !strcmp(attr->name, "serial_cpumask") ?
911 PADATA_CPU_SERIAL : PADATA_CPU_PARALLEL;
912 ret = padata_set_cpumask(pinst, mask_type, new_cpumask);
913 if (!ret)
914 ret = count;
915
916out:
917 free_cpumask_var(new_cpumask);
918 return ret;
919}
920
921#define PADATA_ATTR_RW(_name, _show_name, _store_name) \
922 static struct padata_sysfs_entry _name##_attr = \
923 __ATTR(_name, 0644, _show_name, _store_name)
924#define PADATA_ATTR_RO(_name, _show_name) \
925 static struct padata_sysfs_entry _name##_attr = \
926 __ATTR(_name, 0400, _show_name, NULL)
927
928PADATA_ATTR_RW(serial_cpumask, show_cpumask, store_cpumask);
929PADATA_ATTR_RW(parallel_cpumask, show_cpumask, store_cpumask);
930
931/*
932 * Padata sysfs provides the following objects:
933 * serial_cpumask [RW] - cpumask for serial workers
934 * parallel_cpumask [RW] - cpumask for parallel workers
935 */
936static struct attribute *padata_default_attrs[] = {
937 &serial_cpumask_attr.attr,
938 &parallel_cpumask_attr.attr,
939 NULL,
940};
941
942static ssize_t padata_sysfs_show(struct kobject *kobj,
943 struct attribute *attr, char *buf)
944{
945 struct padata_instance *pinst;
946 struct padata_sysfs_entry *pentry;
947 ssize_t ret = -EIO;
948
949 pinst = kobj2pinst(kobj);
950 pentry = attr2pentry(attr);
951 if (pentry->show)
952 ret = pentry->show(pinst, attr, buf);
953
954 return ret;
955}
956
957static ssize_t padata_sysfs_store(struct kobject *kobj, struct attribute *attr,
958 const char *buf, size_t count)
959{
960 struct padata_instance *pinst;
961 struct padata_sysfs_entry *pentry;
962 ssize_t ret = -EIO;
963
964 pinst = kobj2pinst(kobj);
965 pentry = attr2pentry(attr);
966 if (pentry->show)
967 ret = pentry->store(pinst, attr, buf, count);
968
969 return ret;
970}
971
972static const struct sysfs_ops padata_sysfs_ops = {
973 .show = padata_sysfs_show,
974 .store = padata_sysfs_store,
975};
976
977static struct kobj_type padata_attr_type = {
978 .sysfs_ops = &padata_sysfs_ops,
979 .default_attrs = padata_default_attrs,
980 .release = padata_sysfs_release,
981};
982
e15bacbe 983/**
e6cc1170
SK
984 * padata_alloc - allocate and initialize a padata instance and specify
985 * cpumasks for serial and parallel workers.
16295bec 986 *
16295bec 987 * @wq: workqueue to use for the allocated padata instance
e15bacbe
DK
988 * @pcpumask: cpumask that will be used for padata parallelization
989 * @cbcpumask: cpumask that will be used for padata serialization
c5a81c8f
SAS
990 *
991 * Must be called from a cpus_read_lock() protected region
16295bec 992 */
9596695e
TG
993static struct padata_instance *padata_alloc(struct workqueue_struct *wq,
994 const struct cpumask *pcpumask,
995 const struct cpumask *cbcpumask)
16295bec 996{
16295bec 997 struct padata_instance *pinst;
33e54450 998 struct parallel_data *pd = NULL;
16295bec
SK
999
1000 pinst = kzalloc(sizeof(struct padata_instance), GFP_KERNEL);
1001 if (!pinst)
1002 goto err;
1003
e15bacbe 1004 if (!alloc_cpumask_var(&pinst->cpumask.pcpu, GFP_KERNEL))
16295bec 1005 goto err_free_inst;
e15bacbe
DK
1006 if (!alloc_cpumask_var(&pinst->cpumask.cbcpu, GFP_KERNEL)) {
1007 free_cpumask_var(pinst->cpumask.pcpu);
16295bec 1008 goto err_free_inst;
33e54450 1009 }
e15bacbe
DK
1010 if (!padata_validate_cpumask(pinst, pcpumask) ||
1011 !padata_validate_cpumask(pinst, cbcpumask))
1012 goto err_free_masks;
16295bec 1013
e15bacbe
DK
1014 pd = padata_alloc_pd(pinst, pcpumask, cbcpumask);
1015 if (!pd)
1016 goto err_free_masks;
74781387 1017
16295bec
SK
1018 rcu_assign_pointer(pinst->pd, pd);
1019
1020 pinst->wq = wq;
1021
e15bacbe
DK
1022 cpumask_copy(pinst->cpumask.pcpu, pcpumask);
1023 cpumask_copy(pinst->cpumask.cbcpu, cbcpumask);
16295bec
SK
1024
1025 pinst->flags = 0;
1026
e15bacbe 1027 BLOCKING_INIT_NOTIFIER_HEAD(&pinst->cpumask_change_notifier);
5e017dc3 1028 kobject_init(&pinst->kobj, &padata_attr_type);
16295bec
SK
1029 mutex_init(&pinst->lock);
1030
b8b4a416 1031#ifdef CONFIG_HOTPLUG_CPU
c5a81c8f 1032 cpuhp_state_add_instance_nocalls_cpuslocked(hp_online, &pinst->node);
b8b4a416 1033#endif
16295bec
SK
1034 return pinst;
1035
e15bacbe
DK
1036err_free_masks:
1037 free_cpumask_var(pinst->cpumask.pcpu);
1038 free_cpumask_var(pinst->cpumask.cbcpu);
16295bec
SK
1039err_free_inst:
1040 kfree(pinst);
1041err:
1042 return NULL;
1043}
16295bec 1044
9596695e
TG
1045/**
1046 * padata_alloc_possible - Allocate and initialize padata instance.
1047 * Use the cpu_possible_mask for serial and
1048 * parallel workers.
1049 *
1050 * @wq: workqueue to use for the allocated padata instance
c5a81c8f
SAS
1051 *
1052 * Must be called from a cpus_read_lock() protected region
9596695e
TG
1053 */
1054struct padata_instance *padata_alloc_possible(struct workqueue_struct *wq)
1055{
c5a81c8f 1056 lockdep_assert_cpus_held();
9596695e
TG
1057 return padata_alloc(wq, cpu_possible_mask, cpu_possible_mask);
1058}
1059EXPORT_SYMBOL(padata_alloc_possible);
1060
0198ffd1 1061/**
16295bec
SK
1062 * padata_free - free a padata instance
1063 *
0198ffd1 1064 * @padata_inst: padata instance to free
16295bec
SK
1065 */
1066void padata_free(struct padata_instance *pinst)
1067{
5e017dc3 1068 kobject_put(&pinst->kobj);
16295bec
SK
1069}
1070EXPORT_SYMBOL(padata_free);
30e92153
SAS
1071
1072#ifdef CONFIG_HOTPLUG_CPU
1073
1074static __init int padata_driver_init(void)
1075{
1076 int ret;
1077
1078 ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "padata:online",
1079 padata_cpu_online,
1080 padata_cpu_prep_down);
1081 if (ret < 0)
1082 return ret;
1083 hp_online = ret;
1084 return 0;
1085}
1086module_init(padata_driver_init);
1087
1088static __exit void padata_driver_exit(void)
1089{
1090 cpuhp_remove_multi_state(hp_online);
1091}
1092module_exit(padata_driver_exit);
1093#endif