Commit | Line | Data |
---|---|---|
b2441318 | 1 | // SPDX-License-Identifier: GPL-2.0 |
9a0ef98e CH |
2 | /* |
3 | * Copyright (C) 2016 Thomas Gleixner. | |
4 | * Copyright (C) 2016-2017 Christoph Hellwig. | |
5 | */ | |
5e385a6e CH |
6 | #include <linux/interrupt.h> |
7 | #include <linux/kernel.h> | |
8 | #include <linux/slab.h> | |
9 | #include <linux/cpu.h> | |
10 | ||
34c3d981 | 11 | static void irq_spread_init_one(struct cpumask *irqmsk, struct cpumask *nmsk, |
0145c30e | 12 | unsigned int cpus_per_vec) |
34c3d981 TG |
13 | { |
14 | const struct cpumask *siblmsk; | |
15 | int cpu, sibl; | |
16 | ||
17 | for ( ; cpus_per_vec > 0; ) { | |
18 | cpu = cpumask_first(nmsk); | |
19 | ||
20 | /* Should not happen, but I'm too lazy to think about it */ | |
21 | if (cpu >= nr_cpu_ids) | |
22 | return; | |
23 | ||
24 | cpumask_clear_cpu(cpu, nmsk); | |
25 | cpumask_set_cpu(cpu, irqmsk); | |
26 | cpus_per_vec--; | |
27 | ||
28 | /* If the cpu has siblings, use them first */ | |
29 | siblmsk = topology_sibling_cpumask(cpu); | |
30 | for (sibl = -1; cpus_per_vec > 0; ) { | |
31 | sibl = cpumask_next(sibl, siblmsk); | |
32 | if (sibl >= nr_cpu_ids) | |
33 | break; | |
34 | if (!cpumask_test_and_clear_cpu(sibl, nmsk)) | |
35 | continue; | |
36 | cpumask_set_cpu(sibl, irqmsk); | |
37 | cpus_per_vec--; | |
38 | } | |
39 | } | |
40 | } | |
41 | ||
47778f33 | 42 | static cpumask_var_t *alloc_node_to_cpumask(void) |
9a0ef98e CH |
43 | { |
44 | cpumask_var_t *masks; | |
45 | int node; | |
46 | ||
47 | masks = kcalloc(nr_node_ids, sizeof(cpumask_var_t), GFP_KERNEL); | |
48 | if (!masks) | |
49 | return NULL; | |
50 | ||
51 | for (node = 0; node < nr_node_ids; node++) { | |
52 | if (!zalloc_cpumask_var(&masks[node], GFP_KERNEL)) | |
53 | goto out_unwind; | |
54 | } | |
55 | ||
56 | return masks; | |
57 | ||
58 | out_unwind: | |
59 | while (--node >= 0) | |
60 | free_cpumask_var(masks[node]); | |
61 | kfree(masks); | |
62 | return NULL; | |
63 | } | |
64 | ||
47778f33 | 65 | static void free_node_to_cpumask(cpumask_var_t *masks) |
9a0ef98e CH |
66 | { |
67 | int node; | |
68 | ||
69 | for (node = 0; node < nr_node_ids; node++) | |
70 | free_cpumask_var(masks[node]); | |
71 | kfree(masks); | |
72 | } | |
73 | ||
47778f33 | 74 | static void build_node_to_cpumask(cpumask_var_t *masks) |
9a0ef98e CH |
75 | { |
76 | int cpu; | |
77 | ||
84676c1f | 78 | for_each_possible_cpu(cpu) |
9a0ef98e CH |
79 | cpumask_set_cpu(cpu, masks[cpu_to_node(cpu)]); |
80 | } | |
81 | ||
47778f33 | 82 | static int get_nodes_in_cpumask(cpumask_var_t *node_to_cpumask, |
9a0ef98e | 83 | const struct cpumask *mask, nodemask_t *nodemsk) |
34c3d981 | 84 | { |
c0af5243 | 85 | int n, nodes = 0; |
34c3d981 TG |
86 | |
87 | /* Calculate the number of nodes in the supplied affinity mask */ | |
9a0ef98e | 88 | for_each_node(n) { |
47778f33 | 89 | if (cpumask_intersects(mask, node_to_cpumask[n])) { |
34c3d981 TG |
90 | node_set(n, *nodemsk); |
91 | nodes++; | |
92 | } | |
93 | } | |
94 | return nodes; | |
95 | } | |
96 | ||
0e518330 | 97 | static int __irq_build_affinity_masks(unsigned int startvec, |
0145c30e TG |
98 | unsigned int numvecs, |
99 | unsigned int firstvec, | |
c2899c34 TG |
100 | cpumask_var_t *node_to_cpumask, |
101 | const struct cpumask *cpu_mask, | |
102 | struct cpumask *nmsk, | |
bec04037 | 103 | struct irq_affinity_desc *masks) |
34c3d981 | 104 | { |
0145c30e TG |
105 | unsigned int n, nodes, cpus_per_vec, extra_vecs, done = 0; |
106 | unsigned int last_affv = firstvec + numvecs; | |
107 | unsigned int curvec = startvec; | |
34c3d981 | 108 | nodemask_t nodemsk = NODE_MASK_NONE; |
34c3d981 | 109 | |
d3056812 ML |
110 | if (!cpumask_weight(cpu_mask)) |
111 | return 0; | |
112 | ||
b3e6aaa8 | 113 | nodes = get_nodes_in_cpumask(node_to_cpumask, cpu_mask, &nodemsk); |
34c3d981 TG |
114 | |
115 | /* | |
c0af5243 | 116 | * If the number of nodes in the mask is greater than or equal the |
34c3d981 TG |
117 | * number of vectors we just spread the vectors across the nodes. |
118 | */ | |
1a2d0914 | 119 | if (numvecs <= nodes) { |
34c3d981 | 120 | for_each_node_mask(n, nodemsk) { |
0145c30e TG |
121 | cpumask_or(&masks[curvec].mask, &masks[curvec].mask, |
122 | node_to_cpumask[n]); | |
1a2d0914 | 123 | if (++curvec == last_affv) |
060746d9 | 124 | curvec = firstvec; |
34c3d981 | 125 | } |
0145c30e | 126 | return numvecs; |
34c3d981 TG |
127 | } |
128 | ||
34c3d981 | 129 | for_each_node_mask(n, nodemsk) { |
0145c30e | 130 | unsigned int ncpus, v, vecs_to_assign, vecs_per_node; |
7bf8222b KB |
131 | |
132 | /* Spread the vectors per node */ | |
060746d9 | 133 | vecs_per_node = (numvecs - (curvec - firstvec)) / nodes; |
34c3d981 TG |
134 | |
135 | /* Get the cpus on this node which are in the mask */ | |
b3e6aaa8 | 136 | cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]); |
34c3d981 TG |
137 | |
138 | /* Calculate the number of cpus per vector */ | |
139 | ncpus = cpumask_weight(nmsk); | |
7bf8222b KB |
140 | vecs_to_assign = min(vecs_per_node, ncpus); |
141 | ||
142 | /* Account for rounding errors */ | |
3412386b | 143 | extra_vecs = ncpus - vecs_to_assign * (ncpus / vecs_to_assign); |
34c3d981 | 144 | |
bfe13077 CH |
145 | for (v = 0; curvec < last_affv && v < vecs_to_assign; |
146 | curvec++, v++) { | |
34c3d981 TG |
147 | cpus_per_vec = ncpus / vecs_to_assign; |
148 | ||
149 | /* Account for extra vectors to compensate rounding errors */ | |
150 | if (extra_vecs) { | |
151 | cpus_per_vec++; | |
7bf8222b | 152 | --extra_vecs; |
34c3d981 | 153 | } |
bec04037 DL |
154 | irq_spread_init_one(&masks[curvec].mask, nmsk, |
155 | cpus_per_vec); | |
34c3d981 TG |
156 | } |
157 | ||
1a2d0914 ML |
158 | done += v; |
159 | if (done >= numvecs) | |
34c3d981 | 160 | break; |
1a2d0914 | 161 | if (curvec >= last_affv) |
060746d9 | 162 | curvec = firstvec; |
7bf8222b | 163 | --nodes; |
34c3d981 | 164 | } |
1a2d0914 | 165 | return done; |
b3e6aaa8 ML |
166 | } |
167 | ||
5c903e10 ML |
168 | /* |
169 | * build affinity in two stages: | |
170 | * 1) spread present CPU on these vectors | |
171 | * 2) spread other possible CPUs on these vectors | |
172 | */ | |
0e518330 | 173 | static int irq_build_affinity_masks(unsigned int startvec, unsigned int numvecs, |
0145c30e | 174 | unsigned int firstvec, |
bec04037 | 175 | struct irq_affinity_desc *masks) |
5c903e10 | 176 | { |
0145c30e | 177 | unsigned int curvec = startvec, nr_present, nr_others; |
347253c4 | 178 | cpumask_var_t *node_to_cpumask; |
0145c30e TG |
179 | cpumask_var_t nmsk, npresmsk; |
180 | int ret = -ENOMEM; | |
5c903e10 ML |
181 | |
182 | if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL)) | |
c2899c34 | 183 | return ret; |
5c903e10 ML |
184 | |
185 | if (!zalloc_cpumask_var(&npresmsk, GFP_KERNEL)) | |
347253c4 ML |
186 | goto fail_nmsk; |
187 | ||
188 | node_to_cpumask = alloc_node_to_cpumask(); | |
189 | if (!node_to_cpumask) | |
190 | goto fail_npresmsk; | |
5c903e10 | 191 | |
6da4b3ab | 192 | ret = 0; |
5c903e10 ML |
193 | /* Stabilize the cpumasks */ |
194 | get_online_cpus(); | |
195 | build_node_to_cpumask(node_to_cpumask); | |
196 | ||
197 | /* Spread on present CPUs starting from affd->pre_vectors */ | |
0e518330 | 198 | nr_present = __irq_build_affinity_masks(curvec, numvecs, |
6da4b3ab JA |
199 | firstvec, node_to_cpumask, |
200 | cpu_present_mask, nmsk, masks); | |
5c903e10 ML |
201 | |
202 | /* | |
203 | * Spread on non present CPUs starting from the next vector to be | |
204 | * handled. If the spreading of present CPUs already exhausted the | |
205 | * vector space, assign the non present CPUs to the already spread | |
206 | * out vectors. | |
207 | */ | |
6da4b3ab JA |
208 | if (nr_present >= numvecs) |
209 | curvec = firstvec; | |
5c903e10 | 210 | else |
6da4b3ab | 211 | curvec = firstvec + nr_present; |
5c903e10 | 212 | cpumask_andnot(npresmsk, cpu_possible_mask, cpu_present_mask); |
0e518330 | 213 | nr_others = __irq_build_affinity_masks(curvec, numvecs, |
6da4b3ab JA |
214 | firstvec, node_to_cpumask, |
215 | npresmsk, nmsk, masks); | |
5c903e10 ML |
216 | put_online_cpus(); |
217 | ||
6da4b3ab | 218 | if (nr_present < numvecs) |
c2899c34 | 219 | WARN_ON(nr_present + nr_others < numvecs); |
6da4b3ab | 220 | |
347253c4 ML |
221 | free_node_to_cpumask(node_to_cpumask); |
222 | ||
223 | fail_npresmsk: | |
5c903e10 ML |
224 | free_cpumask_var(npresmsk); |
225 | ||
347253c4 | 226 | fail_nmsk: |
5c903e10 | 227 | free_cpumask_var(nmsk); |
6da4b3ab | 228 | return ret; |
5c903e10 ML |
229 | } |
230 | ||
c66d4bd1 ML |
231 | static void default_calc_sets(struct irq_affinity *affd, unsigned int affvecs) |
232 | { | |
233 | affd->nr_sets = 1; | |
234 | affd->set_size[0] = affvecs; | |
235 | } | |
236 | ||
b3e6aaa8 ML |
237 | /** |
238 | * irq_create_affinity_masks - Create affinity masks for multiqueue spreading | |
239 | * @nvecs: The total number of vectors | |
240 | * @affd: Description of the affinity requirements | |
241 | * | |
bec04037 | 242 | * Returns the irq_affinity_desc pointer or NULL if allocation failed. |
b3e6aaa8 | 243 | */ |
bec04037 | 244 | struct irq_affinity_desc * |
9cfef55b | 245 | irq_create_affinity_masks(unsigned int nvecs, struct irq_affinity *affd) |
b3e6aaa8 | 246 | { |
c66d4bd1 | 247 | unsigned int affvecs, curvec, usedvecs, i; |
bec04037 | 248 | struct irq_affinity_desc *masks = NULL; |
b3e6aaa8 ML |
249 | |
250 | /* | |
c66d4bd1 ML |
251 | * Determine the number of vectors which need interrupt affinities |
252 | * assigned. If the pre/post request exhausts the available vectors | |
253 | * then nothing to do here except for invoking the calc_sets() | |
254 | * callback so the device driver can adjust to the situation. If there | |
255 | * is only a single vector, then managing the queue is pointless as | |
256 | * well. | |
b3e6aaa8 | 257 | */ |
c66d4bd1 ML |
258 | if (nvecs > 1 && nvecs > affd->pre_vectors + affd->post_vectors) |
259 | affvecs = nvecs - affd->pre_vectors - affd->post_vectors; | |
260 | else | |
261 | affvecs = 0; | |
262 | ||
263 | /* | |
264 | * Simple invocations do not provide a calc_sets() callback. Install | |
a6a309ed | 265 | * the generic one. |
c66d4bd1 | 266 | */ |
a6a309ed | 267 | if (!affd->calc_sets) |
c66d4bd1 ML |
268 | affd->calc_sets = default_calc_sets; |
269 | ||
a6a309ed TG |
270 | /* Recalculate the sets */ |
271 | affd->calc_sets(affd, affvecs); | |
b3e6aaa8 | 272 | |
9cfef55b ML |
273 | if (WARN_ON_ONCE(affd->nr_sets > IRQ_AFFINITY_MAX_SETS)) |
274 | return NULL; | |
275 | ||
c66d4bd1 ML |
276 | /* Nothing to assign? */ |
277 | if (!affvecs) | |
278 | return NULL; | |
279 | ||
b3e6aaa8 ML |
280 | masks = kcalloc(nvecs, sizeof(*masks), GFP_KERNEL); |
281 | if (!masks) | |
347253c4 | 282 | return NULL; |
b3e6aaa8 ML |
283 | |
284 | /* Fill out vectors at the beginning that don't need affinity */ | |
285 | for (curvec = 0; curvec < affd->pre_vectors; curvec++) | |
bec04037 | 286 | cpumask_copy(&masks[curvec].mask, irq_default_affinity); |
c66d4bd1 | 287 | |
6da4b3ab JA |
288 | /* |
289 | * Spread on present CPUs starting from affd->pre_vectors. If we | |
290 | * have multiple sets, build each sets affinity mask separately. | |
291 | */ | |
c66d4bd1 ML |
292 | for (i = 0, usedvecs = 0; i < affd->nr_sets; i++) { |
293 | unsigned int this_vecs = affd->set_size[i]; | |
6da4b3ab JA |
294 | int ret; |
295 | ||
0e518330 | 296 | ret = irq_build_affinity_masks(curvec, this_vecs, |
0145c30e | 297 | curvec, masks); |
6da4b3ab | 298 | if (ret) { |
c2899c34 | 299 | kfree(masks); |
347253c4 | 300 | return NULL; |
6da4b3ab JA |
301 | } |
302 | curvec += this_vecs; | |
303 | usedvecs += this_vecs; | |
304 | } | |
67c93c21 CH |
305 | |
306 | /* Fill out vectors at the end that don't need affinity */ | |
d3056812 ML |
307 | if (usedvecs >= affvecs) |
308 | curvec = affd->pre_vectors + affvecs; | |
309 | else | |
310 | curvec = affd->pre_vectors + usedvecs; | |
67c93c21 | 311 | for (; curvec < nvecs; curvec++) |
bec04037 | 312 | cpumask_copy(&masks[curvec].mask, irq_default_affinity); |
d3056812 | 313 | |
c410abbb DL |
314 | /* Mark the managed interrupts */ |
315 | for (i = affd->pre_vectors; i < nvecs - affd->post_vectors; i++) | |
316 | masks[i].is_managed = 1; | |
317 | ||
34c3d981 TG |
318 | return masks; |
319 | } | |
320 | ||
321 | /** | |
212bd846 | 322 | * irq_calc_affinity_vectors - Calculate the optimal number of vectors |
6f9a22bc | 323 | * @minvec: The minimum number of vectors available |
212bd846 CH |
324 | * @maxvec: The maximum number of vectors available |
325 | * @affd: Description of the affinity requirements | |
34c3d981 | 326 | */ |
0145c30e TG |
327 | unsigned int irq_calc_affinity_vectors(unsigned int minvec, unsigned int maxvec, |
328 | const struct irq_affinity *affd) | |
34c3d981 | 329 | { |
0145c30e TG |
330 | unsigned int resv = affd->pre_vectors + affd->post_vectors; |
331 | unsigned int set_vecs; | |
34c3d981 | 332 | |
6f9a22bc MH |
333 | if (resv > minvec) |
334 | return 0; | |
335 | ||
c66d4bd1 ML |
336 | if (affd->calc_sets) { |
337 | set_vecs = maxvec - resv; | |
6da4b3ab JA |
338 | } else { |
339 | get_online_cpus(); | |
340 | set_vecs = cpumask_weight(cpu_possible_mask); | |
341 | put_online_cpus(); | |
342 | } | |
343 | ||
0145c30e | 344 | return resv + min(set_vecs, maxvec - resv); |
34c3d981 | 345 | } |