Commit | Line | Data |
---|---|---|
957558c9 | 1 | /* |
05d6ac1d | 2 | * Copyright(c) 2015, 2016 Intel Corporation. |
957558c9 MH |
3 | * |
4 | * This file is provided under a dual BSD/GPLv2 license. When using or | |
5 | * redistributing this file, you may do so under either license. | |
6 | * | |
7 | * GPL LICENSE SUMMARY | |
8 | * | |
957558c9 MH |
9 | * This program is free software; you can redistribute it and/or modify |
10 | * it under the terms of version 2 of the GNU General Public License as | |
11 | * published by the Free Software Foundation. | |
12 | * | |
13 | * This program is distributed in the hope that it will be useful, but | |
14 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16 | * General Public License for more details. | |
17 | * | |
18 | * BSD LICENSE | |
19 | * | |
957558c9 MH |
20 | * Redistribution and use in source and binary forms, with or without |
21 | * modification, are permitted provided that the following conditions | |
22 | * are met: | |
23 | * | |
24 | * - Redistributions of source code must retain the above copyright | |
25 | * notice, this list of conditions and the following disclaimer. | |
26 | * - Redistributions in binary form must reproduce the above copyright | |
27 | * notice, this list of conditions and the following disclaimer in | |
28 | * the documentation and/or other materials provided with the | |
29 | * distribution. | |
30 | * - Neither the name of Intel Corporation nor the names of its | |
31 | * contributors may be used to endorse or promote products derived | |
32 | * from this software without specific prior written permission. | |
33 | * | |
34 | * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
35 | * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
36 | * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
37 | * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
38 | * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
39 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
40 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
41 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
42 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
43 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
44 | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
45 | * | |
46 | */ | |
47 | #include <linux/topology.h> | |
48 | #include <linux/cpumask.h> | |
49 | #include <linux/module.h> | |
50 | ||
51 | #include "hfi.h" | |
52 | #include "affinity.h" | |
53 | #include "sdma.h" | |
54 | #include "trace.h" | |
55 | ||
957558c9 MH |
56 | /* Name of IRQ types, indexed by enum irq_type */ |
57 | static const char * const irq_type_names[] = { | |
58 | "SDMA", | |
59 | "RCVCTXT", | |
60 | "GENERAL", | |
61 | "OTHER", | |
62 | }; | |
63 | ||
64 | static inline void init_cpu_mask_set(struct cpu_mask_set *set) | |
65 | { | |
66 | cpumask_clear(&set->mask); | |
67 | cpumask_clear(&set->used); | |
68 | set->gen = 0; | |
69 | } | |
70 | ||
0852d241 JJ |
71 | /* Initialize non-HT cpu cores mask */ |
72 | int init_real_cpu_mask(struct hfi1_devdata *dd) | |
73 | { | |
74 | struct hfi1_affinity *info; | |
75 | int possible, curr_cpu, i, ht; | |
76 | ||
77 | info = kzalloc(sizeof(*info), GFP_KERNEL); | |
78 | if (!info) | |
79 | return -ENOMEM; | |
80 | ||
81 | cpumask_clear(&info->real_cpu_mask); | |
82 | ||
83 | /* Start with cpu online mask as the real cpu mask */ | |
84 | cpumask_copy(&info->real_cpu_mask, cpu_online_mask); | |
85 | ||
86 | /* | |
87 | * Remove HT cores from the real cpu mask. Do this in two steps below. | |
88 | */ | |
89 | possible = cpumask_weight(&info->real_cpu_mask); | |
90 | ht = cpumask_weight(topology_sibling_cpumask( | |
91 | cpumask_first(&info->real_cpu_mask))); | |
92 | /* | |
93 | * Step 1. Skip over the first N HT siblings and use them as the | |
94 | * "real" cores. Assumes that HT cores are not enumerated in | |
95 | * succession (except in the single core case). | |
96 | */ | |
97 | curr_cpu = cpumask_first(&info->real_cpu_mask); | |
98 | for (i = 0; i < possible / ht; i++) | |
99 | curr_cpu = cpumask_next(curr_cpu, &info->real_cpu_mask); | |
100 | /* | |
101 | * Step 2. Remove the remaining HT siblings. Use cpumask_next() to | |
102 | * skip any gaps. | |
103 | */ | |
104 | for (; i < possible; i++) { | |
105 | cpumask_clear_cpu(curr_cpu, &info->real_cpu_mask); | |
106 | curr_cpu = cpumask_next(curr_cpu, &info->real_cpu_mask); | |
107 | } | |
108 | ||
109 | dd->affinity = info; | |
110 | return 0; | |
111 | } | |
112 | ||
957558c9 MH |
113 | /* |
114 | * Interrupt affinity. | |
115 | * | |
116 | * non-rcv avail gets a default mask that | |
117 | * starts as possible cpus with threads reset | |
118 | * and each rcv avail reset. | |
119 | * | |
120 | * rcv avail gets node relative 1 wrapping back | |
121 | * to the node relative 1 as necessary. | |
122 | * | |
123 | */ | |
0852d241 | 124 | void hfi1_dev_affinity_init(struct hfi1_devdata *dd) |
957558c9 MH |
125 | { |
126 | int node = pcibus_to_node(dd->pcidev->bus); | |
0852d241 | 127 | struct hfi1_affinity *info = dd->affinity; |
957558c9 | 128 | const struct cpumask *local_mask; |
0852d241 | 129 | int curr_cpu, possible, i; |
957558c9 MH |
130 | |
131 | if (node < 0) | |
132 | node = numa_node_id(); | |
133 | dd->node = node; | |
134 | ||
957558c9 MH |
135 | spin_lock_init(&info->lock); |
136 | ||
137 | init_cpu_mask_set(&info->def_intr); | |
138 | init_cpu_mask_set(&info->rcv_intr); | |
139 | init_cpu_mask_set(&info->proc); | |
140 | ||
141 | local_mask = cpumask_of_node(dd->node); | |
142 | if (cpumask_first(local_mask) >= nr_cpu_ids) | |
143 | local_mask = topology_core_cpumask(0); | |
0852d241 JJ |
144 | /* Use the "real" cpu mask of this node as the default */ |
145 | cpumask_and(&info->def_intr.mask, &info->real_cpu_mask, local_mask); | |
957558c9 MH |
146 | |
147 | /* fill in the receive list */ | |
148 | possible = cpumask_weight(&info->def_intr.mask); | |
149 | curr_cpu = cpumask_first(&info->def_intr.mask); | |
150 | if (possible == 1) { | |
151 | /* only one CPU, everyone will use it */ | |
152 | cpumask_set_cpu(curr_cpu, &info->rcv_intr.mask); | |
153 | } else { | |
154 | /* | |
155 | * Retain the first CPU in the default list for the control | |
156 | * context. | |
157 | */ | |
158 | curr_cpu = cpumask_next(curr_cpu, &info->def_intr.mask); | |
159 | /* | |
160 | * Remove the remaining kernel receive queues from | |
161 | * the default list and add them to the receive list. | |
162 | */ | |
163 | for (i = 0; i < dd->n_krcv_queues - 1; i++) { | |
164 | cpumask_clear_cpu(curr_cpu, &info->def_intr.mask); | |
165 | cpumask_set_cpu(curr_cpu, &info->rcv_intr.mask); | |
166 | curr_cpu = cpumask_next(curr_cpu, &info->def_intr.mask); | |
167 | if (curr_cpu >= nr_cpu_ids) | |
168 | break; | |
169 | } | |
170 | } | |
171 | ||
172 | cpumask_copy(&info->proc.mask, cpu_online_mask); | |
957558c9 MH |
173 | } |
174 | ||
175 | void hfi1_dev_affinity_free(struct hfi1_devdata *dd) | |
176 | { | |
177 | kfree(dd->affinity); | |
178 | } | |
179 | ||
180 | int hfi1_get_irq_affinity(struct hfi1_devdata *dd, struct hfi1_msix_entry *msix) | |
181 | { | |
182 | int ret; | |
183 | cpumask_var_t diff; | |
184 | struct cpu_mask_set *set; | |
185 | struct sdma_engine *sde = NULL; | |
186 | struct hfi1_ctxtdata *rcd = NULL; | |
187 | char extra[64]; | |
188 | int cpu = -1; | |
189 | ||
190 | extra[0] = '\0'; | |
191 | cpumask_clear(&msix->mask); | |
192 | ||
193 | ret = zalloc_cpumask_var(&diff, GFP_KERNEL); | |
194 | if (!ret) | |
195 | return -ENOMEM; | |
196 | ||
197 | switch (msix->type) { | |
198 | case IRQ_SDMA: | |
199 | sde = (struct sdma_engine *)msix->arg; | |
200 | scnprintf(extra, 64, "engine %u", sde->this_idx); | |
201 | /* fall through */ | |
202 | case IRQ_GENERAL: | |
203 | set = &dd->affinity->def_intr; | |
204 | break; | |
205 | case IRQ_RCVCTXT: | |
206 | rcd = (struct hfi1_ctxtdata *)msix->arg; | |
207 | if (rcd->ctxt == HFI1_CTRL_CTXT) { | |
208 | set = &dd->affinity->def_intr; | |
209 | cpu = cpumask_first(&set->mask); | |
210 | } else { | |
211 | set = &dd->affinity->rcv_intr; | |
212 | } | |
213 | scnprintf(extra, 64, "ctxt %u", rcd->ctxt); | |
214 | break; | |
215 | default: | |
216 | dd_dev_err(dd, "Invalid IRQ type %d\n", msix->type); | |
217 | return -EINVAL; | |
218 | } | |
219 | ||
220 | /* | |
221 | * The control receive context is placed on a particular CPU, which | |
222 | * is set above. Skip accounting for it. Everything else finds its | |
223 | * CPU here. | |
224 | */ | |
225 | if (cpu == -1) { | |
226 | spin_lock(&dd->affinity->lock); | |
227 | if (cpumask_equal(&set->mask, &set->used)) { | |
228 | /* | |
229 | * We've used up all the CPUs, bump up the generation | |
230 | * and reset the 'used' map | |
231 | */ | |
232 | set->gen++; | |
233 | cpumask_clear(&set->used); | |
234 | } | |
235 | cpumask_andnot(diff, &set->mask, &set->used); | |
236 | cpu = cpumask_first(diff); | |
237 | cpumask_set_cpu(cpu, &set->used); | |
238 | spin_unlock(&dd->affinity->lock); | |
239 | } | |
240 | ||
241 | switch (msix->type) { | |
242 | case IRQ_SDMA: | |
243 | sde->cpu = cpu; | |
244 | break; | |
245 | case IRQ_GENERAL: | |
246 | case IRQ_RCVCTXT: | |
247 | case IRQ_OTHER: | |
248 | break; | |
249 | } | |
250 | ||
251 | cpumask_set_cpu(cpu, &msix->mask); | |
252 | dd_dev_info(dd, "IRQ vector: %u, type %s %s -> cpu: %d\n", | |
253 | msix->msix.vector, irq_type_names[msix->type], | |
254 | extra, cpu); | |
255 | irq_set_affinity_hint(msix->msix.vector, &msix->mask); | |
256 | ||
257 | free_cpumask_var(diff); | |
258 | return 0; | |
259 | } | |
260 | ||
261 | void hfi1_put_irq_affinity(struct hfi1_devdata *dd, | |
262 | struct hfi1_msix_entry *msix) | |
263 | { | |
264 | struct cpu_mask_set *set = NULL; | |
265 | struct hfi1_ctxtdata *rcd; | |
266 | ||
267 | switch (msix->type) { | |
268 | case IRQ_SDMA: | |
269 | case IRQ_GENERAL: | |
270 | set = &dd->affinity->def_intr; | |
271 | break; | |
272 | case IRQ_RCVCTXT: | |
273 | rcd = (struct hfi1_ctxtdata *)msix->arg; | |
274 | /* only do accounting for non control contexts */ | |
275 | if (rcd->ctxt != HFI1_CTRL_CTXT) | |
276 | set = &dd->affinity->rcv_intr; | |
277 | break; | |
278 | default: | |
279 | return; | |
280 | } | |
281 | ||
282 | if (set) { | |
283 | spin_lock(&dd->affinity->lock); | |
284 | cpumask_andnot(&set->used, &set->used, &msix->mask); | |
285 | if (cpumask_empty(&set->used) && set->gen) { | |
286 | set->gen--; | |
287 | cpumask_copy(&set->used, &set->mask); | |
288 | } | |
289 | spin_unlock(&dd->affinity->lock); | |
290 | } | |
291 | ||
292 | irq_set_affinity_hint(msix->msix.vector, NULL); | |
293 | cpumask_clear(&msix->mask); | |
294 | } | |
295 | ||
296 | int hfi1_get_proc_affinity(struct hfi1_devdata *dd, int node) | |
297 | { | |
298 | int cpu = -1, ret; | |
299 | cpumask_var_t diff, mask, intrs; | |
300 | const struct cpumask *node_mask, | |
301 | *proc_mask = tsk_cpus_allowed(current); | |
302 | struct cpu_mask_set *set = &dd->affinity->proc; | |
957558c9 MH |
303 | |
304 | /* | |
305 | * check whether process/context affinity has already | |
306 | * been set | |
307 | */ | |
308 | if (cpumask_weight(proc_mask) == 1) { | |
f242d93a LR |
309 | hfi1_cdbg(PROC, "PID %u %s affinity set to CPU %*pbl", |
310 | current->pid, current->comm, | |
311 | cpumask_pr_args(proc_mask)); | |
957558c9 MH |
312 | /* |
313 | * Mark the pre-set CPU as used. This is atomic so we don't | |
314 | * need the lock | |
315 | */ | |
316 | cpu = cpumask_first(proc_mask); | |
317 | cpumask_set_cpu(cpu, &set->used); | |
318 | goto done; | |
319 | } else if (cpumask_weight(proc_mask) < cpumask_weight(&set->mask)) { | |
f242d93a LR |
320 | hfi1_cdbg(PROC, "PID %u %s affinity set to CPU set(s) %*pbl", |
321 | current->pid, current->comm, | |
322 | cpumask_pr_args(proc_mask)); | |
957558c9 MH |
323 | goto done; |
324 | } | |
325 | ||
326 | /* | |
327 | * The process does not have a preset CPU affinity so find one to | |
328 | * recommend. We prefer CPUs on the same NUMA as the device. | |
329 | */ | |
330 | ||
331 | ret = zalloc_cpumask_var(&diff, GFP_KERNEL); | |
332 | if (!ret) | |
333 | goto done; | |
334 | ret = zalloc_cpumask_var(&mask, GFP_KERNEL); | |
335 | if (!ret) | |
336 | goto free_diff; | |
337 | ret = zalloc_cpumask_var(&intrs, GFP_KERNEL); | |
338 | if (!ret) | |
339 | goto free_mask; | |
340 | ||
341 | spin_lock(&dd->affinity->lock); | |
342 | /* | |
343 | * If we've used all available CPUs, clear the mask and start | |
344 | * overloading. | |
345 | */ | |
346 | if (cpumask_equal(&set->mask, &set->used)) { | |
347 | set->gen++; | |
348 | cpumask_clear(&set->used); | |
349 | } | |
350 | ||
351 | /* CPUs used by interrupt handlers */ | |
352 | cpumask_copy(intrs, (dd->affinity->def_intr.gen ? | |
353 | &dd->affinity->def_intr.mask : | |
354 | &dd->affinity->def_intr.used)); | |
355 | cpumask_or(intrs, intrs, (dd->affinity->rcv_intr.gen ? | |
356 | &dd->affinity->rcv_intr.mask : | |
357 | &dd->affinity->rcv_intr.used)); | |
f242d93a LR |
358 | hfi1_cdbg(PROC, "CPUs used by interrupts: %*pbl", |
359 | cpumask_pr_args(intrs)); | |
957558c9 MH |
360 | |
361 | /* | |
362 | * If we don't have a NUMA node requested, preference is towards | |
363 | * device NUMA node | |
364 | */ | |
365 | if (node == -1) | |
366 | node = dd->node; | |
367 | node_mask = cpumask_of_node(node); | |
f242d93a LR |
368 | hfi1_cdbg(PROC, "device on NUMA %u, CPUs %*pbl", node, |
369 | cpumask_pr_args(node_mask)); | |
957558c9 MH |
370 | |
371 | /* diff will hold all unused cpus */ | |
372 | cpumask_andnot(diff, &set->mask, &set->used); | |
f242d93a | 373 | hfi1_cdbg(PROC, "unused CPUs (all) %*pbl", cpumask_pr_args(diff)); |
957558c9 MH |
374 | |
375 | /* get cpumask of available CPUs on preferred NUMA */ | |
376 | cpumask_and(mask, diff, node_mask); | |
f242d93a | 377 | hfi1_cdbg(PROC, "available cpus on NUMA %*pbl", cpumask_pr_args(mask)); |
957558c9 MH |
378 | |
379 | /* | |
380 | * At first, we don't want to place processes on the same | |
381 | * CPUs as interrupt handlers. | |
382 | */ | |
383 | cpumask_andnot(diff, mask, intrs); | |
384 | if (!cpumask_empty(diff)) | |
385 | cpumask_copy(mask, diff); | |
386 | ||
387 | /* | |
388 | * if we don't have a cpu on the preferred NUMA, get | |
389 | * the list of the remaining available CPUs | |
390 | */ | |
391 | if (cpumask_empty(mask)) { | |
392 | cpumask_andnot(diff, &set->mask, &set->used); | |
393 | cpumask_andnot(mask, diff, node_mask); | |
394 | } | |
f242d93a LR |
395 | hfi1_cdbg(PROC, "possible CPUs for process %*pbl", |
396 | cpumask_pr_args(mask)); | |
957558c9 MH |
397 | |
398 | cpu = cpumask_first(mask); | |
399 | if (cpu >= nr_cpu_ids) /* empty */ | |
400 | cpu = -1; | |
401 | else | |
402 | cpumask_set_cpu(cpu, &set->used); | |
403 | spin_unlock(&dd->affinity->lock); | |
404 | ||
405 | free_cpumask_var(intrs); | |
406 | free_mask: | |
407 | free_cpumask_var(mask); | |
408 | free_diff: | |
409 | free_cpumask_var(diff); | |
410 | done: | |
411 | return cpu; | |
412 | } | |
413 | ||
414 | void hfi1_put_proc_affinity(struct hfi1_devdata *dd, int cpu) | |
415 | { | |
416 | struct cpu_mask_set *set = &dd->affinity->proc; | |
417 | ||
418 | if (cpu < 0) | |
419 | return; | |
420 | spin_lock(&dd->affinity->lock); | |
421 | cpumask_clear_cpu(cpu, &set->used); | |
422 | if (cpumask_empty(&set->used) && set->gen) { | |
423 | set->gen--; | |
424 | cpumask_copy(&set->used, &set->mask); | |
425 | } | |
426 | spin_unlock(&dd->affinity->lock); | |
427 | } | |
428 |