Commit | Line | Data |
---|---|---|
5ef3166e FB |
1 | // SPDX-License-Identifier: GPL-2.0+ |
2 | // Copyright 2017 IBM Corp. | |
3 | #include <linux/sched/mm.h> | |
4 | #include <linux/mutex.h> | |
50a7ca3c | 5 | #include <linux/mm_types.h> |
5ef3166e FB |
6 | #include <linux/mmu_context.h> |
7 | #include <asm/copro.h> | |
8 | #include <asm/pnv-ocxl.h> | |
280b983c | 9 | #include <misc/ocxl.h> |
5ef3166e | 10 | #include "ocxl_internal.h" |
92add22e | 11 | #include "trace.h" |
5ef3166e FB |
12 | |
13 | ||
14 | #define SPA_PASID_BITS 15 | |
15 | #define SPA_PASID_MAX ((1 << SPA_PASID_BITS) - 1) | |
16 | #define SPA_PE_MASK SPA_PASID_MAX | |
17 | #define SPA_SPA_SIZE_LOG 22 /* Each SPA is 4 Mb */ | |
18 | ||
19 | #define SPA_CFG_SF (1ull << (63-0)) | |
20 | #define SPA_CFG_TA (1ull << (63-1)) | |
21 | #define SPA_CFG_HV (1ull << (63-3)) | |
22 | #define SPA_CFG_UV (1ull << (63-4)) | |
23 | #define SPA_CFG_XLAT_hpt (0ull << (63-6)) /* Hashed page table (HPT) mode */ | |
24 | #define SPA_CFG_XLAT_roh (2ull << (63-6)) /* Radix on HPT mode */ | |
25 | #define SPA_CFG_XLAT_ror (3ull << (63-6)) /* Radix on Radix mode */ | |
26 | #define SPA_CFG_PR (1ull << (63-49)) | |
27 | #define SPA_CFG_TC (1ull << (63-54)) | |
28 | #define SPA_CFG_DR (1ull << (63-59)) | |
29 | ||
30 | #define SPA_XSL_TF (1ull << (63-3)) /* Translation fault */ | |
31 | #define SPA_XSL_S (1ull << (63-38)) /* Store operation */ | |
32 | ||
33 | #define SPA_PE_VALID 0x80000000 | |
34 | ||
35 | ||
36 | struct pe_data { | |
37 | struct mm_struct *mm; | |
38 | /* callback to trigger when a translation fault occurs */ | |
39 | void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr); | |
40 | /* opaque pointer to be passed to the above callback */ | |
41 | void *xsl_err_data; | |
42 | struct rcu_head rcu; | |
43 | }; | |
44 | ||
45 | struct spa { | |
46 | struct ocxl_process_element *spa_mem; | |
47 | int spa_order; | |
48 | struct mutex spa_lock; | |
49 | struct radix_tree_root pe_tree; /* Maps PE handles to pe_data */ | |
50 | char *irq_name; | |
51 | int virq; | |
52 | void __iomem *reg_dsisr; | |
53 | void __iomem *reg_dar; | |
54 | void __iomem *reg_tfc; | |
55 | void __iomem *reg_pe_handle; | |
56 | /* | |
57 | * The following field are used by the memory fault | |
58 | * interrupt handler. We can only have one interrupt at a | |
59 | * time. The NPU won't raise another interrupt until the | |
60 | * previous one has been ack'd by writing to the TFC register | |
61 | */ | |
62 | struct xsl_fault { | |
63 | struct work_struct fault_work; | |
64 | u64 pe; | |
65 | u64 dsisr; | |
66 | u64 dar; | |
67 | struct pe_data pe_data; | |
68 | } xsl_fault; | |
69 | }; | |
70 | ||
71 | /* | |
72 | * A opencapi link can be used be by several PCI functions. We have | |
73 | * one link per device slot. | |
74 | * | |
75 | * A linked list of opencapi links should suffice, as there's a | |
76 | * limited number of opencapi slots on a system and lookup is only | |
77 | * done when the device is probed | |
78 | */ | |
79 | struct link { | |
80 | struct list_head list; | |
81 | struct kref ref; | |
82 | int domain; | |
83 | int bus; | |
84 | int dev; | |
85 | atomic_t irq_available; | |
86 | struct spa *spa; | |
87 | void *platform_data; | |
88 | }; | |
89 | static struct list_head links_list = LIST_HEAD_INIT(links_list); | |
90 | static DEFINE_MUTEX(links_list_lock); | |
91 | ||
92 | enum xsl_response { | |
93 | CONTINUE, | |
94 | ADDRESS_ERROR, | |
95 | RESTART, | |
96 | }; | |
97 | ||
98 | ||
99 | static void read_irq(struct spa *spa, u64 *dsisr, u64 *dar, u64 *pe) | |
100 | { | |
101 | u64 reg; | |
102 | ||
103 | *dsisr = in_be64(spa->reg_dsisr); | |
104 | *dar = in_be64(spa->reg_dar); | |
105 | reg = in_be64(spa->reg_pe_handle); | |
106 | *pe = reg & SPA_PE_MASK; | |
107 | } | |
108 | ||
109 | static void ack_irq(struct spa *spa, enum xsl_response r) | |
110 | { | |
111 | u64 reg = 0; | |
112 | ||
113 | /* continue is not supported */ | |
114 | if (r == RESTART) | |
115 | reg = PPC_BIT(31); | |
116 | else if (r == ADDRESS_ERROR) | |
117 | reg = PPC_BIT(30); | |
118 | else | |
119 | WARN(1, "Invalid irq response %d\n", r); | |
120 | ||
92add22e FB |
121 | if (reg) { |
122 | trace_ocxl_fault_ack(spa->spa_mem, spa->xsl_fault.pe, | |
123 | spa->xsl_fault.dsisr, spa->xsl_fault.dar, reg); | |
5ef3166e | 124 | out_be64(spa->reg_tfc, reg); |
92add22e | 125 | } |
5ef3166e FB |
126 | } |
127 | ||
128 | static void xsl_fault_handler_bh(struct work_struct *fault_work) | |
129 | { | |
50a7ca3c | 130 | vm_fault_t flt = 0; |
5ef3166e FB |
131 | unsigned long access, flags, inv_flags = 0; |
132 | enum xsl_response r; | |
133 | struct xsl_fault *fault = container_of(fault_work, struct xsl_fault, | |
134 | fault_work); | |
135 | struct spa *spa = container_of(fault, struct spa, xsl_fault); | |
136 | ||
137 | int rc; | |
138 | ||
139 | /* | |
d497ebf5 | 140 | * We must release a reference on mm_users whenever exiting this |
5ef3166e FB |
141 | * function (taken in the memory fault interrupt handler) |
142 | */ | |
143 | rc = copro_handle_mm_fault(fault->pe_data.mm, fault->dar, fault->dsisr, | |
144 | &flt); | |
145 | if (rc) { | |
146 | pr_debug("copro_handle_mm_fault failed: %d\n", rc); | |
147 | if (fault->pe_data.xsl_err_cb) { | |
148 | fault->pe_data.xsl_err_cb( | |
149 | fault->pe_data.xsl_err_data, | |
150 | fault->dar, fault->dsisr); | |
151 | } | |
152 | r = ADDRESS_ERROR; | |
153 | goto ack; | |
154 | } | |
155 | ||
156 | if (!radix_enabled()) { | |
157 | /* | |
158 | * update_mmu_cache() will not have loaded the hash | |
159 | * since current->trap is not a 0x400 or 0x300, so | |
160 | * just call hash_page_mm() here. | |
161 | */ | |
162 | access = _PAGE_PRESENT | _PAGE_READ; | |
163 | if (fault->dsisr & SPA_XSL_S) | |
164 | access |= _PAGE_WRITE; | |
165 | ||
0034d395 | 166 | if (get_region_id(fault->dar) != USER_REGION_ID) |
5ef3166e FB |
167 | access |= _PAGE_PRIVILEGED; |
168 | ||
169 | local_irq_save(flags); | |
170 | hash_page_mm(fault->pe_data.mm, fault->dar, access, 0x300, | |
171 | inv_flags); | |
172 | local_irq_restore(flags); | |
173 | } | |
174 | r = RESTART; | |
175 | ack: | |
d497ebf5 | 176 | mmput(fault->pe_data.mm); |
5ef3166e FB |
177 | ack_irq(spa, r); |
178 | } | |
179 | ||
180 | static irqreturn_t xsl_fault_handler(int irq, void *data) | |
181 | { | |
182 | struct link *link = (struct link *) data; | |
183 | struct spa *spa = link->spa; | |
184 | u64 dsisr, dar, pe_handle; | |
185 | struct pe_data *pe_data; | |
186 | struct ocxl_process_element *pe; | |
187 | int lpid, pid, tid; | |
d497ebf5 | 188 | bool schedule = false; |
5ef3166e FB |
189 | |
190 | read_irq(spa, &dsisr, &dar, &pe_handle); | |
92add22e | 191 | trace_ocxl_fault(spa->spa_mem, pe_handle, dsisr, dar, -1); |
5ef3166e FB |
192 | |
193 | WARN_ON(pe_handle > SPA_PE_MASK); | |
194 | pe = spa->spa_mem + pe_handle; | |
195 | lpid = be32_to_cpu(pe->lpid); | |
196 | pid = be32_to_cpu(pe->pid); | |
197 | tid = be32_to_cpu(pe->tid); | |
198 | /* We could be reading all null values here if the PE is being | |
199 | * removed while an interrupt kicks in. It's not supposed to | |
200 | * happen if the driver notified the AFU to terminate the | |
201 | * PASID, and the AFU waited for pending operations before | |
202 | * acknowledging. But even if it happens, we won't find a | |
203 | * memory context below and fail silently, so it should be ok. | |
204 | */ | |
205 | if (!(dsisr & SPA_XSL_TF)) { | |
206 | WARN(1, "Invalid xsl interrupt fault register %#llx\n", dsisr); | |
207 | ack_irq(spa, ADDRESS_ERROR); | |
208 | return IRQ_HANDLED; | |
209 | } | |
210 | ||
211 | rcu_read_lock(); | |
212 | pe_data = radix_tree_lookup(&spa->pe_tree, pe_handle); | |
213 | if (!pe_data) { | |
214 | /* | |
215 | * Could only happen if the driver didn't notify the | |
216 | * AFU about PASID termination before removing the PE, | |
217 | * or the AFU didn't wait for all memory access to | |
218 | * have completed. | |
219 | * | |
220 | * Either way, we fail early, but we shouldn't log an | |
221 | * error message, as it is a valid (if unexpected) | |
222 | * scenario | |
223 | */ | |
224 | rcu_read_unlock(); | |
225 | pr_debug("Unknown mm context for xsl interrupt\n"); | |
226 | ack_irq(spa, ADDRESS_ERROR); | |
227 | return IRQ_HANDLED; | |
228 | } | |
229 | WARN_ON(pe_data->mm->context.id != pid); | |
230 | ||
d497ebf5 FB |
231 | if (mmget_not_zero(pe_data->mm)) { |
232 | spa->xsl_fault.pe = pe_handle; | |
233 | spa->xsl_fault.dar = dar; | |
234 | spa->xsl_fault.dsisr = dsisr; | |
235 | spa->xsl_fault.pe_data = *pe_data; | |
236 | schedule = true; | |
237 | /* mm_users count released by bottom half */ | |
238 | } | |
5ef3166e | 239 | rcu_read_unlock(); |
d497ebf5 FB |
240 | if (schedule) |
241 | schedule_work(&spa->xsl_fault.fault_work); | |
242 | else | |
243 | ack_irq(spa, ADDRESS_ERROR); | |
5ef3166e FB |
244 | return IRQ_HANDLED; |
245 | } | |
246 | ||
247 | static void unmap_irq_registers(struct spa *spa) | |
248 | { | |
249 | pnv_ocxl_unmap_xsl_regs(spa->reg_dsisr, spa->reg_dar, spa->reg_tfc, | |
250 | spa->reg_pe_handle); | |
251 | } | |
252 | ||
253 | static int map_irq_registers(struct pci_dev *dev, struct spa *spa) | |
254 | { | |
255 | return pnv_ocxl_map_xsl_regs(dev, &spa->reg_dsisr, &spa->reg_dar, | |
256 | &spa->reg_tfc, &spa->reg_pe_handle); | |
257 | } | |
258 | ||
259 | static int setup_xsl_irq(struct pci_dev *dev, struct link *link) | |
260 | { | |
261 | struct spa *spa = link->spa; | |
262 | int rc; | |
263 | int hwirq; | |
264 | ||
265 | rc = pnv_ocxl_get_xsl_irq(dev, &hwirq); | |
266 | if (rc) | |
267 | return rc; | |
268 | ||
269 | rc = map_irq_registers(dev, spa); | |
270 | if (rc) | |
271 | return rc; | |
272 | ||
273 | spa->irq_name = kasprintf(GFP_KERNEL, "ocxl-xsl-%x-%x-%x", | |
274 | link->domain, link->bus, link->dev); | |
275 | if (!spa->irq_name) { | |
5ef3166e | 276 | dev_err(&dev->dev, "Can't allocate name for xsl interrupt\n"); |
759bc015 GK |
277 | rc = -ENOMEM; |
278 | goto err_xsl; | |
5ef3166e FB |
279 | } |
280 | /* | |
281 | * At some point, we'll need to look into allowing a higher | |
282 | * number of interrupts. Could we have an IRQ domain per link? | |
283 | */ | |
284 | spa->virq = irq_create_mapping(NULL, hwirq); | |
285 | if (!spa->virq) { | |
5ef3166e FB |
286 | dev_err(&dev->dev, |
287 | "irq_create_mapping failed for translation interrupt\n"); | |
759bc015 GK |
288 | rc = -EINVAL; |
289 | goto err_name; | |
5ef3166e FB |
290 | } |
291 | ||
292 | dev_dbg(&dev->dev, "hwirq %d mapped to virq %d\n", hwirq, spa->virq); | |
293 | ||
294 | rc = request_irq(spa->virq, xsl_fault_handler, 0, spa->irq_name, | |
295 | link); | |
296 | if (rc) { | |
5ef3166e FB |
297 | dev_err(&dev->dev, |
298 | "request_irq failed for translation interrupt: %d\n", | |
299 | rc); | |
759bc015 GK |
300 | rc = -EINVAL; |
301 | goto err_mapping; | |
5ef3166e FB |
302 | } |
303 | return 0; | |
759bc015 GK |
304 | |
305 | err_mapping: | |
306 | irq_dispose_mapping(spa->virq); | |
307 | err_name: | |
308 | kfree(spa->irq_name); | |
309 | err_xsl: | |
310 | unmap_irq_registers(spa); | |
311 | return rc; | |
5ef3166e FB |
312 | } |
313 | ||
314 | static void release_xsl_irq(struct link *link) | |
315 | { | |
316 | struct spa *spa = link->spa; | |
317 | ||
318 | if (spa->virq) { | |
319 | free_irq(spa->virq, link); | |
320 | irq_dispose_mapping(spa->virq); | |
321 | } | |
322 | kfree(spa->irq_name); | |
323 | unmap_irq_registers(spa); | |
324 | } | |
325 | ||
326 | static int alloc_spa(struct pci_dev *dev, struct link *link) | |
327 | { | |
328 | struct spa *spa; | |
329 | ||
330 | spa = kzalloc(sizeof(struct spa), GFP_KERNEL); | |
331 | if (!spa) | |
332 | return -ENOMEM; | |
333 | ||
334 | mutex_init(&spa->spa_lock); | |
335 | INIT_RADIX_TREE(&spa->pe_tree, GFP_KERNEL); | |
336 | INIT_WORK(&spa->xsl_fault.fault_work, xsl_fault_handler_bh); | |
337 | ||
338 | spa->spa_order = SPA_SPA_SIZE_LOG - PAGE_SHIFT; | |
339 | spa->spa_mem = (struct ocxl_process_element *) | |
340 | __get_free_pages(GFP_KERNEL | __GFP_ZERO, spa->spa_order); | |
341 | if (!spa->spa_mem) { | |
342 | dev_err(&dev->dev, "Can't allocate Shared Process Area\n"); | |
343 | kfree(spa); | |
344 | return -ENOMEM; | |
345 | } | |
346 | pr_debug("Allocated SPA for %x:%x:%x at %p\n", link->domain, link->bus, | |
347 | link->dev, spa->spa_mem); | |
348 | ||
349 | link->spa = spa; | |
350 | return 0; | |
351 | } | |
352 | ||
353 | static void free_spa(struct link *link) | |
354 | { | |
355 | struct spa *spa = link->spa; | |
356 | ||
357 | pr_debug("Freeing SPA for %x:%x:%x\n", link->domain, link->bus, | |
358 | link->dev); | |
359 | ||
360 | if (spa && spa->spa_mem) { | |
361 | free_pages((unsigned long) spa->spa_mem, spa->spa_order); | |
362 | kfree(spa); | |
363 | link->spa = NULL; | |
364 | } | |
365 | } | |
366 | ||
367 | static int alloc_link(struct pci_dev *dev, int PE_mask, struct link **out_link) | |
368 | { | |
369 | struct link *link; | |
370 | int rc; | |
371 | ||
372 | link = kzalloc(sizeof(struct link), GFP_KERNEL); | |
373 | if (!link) | |
374 | return -ENOMEM; | |
375 | ||
376 | kref_init(&link->ref); | |
377 | link->domain = pci_domain_nr(dev->bus); | |
378 | link->bus = dev->bus->number; | |
379 | link->dev = PCI_SLOT(dev->devfn); | |
380 | atomic_set(&link->irq_available, MAX_IRQ_PER_LINK); | |
381 | ||
382 | rc = alloc_spa(dev, link); | |
383 | if (rc) | |
384 | goto err_free; | |
385 | ||
386 | rc = setup_xsl_irq(dev, link); | |
387 | if (rc) | |
388 | goto err_spa; | |
389 | ||
390 | /* platform specific hook */ | |
391 | rc = pnv_ocxl_spa_setup(dev, link->spa->spa_mem, PE_mask, | |
392 | &link->platform_data); | |
393 | if (rc) | |
394 | goto err_xsl_irq; | |
395 | ||
396 | *out_link = link; | |
397 | return 0; | |
398 | ||
399 | err_xsl_irq: | |
400 | release_xsl_irq(link); | |
401 | err_spa: | |
402 | free_spa(link); | |
403 | err_free: | |
404 | kfree(link); | |
405 | return rc; | |
406 | } | |
407 | ||
408 | static void free_link(struct link *link) | |
409 | { | |
410 | release_xsl_irq(link); | |
411 | free_spa(link); | |
412 | kfree(link); | |
413 | } | |
414 | ||
415 | int ocxl_link_setup(struct pci_dev *dev, int PE_mask, void **link_handle) | |
416 | { | |
417 | int rc = 0; | |
418 | struct link *link; | |
419 | ||
420 | mutex_lock(&links_list_lock); | |
421 | list_for_each_entry(link, &links_list, list) { | |
422 | /* The functions of a device all share the same link */ | |
423 | if (link->domain == pci_domain_nr(dev->bus) && | |
424 | link->bus == dev->bus->number && | |
425 | link->dev == PCI_SLOT(dev->devfn)) { | |
426 | kref_get(&link->ref); | |
427 | *link_handle = link; | |
428 | goto unlock; | |
429 | } | |
430 | } | |
431 | rc = alloc_link(dev, PE_mask, &link); | |
432 | if (rc) | |
433 | goto unlock; | |
434 | ||
435 | list_add(&link->list, &links_list); | |
436 | *link_handle = link; | |
437 | unlock: | |
438 | mutex_unlock(&links_list_lock); | |
439 | return rc; | |
440 | } | |
280b983c | 441 | EXPORT_SYMBOL_GPL(ocxl_link_setup); |
5ef3166e FB |
442 | |
443 | static void release_xsl(struct kref *ref) | |
444 | { | |
445 | struct link *link = container_of(ref, struct link, ref); | |
446 | ||
447 | list_del(&link->list); | |
448 | /* call platform code before releasing data */ | |
449 | pnv_ocxl_spa_release(link->platform_data); | |
450 | free_link(link); | |
451 | } | |
452 | ||
453 | void ocxl_link_release(struct pci_dev *dev, void *link_handle) | |
454 | { | |
455 | struct link *link = (struct link *) link_handle; | |
456 | ||
457 | mutex_lock(&links_list_lock); | |
458 | kref_put(&link->ref, release_xsl); | |
459 | mutex_unlock(&links_list_lock); | |
460 | } | |
280b983c | 461 | EXPORT_SYMBOL_GPL(ocxl_link_release); |
5ef3166e FB |
462 | |
463 | static u64 calculate_cfg_state(bool kernel) | |
464 | { | |
465 | u64 state; | |
466 | ||
467 | state = SPA_CFG_DR; | |
468 | if (mfspr(SPRN_LPCR) & LPCR_TC) | |
469 | state |= SPA_CFG_TC; | |
470 | if (radix_enabled()) | |
471 | state |= SPA_CFG_XLAT_ror; | |
472 | else | |
473 | state |= SPA_CFG_XLAT_hpt; | |
474 | state |= SPA_CFG_HV; | |
475 | if (kernel) { | |
476 | if (mfmsr() & MSR_SF) | |
477 | state |= SPA_CFG_SF; | |
478 | } else { | |
479 | state |= SPA_CFG_PR; | |
480 | if (!test_tsk_thread_flag(current, TIF_32BIT)) | |
481 | state |= SPA_CFG_SF; | |
482 | } | |
483 | return state; | |
484 | } | |
485 | ||
486 | int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr, | |
487 | u64 amr, struct mm_struct *mm, | |
488 | void (*xsl_err_cb)(void *data, u64 addr, u64 dsisr), | |
489 | void *xsl_err_data) | |
490 | { | |
491 | struct link *link = (struct link *) link_handle; | |
492 | struct spa *spa = link->spa; | |
493 | struct ocxl_process_element *pe; | |
494 | int pe_handle, rc = 0; | |
495 | struct pe_data *pe_data; | |
496 | ||
497 | BUILD_BUG_ON(sizeof(struct ocxl_process_element) != 128); | |
498 | if (pasid > SPA_PASID_MAX) | |
499 | return -EINVAL; | |
500 | ||
501 | mutex_lock(&spa->spa_lock); | |
502 | pe_handle = pasid & SPA_PE_MASK; | |
503 | pe = spa->spa_mem + pe_handle; | |
504 | ||
505 | if (pe->software_state) { | |
506 | rc = -EBUSY; | |
507 | goto unlock; | |
508 | } | |
509 | ||
510 | pe_data = kmalloc(sizeof(*pe_data), GFP_KERNEL); | |
511 | if (!pe_data) { | |
512 | rc = -ENOMEM; | |
513 | goto unlock; | |
514 | } | |
515 | ||
516 | pe_data->mm = mm; | |
517 | pe_data->xsl_err_cb = xsl_err_cb; | |
518 | pe_data->xsl_err_data = xsl_err_data; | |
519 | ||
520 | memset(pe, 0, sizeof(struct ocxl_process_element)); | |
521 | pe->config_state = cpu_to_be64(calculate_cfg_state(pidr == 0)); | |
522 | pe->lpid = cpu_to_be32(mfspr(SPRN_LPID)); | |
523 | pe->pid = cpu_to_be32(pidr); | |
524 | pe->tid = cpu_to_be32(tidr); | |
525 | pe->amr = cpu_to_be64(amr); | |
526 | pe->software_state = cpu_to_be32(SPA_PE_VALID); | |
527 | ||
528 | mm_context_add_copro(mm); | |
529 | /* | |
530 | * Barrier is to make sure PE is visible in the SPA before it | |
531 | * is used by the device. It also helps with the global TLBI | |
532 | * invalidation | |
533 | */ | |
534 | mb(); | |
535 | radix_tree_insert(&spa->pe_tree, pe_handle, pe_data); | |
536 | ||
537 | /* | |
538 | * The mm must stay valid for as long as the device uses it. We | |
539 | * lower the count when the context is removed from the SPA. | |
540 | * | |
541 | * We grab mm_count (and not mm_users), as we don't want to | |
542 | * end up in a circular dependency if a process mmaps its | |
543 | * mmio, therefore incrementing the file ref count when | |
544 | * calling mmap(), and forgets to unmap before exiting. In | |
545 | * that scenario, when the kernel handles the death of the | |
546 | * process, the file is not cleaned because unmap was not | |
547 | * called, and the mm wouldn't be freed because we would still | |
548 | * have a reference on mm_users. Incrementing mm_count solves | |
549 | * the problem. | |
550 | */ | |
551 | mmgrab(mm); | |
92add22e | 552 | trace_ocxl_context_add(current->pid, spa->spa_mem, pasid, pidr, tidr); |
5ef3166e FB |
553 | unlock: |
554 | mutex_unlock(&spa->spa_lock); | |
555 | return rc; | |
556 | } | |
280b983c | 557 | EXPORT_SYMBOL_GPL(ocxl_link_add_pe); |
5ef3166e | 558 | |
e948e06f AS |
559 | int ocxl_link_update_pe(void *link_handle, int pasid, __u16 tid) |
560 | { | |
561 | struct link *link = (struct link *) link_handle; | |
562 | struct spa *spa = link->spa; | |
563 | struct ocxl_process_element *pe; | |
564 | int pe_handle, rc; | |
565 | ||
566 | if (pasid > SPA_PASID_MAX) | |
567 | return -EINVAL; | |
568 | ||
569 | pe_handle = pasid & SPA_PE_MASK; | |
570 | pe = spa->spa_mem + pe_handle; | |
571 | ||
572 | mutex_lock(&spa->spa_lock); | |
573 | ||
e1e71e20 | 574 | pe->tid = cpu_to_be32(tid); |
e948e06f AS |
575 | |
576 | /* | |
577 | * The barrier makes sure the PE is updated | |
578 | * before we clear the NPU context cache below, so that the | |
579 | * old PE cannot be reloaded erroneously. | |
580 | */ | |
581 | mb(); | |
582 | ||
583 | /* | |
584 | * hook to platform code | |
585 | * On powerpc, the entry needs to be cleared from the context | |
586 | * cache of the NPU. | |
587 | */ | |
588 | rc = pnv_ocxl_spa_remove_pe_from_cache(link->platform_data, pe_handle); | |
589 | WARN_ON(rc); | |
590 | ||
591 | mutex_unlock(&spa->spa_lock); | |
592 | return rc; | |
593 | } | |
594 | ||
5ef3166e FB |
595 | int ocxl_link_remove_pe(void *link_handle, int pasid) |
596 | { | |
597 | struct link *link = (struct link *) link_handle; | |
598 | struct spa *spa = link->spa; | |
599 | struct ocxl_process_element *pe; | |
600 | struct pe_data *pe_data; | |
601 | int pe_handle, rc; | |
602 | ||
603 | if (pasid > SPA_PASID_MAX) | |
604 | return -EINVAL; | |
605 | ||
606 | /* | |
607 | * About synchronization with our memory fault handler: | |
608 | * | |
609 | * Before removing the PE, the driver is supposed to have | |
610 | * notified the AFU, which should have cleaned up and make | |
611 | * sure the PASID is no longer in use, including pending | |
612 | * interrupts. However, there's no way to be sure... | |
613 | * | |
614 | * We clear the PE and remove the context from our radix | |
615 | * tree. From that point on, any new interrupt for that | |
616 | * context will fail silently, which is ok. As mentioned | |
617 | * above, that's not expected, but it could happen if the | |
618 | * driver or AFU didn't do the right thing. | |
619 | * | |
620 | * There could still be a bottom half running, but we don't | |
621 | * need to wait/flush, as it is managing a reference count on | |
622 | * the mm it reads from the radix tree. | |
623 | */ | |
624 | pe_handle = pasid & SPA_PE_MASK; | |
625 | pe = spa->spa_mem + pe_handle; | |
626 | ||
627 | mutex_lock(&spa->spa_lock); | |
628 | ||
629 | if (!(be32_to_cpu(pe->software_state) & SPA_PE_VALID)) { | |
630 | rc = -EINVAL; | |
631 | goto unlock; | |
632 | } | |
633 | ||
92add22e FB |
634 | trace_ocxl_context_remove(current->pid, spa->spa_mem, pasid, |
635 | be32_to_cpu(pe->pid), be32_to_cpu(pe->tid)); | |
636 | ||
5ef3166e FB |
637 | memset(pe, 0, sizeof(struct ocxl_process_element)); |
638 | /* | |
639 | * The barrier makes sure the PE is removed from the SPA | |
640 | * before we clear the NPU context cache below, so that the | |
641 | * old PE cannot be reloaded erroneously. | |
642 | */ | |
643 | mb(); | |
644 | ||
645 | /* | |
646 | * hook to platform code | |
647 | * On powerpc, the entry needs to be cleared from the context | |
648 | * cache of the NPU. | |
649 | */ | |
19df3958 | 650 | rc = pnv_ocxl_spa_remove_pe_from_cache(link->platform_data, pe_handle); |
5ef3166e FB |
651 | WARN_ON(rc); |
652 | ||
653 | pe_data = radix_tree_delete(&spa->pe_tree, pe_handle); | |
654 | if (!pe_data) { | |
655 | WARN(1, "Couldn't find pe data when removing PE\n"); | |
656 | } else { | |
657 | mm_context_remove_copro(pe_data->mm); | |
658 | mmdrop(pe_data->mm); | |
659 | kfree_rcu(pe_data, rcu); | |
660 | } | |
661 | unlock: | |
662 | mutex_unlock(&spa->spa_lock); | |
663 | return rc; | |
664 | } | |
280b983c | 665 | EXPORT_SYMBOL_GPL(ocxl_link_remove_pe); |
aeddad17 FB |
666 | |
667 | int ocxl_link_irq_alloc(void *link_handle, int *hw_irq, u64 *trigger_addr) | |
668 | { | |
669 | struct link *link = (struct link *) link_handle; | |
670 | int rc, irq; | |
671 | u64 addr; | |
672 | ||
673 | if (atomic_dec_if_positive(&link->irq_available) < 0) | |
674 | return -ENOSPC; | |
675 | ||
676 | rc = pnv_ocxl_alloc_xive_irq(&irq, &addr); | |
677 | if (rc) { | |
678 | atomic_inc(&link->irq_available); | |
679 | return rc; | |
680 | } | |
681 | ||
682 | *hw_irq = irq; | |
683 | *trigger_addr = addr; | |
684 | return 0; | |
685 | } | |
280b983c | 686 | EXPORT_SYMBOL_GPL(ocxl_link_irq_alloc); |
aeddad17 FB |
687 | |
688 | void ocxl_link_free_irq(void *link_handle, int hw_irq) | |
689 | { | |
690 | struct link *link = (struct link *) link_handle; | |
691 | ||
692 | pnv_ocxl_free_xive_irq(hw_irq); | |
693 | atomic_inc(&link->irq_available); | |
694 | } | |
280b983c | 695 | EXPORT_SYMBOL_GPL(ocxl_link_free_irq); |