Merge tag 'csky-for-linus-6.0-rc1' of https://github.com/c-sky/csky-linux
[linux-block.git] / arch / s390 / kvm / pci.c
CommitLineData
6438e307
MR
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * s390 kvm PCI passthrough support
4 *
5 * Copyright IBM Corp. 2022
6 *
7 * Author(s): Matthew Rosato <mjrosato@linux.ibm.com>
8 */
9
10#include <linux/kvm_host.h>
11#include <linux/pci.h>
98b1d33d
MR
12#include <asm/pci.h>
13#include <asm/pci_insn.h>
3c5a1b6f 14#include <asm/pci_io.h>
09340b2f 15#include <asm/sclp.h>
6438e307 16#include "pci.h"
09340b2f 17#include "kvm-s390.h"
6438e307 18
98b1d33d
MR
19struct zpci_aift *aift;
20
21static inline int __set_irq_noiib(u16 ctl, u8 isc)
22{
23 union zpci_sic_iib iib = {{0}};
24
25 return zpci_set_irq_ctrl(ctl, isc, &iib);
26}
27
28void kvm_s390_pci_aen_exit(void)
29{
30 unsigned long flags;
31 struct kvm_zdev **gait_kzdev;
32
33 lockdep_assert_held(&aift->aift_lock);
34
35 /*
36 * Contents of the aipb remain registered for the life of the host
37 * kernel, the information preserved in zpci_aipb and zpci_aif_sbv
38 * in case we insert the KVM module again later. Clear the AIFT
39 * information and free anything not registered with underlying
40 * firmware.
41 */
42 spin_lock_irqsave(&aift->gait_lock, flags);
43 gait_kzdev = aift->kzdev;
44 aift->gait = NULL;
45 aift->sbv = NULL;
46 aift->kzdev = NULL;
47 spin_unlock_irqrestore(&aift->gait_lock, flags);
48
49 kfree(gait_kzdev);
50}
51
52static int zpci_setup_aipb(u8 nisc)
53{
54 struct page *page;
55 int size, rc;
56
57 zpci_aipb = kzalloc(sizeof(union zpci_sic_iib), GFP_KERNEL);
58 if (!zpci_aipb)
59 return -ENOMEM;
60
61 aift->sbv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC, 0);
62 if (!aift->sbv) {
63 rc = -ENOMEM;
64 goto free_aipb;
65 }
66 zpci_aif_sbv = aift->sbv;
67 size = get_order(PAGE_ALIGN(ZPCI_NR_DEVICES *
68 sizeof(struct zpci_gaite)));
69 page = alloc_pages(GFP_KERNEL | __GFP_ZERO, size);
70 if (!page) {
71 rc = -ENOMEM;
72 goto free_sbv;
73 }
74 aift->gait = (struct zpci_gaite *)page_to_phys(page);
75
76 zpci_aipb->aipb.faisb = virt_to_phys(aift->sbv->vector);
77 zpci_aipb->aipb.gait = virt_to_phys(aift->gait);
78 zpci_aipb->aipb.afi = nisc;
79 zpci_aipb->aipb.faal = ZPCI_NR_DEVICES;
80
81 /* Setup Adapter Event Notification Interpretation */
82 if (zpci_set_irq_ctrl(SIC_SET_AENI_CONTROLS, 0, zpci_aipb)) {
83 rc = -EIO;
84 goto free_gait;
85 }
86
87 return 0;
88
89free_gait:
90 free_pages((unsigned long)aift->gait, size);
91free_sbv:
92 airq_iv_release(aift->sbv);
93 zpci_aif_sbv = NULL;
94free_aipb:
95 kfree(zpci_aipb);
96 zpci_aipb = NULL;
97
98 return rc;
99}
100
101static int zpci_reset_aipb(u8 nisc)
102{
103 /*
104 * AEN registration can only happen once per system boot. If
105 * an aipb already exists then AEN was already registered and
106 * we can re-use the aipb contents. This can only happen if
107 * the KVM module was removed and re-inserted. However, we must
108 * ensure that the same forwarding ISC is used as this is assigned
109 * during KVM module load.
110 */
111 if (zpci_aipb->aipb.afi != nisc)
112 return -EINVAL;
113
114 aift->sbv = zpci_aif_sbv;
115 aift->gait = (struct zpci_gaite *)zpci_aipb->aipb.gait;
116
117 return 0;
118}
119
120int kvm_s390_pci_aen_init(u8 nisc)
121{
122 int rc = 0;
123
124 /* If already enabled for AEN, bail out now */
125 if (aift->gait || aift->sbv)
126 return -EPERM;
127
128 mutex_lock(&aift->aift_lock);
129 aift->kzdev = kcalloc(ZPCI_NR_DEVICES, sizeof(struct kvm_zdev),
130 GFP_KERNEL);
131 if (!aift->kzdev) {
132 rc = -ENOMEM;
133 goto unlock;
134 }
135
136 if (!zpci_aipb)
137 rc = zpci_setup_aipb(nisc);
138 else
139 rc = zpci_reset_aipb(nisc);
140 if (rc)
141 goto free_zdev;
142
143 /* Enable floating IRQs */
144 if (__set_irq_noiib(SIC_IRQ_MODE_SINGLE, nisc)) {
145 rc = -EIO;
146 kvm_s390_pci_aen_exit();
147 }
148
149 goto unlock;
150
151free_zdev:
152 kfree(aift->kzdev);
153unlock:
154 mutex_unlock(&aift->aift_lock);
155 return rc;
156}
157
3c5a1b6f
MR
158/* Modify PCI: Register floating adapter interruption forwarding */
159static int kvm_zpci_set_airq(struct zpci_dev *zdev)
160{
161 u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT);
162 struct zpci_fib fib = {};
163 u8 status;
164
165 fib.fmt0.isc = zdev->kzdev->fib.fmt0.isc;
166 fib.fmt0.sum = 1; /* enable summary notifications */
167 fib.fmt0.noi = airq_iv_end(zdev->aibv);
168 fib.fmt0.aibv = virt_to_phys(zdev->aibv->vector);
169 fib.fmt0.aibvo = 0;
170 fib.fmt0.aisb = virt_to_phys(aift->sbv->vector + (zdev->aisb / 64) * 8);
171 fib.fmt0.aisbo = zdev->aisb & 63;
172 fib.gd = zdev->gisa;
173
174 return zpci_mod_fc(req, &fib, &status) ? -EIO : 0;
175}
176
177/* Modify PCI: Unregister floating adapter interruption forwarding */
178static int kvm_zpci_clear_airq(struct zpci_dev *zdev)
179{
180 u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_DEREG_INT);
181 struct zpci_fib fib = {};
182 u8 cc, status;
183
184 fib.gd = zdev->gisa;
185
186 cc = zpci_mod_fc(req, &fib, &status);
187 if (cc == 3 || (cc == 1 && status == 24))
188 /* Function already gone or IRQs already deregistered. */
189 cc = 0;
190
191 return cc ? -EIO : 0;
192}
193
194static inline void unaccount_mem(unsigned long nr_pages)
195{
196 struct user_struct *user = get_uid(current_user());
197
198 if (user)
199 atomic_long_sub(nr_pages, &user->locked_vm);
200 if (current->mm)
201 atomic64_sub(nr_pages, &current->mm->pinned_vm);
202}
203
204static inline int account_mem(unsigned long nr_pages)
205{
206 struct user_struct *user = get_uid(current_user());
207 unsigned long page_limit, cur_pages, new_pages;
208
209 page_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
210
211 do {
212 cur_pages = atomic_long_read(&user->locked_vm);
213 new_pages = cur_pages + nr_pages;
214 if (new_pages > page_limit)
215 return -ENOMEM;
216 } while (atomic_long_cmpxchg(&user->locked_vm, cur_pages,
217 new_pages) != cur_pages);
218
219 atomic64_add(nr_pages, &current->mm->pinned_vm);
220
221 return 0;
222}
223
224static int kvm_s390_pci_aif_enable(struct zpci_dev *zdev, struct zpci_fib *fib,
225 bool assist)
226{
227 struct page *pages[1], *aibv_page, *aisb_page = NULL;
228 unsigned int msi_vecs, idx;
229 struct zpci_gaite *gaite;
230 unsigned long hva, bit;
231 struct kvm *kvm;
232 phys_addr_t gaddr;
233 int rc = 0, gisc, npages, pcount = 0;
234
235 /*
236 * Interrupt forwarding is only applicable if the device is already
237 * enabled for interpretation
238 */
239 if (zdev->gisa == 0)
240 return -EINVAL;
241
242 kvm = zdev->kzdev->kvm;
243 msi_vecs = min_t(unsigned int, fib->fmt0.noi, zdev->max_msi);
244
245 /* Get the associated forwarding ISC - if invalid, return the error */
246 gisc = kvm_s390_gisc_register(kvm, fib->fmt0.isc);
247 if (gisc < 0)
248 return gisc;
249
250 /* Replace AIBV address */
251 idx = srcu_read_lock(&kvm->srcu);
252 hva = gfn_to_hva(kvm, gpa_to_gfn((gpa_t)fib->fmt0.aibv));
253 npages = pin_user_pages_fast(hva, 1, FOLL_WRITE | FOLL_LONGTERM, pages);
254 srcu_read_unlock(&kvm->srcu, idx);
255 if (npages < 1) {
256 rc = -EIO;
257 goto out;
258 }
259 aibv_page = pages[0];
260 pcount++;
261 gaddr = page_to_phys(aibv_page) + (fib->fmt0.aibv & ~PAGE_MASK);
262 fib->fmt0.aibv = gaddr;
263
264 /* Pin the guest AISB if one was specified */
265 if (fib->fmt0.sum == 1) {
266 idx = srcu_read_lock(&kvm->srcu);
267 hva = gfn_to_hva(kvm, gpa_to_gfn((gpa_t)fib->fmt0.aisb));
268 npages = pin_user_pages_fast(hva, 1, FOLL_WRITE | FOLL_LONGTERM,
269 pages);
270 srcu_read_unlock(&kvm->srcu, idx);
271 if (npages < 1) {
272 rc = -EIO;
273 goto unpin1;
274 }
275 aisb_page = pages[0];
276 pcount++;
277 }
278
279 /* Account for pinned pages, roll back on failure */
280 if (account_mem(pcount))
281 goto unpin2;
282
283 /* AISB must be allocated before we can fill in GAITE */
284 mutex_lock(&aift->aift_lock);
285 bit = airq_iv_alloc_bit(aift->sbv);
286 if (bit == -1UL)
287 goto unlock;
288 zdev->aisb = bit; /* store the summary bit number */
289 zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA |
290 AIRQ_IV_BITLOCK |
291 AIRQ_IV_GUESTVEC,
292 phys_to_virt(fib->fmt0.aibv));
293
294 spin_lock_irq(&aift->gait_lock);
295 gaite = (struct zpci_gaite *)aift->gait + (zdev->aisb *
296 sizeof(struct zpci_gaite));
297
298 /* If assist not requested, host will get all alerts */
299 if (assist)
300 gaite->gisa = (u32)virt_to_phys(&kvm->arch.sie_page2->gisa);
301 else
302 gaite->gisa = 0;
303
304 gaite->gisc = fib->fmt0.isc;
305 gaite->count++;
306 gaite->aisbo = fib->fmt0.aisbo;
307 gaite->aisb = virt_to_phys(page_address(aisb_page) + (fib->fmt0.aisb &
308 ~PAGE_MASK));
309 aift->kzdev[zdev->aisb] = zdev->kzdev;
310 spin_unlock_irq(&aift->gait_lock);
311
312 /* Update guest FIB for re-issue */
313 fib->fmt0.aisbo = zdev->aisb & 63;
314 fib->fmt0.aisb = virt_to_phys(aift->sbv->vector + (zdev->aisb / 64) * 8);
315 fib->fmt0.isc = gisc;
316
317 /* Save some guest fib values in the host for later use */
318 zdev->kzdev->fib.fmt0.isc = fib->fmt0.isc;
319 zdev->kzdev->fib.fmt0.aibv = fib->fmt0.aibv;
320 mutex_unlock(&aift->aift_lock);
321
322 /* Issue the clp to setup the irq now */
323 rc = kvm_zpci_set_airq(zdev);
324 return rc;
325
326unlock:
327 mutex_unlock(&aift->aift_lock);
328unpin2:
329 if (fib->fmt0.sum == 1)
330 unpin_user_page(aisb_page);
331unpin1:
332 unpin_user_page(aibv_page);
333out:
334 return rc;
335}
336
337static int kvm_s390_pci_aif_disable(struct zpci_dev *zdev, bool force)
338{
339 struct kvm_zdev *kzdev = zdev->kzdev;
340 struct zpci_gaite *gaite;
341 struct page *vpage = NULL, *spage = NULL;
342 int rc, pcount = 0;
343 u8 isc;
344
345 if (zdev->gisa == 0)
346 return -EINVAL;
347
348 mutex_lock(&aift->aift_lock);
349
350 /*
351 * If the clear fails due to an error, leave now unless we know this
352 * device is about to go away (force) -- In that case clear the GAITE
353 * regardless.
354 */
355 rc = kvm_zpci_clear_airq(zdev);
356 if (rc && !force)
357 goto out;
358
359 if (zdev->kzdev->fib.fmt0.aibv == 0)
360 goto out;
361 spin_lock_irq(&aift->gait_lock);
362 gaite = (struct zpci_gaite *)aift->gait + (zdev->aisb *
363 sizeof(struct zpci_gaite));
364 isc = gaite->gisc;
365 gaite->count--;
366 if (gaite->count == 0) {
367 /* Release guest AIBV and AISB */
368 vpage = phys_to_page(kzdev->fib.fmt0.aibv);
369 if (gaite->aisb != 0)
370 spage = phys_to_page(gaite->aisb);
371 /* Clear the GAIT entry */
372 gaite->aisb = 0;
373 gaite->gisc = 0;
374 gaite->aisbo = 0;
375 gaite->gisa = 0;
376 aift->kzdev[zdev->aisb] = 0;
377 /* Clear zdev info */
378 airq_iv_free_bit(aift->sbv, zdev->aisb);
379 airq_iv_release(zdev->aibv);
380 zdev->aisb = 0;
381 zdev->aibv = NULL;
382 }
383 spin_unlock_irq(&aift->gait_lock);
384 kvm_s390_gisc_unregister(kzdev->kvm, isc);
385 kzdev->fib.fmt0.isc = 0;
386 kzdev->fib.fmt0.aibv = 0;
387
388 if (vpage) {
389 unpin_user_page(vpage);
390 pcount++;
391 }
392 if (spage) {
393 unpin_user_page(spage);
394 pcount++;
395 }
396 if (pcount > 0)
397 unaccount_mem(pcount);
398out:
399 mutex_unlock(&aift->aift_lock);
400
401 return rc;
402}
403
6438e307
MR
404static int kvm_s390_pci_dev_open(struct zpci_dev *zdev)
405{
406 struct kvm_zdev *kzdev;
407
408 kzdev = kzalloc(sizeof(struct kvm_zdev), GFP_KERNEL);
409 if (!kzdev)
410 return -ENOMEM;
411
412 kzdev->zdev = zdev;
413 zdev->kzdev = kzdev;
414
415 return 0;
416}
417
418static void kvm_s390_pci_dev_release(struct zpci_dev *zdev)
419{
420 struct kvm_zdev *kzdev;
421
422 kzdev = zdev->kzdev;
423 WARN_ON(kzdev->zdev != zdev);
424 zdev->kzdev = NULL;
425 kfree(kzdev);
426}
98b1d33d 427
09340b2f
MR
428
429/*
430 * Register device with the specified KVM. If interpetation facilities are
431 * available, enable them and let userspace indicate whether or not they will
432 * be used (specify SHM bit to disable).
433 */
434int kvm_s390_pci_register_kvm(struct zpci_dev *zdev, struct kvm *kvm)
435{
436 int rc;
437
438 if (!zdev)
439 return -EINVAL;
440
441 mutex_lock(&zdev->kzdev_lock);
442
443 if (zdev->kzdev || zdev->gisa != 0 || !kvm) {
444 mutex_unlock(&zdev->kzdev_lock);
445 return -EINVAL;
446 }
447
448 kvm_get_kvm(kvm);
449
450 mutex_lock(&kvm->lock);
451
452 rc = kvm_s390_pci_dev_open(zdev);
453 if (rc)
454 goto err;
455
456 /*
457 * If interpretation facilities aren't available, add the device to
458 * the kzdev list but don't enable for interpretation.
459 */
460 if (!kvm_s390_pci_interp_allowed())
461 goto out;
462
463 /*
464 * If this is the first request to use an interpreted device, make the
465 * necessary vcpu changes
466 */
467 if (!kvm->arch.use_zpci_interp)
468 kvm_s390_vcpu_pci_enable_interp(kvm);
469
470 if (zdev_enabled(zdev)) {
471 rc = zpci_disable_device(zdev);
472 if (rc)
473 goto err;
474 }
475
476 /*
477 * Store information about the identity of the kvm guest allowed to
478 * access this device via interpretation to be used by host CLP
479 */
480 zdev->gisa = (u32)virt_to_phys(&kvm->arch.sie_page2->gisa);
481
482 rc = zpci_enable_device(zdev);
483 if (rc)
484 goto clear_gisa;
485
486 /* Re-register the IOMMU that was already created */
487 rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
488 virt_to_phys(zdev->dma_table));
489 if (rc)
490 goto clear_gisa;
491
492out:
493 zdev->kzdev->kvm = kvm;
494
495 spin_lock(&kvm->arch.kzdev_list_lock);
496 list_add_tail(&zdev->kzdev->entry, &kvm->arch.kzdev_list);
497 spin_unlock(&kvm->arch.kzdev_list_lock);
498
499 mutex_unlock(&kvm->lock);
500 mutex_unlock(&zdev->kzdev_lock);
501 return 0;
502
503clear_gisa:
504 zdev->gisa = 0;
505err:
506 if (zdev->kzdev)
507 kvm_s390_pci_dev_release(zdev);
508 mutex_unlock(&kvm->lock);
509 mutex_unlock(&zdev->kzdev_lock);
510 kvm_put_kvm(kvm);
511 return rc;
512}
513EXPORT_SYMBOL_GPL(kvm_s390_pci_register_kvm);
514
515void kvm_s390_pci_unregister_kvm(struct zpci_dev *zdev)
516{
517 struct kvm *kvm;
518
519 if (!zdev)
520 return;
521
522 mutex_lock(&zdev->kzdev_lock);
523
524 if (WARN_ON(!zdev->kzdev)) {
525 mutex_unlock(&zdev->kzdev_lock);
526 return;
527 }
528
529 kvm = zdev->kzdev->kvm;
530 mutex_lock(&kvm->lock);
531
532 /*
533 * A 0 gisa means interpretation was never enabled, just remove the
534 * device from the list.
535 */
536 if (zdev->gisa == 0)
537 goto out;
538
539 /* Forwarding must be turned off before interpretation */
540 if (zdev->kzdev->fib.fmt0.aibv != 0)
541 kvm_s390_pci_aif_disable(zdev, true);
542
543 /* Remove the host CLP guest designation */
544 zdev->gisa = 0;
545
546 if (zdev_enabled(zdev)) {
547 if (zpci_disable_device(zdev))
548 goto out;
549 }
550
551 if (zpci_enable_device(zdev))
552 goto out;
553
554 /* Re-register the IOMMU that was already created */
555 zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
556 virt_to_phys(zdev->dma_table));
557
558out:
559 spin_lock(&kvm->arch.kzdev_list_lock);
560 list_del(&zdev->kzdev->entry);
561 spin_unlock(&kvm->arch.kzdev_list_lock);
562 kvm_s390_pci_dev_release(zdev);
563
564 mutex_unlock(&kvm->lock);
565 mutex_unlock(&zdev->kzdev_lock);
566
567 kvm_put_kvm(kvm);
568}
569EXPORT_SYMBOL_GPL(kvm_s390_pci_unregister_kvm);
570
571void kvm_s390_pci_init_list(struct kvm *kvm)
572{
573 spin_lock_init(&kvm->arch.kzdev_list_lock);
574 INIT_LIST_HEAD(&kvm->arch.kzdev_list);
575}
576
577void kvm_s390_pci_clear_list(struct kvm *kvm)
578{
579 /*
580 * This list should already be empty, either via vfio device closures
581 * or kvm fd cleanup.
582 */
583 spin_lock(&kvm->arch.kzdev_list_lock);
584 WARN_ON_ONCE(!list_empty(&kvm->arch.kzdev_list));
585 spin_unlock(&kvm->arch.kzdev_list_lock);
586}
587
db1c875e
MR
588static struct zpci_dev *get_zdev_from_kvm_by_fh(struct kvm *kvm, u32 fh)
589{
590 struct zpci_dev *zdev = NULL;
591 struct kvm_zdev *kzdev;
592
593 spin_lock(&kvm->arch.kzdev_list_lock);
594 list_for_each_entry(kzdev, &kvm->arch.kzdev_list, entry) {
595 if (kzdev->zdev->fh == fh) {
596 zdev = kzdev->zdev;
597 break;
598 }
599 }
600 spin_unlock(&kvm->arch.kzdev_list_lock);
601
602 return zdev;
603}
604
605static int kvm_s390_pci_zpci_reg_aen(struct zpci_dev *zdev,
606 struct kvm_s390_zpci_op *args)
607{
608 struct zpci_fib fib = {};
609 bool hostflag;
610
611 fib.fmt0.aibv = args->u.reg_aen.ibv;
612 fib.fmt0.isc = args->u.reg_aen.isc;
613 fib.fmt0.noi = args->u.reg_aen.noi;
614 if (args->u.reg_aen.sb != 0) {
615 fib.fmt0.aisb = args->u.reg_aen.sb;
616 fib.fmt0.aisbo = args->u.reg_aen.sbo;
617 fib.fmt0.sum = 1;
618 } else {
619 fib.fmt0.aisb = 0;
620 fib.fmt0.aisbo = 0;
621 fib.fmt0.sum = 0;
622 }
623
624 hostflag = !(args->u.reg_aen.flags & KVM_S390_ZPCIOP_REGAEN_HOST);
625 return kvm_s390_pci_aif_enable(zdev, &fib, hostflag);
626}
627
628int kvm_s390_pci_zpci_op(struct kvm *kvm, struct kvm_s390_zpci_op *args)
629{
630 struct kvm_zdev *kzdev;
631 struct zpci_dev *zdev;
632 int r;
633
634 zdev = get_zdev_from_kvm_by_fh(kvm, args->fh);
635 if (!zdev)
636 return -ENODEV;
637
638 mutex_lock(&zdev->kzdev_lock);
639 mutex_lock(&kvm->lock);
640
641 kzdev = zdev->kzdev;
642 if (!kzdev) {
643 r = -ENODEV;
644 goto out;
645 }
646 if (kzdev->kvm != kvm) {
647 r = -EPERM;
648 goto out;
649 }
650
651 switch (args->op) {
652 case KVM_S390_ZPCIOP_REG_AEN:
653 /* Fail on unknown flags */
654 if (args->u.reg_aen.flags & ~KVM_S390_ZPCIOP_REGAEN_HOST) {
655 r = -EINVAL;
656 break;
657 }
658 r = kvm_s390_pci_zpci_reg_aen(zdev, args);
659 break;
660 case KVM_S390_ZPCIOP_DEREG_AEN:
661 r = kvm_s390_pci_aif_disable(zdev, false);
662 break;
663 default:
664 r = -EINVAL;
665 }
666
667out:
668 mutex_unlock(&kvm->lock);
669 mutex_unlock(&zdev->kzdev_lock);
670 return r;
671}
672
98b1d33d
MR
673int kvm_s390_pci_init(void)
674{
675 aift = kzalloc(sizeof(struct zpci_aift), GFP_KERNEL);
676 if (!aift)
677 return -ENOMEM;
678
679 spin_lock_init(&aift->gait_lock);
680 mutex_init(&aift->aift_lock);
681
682 return 0;
683}
684
685void kvm_s390_pci_exit(void)
686{
687 mutex_destroy(&aift->aift_lock);
688
689 kfree(aift);
690}