KVM: PPC: Book3S HV: XIVE: Take the srcu read lock when accessing memslots
[linux-2.6-block.git] / arch / powerpc / kvm / book3s_xive_native.c
CommitLineData
90c73795
CLG
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (c) 2017-2019, IBM Corporation.
4 */
5
6#define pr_fmt(fmt) "xive-kvm: " fmt
7
8#include <linux/kernel.h>
9#include <linux/kvm_host.h>
10#include <linux/err.h>
11#include <linux/gfp.h>
12#include <linux/spinlock.h>
13#include <linux/delay.h>
232b984b 14#include <linux/file.h>
90c73795
CLG
15#include <asm/uaccess.h>
16#include <asm/kvm_book3s.h>
17#include <asm/kvm_ppc.h>
18#include <asm/hvcall.h>
19#include <asm/xive.h>
20#include <asm/xive-regs.h>
21#include <asm/debug.h>
22#include <asm/debugfs.h>
23#include <asm/opal.h>
24
25#include <linux/debugfs.h>
26#include <linux/seq_file.h>
27
28#include "book3s_xive.h"
29
4131f83c
CLG
30static u8 xive_vm_esb_load(struct xive_irq_data *xd, u32 offset)
31{
32 u64 val;
33
34 if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG)
35 offset |= offset << 4;
36
37 val = in_be64(xd->eoi_mmio + offset);
38 return (u8)val;
39}
40
eacc56bb
CLG
41static void kvmppc_xive_native_cleanup_queue(struct kvm_vcpu *vcpu, int prio)
42{
43 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
44 struct xive_q *q = &xc->queues[prio];
45
46 xive_native_disable_queue(xc->vp_id, q, prio);
47 if (q->qpage) {
48 put_page(virt_to_page(q->qpage));
49 q->qpage = NULL;
50 }
51}
52
53void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu)
54{
55 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
56 int i;
57
58 if (!kvmppc_xive_enabled(vcpu))
59 return;
60
61 if (!xc)
62 return;
63
64 pr_devel("native_cleanup_vcpu(cpu=%d)\n", xc->server_num);
65
66 /* Ensure no interrupt is still routed to that VP */
67 xc->valid = false;
68 kvmppc_xive_disable_vcpu_interrupts(vcpu);
69
70 /* Disable the VP */
71 xive_native_disable_vp(xc->vp_id);
72
73 /* Free the queues & associated interrupts */
74 for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
75 /* Free the escalation irq */
76 if (xc->esc_virq[i]) {
77 free_irq(xc->esc_virq[i], vcpu);
78 irq_dispose_mapping(xc->esc_virq[i]);
79 kfree(xc->esc_virq_names[i]);
80 xc->esc_virq[i] = 0;
81 }
82
83 /* Free the queue */
84 kvmppc_xive_native_cleanup_queue(vcpu, i);
85 }
86
87 /* Free the VP */
88 kfree(xc);
89
90 /* Cleanup the vcpu */
91 vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT;
92 vcpu->arch.xive_vcpu = NULL;
93}
94
95int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
96 struct kvm_vcpu *vcpu, u32 server_num)
97{
98 struct kvmppc_xive *xive = dev->private;
99 struct kvmppc_xive_vcpu *xc = NULL;
100 int rc;
101
102 pr_devel("native_connect_vcpu(server=%d)\n", server_num);
103
104 if (dev->ops != &kvm_xive_native_ops) {
105 pr_devel("Wrong ops !\n");
106 return -EPERM;
107 }
108 if (xive->kvm != vcpu->kvm)
109 return -EPERM;
110 if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT)
111 return -EBUSY;
e717d0ae 112 if (server_num >= (KVM_MAX_VCPUS * vcpu->kvm->arch.emul_smt_mode)) {
eacc56bb
CLG
113 pr_devel("Out of bounds !\n");
114 return -EINVAL;
115 }
116
7e10b9a6 117 mutex_lock(&xive->lock);
eacc56bb
CLG
118
119 if (kvmppc_xive_find_server(vcpu->kvm, server_num)) {
120 pr_devel("Duplicate !\n");
121 rc = -EEXIST;
122 goto bail;
123 }
124
125 xc = kzalloc(sizeof(*xc), GFP_KERNEL);
126 if (!xc) {
127 rc = -ENOMEM;
128 goto bail;
129 }
130
131 vcpu->arch.xive_vcpu = xc;
132 xc->xive = xive;
133 xc->vcpu = vcpu;
134 xc->server_num = server_num;
135
136 xc->vp_id = kvmppc_xive_vp(xive, server_num);
137 xc->valid = true;
138 vcpu->arch.irq_type = KVMPPC_IRQ_XIVE;
139
140 rc = xive_native_get_vp_info(xc->vp_id, &xc->vp_cam, &xc->vp_chip_id);
141 if (rc) {
142 pr_err("Failed to get VP info from OPAL: %d\n", rc);
143 goto bail;
144 }
145
146 /*
147 * Enable the VP first as the single escalation mode will
148 * affect escalation interrupts numbering
149 */
150 rc = xive_native_enable_vp(xc->vp_id, xive->single_escalation);
151 if (rc) {
152 pr_err("Failed to enable VP in OPAL: %d\n", rc);
153 goto bail;
154 }
155
156 /* Configure VCPU fields for use by assembly push/pull */
157 vcpu->arch.xive_saved_state.w01 = cpu_to_be64(0xff000000);
158 vcpu->arch.xive_cam_word = cpu_to_be32(xc->vp_cam | TM_QW1W2_VO);
159
160 /* TODO: reset all queues to a clean state ? */
161bail:
7e10b9a6 162 mutex_unlock(&xive->lock);
eacc56bb
CLG
163 if (rc)
164 kvmppc_xive_native_cleanup_vcpu(vcpu);
165
166 return rc;
167}
168
232b984b
CLG
169/*
170 * Device passthrough support
171 */
172static int kvmppc_xive_native_reset_mapped(struct kvm *kvm, unsigned long irq)
173{
174 struct kvmppc_xive *xive = kvm->arch.xive;
175
176 if (irq >= KVMPPC_XIVE_NR_IRQS)
177 return -EINVAL;
178
179 /*
180 * Clear the ESB pages of the IRQ number being mapped (or
181 * unmapped) into the guest and let the the VM fault handler
182 * repopulate with the appropriate ESB pages (device or IC)
183 */
184 pr_debug("clearing esb pages for girq 0x%lx\n", irq);
185 mutex_lock(&xive->mapping_lock);
186 if (xive->mapping)
187 unmap_mapping_range(xive->mapping,
188 irq * (2ull << PAGE_SHIFT),
189 2ull << PAGE_SHIFT, 1);
190 mutex_unlock(&xive->mapping_lock);
191 return 0;
192}
193
194static struct kvmppc_xive_ops kvmppc_xive_native_ops = {
195 .reset_mapped = kvmppc_xive_native_reset_mapped,
196};
197
6520ca64
CLG
198static vm_fault_t xive_native_esb_fault(struct vm_fault *vmf)
199{
200 struct vm_area_struct *vma = vmf->vma;
201 struct kvm_device *dev = vma->vm_file->private_data;
202 struct kvmppc_xive *xive = dev->private;
203 struct kvmppc_xive_src_block *sb;
204 struct kvmppc_xive_irq_state *state;
205 struct xive_irq_data *xd;
206 u32 hw_num;
207 u16 src;
208 u64 page;
209 unsigned long irq;
210 u64 page_offset;
211
212 /*
213 * Linux/KVM uses a two pages ESB setting, one for trigger and
214 * one for EOI
215 */
216 page_offset = vmf->pgoff - vma->vm_pgoff;
217 irq = page_offset / 2;
218
219 sb = kvmppc_xive_find_source(xive, irq, &src);
220 if (!sb) {
221 pr_devel("%s: source %lx not found !\n", __func__, irq);
222 return VM_FAULT_SIGBUS;
223 }
224
225 state = &sb->irq_state[src];
226 kvmppc_xive_select_irq(state, &hw_num, &xd);
227
228 arch_spin_lock(&sb->lock);
229
230 /*
231 * first/even page is for trigger
232 * second/odd page is for EOI and management.
233 */
234 page = page_offset % 2 ? xd->eoi_page : xd->trig_page;
235 arch_spin_unlock(&sb->lock);
236
237 if (WARN_ON(!page)) {
3f8cb76c 238 pr_err("%s: accessing invalid ESB page for source %lx !\n",
6520ca64
CLG
239 __func__, irq);
240 return VM_FAULT_SIGBUS;
241 }
242
243 vmf_insert_pfn(vma, vmf->address, page >> PAGE_SHIFT);
244 return VM_FAULT_NOPAGE;
245}
246
247static const struct vm_operations_struct xive_native_esb_vmops = {
248 .fault = xive_native_esb_fault,
249};
250
39e9af3d
CLG
251static vm_fault_t xive_native_tima_fault(struct vm_fault *vmf)
252{
253 struct vm_area_struct *vma = vmf->vma;
254
255 switch (vmf->pgoff - vma->vm_pgoff) {
256 case 0: /* HW - forbid access */
257 case 1: /* HV - forbid access */
258 return VM_FAULT_SIGBUS;
259 case 2: /* OS */
260 vmf_insert_pfn(vma, vmf->address, xive_tima_os >> PAGE_SHIFT);
261 return VM_FAULT_NOPAGE;
262 case 3: /* USER - TODO */
263 default:
264 return VM_FAULT_SIGBUS;
265 }
266}
267
268static const struct vm_operations_struct xive_native_tima_vmops = {
269 .fault = xive_native_tima_fault,
270};
271
272static int kvmppc_xive_native_mmap(struct kvm_device *dev,
273 struct vm_area_struct *vma)
274{
232b984b
CLG
275 struct kvmppc_xive *xive = dev->private;
276
39e9af3d
CLG
277 /* We only allow mappings at fixed offset for now */
278 if (vma->vm_pgoff == KVM_XIVE_TIMA_PAGE_OFFSET) {
279 if (vma_pages(vma) > 4)
280 return -EINVAL;
281 vma->vm_ops = &xive_native_tima_vmops;
6520ca64
CLG
282 } else if (vma->vm_pgoff == KVM_XIVE_ESB_PAGE_OFFSET) {
283 if (vma_pages(vma) > KVMPPC_XIVE_NR_IRQS * 2)
284 return -EINVAL;
285 vma->vm_ops = &xive_native_esb_vmops;
39e9af3d
CLG
286 } else {
287 return -EINVAL;
288 }
289
290 vma->vm_flags |= VM_IO | VM_PFNMAP;
291 vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot);
232b984b
CLG
292
293 /*
294 * Grab the KVM device file address_space to be able to clear
295 * the ESB pages mapping when a device is passed-through into
296 * the guest.
297 */
298 xive->mapping = vma->vm_file->f_mapping;
39e9af3d
CLG
299 return 0;
300}
301
4131f83c
CLG
302static int kvmppc_xive_native_set_source(struct kvmppc_xive *xive, long irq,
303 u64 addr)
304{
305 struct kvmppc_xive_src_block *sb;
306 struct kvmppc_xive_irq_state *state;
307 u64 __user *ubufp = (u64 __user *) addr;
308 u64 val;
309 u16 idx;
310 int rc;
311
312 pr_devel("%s irq=0x%lx\n", __func__, irq);
313
314 if (irq < KVMPPC_XIVE_FIRST_IRQ || irq >= KVMPPC_XIVE_NR_IRQS)
315 return -E2BIG;
316
317 sb = kvmppc_xive_find_source(xive, irq, &idx);
318 if (!sb) {
319 pr_debug("No source, creating source block...\n");
320 sb = kvmppc_xive_create_src_block(xive, irq);
321 if (!sb) {
322 pr_err("Failed to create block...\n");
323 return -ENOMEM;
324 }
325 }
326 state = &sb->irq_state[idx];
327
328 if (get_user(val, ubufp)) {
329 pr_err("fault getting user info !\n");
330 return -EFAULT;
331 }
332
333 arch_spin_lock(&sb->lock);
334
335 /*
336 * If the source doesn't already have an IPI, allocate
337 * one and get the corresponding data
338 */
339 if (!state->ipi_number) {
340 state->ipi_number = xive_native_alloc_irq();
341 if (state->ipi_number == 0) {
342 pr_err("Failed to allocate IRQ !\n");
343 rc = -ENXIO;
344 goto unlock;
345 }
346 xive_native_populate_irq_data(state->ipi_number,
347 &state->ipi_data);
348 pr_debug("%s allocated hw_irq=0x%x for irq=0x%lx\n", __func__,
349 state->ipi_number, irq);
350 }
351
352 /* Restore LSI state */
353 if (val & KVM_XIVE_LEVEL_SENSITIVE) {
354 state->lsi = true;
355 if (val & KVM_XIVE_LEVEL_ASSERTED)
356 state->asserted = true;
357 pr_devel(" LSI ! Asserted=%d\n", state->asserted);
358 }
359
360 /* Mask IRQ to start with */
361 state->act_server = 0;
362 state->act_priority = MASKED;
363 xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
364 xive_native_configure_irq(state->ipi_number, 0, MASKED, 0);
365
366 /* Increment the number of valid sources and mark this one valid */
367 if (!state->valid)
368 xive->src_count++;
369 state->valid = true;
370
371 rc = 0;
372
373unlock:
374 arch_spin_unlock(&sb->lock);
375
376 return rc;
377}
378
e8676ce5
CLG
379static int kvmppc_xive_native_update_source_config(struct kvmppc_xive *xive,
380 struct kvmppc_xive_src_block *sb,
381 struct kvmppc_xive_irq_state *state,
382 u32 server, u8 priority, bool masked,
383 u32 eisn)
384{
385 struct kvm *kvm = xive->kvm;
386 u32 hw_num;
387 int rc = 0;
388
389 arch_spin_lock(&sb->lock);
390
391 if (state->act_server == server && state->act_priority == priority &&
392 state->eisn == eisn)
393 goto unlock;
394
395 pr_devel("new_act_prio=%d new_act_server=%d mask=%d act_server=%d act_prio=%d\n",
396 priority, server, masked, state->act_server,
397 state->act_priority);
398
399 kvmppc_xive_select_irq(state, &hw_num, NULL);
400
401 if (priority != MASKED && !masked) {
402 rc = kvmppc_xive_select_target(kvm, &server, priority);
403 if (rc)
404 goto unlock;
405
406 state->act_priority = priority;
407 state->act_server = server;
408 state->eisn = eisn;
409
410 rc = xive_native_configure_irq(hw_num,
411 kvmppc_xive_vp(xive, server),
412 priority, eisn);
413 } else {
414 state->act_priority = MASKED;
415 state->act_server = 0;
416 state->eisn = 0;
417
418 rc = xive_native_configure_irq(hw_num, 0, MASKED, 0);
419 }
420
421unlock:
422 arch_spin_unlock(&sb->lock);
423 return rc;
424}
425
426static int kvmppc_xive_native_set_source_config(struct kvmppc_xive *xive,
427 long irq, u64 addr)
428{
429 struct kvmppc_xive_src_block *sb;
430 struct kvmppc_xive_irq_state *state;
431 u64 __user *ubufp = (u64 __user *) addr;
432 u16 src;
433 u64 kvm_cfg;
434 u32 server;
435 u8 priority;
436 bool masked;
437 u32 eisn;
438
439 sb = kvmppc_xive_find_source(xive, irq, &src);
440 if (!sb)
441 return -ENOENT;
442
443 state = &sb->irq_state[src];
444
445 if (!state->valid)
446 return -EINVAL;
447
448 if (get_user(kvm_cfg, ubufp))
449 return -EFAULT;
450
451 pr_devel("%s irq=0x%lx cfg=%016llx\n", __func__, irq, kvm_cfg);
452
453 priority = (kvm_cfg & KVM_XIVE_SOURCE_PRIORITY_MASK) >>
454 KVM_XIVE_SOURCE_PRIORITY_SHIFT;
455 server = (kvm_cfg & KVM_XIVE_SOURCE_SERVER_MASK) >>
456 KVM_XIVE_SOURCE_SERVER_SHIFT;
457 masked = (kvm_cfg & KVM_XIVE_SOURCE_MASKED_MASK) >>
458 KVM_XIVE_SOURCE_MASKED_SHIFT;
459 eisn = (kvm_cfg & KVM_XIVE_SOURCE_EISN_MASK) >>
460 KVM_XIVE_SOURCE_EISN_SHIFT;
461
462 if (priority != xive_prio_from_guest(priority)) {
463 pr_err("invalid priority for queue %d for VCPU %d\n",
464 priority, server);
465 return -EINVAL;
466 }
467
468 return kvmppc_xive_native_update_source_config(xive, sb, state, server,
469 priority, masked, eisn);
470}
471
7b46b616
CLG
472static int kvmppc_xive_native_sync_source(struct kvmppc_xive *xive,
473 long irq, u64 addr)
474{
475 struct kvmppc_xive_src_block *sb;
476 struct kvmppc_xive_irq_state *state;
477 struct xive_irq_data *xd;
478 u32 hw_num;
479 u16 src;
480 int rc = 0;
481
482 pr_devel("%s irq=0x%lx", __func__, irq);
483
484 sb = kvmppc_xive_find_source(xive, irq, &src);
485 if (!sb)
486 return -ENOENT;
487
488 state = &sb->irq_state[src];
489
490 rc = -EINVAL;
491
492 arch_spin_lock(&sb->lock);
493
494 if (state->valid) {
495 kvmppc_xive_select_irq(state, &hw_num, &xd);
496 xive_native_sync_source(hw_num);
497 rc = 0;
498 }
499
500 arch_spin_unlock(&sb->lock);
501 return rc;
502}
503
13ce3297
CLG
504static int xive_native_validate_queue_size(u32 qshift)
505{
506 /*
507 * We only support 64K pages for the moment. This is also
508 * advertised in the DT property "ibm,xive-eq-sizes"
509 */
510 switch (qshift) {
511 case 0: /* EQ reset */
512 case 16:
513 return 0;
514 case 12:
515 case 21:
516 case 24:
517 default:
518 return -EINVAL;
519 }
520}
521
522static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive,
523 long eq_idx, u64 addr)
524{
525 struct kvm *kvm = xive->kvm;
526 struct kvm_vcpu *vcpu;
527 struct kvmppc_xive_vcpu *xc;
528 void __user *ubufp = (void __user *) addr;
529 u32 server;
530 u8 priority;
531 struct kvm_ppc_xive_eq kvm_eq;
532 int rc;
533 __be32 *qaddr = 0;
534 struct page *page;
535 struct xive_q *q;
536 gfn_t gfn;
537 unsigned long page_size;
aedb5b19 538 int srcu_idx;
13ce3297
CLG
539
540 /*
541 * Demangle priority/server tuple from the EQ identifier
542 */
543 priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >>
544 KVM_XIVE_EQ_PRIORITY_SHIFT;
545 server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >>
546 KVM_XIVE_EQ_SERVER_SHIFT;
547
548 if (copy_from_user(&kvm_eq, ubufp, sizeof(kvm_eq)))
549 return -EFAULT;
550
551 vcpu = kvmppc_xive_find_server(kvm, server);
552 if (!vcpu) {
553 pr_err("Can't find server %d\n", server);
554 return -ENOENT;
555 }
556 xc = vcpu->arch.xive_vcpu;
557
558 if (priority != xive_prio_from_guest(priority)) {
559 pr_err("Trying to restore invalid queue %d for VCPU %d\n",
560 priority, server);
561 return -EINVAL;
562 }
563 q = &xc->queues[priority];
564
565 pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n",
566 __func__, server, priority, kvm_eq.flags,
567 kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex);
568
13ce3297
CLG
569 /* reset queue and disable queueing */
570 if (!kvm_eq.qshift) {
571 q->guest_qaddr = 0;
572 q->guest_qshift = 0;
573
574 rc = xive_native_configure_queue(xc->vp_id, q, priority,
575 NULL, 0, true);
576 if (rc) {
577 pr_err("Failed to reset queue %d for VCPU %d: %d\n",
578 priority, xc->server_num, rc);
579 return rc;
580 }
581
582 if (q->qpage) {
583 put_page(virt_to_page(q->qpage));
584 q->qpage = NULL;
585 }
586
587 return 0;
588 }
589
c468bc4e
CLG
590 /*
591 * sPAPR specifies a "Unconditional Notify (n) flag" for the
592 * H_INT_SET_QUEUE_CONFIG hcall which forces notification
593 * without using the coalescing mechanisms provided by the
594 * XIVE END ESBs. This is required on KVM as notification
595 * using the END ESBs is not supported.
596 */
597 if (kvm_eq.flags != KVM_XIVE_EQ_ALWAYS_NOTIFY) {
598 pr_err("invalid flags %d\n", kvm_eq.flags);
599 return -EINVAL;
600 }
601
602 rc = xive_native_validate_queue_size(kvm_eq.qshift);
603 if (rc) {
604 pr_err("invalid queue size %d\n", kvm_eq.qshift);
605 return rc;
606 }
607
13ce3297
CLG
608 if (kvm_eq.qaddr & ((1ull << kvm_eq.qshift) - 1)) {
609 pr_err("queue page is not aligned %llx/%llx\n", kvm_eq.qaddr,
610 1ull << kvm_eq.qshift);
611 return -EINVAL;
612 }
613
aedb5b19 614 srcu_idx = srcu_read_lock(&kvm->srcu);
13ce3297
CLG
615 gfn = gpa_to_gfn(kvm_eq.qaddr);
616 page = gfn_to_page(kvm, gfn);
617 if (is_error_page(page)) {
aedb5b19 618 srcu_read_unlock(&kvm->srcu, srcu_idx);
13ce3297
CLG
619 pr_err("Couldn't get queue page %llx!\n", kvm_eq.qaddr);
620 return -EINVAL;
621 }
622
623 page_size = kvm_host_page_size(kvm, gfn);
624 if (1ull << kvm_eq.qshift > page_size) {
aedb5b19 625 srcu_read_unlock(&kvm->srcu, srcu_idx);
13ce3297
CLG
626 pr_warn("Incompatible host page size %lx!\n", page_size);
627 return -EINVAL;
628 }
629
630 qaddr = page_to_virt(page) + (kvm_eq.qaddr & ~PAGE_MASK);
aedb5b19 631 srcu_read_unlock(&kvm->srcu, srcu_idx);
13ce3297
CLG
632
633 /*
634 * Backup the queue page guest address to the mark EQ page
635 * dirty for migration.
636 */
637 q->guest_qaddr = kvm_eq.qaddr;
638 q->guest_qshift = kvm_eq.qshift;
639
640 /*
641 * Unconditional Notification is forced by default at the
642 * OPAL level because the use of END ESBs is not supported by
643 * Linux.
644 */
645 rc = xive_native_configure_queue(xc->vp_id, q, priority,
646 (__be32 *) qaddr, kvm_eq.qshift, true);
647 if (rc) {
648 pr_err("Failed to configure queue %d for VCPU %d: %d\n",
649 priority, xc->server_num, rc);
650 put_page(page);
651 return rc;
652 }
653
654 /*
655 * Only restore the queue state when needed. When doing the
656 * H_INT_SET_SOURCE_CONFIG hcall, it should not.
657 */
658 if (kvm_eq.qtoggle != 1 || kvm_eq.qindex != 0) {
659 rc = xive_native_set_queue_state(xc->vp_id, priority,
660 kvm_eq.qtoggle,
661 kvm_eq.qindex);
662 if (rc)
663 goto error;
664 }
665
666 rc = kvmppc_xive_attach_escalation(vcpu, priority,
667 xive->single_escalation);
668error:
669 if (rc)
670 kvmppc_xive_native_cleanup_queue(vcpu, priority);
671 return rc;
672}
673
674static int kvmppc_xive_native_get_queue_config(struct kvmppc_xive *xive,
675 long eq_idx, u64 addr)
676{
677 struct kvm *kvm = xive->kvm;
678 struct kvm_vcpu *vcpu;
679 struct kvmppc_xive_vcpu *xc;
680 struct xive_q *q;
681 void __user *ubufp = (u64 __user *) addr;
682 u32 server;
683 u8 priority;
684 struct kvm_ppc_xive_eq kvm_eq;
685 u64 qaddr;
686 u64 qshift;
687 u64 qeoi_page;
688 u32 escalate_irq;
689 u64 qflags;
690 int rc;
691
692 /*
693 * Demangle priority/server tuple from the EQ identifier
694 */
695 priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >>
696 KVM_XIVE_EQ_PRIORITY_SHIFT;
697 server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >>
698 KVM_XIVE_EQ_SERVER_SHIFT;
699
700 vcpu = kvmppc_xive_find_server(kvm, server);
701 if (!vcpu) {
702 pr_err("Can't find server %d\n", server);
703 return -ENOENT;
704 }
705 xc = vcpu->arch.xive_vcpu;
706
707 if (priority != xive_prio_from_guest(priority)) {
708 pr_err("invalid priority for queue %d for VCPU %d\n",
709 priority, server);
710 return -EINVAL;
711 }
712 q = &xc->queues[priority];
713
714 memset(&kvm_eq, 0, sizeof(kvm_eq));
715
716 if (!q->qpage)
717 return 0;
718
719 rc = xive_native_get_queue_info(xc->vp_id, priority, &qaddr, &qshift,
720 &qeoi_page, &escalate_irq, &qflags);
721 if (rc)
722 return rc;
723
724 kvm_eq.flags = 0;
725 if (qflags & OPAL_XIVE_EQ_ALWAYS_NOTIFY)
726 kvm_eq.flags |= KVM_XIVE_EQ_ALWAYS_NOTIFY;
727
728 kvm_eq.qshift = q->guest_qshift;
729 kvm_eq.qaddr = q->guest_qaddr;
730
731 rc = xive_native_get_queue_state(xc->vp_id, priority, &kvm_eq.qtoggle,
732 &kvm_eq.qindex);
733 if (rc)
734 return rc;
735
736 pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n",
737 __func__, server, priority, kvm_eq.flags,
738 kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex);
739
740 if (copy_to_user(ubufp, &kvm_eq, sizeof(kvm_eq)))
741 return -EFAULT;
742
743 return 0;
744}
745
5ca80647
CLG
746static void kvmppc_xive_reset_sources(struct kvmppc_xive_src_block *sb)
747{
748 int i;
749
750 for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
751 struct kvmppc_xive_irq_state *state = &sb->irq_state[i];
752
753 if (!state->valid)
754 continue;
755
756 if (state->act_priority == MASKED)
757 continue;
758
759 state->eisn = 0;
760 state->act_server = 0;
761 state->act_priority = MASKED;
762 xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
763 xive_native_configure_irq(state->ipi_number, 0, MASKED, 0);
764 if (state->pt_number) {
765 xive_vm_esb_load(state->pt_data, XIVE_ESB_SET_PQ_01);
766 xive_native_configure_irq(state->pt_number,
767 0, MASKED, 0);
768 }
769 }
770}
771
772static int kvmppc_xive_reset(struct kvmppc_xive *xive)
773{
774 struct kvm *kvm = xive->kvm;
775 struct kvm_vcpu *vcpu;
776 unsigned int i;
777
778 pr_devel("%s\n", __func__);
779
7e10b9a6 780 mutex_lock(&xive->lock);
5ca80647
CLG
781
782 kvm_for_each_vcpu(i, vcpu, kvm) {
783 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
784 unsigned int prio;
785
786 if (!xc)
787 continue;
788
789 kvmppc_xive_disable_vcpu_interrupts(vcpu);
790
791 for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
792
793 /* Single escalation, no queue 7 */
794 if (prio == 7 && xive->single_escalation)
795 break;
796
797 if (xc->esc_virq[prio]) {
798 free_irq(xc->esc_virq[prio], vcpu);
799 irq_dispose_mapping(xc->esc_virq[prio]);
800 kfree(xc->esc_virq_names[prio]);
801 xc->esc_virq[prio] = 0;
802 }
803
804 kvmppc_xive_native_cleanup_queue(vcpu, prio);
805 }
806 }
807
808 for (i = 0; i <= xive->max_sbid; i++) {
809 struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
810
811 if (sb) {
812 arch_spin_lock(&sb->lock);
813 kvmppc_xive_reset_sources(sb);
814 arch_spin_unlock(&sb->lock);
815 }
816 }
817
7e10b9a6 818 mutex_unlock(&xive->lock);
5ca80647
CLG
819
820 return 0;
821}
822
e6714bd1
CLG
823static void kvmppc_xive_native_sync_sources(struct kvmppc_xive_src_block *sb)
824{
825 int j;
826
827 for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++) {
828 struct kvmppc_xive_irq_state *state = &sb->irq_state[j];
829 struct xive_irq_data *xd;
830 u32 hw_num;
831
832 if (!state->valid)
833 continue;
834
835 /*
836 * The struct kvmppc_xive_irq_state reflects the state
837 * of the EAS configuration and not the state of the
838 * source. The source is masked setting the PQ bits to
839 * '-Q', which is what is being done before calling
840 * the KVM_DEV_XIVE_EQ_SYNC control.
841 *
842 * If a source EAS is configured, OPAL syncs the XIVE
843 * IC of the source and the XIVE IC of the previous
844 * target if any.
845 *
846 * So it should be fine ignoring MASKED sources as
847 * they have been synced already.
848 */
849 if (state->act_priority == MASKED)
850 continue;
851
852 kvmppc_xive_select_irq(state, &hw_num, &xd);
853 xive_native_sync_source(hw_num);
854 xive_native_sync_queue(hw_num);
855 }
856}
857
858static int kvmppc_xive_native_vcpu_eq_sync(struct kvm_vcpu *vcpu)
859{
860 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
861 unsigned int prio;
aedb5b19 862 int srcu_idx;
e6714bd1
CLG
863
864 if (!xc)
865 return -ENOENT;
866
867 for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
868 struct xive_q *q = &xc->queues[prio];
869
870 if (!q->qpage)
871 continue;
872
873 /* Mark EQ page dirty for migration */
aedb5b19 874 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
e6714bd1 875 mark_page_dirty(vcpu->kvm, gpa_to_gfn(q->guest_qaddr));
aedb5b19 876 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
e6714bd1
CLG
877 }
878 return 0;
879}
880
881static int kvmppc_xive_native_eq_sync(struct kvmppc_xive *xive)
882{
883 struct kvm *kvm = xive->kvm;
884 struct kvm_vcpu *vcpu;
885 unsigned int i;
886
887 pr_devel("%s\n", __func__);
888
7e10b9a6 889 mutex_lock(&xive->lock);
e6714bd1
CLG
890 for (i = 0; i <= xive->max_sbid; i++) {
891 struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
892
893 if (sb) {
894 arch_spin_lock(&sb->lock);
895 kvmppc_xive_native_sync_sources(sb);
896 arch_spin_unlock(&sb->lock);
897 }
898 }
899
900 kvm_for_each_vcpu(i, vcpu, kvm) {
901 kvmppc_xive_native_vcpu_eq_sync(vcpu);
902 }
7e10b9a6 903 mutex_unlock(&xive->lock);
e6714bd1
CLG
904
905 return 0;
906}
907
90c73795
CLG
908static int kvmppc_xive_native_set_attr(struct kvm_device *dev,
909 struct kvm_device_attr *attr)
910{
4131f83c
CLG
911 struct kvmppc_xive *xive = dev->private;
912
90c73795
CLG
913 switch (attr->group) {
914 case KVM_DEV_XIVE_GRP_CTRL:
5ca80647
CLG
915 switch (attr->attr) {
916 case KVM_DEV_XIVE_RESET:
917 return kvmppc_xive_reset(xive);
e6714bd1
CLG
918 case KVM_DEV_XIVE_EQ_SYNC:
919 return kvmppc_xive_native_eq_sync(xive);
5ca80647 920 }
90c73795 921 break;
4131f83c
CLG
922 case KVM_DEV_XIVE_GRP_SOURCE:
923 return kvmppc_xive_native_set_source(xive, attr->attr,
924 attr->addr);
e8676ce5
CLG
925 case KVM_DEV_XIVE_GRP_SOURCE_CONFIG:
926 return kvmppc_xive_native_set_source_config(xive, attr->attr,
927 attr->addr);
13ce3297
CLG
928 case KVM_DEV_XIVE_GRP_EQ_CONFIG:
929 return kvmppc_xive_native_set_queue_config(xive, attr->attr,
930 attr->addr);
7b46b616
CLG
931 case KVM_DEV_XIVE_GRP_SOURCE_SYNC:
932 return kvmppc_xive_native_sync_source(xive, attr->attr,
933 attr->addr);
90c73795
CLG
934 }
935 return -ENXIO;
936}
937
938static int kvmppc_xive_native_get_attr(struct kvm_device *dev,
939 struct kvm_device_attr *attr)
940{
13ce3297
CLG
941 struct kvmppc_xive *xive = dev->private;
942
943 switch (attr->group) {
944 case KVM_DEV_XIVE_GRP_EQ_CONFIG:
945 return kvmppc_xive_native_get_queue_config(xive, attr->attr,
946 attr->addr);
947 }
90c73795
CLG
948 return -ENXIO;
949}
950
951static int kvmppc_xive_native_has_attr(struct kvm_device *dev,
952 struct kvm_device_attr *attr)
953{
954 switch (attr->group) {
955 case KVM_DEV_XIVE_GRP_CTRL:
5ca80647
CLG
956 switch (attr->attr) {
957 case KVM_DEV_XIVE_RESET:
e6714bd1 958 case KVM_DEV_XIVE_EQ_SYNC:
5ca80647
CLG
959 return 0;
960 }
90c73795 961 break;
4131f83c 962 case KVM_DEV_XIVE_GRP_SOURCE:
e8676ce5 963 case KVM_DEV_XIVE_GRP_SOURCE_CONFIG:
7b46b616 964 case KVM_DEV_XIVE_GRP_SOURCE_SYNC:
4131f83c
CLG
965 if (attr->attr >= KVMPPC_XIVE_FIRST_IRQ &&
966 attr->attr < KVMPPC_XIVE_NR_IRQS)
967 return 0;
968 break;
13ce3297
CLG
969 case KVM_DEV_XIVE_GRP_EQ_CONFIG:
970 return 0;
90c73795
CLG
971 }
972 return -ENXIO;
973}
974
5422e951 975/*
7e10b9a6 976 * Called when device fd is closed. kvm->lock is held.
5422e951
CLG
977 */
978static void kvmppc_xive_native_release(struct kvm_device *dev)
90c73795
CLG
979{
980 struct kvmppc_xive *xive = dev->private;
981 struct kvm *kvm = xive->kvm;
5422e951 982 struct kvm_vcpu *vcpu;
4131f83c 983 int i;
90c73795 984
5422e951
CLG
985 pr_devel("Releasing xive native device\n");
986
d47aacdb
CLG
987 /*
988 * Clear the KVM device file address_space which is used to
989 * unmap the ESB pages when a device is passed-through.
990 */
991 mutex_lock(&xive->mapping_lock);
992 xive->mapping = NULL;
993 mutex_unlock(&xive->mapping_lock);
994
5422e951 995 /*
6f868405
PM
996 * Since this is the device release function, we know that
997 * userspace does not have any open fd or mmap referring to
998 * the device. Therefore there can not be any of the
999 * device attribute set/get, mmap, or page fault functions
1000 * being executed concurrently, and similarly, the
1001 * connect_vcpu and set/clr_mapped functions also cannot
1002 * be being executed.
5422e951 1003 */
c395fe1d
PM
1004
1005 debugfs_remove(xive->dentry);
6f868405
PM
1006
1007 /*
1008 * We should clean up the vCPU interrupt presenters first.
1009 */
1010 kvm_for_each_vcpu(i, vcpu, kvm) {
1011 /*
1012 * Take vcpu->mutex to ensure that no one_reg get/set ioctl
1013 * (i.e. kvmppc_xive_native_[gs]et_vp) can be being done.
c395fe1d
PM
1014 * Holding the vcpu->mutex also means that the vcpu cannot
1015 * be executing the KVM_RUN ioctl, and therefore it cannot
1016 * be executing the XIVE push or pull code or accessing
1017 * the XIVE MMIO regions.
6f868405
PM
1018 */
1019 mutex_lock(&vcpu->mutex);
5422e951 1020 kvmppc_xive_native_cleanup_vcpu(vcpu);
6f868405
PM
1021 mutex_unlock(&vcpu->mutex);
1022 }
90c73795 1023
c395fe1d
PM
1024 /*
1025 * Now that we have cleared vcpu->arch.xive_vcpu, vcpu->arch.irq_type
1026 * and vcpu->arch.xive_esc_[vr]addr on each vcpu, we are safe
1027 * against xive code getting called during vcpu execution or
1028 * set/get one_reg operations.
1029 */
6f868405 1030 kvm->arch.xive = NULL;
90c73795 1031
4131f83c
CLG
1032 for (i = 0; i <= xive->max_sbid; i++) {
1033 if (xive->src_blocks[i])
1034 kvmppc_xive_free_sources(xive->src_blocks[i]);
1035 kfree(xive->src_blocks[i]);
1036 xive->src_blocks[i] = NULL;
1037 }
1038
90c73795
CLG
1039 if (xive->vp_base != XIVE_INVALID_VP)
1040 xive_native_free_vp_block(xive->vp_base);
1041
5422e951
CLG
1042 /*
1043 * A reference of the kvmppc_xive pointer is now kept under
1044 * the xive_devices struct of the machine for reuse. It is
1045 * freed when the VM is destroyed for now until we fix all the
1046 * execution paths.
1047 */
1048
90c73795
CLG
1049 kfree(dev);
1050}
1051
6f868405
PM
1052/*
1053 * Create a XIVE device. kvm->lock is held.
1054 */
90c73795
CLG
1055static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type)
1056{
1057 struct kvmppc_xive *xive;
1058 struct kvm *kvm = dev->kvm;
1059 int ret = 0;
1060
1061 pr_devel("Creating xive native device\n");
1062
1063 if (kvm->arch.xive)
1064 return -EEXIST;
1065
5422e951 1066 xive = kvmppc_xive_get_device(kvm, type);
90c73795
CLG
1067 if (!xive)
1068 return -ENOMEM;
1069
1070 dev->private = xive;
1071 xive->dev = dev;
1072 xive->kvm = kvm;
1073 kvm->arch.xive = xive;
232b984b 1074 mutex_init(&xive->mapping_lock);
7e10b9a6 1075 mutex_init(&xive->lock);
90c73795
CLG
1076
1077 /*
1078 * Allocate a bunch of VPs. KVM_MAX_VCPUS is a large value for
1079 * a default. Getting the max number of CPUs the VM was
1080 * configured with would improve our usage of the XIVE VP space.
1081 */
1082 xive->vp_base = xive_native_alloc_vp_block(KVM_MAX_VCPUS);
1083 pr_devel("VP_Base=%x\n", xive->vp_base);
1084
1085 if (xive->vp_base == XIVE_INVALID_VP)
1086 ret = -ENXIO;
1087
1088 xive->single_escalation = xive_native_has_single_escalation();
232b984b 1089 xive->ops = &kvmppc_xive_native_ops;
90c73795
CLG
1090
1091 if (ret)
1092 kfree(xive);
1093
1094 return ret;
1095}
1096
e4945b9d
CLG
1097/*
1098 * Interrupt Pending Buffer (IPB) offset
1099 */
1100#define TM_IPB_SHIFT 40
1101#define TM_IPB_MASK (((u64) 0xFF) << TM_IPB_SHIFT)
1102
1103int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val)
1104{
1105 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
1106 u64 opal_state;
1107 int rc;
1108
1109 if (!kvmppc_xive_enabled(vcpu))
1110 return -EPERM;
1111
1112 if (!xc)
1113 return -ENOENT;
1114
1115 /* Thread context registers. We only care about IPB and CPPR */
1116 val->xive_timaval[0] = vcpu->arch.xive_saved_state.w01;
1117
1118 /* Get the VP state from OPAL */
1119 rc = xive_native_get_vp_state(xc->vp_id, &opal_state);
1120 if (rc)
1121 return rc;
1122
1123 /*
1124 * Capture the backup of IPB register in the NVT structure and
1125 * merge it in our KVM VP state.
1126 */
1127 val->xive_timaval[0] |= cpu_to_be64(opal_state & TM_IPB_MASK);
1128
1129 pr_devel("%s NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x opal=%016llx\n",
1130 __func__,
1131 vcpu->arch.xive_saved_state.nsr,
1132 vcpu->arch.xive_saved_state.cppr,
1133 vcpu->arch.xive_saved_state.ipb,
1134 vcpu->arch.xive_saved_state.pipr,
1135 vcpu->arch.xive_saved_state.w01,
1136 (u32) vcpu->arch.xive_cam_word, opal_state);
1137
1138 return 0;
1139}
1140
1141int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val)
1142{
1143 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
1144 struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
1145
1146 pr_devel("%s w01=%016llx vp=%016llx\n", __func__,
1147 val->xive_timaval[0], val->xive_timaval[1]);
1148
1149 if (!kvmppc_xive_enabled(vcpu))
1150 return -EPERM;
1151
1152 if (!xc || !xive)
1153 return -ENOENT;
1154
1155 /* We can't update the state of a "pushed" VCPU */
1156 if (WARN_ON(vcpu->arch.xive_pushed))
1157 return -EBUSY;
1158
1159 /*
1160 * Restore the thread context registers. IPB and CPPR should
1161 * be the only ones that matter.
1162 */
1163 vcpu->arch.xive_saved_state.w01 = val->xive_timaval[0];
1164
1165 /*
1166 * There is no need to restore the XIVE internal state (IPB
1167 * stored in the NVT) as the IPB register was merged in KVM VP
1168 * state when captured.
1169 */
1170 return 0;
1171}
1172
90c73795
CLG
1173static int xive_native_debug_show(struct seq_file *m, void *private)
1174{
1175 struct kvmppc_xive *xive = m->private;
1176 struct kvm *kvm = xive->kvm;
eacc56bb
CLG
1177 struct kvm_vcpu *vcpu;
1178 unsigned int i;
90c73795
CLG
1179
1180 if (!kvm)
1181 return 0;
1182
eacc56bb
CLG
1183 seq_puts(m, "=========\nVCPU state\n=========\n");
1184
1185 kvm_for_each_vcpu(i, vcpu, kvm) {
1186 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
1187
1188 if (!xc)
1189 continue;
1190
1191 seq_printf(m, "cpu server %#x NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n",
1192 xc->server_num,
1193 vcpu->arch.xive_saved_state.nsr,
1194 vcpu->arch.xive_saved_state.cppr,
1195 vcpu->arch.xive_saved_state.ipb,
1196 vcpu->arch.xive_saved_state.pipr,
1197 vcpu->arch.xive_saved_state.w01,
1198 (u32) vcpu->arch.xive_cam_word);
1199
1200 kvmppc_xive_debug_show_queues(m, vcpu);
1201 }
1202
90c73795
CLG
1203 return 0;
1204}
1205
1206static int xive_native_debug_open(struct inode *inode, struct file *file)
1207{
1208 return single_open(file, xive_native_debug_show, inode->i_private);
1209}
1210
1211static const struct file_operations xive_native_debug_fops = {
1212 .open = xive_native_debug_open,
1213 .read = seq_read,
1214 .llseek = seq_lseek,
1215 .release = single_release,
1216};
1217
1218static void xive_native_debugfs_init(struct kvmppc_xive *xive)
1219{
1220 char *name;
1221
1222 name = kasprintf(GFP_KERNEL, "kvm-xive-%p", xive);
1223 if (!name) {
1224 pr_err("%s: no memory for name\n", __func__);
1225 return;
1226 }
1227
1228 xive->dentry = debugfs_create_file(name, 0444, powerpc_debugfs_root,
1229 xive, &xive_native_debug_fops);
1230
1231 pr_debug("%s: created %s\n", __func__, name);
1232 kfree(name);
1233}
1234
1235static void kvmppc_xive_native_init(struct kvm_device *dev)
1236{
1237 struct kvmppc_xive *xive = (struct kvmppc_xive *)dev->private;
1238
1239 /* Register some debug interfaces */
1240 xive_native_debugfs_init(xive);
1241}
1242
1243struct kvm_device_ops kvm_xive_native_ops = {
1244 .name = "kvm-xive-native",
1245 .create = kvmppc_xive_native_create,
1246 .init = kvmppc_xive_native_init,
5422e951 1247 .release = kvmppc_xive_native_release,
90c73795
CLG
1248 .set_attr = kvmppc_xive_native_set_attr,
1249 .get_attr = kvmppc_xive_native_get_attr,
1250 .has_attr = kvmppc_xive_native_has_attr,
39e9af3d 1251 .mmap = kvmppc_xive_native_mmap,
90c73795
CLG
1252};
1253
1254void kvmppc_xive_native_init_module(void)
1255{
1256 ;
1257}
1258
1259void kvmppc_xive_native_exit_module(void)
1260{
1261 ;
1262}