Merge tag 'clk-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-2.6-block.git] / arch / powerpc / kvm / book3s_xive_native.c
CommitLineData
90c73795
CLG
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Copyright (c) 2017-2019, IBM Corporation.
4 */
5
6#define pr_fmt(fmt) "xive-kvm: " fmt
7
8#include <linux/kernel.h>
9#include <linux/kvm_host.h>
10#include <linux/err.h>
11#include <linux/gfp.h>
12#include <linux/spinlock.h>
13#include <linux/delay.h>
232b984b 14#include <linux/file.h>
90c73795
CLG
15#include <asm/uaccess.h>
16#include <asm/kvm_book3s.h>
17#include <asm/kvm_ppc.h>
18#include <asm/hvcall.h>
19#include <asm/xive.h>
20#include <asm/xive-regs.h>
21#include <asm/debug.h>
22#include <asm/debugfs.h>
23#include <asm/opal.h>
24
25#include <linux/debugfs.h>
26#include <linux/seq_file.h>
27
28#include "book3s_xive.h"
29
4131f83c
CLG
30static u8 xive_vm_esb_load(struct xive_irq_data *xd, u32 offset)
31{
32 u64 val;
33
34 if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG)
35 offset |= offset << 4;
36
37 val = in_be64(xd->eoi_mmio + offset);
38 return (u8)val;
39}
40
eacc56bb
CLG
41static void kvmppc_xive_native_cleanup_queue(struct kvm_vcpu *vcpu, int prio)
42{
43 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
44 struct xive_q *q = &xc->queues[prio];
45
46 xive_native_disable_queue(xc->vp_id, q, prio);
47 if (q->qpage) {
48 put_page(virt_to_page(q->qpage));
49 q->qpage = NULL;
50 }
51}
52
53void kvmppc_xive_native_cleanup_vcpu(struct kvm_vcpu *vcpu)
54{
55 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
56 int i;
57
58 if (!kvmppc_xive_enabled(vcpu))
59 return;
60
61 if (!xc)
62 return;
63
64 pr_devel("native_cleanup_vcpu(cpu=%d)\n", xc->server_num);
65
66 /* Ensure no interrupt is still routed to that VP */
67 xc->valid = false;
68 kvmppc_xive_disable_vcpu_interrupts(vcpu);
69
237aed48 70 /* Free escalations */
eacc56bb
CLG
71 for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
72 /* Free the escalation irq */
73 if (xc->esc_virq[i]) {
da15c03b
PM
74 if (xc->xive->single_escalation)
75 xive_cleanup_single_escalation(vcpu, xc,
76 xc->esc_virq[i]);
eacc56bb
CLG
77 free_irq(xc->esc_virq[i], vcpu);
78 irq_dispose_mapping(xc->esc_virq[i]);
79 kfree(xc->esc_virq_names[i]);
80 xc->esc_virq[i] = 0;
81 }
237aed48 82 }
eacc56bb 83
237aed48
CLG
84 /* Disable the VP */
85 xive_native_disable_vp(xc->vp_id);
86
8d4ba9c9
PM
87 /* Clear the cam word so guest entry won't try to push context */
88 vcpu->arch.xive_cam_word = 0;
89
237aed48
CLG
90 /* Free the queues */
91 for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
eacc56bb
CLG
92 kvmppc_xive_native_cleanup_queue(vcpu, i);
93 }
94
95 /* Free the VP */
96 kfree(xc);
97
98 /* Cleanup the vcpu */
99 vcpu->arch.irq_type = KVMPPC_IRQ_DEFAULT;
100 vcpu->arch.xive_vcpu = NULL;
101}
102
103int kvmppc_xive_native_connect_vcpu(struct kvm_device *dev,
104 struct kvm_vcpu *vcpu, u32 server_num)
105{
106 struct kvmppc_xive *xive = dev->private;
107 struct kvmppc_xive_vcpu *xc = NULL;
108 int rc;
12ade69c 109 u32 vp_id;
eacc56bb
CLG
110
111 pr_devel("native_connect_vcpu(server=%d)\n", server_num);
112
113 if (dev->ops != &kvm_xive_native_ops) {
114 pr_devel("Wrong ops !\n");
115 return -EPERM;
116 }
117 if (xive->kvm != vcpu->kvm)
118 return -EPERM;
119 if (vcpu->arch.irq_type != KVMPPC_IRQ_DEFAULT)
120 return -EBUSY;
e717d0ae 121 if (server_num >= (KVM_MAX_VCPUS * vcpu->kvm->arch.emul_smt_mode)) {
eacc56bb
CLG
122 pr_devel("Out of bounds !\n");
123 return -EINVAL;
124 }
125
7e10b9a6 126 mutex_lock(&xive->lock);
eacc56bb 127
12ade69c
GK
128 vp_id = kvmppc_xive_vp(xive, server_num);
129 if (kvmppc_xive_vp_in_use(xive->kvm, vp_id)) {
eacc56bb
CLG
130 pr_devel("Duplicate !\n");
131 rc = -EEXIST;
132 goto bail;
133 }
134
135 xc = kzalloc(sizeof(*xc), GFP_KERNEL);
136 if (!xc) {
137 rc = -ENOMEM;
138 goto bail;
139 }
140
141 vcpu->arch.xive_vcpu = xc;
142 xc->xive = xive;
143 xc->vcpu = vcpu;
144 xc->server_num = server_num;
145
12ade69c 146 xc->vp_id = vp_id;
eacc56bb
CLG
147 xc->valid = true;
148 vcpu->arch.irq_type = KVMPPC_IRQ_XIVE;
149
150 rc = xive_native_get_vp_info(xc->vp_id, &xc->vp_cam, &xc->vp_chip_id);
151 if (rc) {
152 pr_err("Failed to get VP info from OPAL: %d\n", rc);
153 goto bail;
154 }
155
156 /*
157 * Enable the VP first as the single escalation mode will
158 * affect escalation interrupts numbering
159 */
160 rc = xive_native_enable_vp(xc->vp_id, xive->single_escalation);
161 if (rc) {
162 pr_err("Failed to enable VP in OPAL: %d\n", rc);
163 goto bail;
164 }
165
166 /* Configure VCPU fields for use by assembly push/pull */
167 vcpu->arch.xive_saved_state.w01 = cpu_to_be64(0xff000000);
168 vcpu->arch.xive_cam_word = cpu_to_be32(xc->vp_cam | TM_QW1W2_VO);
169
170 /* TODO: reset all queues to a clean state ? */
171bail:
7e10b9a6 172 mutex_unlock(&xive->lock);
eacc56bb
CLG
173 if (rc)
174 kvmppc_xive_native_cleanup_vcpu(vcpu);
175
176 return rc;
177}
178
232b984b
CLG
179/*
180 * Device passthrough support
181 */
182static int kvmppc_xive_native_reset_mapped(struct kvm *kvm, unsigned long irq)
183{
184 struct kvmppc_xive *xive = kvm->arch.xive;
bcaa3110 185 pgoff_t esb_pgoff = KVM_XIVE_ESB_PAGE_OFFSET + irq * 2;
232b984b
CLG
186
187 if (irq >= KVMPPC_XIVE_NR_IRQS)
188 return -EINVAL;
189
190 /*
191 * Clear the ESB pages of the IRQ number being mapped (or
192 * unmapped) into the guest and let the the VM fault handler
193 * repopulate with the appropriate ESB pages (device or IC)
194 */
195 pr_debug("clearing esb pages for girq 0x%lx\n", irq);
196 mutex_lock(&xive->mapping_lock);
197 if (xive->mapping)
198 unmap_mapping_range(xive->mapping,
bcaa3110 199 esb_pgoff << PAGE_SHIFT,
232b984b
CLG
200 2ull << PAGE_SHIFT, 1);
201 mutex_unlock(&xive->mapping_lock);
202 return 0;
203}
204
205static struct kvmppc_xive_ops kvmppc_xive_native_ops = {
206 .reset_mapped = kvmppc_xive_native_reset_mapped,
207};
208
6520ca64
CLG
209static vm_fault_t xive_native_esb_fault(struct vm_fault *vmf)
210{
211 struct vm_area_struct *vma = vmf->vma;
212 struct kvm_device *dev = vma->vm_file->private_data;
213 struct kvmppc_xive *xive = dev->private;
214 struct kvmppc_xive_src_block *sb;
215 struct kvmppc_xive_irq_state *state;
216 struct xive_irq_data *xd;
217 u32 hw_num;
218 u16 src;
219 u64 page;
220 unsigned long irq;
221 u64 page_offset;
222
223 /*
224 * Linux/KVM uses a two pages ESB setting, one for trigger and
225 * one for EOI
226 */
227 page_offset = vmf->pgoff - vma->vm_pgoff;
228 irq = page_offset / 2;
229
230 sb = kvmppc_xive_find_source(xive, irq, &src);
231 if (!sb) {
232 pr_devel("%s: source %lx not found !\n", __func__, irq);
233 return VM_FAULT_SIGBUS;
234 }
235
236 state = &sb->irq_state[src];
237 kvmppc_xive_select_irq(state, &hw_num, &xd);
238
239 arch_spin_lock(&sb->lock);
240
241 /*
242 * first/even page is for trigger
243 * second/odd page is for EOI and management.
244 */
245 page = page_offset % 2 ? xd->eoi_page : xd->trig_page;
246 arch_spin_unlock(&sb->lock);
247
248 if (WARN_ON(!page)) {
3f8cb76c 249 pr_err("%s: accessing invalid ESB page for source %lx !\n",
6520ca64
CLG
250 __func__, irq);
251 return VM_FAULT_SIGBUS;
252 }
253
254 vmf_insert_pfn(vma, vmf->address, page >> PAGE_SHIFT);
255 return VM_FAULT_NOPAGE;
256}
257
258static const struct vm_operations_struct xive_native_esb_vmops = {
259 .fault = xive_native_esb_fault,
260};
261
39e9af3d
CLG
262static vm_fault_t xive_native_tima_fault(struct vm_fault *vmf)
263{
264 struct vm_area_struct *vma = vmf->vma;
265
266 switch (vmf->pgoff - vma->vm_pgoff) {
267 case 0: /* HW - forbid access */
268 case 1: /* HV - forbid access */
269 return VM_FAULT_SIGBUS;
270 case 2: /* OS */
271 vmf_insert_pfn(vma, vmf->address, xive_tima_os >> PAGE_SHIFT);
272 return VM_FAULT_NOPAGE;
273 case 3: /* USER - TODO */
274 default:
275 return VM_FAULT_SIGBUS;
276 }
277}
278
279static const struct vm_operations_struct xive_native_tima_vmops = {
280 .fault = xive_native_tima_fault,
281};
282
283static int kvmppc_xive_native_mmap(struct kvm_device *dev,
284 struct vm_area_struct *vma)
285{
232b984b
CLG
286 struct kvmppc_xive *xive = dev->private;
287
39e9af3d
CLG
288 /* We only allow mappings at fixed offset for now */
289 if (vma->vm_pgoff == KVM_XIVE_TIMA_PAGE_OFFSET) {
290 if (vma_pages(vma) > 4)
291 return -EINVAL;
292 vma->vm_ops = &xive_native_tima_vmops;
6520ca64
CLG
293 } else if (vma->vm_pgoff == KVM_XIVE_ESB_PAGE_OFFSET) {
294 if (vma_pages(vma) > KVMPPC_XIVE_NR_IRQS * 2)
295 return -EINVAL;
296 vma->vm_ops = &xive_native_esb_vmops;
39e9af3d
CLG
297 } else {
298 return -EINVAL;
299 }
300
301 vma->vm_flags |= VM_IO | VM_PFNMAP;
302 vma->vm_page_prot = pgprot_noncached_wc(vma->vm_page_prot);
232b984b
CLG
303
304 /*
305 * Grab the KVM device file address_space to be able to clear
306 * the ESB pages mapping when a device is passed-through into
307 * the guest.
308 */
309 xive->mapping = vma->vm_file->f_mapping;
39e9af3d
CLG
310 return 0;
311}
312
4131f83c
CLG
313static int kvmppc_xive_native_set_source(struct kvmppc_xive *xive, long irq,
314 u64 addr)
315{
316 struct kvmppc_xive_src_block *sb;
317 struct kvmppc_xive_irq_state *state;
318 u64 __user *ubufp = (u64 __user *) addr;
319 u64 val;
320 u16 idx;
321 int rc;
322
323 pr_devel("%s irq=0x%lx\n", __func__, irq);
324
325 if (irq < KVMPPC_XIVE_FIRST_IRQ || irq >= KVMPPC_XIVE_NR_IRQS)
326 return -E2BIG;
327
328 sb = kvmppc_xive_find_source(xive, irq, &idx);
329 if (!sb) {
330 pr_debug("No source, creating source block...\n");
331 sb = kvmppc_xive_create_src_block(xive, irq);
332 if (!sb) {
333 pr_err("Failed to create block...\n");
334 return -ENOMEM;
335 }
336 }
337 state = &sb->irq_state[idx];
338
339 if (get_user(val, ubufp)) {
340 pr_err("fault getting user info !\n");
341 return -EFAULT;
342 }
343
344 arch_spin_lock(&sb->lock);
345
346 /*
347 * If the source doesn't already have an IPI, allocate
348 * one and get the corresponding data
349 */
350 if (!state->ipi_number) {
351 state->ipi_number = xive_native_alloc_irq();
352 if (state->ipi_number == 0) {
353 pr_err("Failed to allocate IRQ !\n");
354 rc = -ENXIO;
355 goto unlock;
356 }
357 xive_native_populate_irq_data(state->ipi_number,
358 &state->ipi_data);
359 pr_debug("%s allocated hw_irq=0x%x for irq=0x%lx\n", __func__,
360 state->ipi_number, irq);
361 }
362
363 /* Restore LSI state */
364 if (val & KVM_XIVE_LEVEL_SENSITIVE) {
365 state->lsi = true;
366 if (val & KVM_XIVE_LEVEL_ASSERTED)
367 state->asserted = true;
368 pr_devel(" LSI ! Asserted=%d\n", state->asserted);
369 }
370
371 /* Mask IRQ to start with */
372 state->act_server = 0;
373 state->act_priority = MASKED;
374 xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
375 xive_native_configure_irq(state->ipi_number, 0, MASKED, 0);
376
377 /* Increment the number of valid sources and mark this one valid */
378 if (!state->valid)
379 xive->src_count++;
380 state->valid = true;
381
382 rc = 0;
383
384unlock:
385 arch_spin_unlock(&sb->lock);
386
387 return rc;
388}
389
e8676ce5
CLG
390static int kvmppc_xive_native_update_source_config(struct kvmppc_xive *xive,
391 struct kvmppc_xive_src_block *sb,
392 struct kvmppc_xive_irq_state *state,
393 u32 server, u8 priority, bool masked,
394 u32 eisn)
395{
396 struct kvm *kvm = xive->kvm;
397 u32 hw_num;
398 int rc = 0;
399
400 arch_spin_lock(&sb->lock);
401
402 if (state->act_server == server && state->act_priority == priority &&
403 state->eisn == eisn)
404 goto unlock;
405
406 pr_devel("new_act_prio=%d new_act_server=%d mask=%d act_server=%d act_prio=%d\n",
407 priority, server, masked, state->act_server,
408 state->act_priority);
409
410 kvmppc_xive_select_irq(state, &hw_num, NULL);
411
412 if (priority != MASKED && !masked) {
413 rc = kvmppc_xive_select_target(kvm, &server, priority);
414 if (rc)
415 goto unlock;
416
417 state->act_priority = priority;
418 state->act_server = server;
419 state->eisn = eisn;
420
421 rc = xive_native_configure_irq(hw_num,
422 kvmppc_xive_vp(xive, server),
423 priority, eisn);
424 } else {
425 state->act_priority = MASKED;
426 state->act_server = 0;
427 state->eisn = 0;
428
429 rc = xive_native_configure_irq(hw_num, 0, MASKED, 0);
430 }
431
432unlock:
433 arch_spin_unlock(&sb->lock);
434 return rc;
435}
436
437static int kvmppc_xive_native_set_source_config(struct kvmppc_xive *xive,
438 long irq, u64 addr)
439{
440 struct kvmppc_xive_src_block *sb;
441 struct kvmppc_xive_irq_state *state;
442 u64 __user *ubufp = (u64 __user *) addr;
443 u16 src;
444 u64 kvm_cfg;
445 u32 server;
446 u8 priority;
447 bool masked;
448 u32 eisn;
449
450 sb = kvmppc_xive_find_source(xive, irq, &src);
451 if (!sb)
452 return -ENOENT;
453
454 state = &sb->irq_state[src];
455
456 if (!state->valid)
457 return -EINVAL;
458
459 if (get_user(kvm_cfg, ubufp))
460 return -EFAULT;
461
462 pr_devel("%s irq=0x%lx cfg=%016llx\n", __func__, irq, kvm_cfg);
463
464 priority = (kvm_cfg & KVM_XIVE_SOURCE_PRIORITY_MASK) >>
465 KVM_XIVE_SOURCE_PRIORITY_SHIFT;
466 server = (kvm_cfg & KVM_XIVE_SOURCE_SERVER_MASK) >>
467 KVM_XIVE_SOURCE_SERVER_SHIFT;
468 masked = (kvm_cfg & KVM_XIVE_SOURCE_MASKED_MASK) >>
469 KVM_XIVE_SOURCE_MASKED_SHIFT;
470 eisn = (kvm_cfg & KVM_XIVE_SOURCE_EISN_MASK) >>
471 KVM_XIVE_SOURCE_EISN_SHIFT;
472
473 if (priority != xive_prio_from_guest(priority)) {
474 pr_err("invalid priority for queue %d for VCPU %d\n",
475 priority, server);
476 return -EINVAL;
477 }
478
479 return kvmppc_xive_native_update_source_config(xive, sb, state, server,
480 priority, masked, eisn);
481}
482
7b46b616
CLG
483static int kvmppc_xive_native_sync_source(struct kvmppc_xive *xive,
484 long irq, u64 addr)
485{
486 struct kvmppc_xive_src_block *sb;
487 struct kvmppc_xive_irq_state *state;
488 struct xive_irq_data *xd;
489 u32 hw_num;
490 u16 src;
491 int rc = 0;
492
493 pr_devel("%s irq=0x%lx", __func__, irq);
494
495 sb = kvmppc_xive_find_source(xive, irq, &src);
496 if (!sb)
497 return -ENOENT;
498
499 state = &sb->irq_state[src];
500
501 rc = -EINVAL;
502
503 arch_spin_lock(&sb->lock);
504
505 if (state->valid) {
506 kvmppc_xive_select_irq(state, &hw_num, &xd);
507 xive_native_sync_source(hw_num);
508 rc = 0;
509 }
510
511 arch_spin_unlock(&sb->lock);
512 return rc;
513}
514
13ce3297
CLG
515static int xive_native_validate_queue_size(u32 qshift)
516{
517 /*
518 * We only support 64K pages for the moment. This is also
519 * advertised in the DT property "ibm,xive-eq-sizes"
520 */
521 switch (qshift) {
522 case 0: /* EQ reset */
523 case 16:
524 return 0;
525 case 12:
526 case 21:
527 case 24:
528 default:
529 return -EINVAL;
530 }
531}
532
533static int kvmppc_xive_native_set_queue_config(struct kvmppc_xive *xive,
534 long eq_idx, u64 addr)
535{
536 struct kvm *kvm = xive->kvm;
537 struct kvm_vcpu *vcpu;
538 struct kvmppc_xive_vcpu *xc;
539 void __user *ubufp = (void __user *) addr;
540 u32 server;
541 u8 priority;
542 struct kvm_ppc_xive_eq kvm_eq;
543 int rc;
544 __be32 *qaddr = 0;
545 struct page *page;
546 struct xive_q *q;
547 gfn_t gfn;
548 unsigned long page_size;
aedb5b19 549 int srcu_idx;
13ce3297
CLG
550
551 /*
552 * Demangle priority/server tuple from the EQ identifier
553 */
554 priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >>
555 KVM_XIVE_EQ_PRIORITY_SHIFT;
556 server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >>
557 KVM_XIVE_EQ_SERVER_SHIFT;
558
559 if (copy_from_user(&kvm_eq, ubufp, sizeof(kvm_eq)))
560 return -EFAULT;
561
562 vcpu = kvmppc_xive_find_server(kvm, server);
563 if (!vcpu) {
564 pr_err("Can't find server %d\n", server);
565 return -ENOENT;
566 }
567 xc = vcpu->arch.xive_vcpu;
568
569 if (priority != xive_prio_from_guest(priority)) {
570 pr_err("Trying to restore invalid queue %d for VCPU %d\n",
571 priority, server);
572 return -EINVAL;
573 }
574 q = &xc->queues[priority];
575
576 pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n",
577 __func__, server, priority, kvm_eq.flags,
578 kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex);
579
13ce3297
CLG
580 /* reset queue and disable queueing */
581 if (!kvm_eq.qshift) {
582 q->guest_qaddr = 0;
583 q->guest_qshift = 0;
584
585 rc = xive_native_configure_queue(xc->vp_id, q, priority,
586 NULL, 0, true);
587 if (rc) {
588 pr_err("Failed to reset queue %d for VCPU %d: %d\n",
589 priority, xc->server_num, rc);
590 return rc;
591 }
592
593 if (q->qpage) {
594 put_page(virt_to_page(q->qpage));
595 q->qpage = NULL;
596 }
597
598 return 0;
599 }
600
c468bc4e
CLG
601 /*
602 * sPAPR specifies a "Unconditional Notify (n) flag" for the
603 * H_INT_SET_QUEUE_CONFIG hcall which forces notification
604 * without using the coalescing mechanisms provided by the
605 * XIVE END ESBs. This is required on KVM as notification
606 * using the END ESBs is not supported.
607 */
608 if (kvm_eq.flags != KVM_XIVE_EQ_ALWAYS_NOTIFY) {
609 pr_err("invalid flags %d\n", kvm_eq.flags);
610 return -EINVAL;
611 }
612
613 rc = xive_native_validate_queue_size(kvm_eq.qshift);
614 if (rc) {
615 pr_err("invalid queue size %d\n", kvm_eq.qshift);
616 return rc;
617 }
618
13ce3297
CLG
619 if (kvm_eq.qaddr & ((1ull << kvm_eq.qshift) - 1)) {
620 pr_err("queue page is not aligned %llx/%llx\n", kvm_eq.qaddr,
621 1ull << kvm_eq.qshift);
622 return -EINVAL;
623 }
624
aedb5b19 625 srcu_idx = srcu_read_lock(&kvm->srcu);
13ce3297
CLG
626 gfn = gpa_to_gfn(kvm_eq.qaddr);
627 page = gfn_to_page(kvm, gfn);
628 if (is_error_page(page)) {
aedb5b19 629 srcu_read_unlock(&kvm->srcu, srcu_idx);
13ce3297
CLG
630 pr_err("Couldn't get queue page %llx!\n", kvm_eq.qaddr);
631 return -EINVAL;
632 }
633
634 page_size = kvm_host_page_size(kvm, gfn);
635 if (1ull << kvm_eq.qshift > page_size) {
aedb5b19 636 srcu_read_unlock(&kvm->srcu, srcu_idx);
13ce3297
CLG
637 pr_warn("Incompatible host page size %lx!\n", page_size);
638 return -EINVAL;
639 }
640
641 qaddr = page_to_virt(page) + (kvm_eq.qaddr & ~PAGE_MASK);
aedb5b19 642 srcu_read_unlock(&kvm->srcu, srcu_idx);
13ce3297
CLG
643
644 /*
645 * Backup the queue page guest address to the mark EQ page
646 * dirty for migration.
647 */
648 q->guest_qaddr = kvm_eq.qaddr;
649 q->guest_qshift = kvm_eq.qshift;
650
651 /*
652 * Unconditional Notification is forced by default at the
653 * OPAL level because the use of END ESBs is not supported by
654 * Linux.
655 */
656 rc = xive_native_configure_queue(xc->vp_id, q, priority,
657 (__be32 *) qaddr, kvm_eq.qshift, true);
658 if (rc) {
659 pr_err("Failed to configure queue %d for VCPU %d: %d\n",
660 priority, xc->server_num, rc);
661 put_page(page);
662 return rc;
663 }
664
665 /*
666 * Only restore the queue state when needed. When doing the
667 * H_INT_SET_SOURCE_CONFIG hcall, it should not.
668 */
669 if (kvm_eq.qtoggle != 1 || kvm_eq.qindex != 0) {
670 rc = xive_native_set_queue_state(xc->vp_id, priority,
671 kvm_eq.qtoggle,
672 kvm_eq.qindex);
673 if (rc)
674 goto error;
675 }
676
677 rc = kvmppc_xive_attach_escalation(vcpu, priority,
678 xive->single_escalation);
679error:
680 if (rc)
681 kvmppc_xive_native_cleanup_queue(vcpu, priority);
682 return rc;
683}
684
685static int kvmppc_xive_native_get_queue_config(struct kvmppc_xive *xive,
686 long eq_idx, u64 addr)
687{
688 struct kvm *kvm = xive->kvm;
689 struct kvm_vcpu *vcpu;
690 struct kvmppc_xive_vcpu *xc;
691 struct xive_q *q;
692 void __user *ubufp = (u64 __user *) addr;
693 u32 server;
694 u8 priority;
695 struct kvm_ppc_xive_eq kvm_eq;
696 u64 qaddr;
697 u64 qshift;
698 u64 qeoi_page;
699 u32 escalate_irq;
700 u64 qflags;
701 int rc;
702
703 /*
704 * Demangle priority/server tuple from the EQ identifier
705 */
706 priority = (eq_idx & KVM_XIVE_EQ_PRIORITY_MASK) >>
707 KVM_XIVE_EQ_PRIORITY_SHIFT;
708 server = (eq_idx & KVM_XIVE_EQ_SERVER_MASK) >>
709 KVM_XIVE_EQ_SERVER_SHIFT;
710
711 vcpu = kvmppc_xive_find_server(kvm, server);
712 if (!vcpu) {
713 pr_err("Can't find server %d\n", server);
714 return -ENOENT;
715 }
716 xc = vcpu->arch.xive_vcpu;
717
718 if (priority != xive_prio_from_guest(priority)) {
719 pr_err("invalid priority for queue %d for VCPU %d\n",
720 priority, server);
721 return -EINVAL;
722 }
723 q = &xc->queues[priority];
724
725 memset(&kvm_eq, 0, sizeof(kvm_eq));
726
727 if (!q->qpage)
728 return 0;
729
730 rc = xive_native_get_queue_info(xc->vp_id, priority, &qaddr, &qshift,
731 &qeoi_page, &escalate_irq, &qflags);
732 if (rc)
733 return rc;
734
735 kvm_eq.flags = 0;
736 if (qflags & OPAL_XIVE_EQ_ALWAYS_NOTIFY)
737 kvm_eq.flags |= KVM_XIVE_EQ_ALWAYS_NOTIFY;
738
739 kvm_eq.qshift = q->guest_qshift;
740 kvm_eq.qaddr = q->guest_qaddr;
741
742 rc = xive_native_get_queue_state(xc->vp_id, priority, &kvm_eq.qtoggle,
743 &kvm_eq.qindex);
744 if (rc)
745 return rc;
746
747 pr_devel("%s VCPU %d priority %d fl:%x shift:%d addr:%llx g:%d idx:%d\n",
748 __func__, server, priority, kvm_eq.flags,
749 kvm_eq.qshift, kvm_eq.qaddr, kvm_eq.qtoggle, kvm_eq.qindex);
750
751 if (copy_to_user(ubufp, &kvm_eq, sizeof(kvm_eq)))
752 return -EFAULT;
753
754 return 0;
755}
756
5ca80647
CLG
757static void kvmppc_xive_reset_sources(struct kvmppc_xive_src_block *sb)
758{
759 int i;
760
761 for (i = 0; i < KVMPPC_XICS_IRQ_PER_ICS; i++) {
762 struct kvmppc_xive_irq_state *state = &sb->irq_state[i];
763
764 if (!state->valid)
765 continue;
766
767 if (state->act_priority == MASKED)
768 continue;
769
770 state->eisn = 0;
771 state->act_server = 0;
772 state->act_priority = MASKED;
773 xive_vm_esb_load(&state->ipi_data, XIVE_ESB_SET_PQ_01);
774 xive_native_configure_irq(state->ipi_number, 0, MASKED, 0);
775 if (state->pt_number) {
776 xive_vm_esb_load(state->pt_data, XIVE_ESB_SET_PQ_01);
777 xive_native_configure_irq(state->pt_number,
778 0, MASKED, 0);
779 }
780 }
781}
782
783static int kvmppc_xive_reset(struct kvmppc_xive *xive)
784{
785 struct kvm *kvm = xive->kvm;
786 struct kvm_vcpu *vcpu;
787 unsigned int i;
788
789 pr_devel("%s\n", __func__);
790
7e10b9a6 791 mutex_lock(&xive->lock);
5ca80647
CLG
792
793 kvm_for_each_vcpu(i, vcpu, kvm) {
794 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
795 unsigned int prio;
796
797 if (!xc)
798 continue;
799
800 kvmppc_xive_disable_vcpu_interrupts(vcpu);
801
802 for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
803
804 /* Single escalation, no queue 7 */
805 if (prio == 7 && xive->single_escalation)
806 break;
807
808 if (xc->esc_virq[prio]) {
809 free_irq(xc->esc_virq[prio], vcpu);
810 irq_dispose_mapping(xc->esc_virq[prio]);
811 kfree(xc->esc_virq_names[prio]);
812 xc->esc_virq[prio] = 0;
813 }
814
815 kvmppc_xive_native_cleanup_queue(vcpu, prio);
816 }
817 }
818
819 for (i = 0; i <= xive->max_sbid; i++) {
820 struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
821
822 if (sb) {
823 arch_spin_lock(&sb->lock);
824 kvmppc_xive_reset_sources(sb);
825 arch_spin_unlock(&sb->lock);
826 }
827 }
828
7e10b9a6 829 mutex_unlock(&xive->lock);
5ca80647
CLG
830
831 return 0;
832}
833
e6714bd1
CLG
834static void kvmppc_xive_native_sync_sources(struct kvmppc_xive_src_block *sb)
835{
836 int j;
837
838 for (j = 0; j < KVMPPC_XICS_IRQ_PER_ICS; j++) {
839 struct kvmppc_xive_irq_state *state = &sb->irq_state[j];
840 struct xive_irq_data *xd;
841 u32 hw_num;
842
843 if (!state->valid)
844 continue;
845
846 /*
847 * The struct kvmppc_xive_irq_state reflects the state
848 * of the EAS configuration and not the state of the
849 * source. The source is masked setting the PQ bits to
850 * '-Q', which is what is being done before calling
851 * the KVM_DEV_XIVE_EQ_SYNC control.
852 *
853 * If a source EAS is configured, OPAL syncs the XIVE
854 * IC of the source and the XIVE IC of the previous
855 * target if any.
856 *
857 * So it should be fine ignoring MASKED sources as
858 * they have been synced already.
859 */
860 if (state->act_priority == MASKED)
861 continue;
862
863 kvmppc_xive_select_irq(state, &hw_num, &xd);
864 xive_native_sync_source(hw_num);
865 xive_native_sync_queue(hw_num);
866 }
867}
868
869static int kvmppc_xive_native_vcpu_eq_sync(struct kvm_vcpu *vcpu)
870{
871 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
872 unsigned int prio;
aedb5b19 873 int srcu_idx;
e6714bd1
CLG
874
875 if (!xc)
876 return -ENOENT;
877
878 for (prio = 0; prio < KVMPPC_XIVE_Q_COUNT; prio++) {
879 struct xive_q *q = &xc->queues[prio];
880
881 if (!q->qpage)
882 continue;
883
884 /* Mark EQ page dirty for migration */
aedb5b19 885 srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
e6714bd1 886 mark_page_dirty(vcpu->kvm, gpa_to_gfn(q->guest_qaddr));
aedb5b19 887 srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx);
e6714bd1
CLG
888 }
889 return 0;
890}
891
892static int kvmppc_xive_native_eq_sync(struct kvmppc_xive *xive)
893{
894 struct kvm *kvm = xive->kvm;
895 struct kvm_vcpu *vcpu;
896 unsigned int i;
897
898 pr_devel("%s\n", __func__);
899
7e10b9a6 900 mutex_lock(&xive->lock);
e6714bd1
CLG
901 for (i = 0; i <= xive->max_sbid; i++) {
902 struct kvmppc_xive_src_block *sb = xive->src_blocks[i];
903
904 if (sb) {
905 arch_spin_lock(&sb->lock);
906 kvmppc_xive_native_sync_sources(sb);
907 arch_spin_unlock(&sb->lock);
908 }
909 }
910
911 kvm_for_each_vcpu(i, vcpu, kvm) {
912 kvmppc_xive_native_vcpu_eq_sync(vcpu);
913 }
7e10b9a6 914 mutex_unlock(&xive->lock);
e6714bd1
CLG
915
916 return 0;
917}
918
90c73795
CLG
919static int kvmppc_xive_native_set_attr(struct kvm_device *dev,
920 struct kvm_device_attr *attr)
921{
4131f83c
CLG
922 struct kvmppc_xive *xive = dev->private;
923
90c73795
CLG
924 switch (attr->group) {
925 case KVM_DEV_XIVE_GRP_CTRL:
5ca80647
CLG
926 switch (attr->attr) {
927 case KVM_DEV_XIVE_RESET:
928 return kvmppc_xive_reset(xive);
e6714bd1
CLG
929 case KVM_DEV_XIVE_EQ_SYNC:
930 return kvmppc_xive_native_eq_sync(xive);
5ca80647 931 }
90c73795 932 break;
4131f83c
CLG
933 case KVM_DEV_XIVE_GRP_SOURCE:
934 return kvmppc_xive_native_set_source(xive, attr->attr,
935 attr->addr);
e8676ce5
CLG
936 case KVM_DEV_XIVE_GRP_SOURCE_CONFIG:
937 return kvmppc_xive_native_set_source_config(xive, attr->attr,
938 attr->addr);
13ce3297
CLG
939 case KVM_DEV_XIVE_GRP_EQ_CONFIG:
940 return kvmppc_xive_native_set_queue_config(xive, attr->attr,
941 attr->addr);
7b46b616
CLG
942 case KVM_DEV_XIVE_GRP_SOURCE_SYNC:
943 return kvmppc_xive_native_sync_source(xive, attr->attr,
944 attr->addr);
90c73795
CLG
945 }
946 return -ENXIO;
947}
948
949static int kvmppc_xive_native_get_attr(struct kvm_device *dev,
950 struct kvm_device_attr *attr)
951{
13ce3297
CLG
952 struct kvmppc_xive *xive = dev->private;
953
954 switch (attr->group) {
955 case KVM_DEV_XIVE_GRP_EQ_CONFIG:
956 return kvmppc_xive_native_get_queue_config(xive, attr->attr,
957 attr->addr);
958 }
90c73795
CLG
959 return -ENXIO;
960}
961
962static int kvmppc_xive_native_has_attr(struct kvm_device *dev,
963 struct kvm_device_attr *attr)
964{
965 switch (attr->group) {
966 case KVM_DEV_XIVE_GRP_CTRL:
5ca80647
CLG
967 switch (attr->attr) {
968 case KVM_DEV_XIVE_RESET:
e6714bd1 969 case KVM_DEV_XIVE_EQ_SYNC:
5ca80647
CLG
970 return 0;
971 }
90c73795 972 break;
4131f83c 973 case KVM_DEV_XIVE_GRP_SOURCE:
e8676ce5 974 case KVM_DEV_XIVE_GRP_SOURCE_CONFIG:
7b46b616 975 case KVM_DEV_XIVE_GRP_SOURCE_SYNC:
4131f83c
CLG
976 if (attr->attr >= KVMPPC_XIVE_FIRST_IRQ &&
977 attr->attr < KVMPPC_XIVE_NR_IRQS)
978 return 0;
979 break;
13ce3297
CLG
980 case KVM_DEV_XIVE_GRP_EQ_CONFIG:
981 return 0;
90c73795
CLG
982 }
983 return -ENXIO;
984}
985
5422e951 986/*
7e10b9a6 987 * Called when device fd is closed. kvm->lock is held.
5422e951
CLG
988 */
989static void kvmppc_xive_native_release(struct kvm_device *dev)
90c73795
CLG
990{
991 struct kvmppc_xive *xive = dev->private;
992 struct kvm *kvm = xive->kvm;
5422e951 993 struct kvm_vcpu *vcpu;
4131f83c 994 int i;
90c73795 995
5422e951
CLG
996 pr_devel("Releasing xive native device\n");
997
d47aacdb
CLG
998 /*
999 * Clear the KVM device file address_space which is used to
1000 * unmap the ESB pages when a device is passed-through.
1001 */
1002 mutex_lock(&xive->mapping_lock);
1003 xive->mapping = NULL;
1004 mutex_unlock(&xive->mapping_lock);
1005
5422e951 1006 /*
6f868405
PM
1007 * Since this is the device release function, we know that
1008 * userspace does not have any open fd or mmap referring to
1009 * the device. Therefore there can not be any of the
1010 * device attribute set/get, mmap, or page fault functions
1011 * being executed concurrently, and similarly, the
1012 * connect_vcpu and set/clr_mapped functions also cannot
1013 * be being executed.
5422e951 1014 */
c395fe1d
PM
1015
1016 debugfs_remove(xive->dentry);
6f868405
PM
1017
1018 /*
1019 * We should clean up the vCPU interrupt presenters first.
1020 */
1021 kvm_for_each_vcpu(i, vcpu, kvm) {
1022 /*
1023 * Take vcpu->mutex to ensure that no one_reg get/set ioctl
1024 * (i.e. kvmppc_xive_native_[gs]et_vp) can be being done.
c395fe1d
PM
1025 * Holding the vcpu->mutex also means that the vcpu cannot
1026 * be executing the KVM_RUN ioctl, and therefore it cannot
1027 * be executing the XIVE push or pull code or accessing
1028 * the XIVE MMIO regions.
6f868405
PM
1029 */
1030 mutex_lock(&vcpu->mutex);
5422e951 1031 kvmppc_xive_native_cleanup_vcpu(vcpu);
6f868405
PM
1032 mutex_unlock(&vcpu->mutex);
1033 }
90c73795 1034
c395fe1d
PM
1035 /*
1036 * Now that we have cleared vcpu->arch.xive_vcpu, vcpu->arch.irq_type
1037 * and vcpu->arch.xive_esc_[vr]addr on each vcpu, we are safe
1038 * against xive code getting called during vcpu execution or
1039 * set/get one_reg operations.
1040 */
6f868405 1041 kvm->arch.xive = NULL;
90c73795 1042
4131f83c
CLG
1043 for (i = 0; i <= xive->max_sbid; i++) {
1044 if (xive->src_blocks[i])
1045 kvmppc_xive_free_sources(xive->src_blocks[i]);
1046 kfree(xive->src_blocks[i]);
1047 xive->src_blocks[i] = NULL;
1048 }
1049
90c73795
CLG
1050 if (xive->vp_base != XIVE_INVALID_VP)
1051 xive_native_free_vp_block(xive->vp_base);
1052
5422e951
CLG
1053 /*
1054 * A reference of the kvmppc_xive pointer is now kept under
1055 * the xive_devices struct of the machine for reuse. It is
1056 * freed when the VM is destroyed for now until we fix all the
1057 * execution paths.
1058 */
1059
90c73795
CLG
1060 kfree(dev);
1061}
1062
6f868405
PM
1063/*
1064 * Create a XIVE device. kvm->lock is held.
1065 */
90c73795
CLG
1066static int kvmppc_xive_native_create(struct kvm_device *dev, u32 type)
1067{
1068 struct kvmppc_xive *xive;
1069 struct kvm *kvm = dev->kvm;
1070 int ret = 0;
1071
1072 pr_devel("Creating xive native device\n");
1073
1074 if (kvm->arch.xive)
1075 return -EEXIST;
1076
5422e951 1077 xive = kvmppc_xive_get_device(kvm, type);
90c73795
CLG
1078 if (!xive)
1079 return -ENOMEM;
1080
1081 dev->private = xive;
1082 xive->dev = dev;
1083 xive->kvm = kvm;
1084 kvm->arch.xive = xive;
232b984b 1085 mutex_init(&xive->mapping_lock);
7e10b9a6 1086 mutex_init(&xive->lock);
90c73795
CLG
1087
1088 /*
1089 * Allocate a bunch of VPs. KVM_MAX_VCPUS is a large value for
1090 * a default. Getting the max number of CPUs the VM was
1091 * configured with would improve our usage of the XIVE VP space.
1092 */
1093 xive->vp_base = xive_native_alloc_vp_block(KVM_MAX_VCPUS);
1094 pr_devel("VP_Base=%x\n", xive->vp_base);
1095
1096 if (xive->vp_base == XIVE_INVALID_VP)
1097 ret = -ENXIO;
1098
1099 xive->single_escalation = xive_native_has_single_escalation();
232b984b 1100 xive->ops = &kvmppc_xive_native_ops;
90c73795
CLG
1101
1102 if (ret)
9798f4ea 1103 return ret;
90c73795 1104
9798f4ea 1105 return 0;
90c73795
CLG
1106}
1107
e4945b9d
CLG
1108/*
1109 * Interrupt Pending Buffer (IPB) offset
1110 */
1111#define TM_IPB_SHIFT 40
1112#define TM_IPB_MASK (((u64) 0xFF) << TM_IPB_SHIFT)
1113
1114int kvmppc_xive_native_get_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val)
1115{
1116 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
1117 u64 opal_state;
1118 int rc;
1119
1120 if (!kvmppc_xive_enabled(vcpu))
1121 return -EPERM;
1122
1123 if (!xc)
1124 return -ENOENT;
1125
1126 /* Thread context registers. We only care about IPB and CPPR */
1127 val->xive_timaval[0] = vcpu->arch.xive_saved_state.w01;
1128
1129 /* Get the VP state from OPAL */
1130 rc = xive_native_get_vp_state(xc->vp_id, &opal_state);
1131 if (rc)
1132 return rc;
1133
1134 /*
1135 * Capture the backup of IPB register in the NVT structure and
1136 * merge it in our KVM VP state.
1137 */
1138 val->xive_timaval[0] |= cpu_to_be64(opal_state & TM_IPB_MASK);
1139
1140 pr_devel("%s NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x opal=%016llx\n",
1141 __func__,
1142 vcpu->arch.xive_saved_state.nsr,
1143 vcpu->arch.xive_saved_state.cppr,
1144 vcpu->arch.xive_saved_state.ipb,
1145 vcpu->arch.xive_saved_state.pipr,
1146 vcpu->arch.xive_saved_state.w01,
1147 (u32) vcpu->arch.xive_cam_word, opal_state);
1148
1149 return 0;
1150}
1151
1152int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu, union kvmppc_one_reg *val)
1153{
1154 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
1155 struct kvmppc_xive *xive = vcpu->kvm->arch.xive;
1156
1157 pr_devel("%s w01=%016llx vp=%016llx\n", __func__,
1158 val->xive_timaval[0], val->xive_timaval[1]);
1159
1160 if (!kvmppc_xive_enabled(vcpu))
1161 return -EPERM;
1162
1163 if (!xc || !xive)
1164 return -ENOENT;
1165
1166 /* We can't update the state of a "pushed" VCPU */
1167 if (WARN_ON(vcpu->arch.xive_pushed))
1168 return -EBUSY;
1169
1170 /*
1171 * Restore the thread context registers. IPB and CPPR should
1172 * be the only ones that matter.
1173 */
1174 vcpu->arch.xive_saved_state.w01 = val->xive_timaval[0];
1175
1176 /*
1177 * There is no need to restore the XIVE internal state (IPB
1178 * stored in the NVT) as the IPB register was merged in KVM VP
1179 * state when captured.
1180 */
1181 return 0;
1182}
1183
2ad7a27d
PM
1184bool kvmppc_xive_native_supported(void)
1185{
1186 return xive_native_has_queue_state_support();
1187}
1188
90c73795
CLG
1189static int xive_native_debug_show(struct seq_file *m, void *private)
1190{
1191 struct kvmppc_xive *xive = m->private;
1192 struct kvm *kvm = xive->kvm;
eacc56bb
CLG
1193 struct kvm_vcpu *vcpu;
1194 unsigned int i;
90c73795
CLG
1195
1196 if (!kvm)
1197 return 0;
1198
eacc56bb
CLG
1199 seq_puts(m, "=========\nVCPU state\n=========\n");
1200
1201 kvm_for_each_vcpu(i, vcpu, kvm) {
1202 struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
1203
1204 if (!xc)
1205 continue;
1206
1207 seq_printf(m, "cpu server %#x NSR=%02x CPPR=%02x IBP=%02x PIPR=%02x w01=%016llx w2=%08x\n",
1208 xc->server_num,
1209 vcpu->arch.xive_saved_state.nsr,
1210 vcpu->arch.xive_saved_state.cppr,
1211 vcpu->arch.xive_saved_state.ipb,
1212 vcpu->arch.xive_saved_state.pipr,
1213 vcpu->arch.xive_saved_state.w01,
1214 (u32) vcpu->arch.xive_cam_word);
1215
1216 kvmppc_xive_debug_show_queues(m, vcpu);
1217 }
1218
90c73795
CLG
1219 return 0;
1220}
1221
1222static int xive_native_debug_open(struct inode *inode, struct file *file)
1223{
1224 return single_open(file, xive_native_debug_show, inode->i_private);
1225}
1226
1227static const struct file_operations xive_native_debug_fops = {
1228 .open = xive_native_debug_open,
1229 .read = seq_read,
1230 .llseek = seq_lseek,
1231 .release = single_release,
1232};
1233
1234static void xive_native_debugfs_init(struct kvmppc_xive *xive)
1235{
1236 char *name;
1237
1238 name = kasprintf(GFP_KERNEL, "kvm-xive-%p", xive);
1239 if (!name) {
1240 pr_err("%s: no memory for name\n", __func__);
1241 return;
1242 }
1243
1244 xive->dentry = debugfs_create_file(name, 0444, powerpc_debugfs_root,
1245 xive, &xive_native_debug_fops);
1246
1247 pr_debug("%s: created %s\n", __func__, name);
1248 kfree(name);
1249}
1250
1251static void kvmppc_xive_native_init(struct kvm_device *dev)
1252{
1253 struct kvmppc_xive *xive = (struct kvmppc_xive *)dev->private;
1254
1255 /* Register some debug interfaces */
1256 xive_native_debugfs_init(xive);
1257}
1258
1259struct kvm_device_ops kvm_xive_native_ops = {
1260 .name = "kvm-xive-native",
1261 .create = kvmppc_xive_native_create,
1262 .init = kvmppc_xive_native_init,
5422e951 1263 .release = kvmppc_xive_native_release,
90c73795
CLG
1264 .set_attr = kvmppc_xive_native_set_attr,
1265 .get_attr = kvmppc_xive_native_get_attr,
1266 .has_attr = kvmppc_xive_native_has_attr,
39e9af3d 1267 .mmap = kvmppc_xive_native_mmap,
90c73795
CLG
1268};
1269
1270void kvmppc_xive_native_init_module(void)
1271{
1272 ;
1273}
1274
1275void kvmppc_xive_native_exit_module(void)
1276{
1277 ;
1278}