Merge tag 'efi-efivars-removal-for-v5.20' of git://git.kernel.org/pub/scm/linux/kerne...
[linux-2.6-block.git] / arch / x86 / kvm / lapic.c
1 // SPDX-License-Identifier: GPL-2.0-only
2
3 /*
4  * Local APIC virtualization
5  *
6  * Copyright (C) 2006 Qumranet, Inc.
7  * Copyright (C) 2007 Novell
8  * Copyright (C) 2007 Intel
9  * Copyright 2009 Red Hat, Inc. and/or its affiliates.
10  *
11  * Authors:
12  *   Dor Laor <dor.laor@qumranet.com>
13  *   Gregory Haskins <ghaskins@novell.com>
14  *   Yaozu (Eddie) Dong <eddie.dong@intel.com>
15  *
16  * Based on Xen 3.1 code, Copyright (c) 2004, Intel Corporation.
17  */
18
19 #include <linux/kvm_host.h>
20 #include <linux/kvm.h>
21 #include <linux/mm.h>
22 #include <linux/highmem.h>
23 #include <linux/smp.h>
24 #include <linux/hrtimer.h>
25 #include <linux/io.h>
26 #include <linux/export.h>
27 #include <linux/math64.h>
28 #include <linux/slab.h>
29 #include <asm/processor.h>
30 #include <asm/msr.h>
31 #include <asm/page.h>
32 #include <asm/current.h>
33 #include <asm/apicdef.h>
34 #include <asm/delay.h>
35 #include <linux/atomic.h>
36 #include <linux/jump_label.h>
37 #include "kvm_cache_regs.h"
38 #include "irq.h"
39 #include "ioapic.h"
40 #include "trace.h"
41 #include "x86.h"
42 #include "cpuid.h"
43 #include "hyperv.h"
44
45 #ifndef CONFIG_X86_64
46 #define mod_64(x, y) ((x) - (y) * div64_u64(x, y))
47 #else
48 #define mod_64(x, y) ((x) % (y))
49 #endif
50
51 #define PRId64 "d"
52 #define PRIx64 "llx"
53 #define PRIu64 "u"
54 #define PRIo64 "o"
55
56 /* 14 is the version for Xeon and Pentium 8.4.8*/
57 #define APIC_VERSION                    (0x14UL | ((KVM_APIC_LVT_NUM - 1) << 16))
58 #define LAPIC_MMIO_LENGTH               (1 << 12)
59 /* followed define is not in apicdef.h */
60 #define MAX_APIC_VECTOR                 256
61 #define APIC_VECTORS_PER_REG            32
62
63 static bool lapic_timer_advance_dynamic __read_mostly;
64 #define LAPIC_TIMER_ADVANCE_ADJUST_MIN  100     /* clock cycles */
65 #define LAPIC_TIMER_ADVANCE_ADJUST_MAX  10000   /* clock cycles */
66 #define LAPIC_TIMER_ADVANCE_NS_INIT     1000
67 #define LAPIC_TIMER_ADVANCE_NS_MAX     5000
68 /* step-by-step approximation to mitigate fluctuation */
69 #define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8
70
71 static inline void __kvm_lapic_set_reg(char *regs, int reg_off, u32 val)
72 {
73         *((u32 *) (regs + reg_off)) = val;
74 }
75
76 static inline void kvm_lapic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val)
77 {
78         __kvm_lapic_set_reg(apic->regs, reg_off, val);
79 }
80
81 static __always_inline u64 __kvm_lapic_get_reg64(char *regs, int reg)
82 {
83         BUILD_BUG_ON(reg != APIC_ICR);
84         return *((u64 *) (regs + reg));
85 }
86
87 static __always_inline u64 kvm_lapic_get_reg64(struct kvm_lapic *apic, int reg)
88 {
89         return __kvm_lapic_get_reg64(apic->regs, reg);
90 }
91
92 static __always_inline void __kvm_lapic_set_reg64(char *regs, int reg, u64 val)
93 {
94         BUILD_BUG_ON(reg != APIC_ICR);
95         *((u64 *) (regs + reg)) = val;
96 }
97
98 static __always_inline void kvm_lapic_set_reg64(struct kvm_lapic *apic,
99                                                 int reg, u64 val)
100 {
101         __kvm_lapic_set_reg64(apic->regs, reg, val);
102 }
103
104 static inline int apic_test_vector(int vec, void *bitmap)
105 {
106         return test_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
107 }
108
109 bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector)
110 {
111         struct kvm_lapic *apic = vcpu->arch.apic;
112
113         return apic_test_vector(vector, apic->regs + APIC_ISR) ||
114                 apic_test_vector(vector, apic->regs + APIC_IRR);
115 }
116
117 static inline int __apic_test_and_set_vector(int vec, void *bitmap)
118 {
119         return __test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
120 }
121
122 static inline int __apic_test_and_clear_vector(int vec, void *bitmap)
123 {
124         return __test_and_clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
125 }
126
127 __read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(apic_hw_disabled, HZ);
128 __read_mostly DEFINE_STATIC_KEY_DEFERRED_FALSE(apic_sw_disabled, HZ);
129
130 static inline int apic_enabled(struct kvm_lapic *apic)
131 {
132         return kvm_apic_sw_enabled(apic) &&     kvm_apic_hw_enabled(apic);
133 }
134
135 #define LVT_MASK        \
136         (APIC_LVT_MASKED | APIC_SEND_PENDING | APIC_VECTOR_MASK)
137
138 #define LINT_MASK       \
139         (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \
140          APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER)
141
142 static inline u32 kvm_x2apic_id(struct kvm_lapic *apic)
143 {
144         return apic->vcpu->vcpu_id;
145 }
146
147 static bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu)
148 {
149         return pi_inject_timer && kvm_vcpu_apicv_active(vcpu) &&
150                 (kvm_mwait_in_guest(vcpu->kvm) || kvm_hlt_in_guest(vcpu->kvm));
151 }
152
153 bool kvm_can_use_hv_timer(struct kvm_vcpu *vcpu)
154 {
155         return kvm_x86_ops.set_hv_timer
156                && !(kvm_mwait_in_guest(vcpu->kvm) ||
157                     kvm_can_post_timer_interrupt(vcpu));
158 }
159 EXPORT_SYMBOL_GPL(kvm_can_use_hv_timer);
160
161 static bool kvm_use_posted_timer_interrupt(struct kvm_vcpu *vcpu)
162 {
163         return kvm_can_post_timer_interrupt(vcpu) && vcpu->mode == IN_GUEST_MODE;
164 }
165
166 static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
167                 u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) {
168         switch (map->mode) {
169         case KVM_APIC_MODE_X2APIC: {
170                 u32 offset = (dest_id >> 16) * 16;
171                 u32 max_apic_id = map->max_apic_id;
172
173                 if (offset <= max_apic_id) {
174                         u8 cluster_size = min(max_apic_id - offset + 1, 16U);
175
176                         offset = array_index_nospec(offset, map->max_apic_id + 1);
177                         *cluster = &map->phys_map[offset];
178                         *mask = dest_id & (0xffff >> (16 - cluster_size));
179                 } else {
180                         *mask = 0;
181                 }
182
183                 return true;
184                 }
185         case KVM_APIC_MODE_XAPIC_FLAT:
186                 *cluster = map->xapic_flat_map;
187                 *mask = dest_id & 0xff;
188                 return true;
189         case KVM_APIC_MODE_XAPIC_CLUSTER:
190                 *cluster = map->xapic_cluster_map[(dest_id >> 4) & 0xf];
191                 *mask = dest_id & 0xf;
192                 return true;
193         default:
194                 /* Not optimized. */
195                 return false;
196         }
197 }
198
199 static void kvm_apic_map_free(struct rcu_head *rcu)
200 {
201         struct kvm_apic_map *map = container_of(rcu, struct kvm_apic_map, rcu);
202
203         kvfree(map);
204 }
205
206 /*
207  * CLEAN -> DIRTY and UPDATE_IN_PROGRESS -> DIRTY changes happen without a lock.
208  *
209  * DIRTY -> UPDATE_IN_PROGRESS and UPDATE_IN_PROGRESS -> CLEAN happen with
210  * apic_map_lock_held.
211  */
212 enum {
213         CLEAN,
214         UPDATE_IN_PROGRESS,
215         DIRTY
216 };
217
218 void kvm_recalculate_apic_map(struct kvm *kvm)
219 {
220         struct kvm_apic_map *new, *old = NULL;
221         struct kvm_vcpu *vcpu;
222         unsigned long i;
223         u32 max_id = 255; /* enough space for any xAPIC ID */
224
225         /* Read kvm->arch.apic_map_dirty before kvm->arch.apic_map.  */
226         if (atomic_read_acquire(&kvm->arch.apic_map_dirty) == CLEAN)
227                 return;
228
229         WARN_ONCE(!irqchip_in_kernel(kvm),
230                   "Dirty APIC map without an in-kernel local APIC");
231
232         mutex_lock(&kvm->arch.apic_map_lock);
233         /*
234          * Read kvm->arch.apic_map_dirty before kvm->arch.apic_map
235          * (if clean) or the APIC registers (if dirty).
236          */
237         if (atomic_cmpxchg_acquire(&kvm->arch.apic_map_dirty,
238                                    DIRTY, UPDATE_IN_PROGRESS) == CLEAN) {
239                 /* Someone else has updated the map. */
240                 mutex_unlock(&kvm->arch.apic_map_lock);
241                 return;
242         }
243
244         kvm_for_each_vcpu(i, vcpu, kvm)
245                 if (kvm_apic_present(vcpu))
246                         max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic));
247
248         new = kvzalloc(sizeof(struct kvm_apic_map) +
249                            sizeof(struct kvm_lapic *) * ((u64)max_id + 1),
250                            GFP_KERNEL_ACCOUNT);
251
252         if (!new)
253                 goto out;
254
255         new->max_apic_id = max_id;
256
257         kvm_for_each_vcpu(i, vcpu, kvm) {
258                 struct kvm_lapic *apic = vcpu->arch.apic;
259                 struct kvm_lapic **cluster;
260                 u16 mask;
261                 u32 ldr;
262                 u8 xapic_id;
263                 u32 x2apic_id;
264
265                 if (!kvm_apic_present(vcpu))
266                         continue;
267
268                 xapic_id = kvm_xapic_id(apic);
269                 x2apic_id = kvm_x2apic_id(apic);
270
271                 /* Hotplug hack: see kvm_apic_match_physical_addr(), ... */
272                 if ((apic_x2apic_mode(apic) || x2apic_id > 0xff) &&
273                                 x2apic_id <= new->max_apic_id)
274                         new->phys_map[x2apic_id] = apic;
275                 /*
276                  * ... xAPIC ID of VCPUs with APIC ID > 0xff will wrap-around,
277                  * prevent them from masking VCPUs with APIC ID <= 0xff.
278                  */
279                 if (!apic_x2apic_mode(apic) && !new->phys_map[xapic_id])
280                         new->phys_map[xapic_id] = apic;
281
282                 if (!kvm_apic_sw_enabled(apic))
283                         continue;
284
285                 ldr = kvm_lapic_get_reg(apic, APIC_LDR);
286
287                 if (apic_x2apic_mode(apic)) {
288                         new->mode |= KVM_APIC_MODE_X2APIC;
289                 } else if (ldr) {
290                         ldr = GET_APIC_LOGICAL_ID(ldr);
291                         if (kvm_lapic_get_reg(apic, APIC_DFR) == APIC_DFR_FLAT)
292                                 new->mode |= KVM_APIC_MODE_XAPIC_FLAT;
293                         else
294                                 new->mode |= KVM_APIC_MODE_XAPIC_CLUSTER;
295                 }
296
297                 if (!kvm_apic_map_get_logical_dest(new, ldr, &cluster, &mask))
298                         continue;
299
300                 if (mask)
301                         cluster[ffs(mask) - 1] = apic;
302         }
303 out:
304         old = rcu_dereference_protected(kvm->arch.apic_map,
305                         lockdep_is_held(&kvm->arch.apic_map_lock));
306         rcu_assign_pointer(kvm->arch.apic_map, new);
307         /*
308          * Write kvm->arch.apic_map before clearing apic->apic_map_dirty.
309          * If another update has come in, leave it DIRTY.
310          */
311         atomic_cmpxchg_release(&kvm->arch.apic_map_dirty,
312                                UPDATE_IN_PROGRESS, CLEAN);
313         mutex_unlock(&kvm->arch.apic_map_lock);
314
315         if (old)
316                 call_rcu(&old->rcu, kvm_apic_map_free);
317
318         kvm_make_scan_ioapic_request(kvm);
319 }
320
321 static inline void apic_set_spiv(struct kvm_lapic *apic, u32 val)
322 {
323         bool enabled = val & APIC_SPIV_APIC_ENABLED;
324
325         kvm_lapic_set_reg(apic, APIC_SPIV, val);
326
327         if (enabled != apic->sw_enabled) {
328                 apic->sw_enabled = enabled;
329                 if (enabled)
330                         static_branch_slow_dec_deferred(&apic_sw_disabled);
331                 else
332                         static_branch_inc(&apic_sw_disabled.key);
333
334                 atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
335         }
336
337         /* Check if there are APF page ready requests pending */
338         if (enabled)
339                 kvm_make_request(KVM_REQ_APF_READY, apic->vcpu);
340 }
341
342 static inline void kvm_apic_set_xapic_id(struct kvm_lapic *apic, u8 id)
343 {
344         kvm_lapic_set_reg(apic, APIC_ID, id << 24);
345         atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
346 }
347
348 static inline void kvm_apic_set_ldr(struct kvm_lapic *apic, u32 id)
349 {
350         kvm_lapic_set_reg(apic, APIC_LDR, id);
351         atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
352 }
353
354 static inline void kvm_apic_set_dfr(struct kvm_lapic *apic, u32 val)
355 {
356         kvm_lapic_set_reg(apic, APIC_DFR, val);
357         atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
358 }
359
360 static inline u32 kvm_apic_calc_x2apic_ldr(u32 id)
361 {
362         return ((id >> 4) << 16) | (1 << (id & 0xf));
363 }
364
365 static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id)
366 {
367         u32 ldr = kvm_apic_calc_x2apic_ldr(id);
368
369         WARN_ON_ONCE(id != apic->vcpu->vcpu_id);
370
371         kvm_lapic_set_reg(apic, APIC_ID, id);
372         kvm_lapic_set_reg(apic, APIC_LDR, ldr);
373         atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
374 }
375
376 static inline int apic_lvt_enabled(struct kvm_lapic *apic, int lvt_type)
377 {
378         return !(kvm_lapic_get_reg(apic, lvt_type) & APIC_LVT_MASKED);
379 }
380
381 static inline int apic_lvtt_oneshot(struct kvm_lapic *apic)
382 {
383         return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_ONESHOT;
384 }
385
386 static inline int apic_lvtt_period(struct kvm_lapic *apic)
387 {
388         return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_PERIODIC;
389 }
390
391 static inline int apic_lvtt_tscdeadline(struct kvm_lapic *apic)
392 {
393         return apic->lapic_timer.timer_mode == APIC_LVT_TIMER_TSCDEADLINE;
394 }
395
396 static inline int apic_lvt_nmi_mode(u32 lvt_val)
397 {
398         return (lvt_val & (APIC_MODE_MASK | APIC_LVT_MASKED)) == APIC_DM_NMI;
399 }
400
401 void kvm_apic_set_version(struct kvm_vcpu *vcpu)
402 {
403         struct kvm_lapic *apic = vcpu->arch.apic;
404         u32 v = APIC_VERSION;
405
406         if (!lapic_in_kernel(vcpu))
407                 return;
408
409         /*
410          * KVM emulates 82093AA datasheet (with in-kernel IOAPIC implementation)
411          * which doesn't have EOI register; Some buggy OSes (e.g. Windows with
412          * Hyper-V role) disable EOI broadcast in lapic not checking for IOAPIC
413          * version first and level-triggered interrupts never get EOIed in
414          * IOAPIC.
415          */
416         if (guest_cpuid_has(vcpu, X86_FEATURE_X2APIC) &&
417             !ioapic_in_kernel(vcpu->kvm))
418                 v |= APIC_LVR_DIRECTED_EOI;
419         kvm_lapic_set_reg(apic, APIC_LVR, v);
420 }
421
422 static const unsigned int apic_lvt_mask[KVM_APIC_LVT_NUM] = {
423         LVT_MASK ,      /* part LVTT mask, timer mode mask added at runtime */
424         LVT_MASK | APIC_MODE_MASK,      /* LVTTHMR */
425         LVT_MASK | APIC_MODE_MASK,      /* LVTPC */
426         LINT_MASK, LINT_MASK,   /* LVT0-1 */
427         LVT_MASK                /* LVTERR */
428 };
429
430 static int find_highest_vector(void *bitmap)
431 {
432         int vec;
433         u32 *reg;
434
435         for (vec = MAX_APIC_VECTOR - APIC_VECTORS_PER_REG;
436              vec >= 0; vec -= APIC_VECTORS_PER_REG) {
437                 reg = bitmap + REG_POS(vec);
438                 if (*reg)
439                         return __fls(*reg) + vec;
440         }
441
442         return -1;
443 }
444
445 static u8 count_vectors(void *bitmap)
446 {
447         int vec;
448         u32 *reg;
449         u8 count = 0;
450
451         for (vec = 0; vec < MAX_APIC_VECTOR; vec += APIC_VECTORS_PER_REG) {
452                 reg = bitmap + REG_POS(vec);
453                 count += hweight32(*reg);
454         }
455
456         return count;
457 }
458
459 bool __kvm_apic_update_irr(u32 *pir, void *regs, int *max_irr)
460 {
461         u32 i, vec;
462         u32 pir_val, irr_val, prev_irr_val;
463         int max_updated_irr;
464
465         max_updated_irr = -1;
466         *max_irr = -1;
467
468         for (i = vec = 0; i <= 7; i++, vec += 32) {
469                 pir_val = READ_ONCE(pir[i]);
470                 irr_val = *((u32 *)(regs + APIC_IRR + i * 0x10));
471                 if (pir_val) {
472                         prev_irr_val = irr_val;
473                         irr_val |= xchg(&pir[i], 0);
474                         *((u32 *)(regs + APIC_IRR + i * 0x10)) = irr_val;
475                         if (prev_irr_val != irr_val) {
476                                 max_updated_irr =
477                                         __fls(irr_val ^ prev_irr_val) + vec;
478                         }
479                 }
480                 if (irr_val)
481                         *max_irr = __fls(irr_val) + vec;
482         }
483
484         return ((max_updated_irr != -1) &&
485                 (max_updated_irr == *max_irr));
486 }
487 EXPORT_SYMBOL_GPL(__kvm_apic_update_irr);
488
489 bool kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir, int *max_irr)
490 {
491         struct kvm_lapic *apic = vcpu->arch.apic;
492
493         return __kvm_apic_update_irr(pir, apic->regs, max_irr);
494 }
495 EXPORT_SYMBOL_GPL(kvm_apic_update_irr);
496
497 static inline int apic_search_irr(struct kvm_lapic *apic)
498 {
499         return find_highest_vector(apic->regs + APIC_IRR);
500 }
501
502 static inline int apic_find_highest_irr(struct kvm_lapic *apic)
503 {
504         int result;
505
506         /*
507          * Note that irr_pending is just a hint. It will be always
508          * true with virtual interrupt delivery enabled.
509          */
510         if (!apic->irr_pending)
511                 return -1;
512
513         result = apic_search_irr(apic);
514         ASSERT(result == -1 || result >= 16);
515
516         return result;
517 }
518
519 static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
520 {
521         struct kvm_vcpu *vcpu;
522
523         vcpu = apic->vcpu;
524
525         if (unlikely(vcpu->arch.apicv_active)) {
526                 /* need to update RVI */
527                 kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR);
528                 static_call_cond(kvm_x86_hwapic_irr_update)(vcpu, apic_find_highest_irr(apic));
529         } else {
530                 apic->irr_pending = false;
531                 kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR);
532                 if (apic_search_irr(apic) != -1)
533                         apic->irr_pending = true;
534         }
535 }
536
537 void kvm_apic_clear_irr(struct kvm_vcpu *vcpu, int vec)
538 {
539         apic_clear_irr(vec, vcpu->arch.apic);
540 }
541 EXPORT_SYMBOL_GPL(kvm_apic_clear_irr);
542
543 static inline void apic_set_isr(int vec, struct kvm_lapic *apic)
544 {
545         struct kvm_vcpu *vcpu;
546
547         if (__apic_test_and_set_vector(vec, apic->regs + APIC_ISR))
548                 return;
549
550         vcpu = apic->vcpu;
551
552         /*
553          * With APIC virtualization enabled, all caching is disabled
554          * because the processor can modify ISR under the hood.  Instead
555          * just set SVI.
556          */
557         if (unlikely(vcpu->arch.apicv_active))
558                 static_call_cond(kvm_x86_hwapic_isr_update)(vcpu, vec);
559         else {
560                 ++apic->isr_count;
561                 BUG_ON(apic->isr_count > MAX_APIC_VECTOR);
562                 /*
563                  * ISR (in service register) bit is set when injecting an interrupt.
564                  * The highest vector is injected. Thus the latest bit set matches
565                  * the highest bit in ISR.
566                  */
567                 apic->highest_isr_cache = vec;
568         }
569 }
570
571 static inline int apic_find_highest_isr(struct kvm_lapic *apic)
572 {
573         int result;
574
575         /*
576          * Note that isr_count is always 1, and highest_isr_cache
577          * is always -1, with APIC virtualization enabled.
578          */
579         if (!apic->isr_count)
580                 return -1;
581         if (likely(apic->highest_isr_cache != -1))
582                 return apic->highest_isr_cache;
583
584         result = find_highest_vector(apic->regs + APIC_ISR);
585         ASSERT(result == -1 || result >= 16);
586
587         return result;
588 }
589
590 static inline void apic_clear_isr(int vec, struct kvm_lapic *apic)
591 {
592         struct kvm_vcpu *vcpu;
593         if (!__apic_test_and_clear_vector(vec, apic->regs + APIC_ISR))
594                 return;
595
596         vcpu = apic->vcpu;
597
598         /*
599          * We do get here for APIC virtualization enabled if the guest
600          * uses the Hyper-V APIC enlightenment.  In this case we may need
601          * to trigger a new interrupt delivery by writing the SVI field;
602          * on the other hand isr_count and highest_isr_cache are unused
603          * and must be left alone.
604          */
605         if (unlikely(vcpu->arch.apicv_active))
606                 static_call_cond(kvm_x86_hwapic_isr_update)(vcpu, apic_find_highest_isr(apic));
607         else {
608                 --apic->isr_count;
609                 BUG_ON(apic->isr_count < 0);
610                 apic->highest_isr_cache = -1;
611         }
612 }
613
614 int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
615 {
616         /* This may race with setting of irr in __apic_accept_irq() and
617          * value returned may be wrong, but kvm_vcpu_kick() in __apic_accept_irq
618          * will cause vmexit immediately and the value will be recalculated
619          * on the next vmentry.
620          */
621         return apic_find_highest_irr(vcpu->arch.apic);
622 }
623 EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr);
624
625 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
626                              int vector, int level, int trig_mode,
627                              struct dest_map *dest_map);
628
629 int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq,
630                      struct dest_map *dest_map)
631 {
632         struct kvm_lapic *apic = vcpu->arch.apic;
633
634         return __apic_accept_irq(apic, irq->delivery_mode, irq->vector,
635                         irq->level, irq->trig_mode, dest_map);
636 }
637
638 static int __pv_send_ipi(unsigned long *ipi_bitmap, struct kvm_apic_map *map,
639                          struct kvm_lapic_irq *irq, u32 min)
640 {
641         int i, count = 0;
642         struct kvm_vcpu *vcpu;
643
644         if (min > map->max_apic_id)
645                 return 0;
646
647         for_each_set_bit(i, ipi_bitmap,
648                 min((u32)BITS_PER_LONG, (map->max_apic_id - min + 1))) {
649                 if (map->phys_map[min + i]) {
650                         vcpu = map->phys_map[min + i]->vcpu;
651                         count += kvm_apic_set_irq(vcpu, irq, NULL);
652                 }
653         }
654
655         return count;
656 }
657
658 int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
659                     unsigned long ipi_bitmap_high, u32 min,
660                     unsigned long icr, int op_64_bit)
661 {
662         struct kvm_apic_map *map;
663         struct kvm_lapic_irq irq = {0};
664         int cluster_size = op_64_bit ? 64 : 32;
665         int count;
666
667         if (icr & (APIC_DEST_MASK | APIC_SHORT_MASK))
668                 return -KVM_EINVAL;
669
670         irq.vector = icr & APIC_VECTOR_MASK;
671         irq.delivery_mode = icr & APIC_MODE_MASK;
672         irq.level = (icr & APIC_INT_ASSERT) != 0;
673         irq.trig_mode = icr & APIC_INT_LEVELTRIG;
674
675         rcu_read_lock();
676         map = rcu_dereference(kvm->arch.apic_map);
677
678         count = -EOPNOTSUPP;
679         if (likely(map)) {
680                 count = __pv_send_ipi(&ipi_bitmap_low, map, &irq, min);
681                 min += cluster_size;
682                 count += __pv_send_ipi(&ipi_bitmap_high, map, &irq, min);
683         }
684
685         rcu_read_unlock();
686         return count;
687 }
688
689 static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val)
690 {
691
692         return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, &val,
693                                       sizeof(val));
694 }
695
696 static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val)
697 {
698
699         return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, val,
700                                       sizeof(*val));
701 }
702
703 static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu)
704 {
705         return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED;
706 }
707
708 static void pv_eoi_set_pending(struct kvm_vcpu *vcpu)
709 {
710         if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0)
711                 return;
712
713         __set_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
714 }
715
716 static bool pv_eoi_test_and_clr_pending(struct kvm_vcpu *vcpu)
717 {
718         u8 val;
719
720         if (pv_eoi_get_user(vcpu, &val) < 0)
721                 return false;
722
723         val &= KVM_PV_EOI_ENABLED;
724
725         if (val && pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0)
726                 return false;
727
728         /*
729          * Clear pending bit in any case: it will be set again on vmentry.
730          * While this might not be ideal from performance point of view,
731          * this makes sure pv eoi is only enabled when we know it's safe.
732          */
733         __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
734
735         return val;
736 }
737
738 static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr)
739 {
740         int highest_irr;
741         if (kvm_x86_ops.sync_pir_to_irr)
742                 highest_irr = static_call(kvm_x86_sync_pir_to_irr)(apic->vcpu);
743         else
744                 highest_irr = apic_find_highest_irr(apic);
745         if (highest_irr == -1 || (highest_irr & 0xF0) <= ppr)
746                 return -1;
747         return highest_irr;
748 }
749
750 static bool __apic_update_ppr(struct kvm_lapic *apic, u32 *new_ppr)
751 {
752         u32 tpr, isrv, ppr, old_ppr;
753         int isr;
754
755         old_ppr = kvm_lapic_get_reg(apic, APIC_PROCPRI);
756         tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI);
757         isr = apic_find_highest_isr(apic);
758         isrv = (isr != -1) ? isr : 0;
759
760         if ((tpr & 0xf0) >= (isrv & 0xf0))
761                 ppr = tpr & 0xff;
762         else
763                 ppr = isrv & 0xf0;
764
765         *new_ppr = ppr;
766         if (old_ppr != ppr)
767                 kvm_lapic_set_reg(apic, APIC_PROCPRI, ppr);
768
769         return ppr < old_ppr;
770 }
771
772 static void apic_update_ppr(struct kvm_lapic *apic)
773 {
774         u32 ppr;
775
776         if (__apic_update_ppr(apic, &ppr) &&
777             apic_has_interrupt_for_ppr(apic, ppr) != -1)
778                 kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
779 }
780
781 void kvm_apic_update_ppr(struct kvm_vcpu *vcpu)
782 {
783         apic_update_ppr(vcpu->arch.apic);
784 }
785 EXPORT_SYMBOL_GPL(kvm_apic_update_ppr);
786
787 static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr)
788 {
789         kvm_lapic_set_reg(apic, APIC_TASKPRI, tpr);
790         apic_update_ppr(apic);
791 }
792
793 static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 mda)
794 {
795         return mda == (apic_x2apic_mode(apic) ?
796                         X2APIC_BROADCAST : APIC_BROADCAST);
797 }
798
799 static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda)
800 {
801         if (kvm_apic_broadcast(apic, mda))
802                 return true;
803
804         if (apic_x2apic_mode(apic))
805                 return mda == kvm_x2apic_id(apic);
806
807         /*
808          * Hotplug hack: Make LAPIC in xAPIC mode also accept interrupts as if
809          * it were in x2APIC mode.  Hotplugged VCPUs start in xAPIC mode and
810          * this allows unique addressing of VCPUs with APIC ID over 0xff.
811          * The 0xff condition is needed because writeable xAPIC ID.
812          */
813         if (kvm_x2apic_id(apic) > 0xff && mda == kvm_x2apic_id(apic))
814                 return true;
815
816         return mda == kvm_xapic_id(apic);
817 }
818
819 static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
820 {
821         u32 logical_id;
822
823         if (kvm_apic_broadcast(apic, mda))
824                 return true;
825
826         logical_id = kvm_lapic_get_reg(apic, APIC_LDR);
827
828         if (apic_x2apic_mode(apic))
829                 return ((logical_id >> 16) == (mda >> 16))
830                        && (logical_id & mda & 0xffff) != 0;
831
832         logical_id = GET_APIC_LOGICAL_ID(logical_id);
833
834         switch (kvm_lapic_get_reg(apic, APIC_DFR)) {
835         case APIC_DFR_FLAT:
836                 return (logical_id & mda) != 0;
837         case APIC_DFR_CLUSTER:
838                 return ((logical_id >> 4) == (mda >> 4))
839                        && (logical_id & mda & 0xf) != 0;
840         default:
841                 return false;
842         }
843 }
844
845 /* The KVM local APIC implementation has two quirks:
846  *
847  *  - Real hardware delivers interrupts destined to x2APIC ID > 0xff to LAPICs
848  *    in xAPIC mode if the "destination & 0xff" matches its xAPIC ID.
849  *    KVM doesn't do that aliasing.
850  *
851  *  - in-kernel IOAPIC messages have to be delivered directly to
852  *    x2APIC, because the kernel does not support interrupt remapping.
853  *    In order to support broadcast without interrupt remapping, x2APIC
854  *    rewrites the destination of non-IPI messages from APIC_BROADCAST
855  *    to X2APIC_BROADCAST.
856  *
857  * The broadcast quirk can be disabled with KVM_CAP_X2APIC_API.  This is
858  * important when userspace wants to use x2APIC-format MSIs, because
859  * APIC_BROADCAST (0xff) is a legal route for "cluster 0, CPUs 0-7".
860  */
861 static u32 kvm_apic_mda(struct kvm_vcpu *vcpu, unsigned int dest_id,
862                 struct kvm_lapic *source, struct kvm_lapic *target)
863 {
864         bool ipi = source != NULL;
865
866         if (!vcpu->kvm->arch.x2apic_broadcast_quirk_disabled &&
867             !ipi && dest_id == APIC_BROADCAST && apic_x2apic_mode(target))
868                 return X2APIC_BROADCAST;
869
870         return dest_id;
871 }
872
873 bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
874                            int shorthand, unsigned int dest, int dest_mode)
875 {
876         struct kvm_lapic *target = vcpu->arch.apic;
877         u32 mda = kvm_apic_mda(vcpu, dest, source, target);
878
879         ASSERT(target);
880         switch (shorthand) {
881         case APIC_DEST_NOSHORT:
882                 if (dest_mode == APIC_DEST_PHYSICAL)
883                         return kvm_apic_match_physical_addr(target, mda);
884                 else
885                         return kvm_apic_match_logical_addr(target, mda);
886         case APIC_DEST_SELF:
887                 return target == source;
888         case APIC_DEST_ALLINC:
889                 return true;
890         case APIC_DEST_ALLBUT:
891                 return target != source;
892         default:
893                 return false;
894         }
895 }
896 EXPORT_SYMBOL_GPL(kvm_apic_match_dest);
897
898 int kvm_vector_to_index(u32 vector, u32 dest_vcpus,
899                        const unsigned long *bitmap, u32 bitmap_size)
900 {
901         u32 mod;
902         int i, idx = -1;
903
904         mod = vector % dest_vcpus;
905
906         for (i = 0; i <= mod; i++) {
907                 idx = find_next_bit(bitmap, bitmap_size, idx + 1);
908                 BUG_ON(idx == bitmap_size);
909         }
910
911         return idx;
912 }
913
914 static void kvm_apic_disabled_lapic_found(struct kvm *kvm)
915 {
916         if (!kvm->arch.disabled_lapic_found) {
917                 kvm->arch.disabled_lapic_found = true;
918                 printk(KERN_INFO
919                        "Disabled LAPIC found during irq injection\n");
920         }
921 }
922
923 static bool kvm_apic_is_broadcast_dest(struct kvm *kvm, struct kvm_lapic **src,
924                 struct kvm_lapic_irq *irq, struct kvm_apic_map *map)
925 {
926         if (kvm->arch.x2apic_broadcast_quirk_disabled) {
927                 if ((irq->dest_id == APIC_BROADCAST &&
928                                 map->mode != KVM_APIC_MODE_X2APIC))
929                         return true;
930                 if (irq->dest_id == X2APIC_BROADCAST)
931                         return true;
932         } else {
933                 bool x2apic_ipi = src && *src && apic_x2apic_mode(*src);
934                 if (irq->dest_id == (x2apic_ipi ?
935                                      X2APIC_BROADCAST : APIC_BROADCAST))
936                         return true;
937         }
938
939         return false;
940 }
941
942 /* Return true if the interrupt can be handled by using *bitmap as index mask
943  * for valid destinations in *dst array.
944  * Return false if kvm_apic_map_get_dest_lapic did nothing useful.
945  * Note: we may have zero kvm_lapic destinations when we return true, which
946  * means that the interrupt should be dropped.  In this case, *bitmap would be
947  * zero and *dst undefined.
948  */
949 static inline bool kvm_apic_map_get_dest_lapic(struct kvm *kvm,
950                 struct kvm_lapic **src, struct kvm_lapic_irq *irq,
951                 struct kvm_apic_map *map, struct kvm_lapic ***dst,
952                 unsigned long *bitmap)
953 {
954         int i, lowest;
955
956         if (irq->shorthand == APIC_DEST_SELF && src) {
957                 *dst = src;
958                 *bitmap = 1;
959                 return true;
960         } else if (irq->shorthand)
961                 return false;
962
963         if (!map || kvm_apic_is_broadcast_dest(kvm, src, irq, map))
964                 return false;
965
966         if (irq->dest_mode == APIC_DEST_PHYSICAL) {
967                 if (irq->dest_id > map->max_apic_id) {
968                         *bitmap = 0;
969                 } else {
970                         u32 dest_id = array_index_nospec(irq->dest_id, map->max_apic_id + 1);
971                         *dst = &map->phys_map[dest_id];
972                         *bitmap = 1;
973                 }
974                 return true;
975         }
976
977         *bitmap = 0;
978         if (!kvm_apic_map_get_logical_dest(map, irq->dest_id, dst,
979                                 (u16 *)bitmap))
980                 return false;
981
982         if (!kvm_lowest_prio_delivery(irq))
983                 return true;
984
985         if (!kvm_vector_hashing_enabled()) {
986                 lowest = -1;
987                 for_each_set_bit(i, bitmap, 16) {
988                         if (!(*dst)[i])
989                                 continue;
990                         if (lowest < 0)
991                                 lowest = i;
992                         else if (kvm_apic_compare_prio((*dst)[i]->vcpu,
993                                                 (*dst)[lowest]->vcpu) < 0)
994                                 lowest = i;
995                 }
996         } else {
997                 if (!*bitmap)
998                         return true;
999
1000                 lowest = kvm_vector_to_index(irq->vector, hweight16(*bitmap),
1001                                 bitmap, 16);
1002
1003                 if (!(*dst)[lowest]) {
1004                         kvm_apic_disabled_lapic_found(kvm);
1005                         *bitmap = 0;
1006                         return true;
1007                 }
1008         }
1009
1010         *bitmap = (lowest >= 0) ? 1 << lowest : 0;
1011
1012         return true;
1013 }
1014
1015 bool kvm_irq_delivery_to_apic_fast(struct kvm *kvm, struct kvm_lapic *src,
1016                 struct kvm_lapic_irq *irq, int *r, struct dest_map *dest_map)
1017 {
1018         struct kvm_apic_map *map;
1019         unsigned long bitmap;
1020         struct kvm_lapic **dst = NULL;
1021         int i;
1022         bool ret;
1023
1024         *r = -1;
1025
1026         if (irq->shorthand == APIC_DEST_SELF) {
1027                 if (KVM_BUG_ON(!src, kvm)) {
1028                         *r = 0;
1029                         return true;
1030                 }
1031                 *r = kvm_apic_set_irq(src->vcpu, irq, dest_map);
1032                 return true;
1033         }
1034
1035         rcu_read_lock();
1036         map = rcu_dereference(kvm->arch.apic_map);
1037
1038         ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dst, &bitmap);
1039         if (ret) {
1040                 *r = 0;
1041                 for_each_set_bit(i, &bitmap, 16) {
1042                         if (!dst[i])
1043                                 continue;
1044                         *r += kvm_apic_set_irq(dst[i]->vcpu, irq, dest_map);
1045                 }
1046         }
1047
1048         rcu_read_unlock();
1049         return ret;
1050 }
1051
1052 /*
1053  * This routine tries to handle interrupts in posted mode, here is how
1054  * it deals with different cases:
1055  * - For single-destination interrupts, handle it in posted mode
1056  * - Else if vector hashing is enabled and it is a lowest-priority
1057  *   interrupt, handle it in posted mode and use the following mechanism
1058  *   to find the destination vCPU.
1059  *      1. For lowest-priority interrupts, store all the possible
1060  *         destination vCPUs in an array.
1061  *      2. Use "guest vector % max number of destination vCPUs" to find
1062  *         the right destination vCPU in the array for the lowest-priority
1063  *         interrupt.
1064  * - Otherwise, use remapped mode to inject the interrupt.
1065  */
1066 bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
1067                         struct kvm_vcpu **dest_vcpu)
1068 {
1069         struct kvm_apic_map *map;
1070         unsigned long bitmap;
1071         struct kvm_lapic **dst = NULL;
1072         bool ret = false;
1073
1074         if (irq->shorthand)
1075                 return false;
1076
1077         rcu_read_lock();
1078         map = rcu_dereference(kvm->arch.apic_map);
1079
1080         if (kvm_apic_map_get_dest_lapic(kvm, NULL, irq, map, &dst, &bitmap) &&
1081                         hweight16(bitmap) == 1) {
1082                 unsigned long i = find_first_bit(&bitmap, 16);
1083
1084                 if (dst[i]) {
1085                         *dest_vcpu = dst[i]->vcpu;
1086                         ret = true;
1087                 }
1088         }
1089
1090         rcu_read_unlock();
1091         return ret;
1092 }
1093
1094 /*
1095  * Add a pending IRQ into lapic.
1096  * Return 1 if successfully added and 0 if discarded.
1097  */
1098 static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
1099                              int vector, int level, int trig_mode,
1100                              struct dest_map *dest_map)
1101 {
1102         int result = 0;
1103         struct kvm_vcpu *vcpu = apic->vcpu;
1104
1105         trace_kvm_apic_accept_irq(vcpu->vcpu_id, delivery_mode,
1106                                   trig_mode, vector);
1107         switch (delivery_mode) {
1108         case APIC_DM_LOWEST:
1109                 vcpu->arch.apic_arb_prio++;
1110                 fallthrough;
1111         case APIC_DM_FIXED:
1112                 if (unlikely(trig_mode && !level))
1113                         break;
1114
1115                 /* FIXME add logic for vcpu on reset */
1116                 if (unlikely(!apic_enabled(apic)))
1117                         break;
1118
1119                 result = 1;
1120
1121                 if (dest_map) {
1122                         __set_bit(vcpu->vcpu_id, dest_map->map);
1123                         dest_map->vectors[vcpu->vcpu_id] = vector;
1124                 }
1125
1126                 if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) {
1127                         if (trig_mode)
1128                                 kvm_lapic_set_vector(vector,
1129                                                      apic->regs + APIC_TMR);
1130                         else
1131                                 kvm_lapic_clear_vector(vector,
1132                                                        apic->regs + APIC_TMR);
1133                 }
1134
1135                 static_call(kvm_x86_deliver_interrupt)(apic, delivery_mode,
1136                                                        trig_mode, vector);
1137                 break;
1138
1139         case APIC_DM_REMRD:
1140                 result = 1;
1141                 vcpu->arch.pv.pv_unhalted = 1;
1142                 kvm_make_request(KVM_REQ_EVENT, vcpu);
1143                 kvm_vcpu_kick(vcpu);
1144                 break;
1145
1146         case APIC_DM_SMI:
1147                 result = 1;
1148                 kvm_make_request(KVM_REQ_SMI, vcpu);
1149                 kvm_vcpu_kick(vcpu);
1150                 break;
1151
1152         case APIC_DM_NMI:
1153                 result = 1;
1154                 kvm_inject_nmi(vcpu);
1155                 kvm_vcpu_kick(vcpu);
1156                 break;
1157
1158         case APIC_DM_INIT:
1159                 if (!trig_mode || level) {
1160                         result = 1;
1161                         /* assumes that there are only KVM_APIC_INIT/SIPI */
1162                         apic->pending_events = (1UL << KVM_APIC_INIT);
1163                         kvm_make_request(KVM_REQ_EVENT, vcpu);
1164                         kvm_vcpu_kick(vcpu);
1165                 }
1166                 break;
1167
1168         case APIC_DM_STARTUP:
1169                 result = 1;
1170                 apic->sipi_vector = vector;
1171                 /* make sure sipi_vector is visible for the receiver */
1172                 smp_wmb();
1173                 set_bit(KVM_APIC_SIPI, &apic->pending_events);
1174                 kvm_make_request(KVM_REQ_EVENT, vcpu);
1175                 kvm_vcpu_kick(vcpu);
1176                 break;
1177
1178         case APIC_DM_EXTINT:
1179                 /*
1180                  * Should only be called by kvm_apic_local_deliver() with LVT0,
1181                  * before NMI watchdog was enabled. Already handled by
1182                  * kvm_apic_accept_pic_intr().
1183                  */
1184                 break;
1185
1186         default:
1187                 printk(KERN_ERR "TODO: unsupported delivery mode %x\n",
1188                        delivery_mode);
1189                 break;
1190         }
1191         return result;
1192 }
1193
1194 /*
1195  * This routine identifies the destination vcpus mask meant to receive the
1196  * IOAPIC interrupts. It either uses kvm_apic_map_get_dest_lapic() to find
1197  * out the destination vcpus array and set the bitmap or it traverses to
1198  * each available vcpu to identify the same.
1199  */
1200 void kvm_bitmap_or_dest_vcpus(struct kvm *kvm, struct kvm_lapic_irq *irq,
1201                               unsigned long *vcpu_bitmap)
1202 {
1203         struct kvm_lapic **dest_vcpu = NULL;
1204         struct kvm_lapic *src = NULL;
1205         struct kvm_apic_map *map;
1206         struct kvm_vcpu *vcpu;
1207         unsigned long bitmap, i;
1208         int vcpu_idx;
1209         bool ret;
1210
1211         rcu_read_lock();
1212         map = rcu_dereference(kvm->arch.apic_map);
1213
1214         ret = kvm_apic_map_get_dest_lapic(kvm, &src, irq, map, &dest_vcpu,
1215                                           &bitmap);
1216         if (ret) {
1217                 for_each_set_bit(i, &bitmap, 16) {
1218                         if (!dest_vcpu[i])
1219                                 continue;
1220                         vcpu_idx = dest_vcpu[i]->vcpu->vcpu_idx;
1221                         __set_bit(vcpu_idx, vcpu_bitmap);
1222                 }
1223         } else {
1224                 kvm_for_each_vcpu(i, vcpu, kvm) {
1225                         if (!kvm_apic_present(vcpu))
1226                                 continue;
1227                         if (!kvm_apic_match_dest(vcpu, NULL,
1228                                                  irq->shorthand,
1229                                                  irq->dest_id,
1230                                                  irq->dest_mode))
1231                                 continue;
1232                         __set_bit(i, vcpu_bitmap);
1233                 }
1234         }
1235         rcu_read_unlock();
1236 }
1237
1238 int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2)
1239 {
1240         return vcpu1->arch.apic_arb_prio - vcpu2->arch.apic_arb_prio;
1241 }
1242
1243 static bool kvm_ioapic_handles_vector(struct kvm_lapic *apic, int vector)
1244 {
1245         return test_bit(vector, apic->vcpu->arch.ioapic_handled_vectors);
1246 }
1247
1248 static void kvm_ioapic_send_eoi(struct kvm_lapic *apic, int vector)
1249 {
1250         int trigger_mode;
1251
1252         /* Eoi the ioapic only if the ioapic doesn't own the vector. */
1253         if (!kvm_ioapic_handles_vector(apic, vector))
1254                 return;
1255
1256         /* Request a KVM exit to inform the userspace IOAPIC. */
1257         if (irqchip_split(apic->vcpu->kvm)) {
1258                 apic->vcpu->arch.pending_ioapic_eoi = vector;
1259                 kvm_make_request(KVM_REQ_IOAPIC_EOI_EXIT, apic->vcpu);
1260                 return;
1261         }
1262
1263         if (apic_test_vector(vector, apic->regs + APIC_TMR))
1264                 trigger_mode = IOAPIC_LEVEL_TRIG;
1265         else
1266                 trigger_mode = IOAPIC_EDGE_TRIG;
1267
1268         kvm_ioapic_update_eoi(apic->vcpu, vector, trigger_mode);
1269 }
1270
1271 static int apic_set_eoi(struct kvm_lapic *apic)
1272 {
1273         int vector = apic_find_highest_isr(apic);
1274
1275         trace_kvm_eoi(apic, vector);
1276
1277         /*
1278          * Not every write EOI will has corresponding ISR,
1279          * one example is when Kernel check timer on setup_IO_APIC
1280          */
1281         if (vector == -1)
1282                 return vector;
1283
1284         apic_clear_isr(vector, apic);
1285         apic_update_ppr(apic);
1286
1287         if (to_hv_vcpu(apic->vcpu) &&
1288             test_bit(vector, to_hv_synic(apic->vcpu)->vec_bitmap))
1289                 kvm_hv_synic_send_eoi(apic->vcpu, vector);
1290
1291         kvm_ioapic_send_eoi(apic, vector);
1292         kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
1293         return vector;
1294 }
1295
1296 /*
1297  * this interface assumes a trap-like exit, which has already finished
1298  * desired side effect including vISR and vPPR update.
1299  */
1300 void kvm_apic_set_eoi_accelerated(struct kvm_vcpu *vcpu, int vector)
1301 {
1302         struct kvm_lapic *apic = vcpu->arch.apic;
1303
1304         trace_kvm_eoi(apic, vector);
1305
1306         kvm_ioapic_send_eoi(apic, vector);
1307         kvm_make_request(KVM_REQ_EVENT, apic->vcpu);
1308 }
1309 EXPORT_SYMBOL_GPL(kvm_apic_set_eoi_accelerated);
1310
1311 void kvm_apic_send_ipi(struct kvm_lapic *apic, u32 icr_low, u32 icr_high)
1312 {
1313         struct kvm_lapic_irq irq;
1314
1315         /* KVM has no delay and should always clear the BUSY/PENDING flag. */
1316         WARN_ON_ONCE(icr_low & APIC_ICR_BUSY);
1317
1318         irq.vector = icr_low & APIC_VECTOR_MASK;
1319         irq.delivery_mode = icr_low & APIC_MODE_MASK;
1320         irq.dest_mode = icr_low & APIC_DEST_MASK;
1321         irq.level = (icr_low & APIC_INT_ASSERT) != 0;
1322         irq.trig_mode = icr_low & APIC_INT_LEVELTRIG;
1323         irq.shorthand = icr_low & APIC_SHORT_MASK;
1324         irq.msi_redir_hint = false;
1325         if (apic_x2apic_mode(apic))
1326                 irq.dest_id = icr_high;
1327         else
1328                 irq.dest_id = GET_APIC_DEST_FIELD(icr_high);
1329
1330         trace_kvm_apic_ipi(icr_low, irq.dest_id);
1331
1332         kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL);
1333 }
1334 EXPORT_SYMBOL_GPL(kvm_apic_send_ipi);
1335
1336 static u32 apic_get_tmcct(struct kvm_lapic *apic)
1337 {
1338         ktime_t remaining, now;
1339         s64 ns;
1340         u32 tmcct;
1341
1342         ASSERT(apic != NULL);
1343
1344         /* if initial count is 0, current count should also be 0 */
1345         if (kvm_lapic_get_reg(apic, APIC_TMICT) == 0 ||
1346                 apic->lapic_timer.period == 0)
1347                 return 0;
1348
1349         now = ktime_get();
1350         remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
1351         if (ktime_to_ns(remaining) < 0)
1352                 remaining = 0;
1353
1354         ns = mod_64(ktime_to_ns(remaining), apic->lapic_timer.period);
1355         tmcct = div64_u64(ns,
1356                          (APIC_BUS_CYCLE_NS * apic->divide_count));
1357
1358         return tmcct;
1359 }
1360
1361 static void __report_tpr_access(struct kvm_lapic *apic, bool write)
1362 {
1363         struct kvm_vcpu *vcpu = apic->vcpu;
1364         struct kvm_run *run = vcpu->run;
1365
1366         kvm_make_request(KVM_REQ_REPORT_TPR_ACCESS, vcpu);
1367         run->tpr_access.rip = kvm_rip_read(vcpu);
1368         run->tpr_access.is_write = write;
1369 }
1370
1371 static inline void report_tpr_access(struct kvm_lapic *apic, bool write)
1372 {
1373         if (apic->vcpu->arch.tpr_access_reporting)
1374                 __report_tpr_access(apic, write);
1375 }
1376
1377 static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
1378 {
1379         u32 val = 0;
1380
1381         if (offset >= LAPIC_MMIO_LENGTH)
1382                 return 0;
1383
1384         switch (offset) {
1385         case APIC_ARBPRI:
1386                 break;
1387
1388         case APIC_TMCCT:        /* Timer CCR */
1389                 if (apic_lvtt_tscdeadline(apic))
1390                         return 0;
1391
1392                 val = apic_get_tmcct(apic);
1393                 break;
1394         case APIC_PROCPRI:
1395                 apic_update_ppr(apic);
1396                 val = kvm_lapic_get_reg(apic, offset);
1397                 break;
1398         case APIC_TASKPRI:
1399                 report_tpr_access(apic, false);
1400                 fallthrough;
1401         default:
1402                 val = kvm_lapic_get_reg(apic, offset);
1403                 break;
1404         }
1405
1406         return val;
1407 }
1408
1409 static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev)
1410 {
1411         return container_of(dev, struct kvm_lapic, dev);
1412 }
1413
1414 #define APIC_REG_MASK(reg)      (1ull << ((reg) >> 4))
1415 #define APIC_REGS_MASK(first, count) \
1416         (APIC_REG_MASK(first) * ((1ull << (count)) - 1))
1417
1418 static int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
1419                               void *data)
1420 {
1421         unsigned char alignment = offset & 0xf;
1422         u32 result;
1423         /* this bitmask has a bit cleared for each reserved register */
1424         u64 valid_reg_mask =
1425                 APIC_REG_MASK(APIC_ID) |
1426                 APIC_REG_MASK(APIC_LVR) |
1427                 APIC_REG_MASK(APIC_TASKPRI) |
1428                 APIC_REG_MASK(APIC_PROCPRI) |
1429                 APIC_REG_MASK(APIC_LDR) |
1430                 APIC_REG_MASK(APIC_DFR) |
1431                 APIC_REG_MASK(APIC_SPIV) |
1432                 APIC_REGS_MASK(APIC_ISR, APIC_ISR_NR) |
1433                 APIC_REGS_MASK(APIC_TMR, APIC_ISR_NR) |
1434                 APIC_REGS_MASK(APIC_IRR, APIC_ISR_NR) |
1435                 APIC_REG_MASK(APIC_ESR) |
1436                 APIC_REG_MASK(APIC_ICR) |
1437                 APIC_REG_MASK(APIC_LVTT) |
1438                 APIC_REG_MASK(APIC_LVTTHMR) |
1439                 APIC_REG_MASK(APIC_LVTPC) |
1440                 APIC_REG_MASK(APIC_LVT0) |
1441                 APIC_REG_MASK(APIC_LVT1) |
1442                 APIC_REG_MASK(APIC_LVTERR) |
1443                 APIC_REG_MASK(APIC_TMICT) |
1444                 APIC_REG_MASK(APIC_TMCCT) |
1445                 APIC_REG_MASK(APIC_TDCR);
1446
1447         /*
1448          * ARBPRI and ICR2 are not valid in x2APIC mode.  WARN if KVM reads ICR
1449          * in x2APIC mode as it's an 8-byte register in x2APIC and needs to be
1450          * manually handled by the caller.
1451          */
1452         if (!apic_x2apic_mode(apic))
1453                 valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI) |
1454                                   APIC_REG_MASK(APIC_ICR2);
1455         else
1456                 WARN_ON_ONCE(offset == APIC_ICR);
1457
1458         if (alignment + len > 4)
1459                 return 1;
1460
1461         if (offset > 0x3f0 || !(valid_reg_mask & APIC_REG_MASK(offset)))
1462                 return 1;
1463
1464         result = __apic_read(apic, offset & ~0xf);
1465
1466         trace_kvm_apic_read(offset, result);
1467
1468         switch (len) {
1469         case 1:
1470         case 2:
1471         case 4:
1472                 memcpy(data, (char *)&result + alignment, len);
1473                 break;
1474         default:
1475                 printk(KERN_ERR "Local APIC read with len = %x, "
1476                        "should be 1,2, or 4 instead\n", len);
1477                 break;
1478         }
1479         return 0;
1480 }
1481
1482 static int apic_mmio_in_range(struct kvm_lapic *apic, gpa_t addr)
1483 {
1484         return addr >= apic->base_address &&
1485                 addr < apic->base_address + LAPIC_MMIO_LENGTH;
1486 }
1487
1488 static int apic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
1489                            gpa_t address, int len, void *data)
1490 {
1491         struct kvm_lapic *apic = to_lapic(this);
1492         u32 offset = address - apic->base_address;
1493
1494         if (!apic_mmio_in_range(apic, address))
1495                 return -EOPNOTSUPP;
1496
1497         if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) {
1498                 if (!kvm_check_has_quirk(vcpu->kvm,
1499                                          KVM_X86_QUIRK_LAPIC_MMIO_HOLE))
1500                         return -EOPNOTSUPP;
1501
1502                 memset(data, 0xff, len);
1503                 return 0;
1504         }
1505
1506         kvm_lapic_reg_read(apic, offset, len, data);
1507
1508         return 0;
1509 }
1510
1511 static void update_divide_count(struct kvm_lapic *apic)
1512 {
1513         u32 tmp1, tmp2, tdcr;
1514
1515         tdcr = kvm_lapic_get_reg(apic, APIC_TDCR);
1516         tmp1 = tdcr & 0xf;
1517         tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1;
1518         apic->divide_count = 0x1 << (tmp2 & 0x7);
1519 }
1520
1521 static void limit_periodic_timer_frequency(struct kvm_lapic *apic)
1522 {
1523         /*
1524          * Do not allow the guest to program periodic timers with small
1525          * interval, since the hrtimers are not throttled by the host
1526          * scheduler.
1527          */
1528         if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
1529                 s64 min_period = min_timer_period_us * 1000LL;
1530
1531                 if (apic->lapic_timer.period < min_period) {
1532                         pr_info_ratelimited(
1533                             "kvm: vcpu %i: requested %lld ns "
1534                             "lapic timer period limited to %lld ns\n",
1535                             apic->vcpu->vcpu_id,
1536                             apic->lapic_timer.period, min_period);
1537                         apic->lapic_timer.period = min_period;
1538                 }
1539         }
1540 }
1541
1542 static void cancel_hv_timer(struct kvm_lapic *apic);
1543
1544 static void cancel_apic_timer(struct kvm_lapic *apic)
1545 {
1546         hrtimer_cancel(&apic->lapic_timer.timer);
1547         preempt_disable();
1548         if (apic->lapic_timer.hv_timer_in_use)
1549                 cancel_hv_timer(apic);
1550         preempt_enable();
1551         atomic_set(&apic->lapic_timer.pending, 0);
1552 }
1553
1554 static void apic_update_lvtt(struct kvm_lapic *apic)
1555 {
1556         u32 timer_mode = kvm_lapic_get_reg(apic, APIC_LVTT) &
1557                         apic->lapic_timer.timer_mode_mask;
1558
1559         if (apic->lapic_timer.timer_mode != timer_mode) {
1560                 if (apic_lvtt_tscdeadline(apic) != (timer_mode ==
1561                                 APIC_LVT_TIMER_TSCDEADLINE)) {
1562                         cancel_apic_timer(apic);
1563                         kvm_lapic_set_reg(apic, APIC_TMICT, 0);
1564                         apic->lapic_timer.period = 0;
1565                         apic->lapic_timer.tscdeadline = 0;
1566                 }
1567                 apic->lapic_timer.timer_mode = timer_mode;
1568                 limit_periodic_timer_frequency(apic);
1569         }
1570 }
1571
1572 /*
1573  * On APICv, this test will cause a busy wait
1574  * during a higher-priority task.
1575  */
1576
1577 static bool lapic_timer_int_injected(struct kvm_vcpu *vcpu)
1578 {
1579         struct kvm_lapic *apic = vcpu->arch.apic;
1580         u32 reg = kvm_lapic_get_reg(apic, APIC_LVTT);
1581
1582         if (kvm_apic_hw_enabled(apic)) {
1583                 int vec = reg & APIC_VECTOR_MASK;
1584                 void *bitmap = apic->regs + APIC_ISR;
1585
1586                 if (vcpu->arch.apicv_active)
1587                         bitmap = apic->regs + APIC_IRR;
1588
1589                 if (apic_test_vector(vec, bitmap))
1590                         return true;
1591         }
1592         return false;
1593 }
1594
1595 static inline void __wait_lapic_expire(struct kvm_vcpu *vcpu, u64 guest_cycles)
1596 {
1597         u64 timer_advance_ns = vcpu->arch.apic->lapic_timer.timer_advance_ns;
1598
1599         /*
1600          * If the guest TSC is running at a different ratio than the host, then
1601          * convert the delay to nanoseconds to achieve an accurate delay.  Note
1602          * that __delay() uses delay_tsc whenever the hardware has TSC, thus
1603          * always for VMX enabled hardware.
1604          */
1605         if (vcpu->arch.tsc_scaling_ratio == kvm_default_tsc_scaling_ratio) {
1606                 __delay(min(guest_cycles,
1607                         nsec_to_cycles(vcpu, timer_advance_ns)));
1608         } else {
1609                 u64 delay_ns = guest_cycles * 1000000ULL;
1610                 do_div(delay_ns, vcpu->arch.virtual_tsc_khz);
1611                 ndelay(min_t(u32, delay_ns, timer_advance_ns));
1612         }
1613 }
1614
1615 static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu,
1616                                               s64 advance_expire_delta)
1617 {
1618         struct kvm_lapic *apic = vcpu->arch.apic;
1619         u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns;
1620         u64 ns;
1621
1622         /* Do not adjust for tiny fluctuations or large random spikes. */
1623         if (abs(advance_expire_delta) > LAPIC_TIMER_ADVANCE_ADJUST_MAX ||
1624             abs(advance_expire_delta) < LAPIC_TIMER_ADVANCE_ADJUST_MIN)
1625                 return;
1626
1627         /* too early */
1628         if (advance_expire_delta < 0) {
1629                 ns = -advance_expire_delta * 1000000ULL;
1630                 do_div(ns, vcpu->arch.virtual_tsc_khz);
1631                 timer_advance_ns -= ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP;
1632         } else {
1633         /* too late */
1634                 ns = advance_expire_delta * 1000000ULL;
1635                 do_div(ns, vcpu->arch.virtual_tsc_khz);
1636                 timer_advance_ns += ns/LAPIC_TIMER_ADVANCE_ADJUST_STEP;
1637         }
1638
1639         if (unlikely(timer_advance_ns > LAPIC_TIMER_ADVANCE_NS_MAX))
1640                 timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT;
1641         apic->lapic_timer.timer_advance_ns = timer_advance_ns;
1642 }
1643
1644 static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
1645 {
1646         struct kvm_lapic *apic = vcpu->arch.apic;
1647         u64 guest_tsc, tsc_deadline;
1648
1649         tsc_deadline = apic->lapic_timer.expired_tscdeadline;
1650         apic->lapic_timer.expired_tscdeadline = 0;
1651         guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
1652         trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline);
1653
1654         if (lapic_timer_advance_dynamic) {
1655                 adjust_lapic_timer_advance(vcpu, guest_tsc - tsc_deadline);
1656                 /*
1657                  * If the timer fired early, reread the TSC to account for the
1658                  * overhead of the above adjustment to avoid waiting longer
1659                  * than is necessary.
1660                  */
1661                 if (guest_tsc < tsc_deadline)
1662                         guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
1663         }
1664
1665         if (guest_tsc < tsc_deadline)
1666                 __wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
1667 }
1668
1669 void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
1670 {
1671         if (lapic_in_kernel(vcpu) &&
1672             vcpu->arch.apic->lapic_timer.expired_tscdeadline &&
1673             vcpu->arch.apic->lapic_timer.timer_advance_ns &&
1674             lapic_timer_int_injected(vcpu))
1675                 __kvm_wait_lapic_expire(vcpu);
1676 }
1677 EXPORT_SYMBOL_GPL(kvm_wait_lapic_expire);
1678
1679 static void kvm_apic_inject_pending_timer_irqs(struct kvm_lapic *apic)
1680 {
1681         struct kvm_timer *ktimer = &apic->lapic_timer;
1682
1683         kvm_apic_local_deliver(apic, APIC_LVTT);
1684         if (apic_lvtt_tscdeadline(apic)) {
1685                 ktimer->tscdeadline = 0;
1686         } else if (apic_lvtt_oneshot(apic)) {
1687                 ktimer->tscdeadline = 0;
1688                 ktimer->target_expiration = 0;
1689         }
1690 }
1691
1692 static void apic_timer_expired(struct kvm_lapic *apic, bool from_timer_fn)
1693 {
1694         struct kvm_vcpu *vcpu = apic->vcpu;
1695         struct kvm_timer *ktimer = &apic->lapic_timer;
1696
1697         if (atomic_read(&apic->lapic_timer.pending))
1698                 return;
1699
1700         if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use)
1701                 ktimer->expired_tscdeadline = ktimer->tscdeadline;
1702
1703         if (!from_timer_fn && vcpu->arch.apicv_active) {
1704                 WARN_ON(kvm_get_running_vcpu() != vcpu);
1705                 kvm_apic_inject_pending_timer_irqs(apic);
1706                 return;
1707         }
1708
1709         if (kvm_use_posted_timer_interrupt(apic->vcpu)) {
1710                 /*
1711                  * Ensure the guest's timer has truly expired before posting an
1712                  * interrupt.  Open code the relevant checks to avoid querying
1713                  * lapic_timer_int_injected(), which will be false since the
1714                  * interrupt isn't yet injected.  Waiting until after injecting
1715                  * is not an option since that won't help a posted interrupt.
1716                  */
1717                 if (vcpu->arch.apic->lapic_timer.expired_tscdeadline &&
1718                     vcpu->arch.apic->lapic_timer.timer_advance_ns)
1719                         __kvm_wait_lapic_expire(vcpu);
1720                 kvm_apic_inject_pending_timer_irqs(apic);
1721                 return;
1722         }
1723
1724         atomic_inc(&apic->lapic_timer.pending);
1725         kvm_make_request(KVM_REQ_UNBLOCK, vcpu);
1726         if (from_timer_fn)
1727                 kvm_vcpu_kick(vcpu);
1728 }
1729
1730 static void start_sw_tscdeadline(struct kvm_lapic *apic)
1731 {
1732         struct kvm_timer *ktimer = &apic->lapic_timer;
1733         u64 guest_tsc, tscdeadline = ktimer->tscdeadline;
1734         u64 ns = 0;
1735         ktime_t expire;
1736         struct kvm_vcpu *vcpu = apic->vcpu;
1737         unsigned long this_tsc_khz = vcpu->arch.virtual_tsc_khz;
1738         unsigned long flags;
1739         ktime_t now;
1740
1741         if (unlikely(!tscdeadline || !this_tsc_khz))
1742                 return;
1743
1744         local_irq_save(flags);
1745
1746         now = ktime_get();
1747         guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
1748
1749         ns = (tscdeadline - guest_tsc) * 1000000ULL;
1750         do_div(ns, this_tsc_khz);
1751
1752         if (likely(tscdeadline > guest_tsc) &&
1753             likely(ns > apic->lapic_timer.timer_advance_ns)) {
1754                 expire = ktime_add_ns(now, ns);
1755                 expire = ktime_sub_ns(expire, ktimer->timer_advance_ns);
1756                 hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS_HARD);
1757         } else
1758                 apic_timer_expired(apic, false);
1759
1760         local_irq_restore(flags);
1761 }
1762
1763 static inline u64 tmict_to_ns(struct kvm_lapic *apic, u32 tmict)
1764 {
1765         return (u64)tmict * APIC_BUS_CYCLE_NS * (u64)apic->divide_count;
1766 }
1767
1768 static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_divisor)
1769 {
1770         ktime_t now, remaining;
1771         u64 ns_remaining_old, ns_remaining_new;
1772
1773         apic->lapic_timer.period =
1774                         tmict_to_ns(apic, kvm_lapic_get_reg(apic, APIC_TMICT));
1775         limit_periodic_timer_frequency(apic);
1776
1777         now = ktime_get();
1778         remaining = ktime_sub(apic->lapic_timer.target_expiration, now);
1779         if (ktime_to_ns(remaining) < 0)
1780                 remaining = 0;
1781
1782         ns_remaining_old = ktime_to_ns(remaining);
1783         ns_remaining_new = mul_u64_u32_div(ns_remaining_old,
1784                                            apic->divide_count, old_divisor);
1785
1786         apic->lapic_timer.tscdeadline +=
1787                 nsec_to_cycles(apic->vcpu, ns_remaining_new) -
1788                 nsec_to_cycles(apic->vcpu, ns_remaining_old);
1789         apic->lapic_timer.target_expiration = ktime_add_ns(now, ns_remaining_new);
1790 }
1791
1792 static bool set_target_expiration(struct kvm_lapic *apic, u32 count_reg)
1793 {
1794         ktime_t now;
1795         u64 tscl = rdtsc();
1796         s64 deadline;
1797
1798         now = ktime_get();
1799         apic->lapic_timer.period =
1800                         tmict_to_ns(apic, kvm_lapic_get_reg(apic, APIC_TMICT));
1801
1802         if (!apic->lapic_timer.period) {
1803                 apic->lapic_timer.tscdeadline = 0;
1804                 return false;
1805         }
1806
1807         limit_periodic_timer_frequency(apic);
1808         deadline = apic->lapic_timer.period;
1809
1810         if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic)) {
1811                 if (unlikely(count_reg != APIC_TMICT)) {
1812                         deadline = tmict_to_ns(apic,
1813                                      kvm_lapic_get_reg(apic, count_reg));
1814                         if (unlikely(deadline <= 0))
1815                                 deadline = apic->lapic_timer.period;
1816                         else if (unlikely(deadline > apic->lapic_timer.period)) {
1817                                 pr_info_ratelimited(
1818                                     "kvm: vcpu %i: requested lapic timer restore with "
1819                                     "starting count register %#x=%u (%lld ns) > initial count (%lld ns). "
1820                                     "Using initial count to start timer.\n",
1821                                     apic->vcpu->vcpu_id,
1822                                     count_reg,
1823                                     kvm_lapic_get_reg(apic, count_reg),
1824                                     deadline, apic->lapic_timer.period);
1825                                 kvm_lapic_set_reg(apic, count_reg, 0);
1826                                 deadline = apic->lapic_timer.period;
1827                         }
1828                 }
1829         }
1830
1831         apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
1832                 nsec_to_cycles(apic->vcpu, deadline);
1833         apic->lapic_timer.target_expiration = ktime_add_ns(now, deadline);
1834
1835         return true;
1836 }
1837
1838 static void advance_periodic_target_expiration(struct kvm_lapic *apic)
1839 {
1840         ktime_t now = ktime_get();
1841         u64 tscl = rdtsc();
1842         ktime_t delta;
1843
1844         /*
1845          * Synchronize both deadlines to the same time source or
1846          * differences in the periods (caused by differences in the
1847          * underlying clocks or numerical approximation errors) will
1848          * cause the two to drift apart over time as the errors
1849          * accumulate.
1850          */
1851         apic->lapic_timer.target_expiration =
1852                 ktime_add_ns(apic->lapic_timer.target_expiration,
1853                                 apic->lapic_timer.period);
1854         delta = ktime_sub(apic->lapic_timer.target_expiration, now);
1855         apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
1856                 nsec_to_cycles(apic->vcpu, delta);
1857 }
1858
1859 static void start_sw_period(struct kvm_lapic *apic)
1860 {
1861         if (!apic->lapic_timer.period)
1862                 return;
1863
1864         if (ktime_after(ktime_get(),
1865                         apic->lapic_timer.target_expiration)) {
1866                 apic_timer_expired(apic, false);
1867
1868                 if (apic_lvtt_oneshot(apic))
1869                         return;
1870
1871                 advance_periodic_target_expiration(apic);
1872         }
1873
1874         hrtimer_start(&apic->lapic_timer.timer,
1875                 apic->lapic_timer.target_expiration,
1876                 HRTIMER_MODE_ABS_HARD);
1877 }
1878
1879 bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
1880 {
1881         if (!lapic_in_kernel(vcpu))
1882                 return false;
1883
1884         return vcpu->arch.apic->lapic_timer.hv_timer_in_use;
1885 }
1886 EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use);
1887
1888 static void cancel_hv_timer(struct kvm_lapic *apic)
1889 {
1890         WARN_ON(preemptible());
1891         WARN_ON(!apic->lapic_timer.hv_timer_in_use);
1892         static_call(kvm_x86_cancel_hv_timer)(apic->vcpu);
1893         apic->lapic_timer.hv_timer_in_use = false;
1894 }
1895
1896 static bool start_hv_timer(struct kvm_lapic *apic)
1897 {
1898         struct kvm_timer *ktimer = &apic->lapic_timer;
1899         struct kvm_vcpu *vcpu = apic->vcpu;
1900         bool expired;
1901
1902         WARN_ON(preemptible());
1903         if (!kvm_can_use_hv_timer(vcpu))
1904                 return false;
1905
1906         if (!ktimer->tscdeadline)
1907                 return false;
1908
1909         if (static_call(kvm_x86_set_hv_timer)(vcpu, ktimer->tscdeadline, &expired))
1910                 return false;
1911
1912         ktimer->hv_timer_in_use = true;
1913         hrtimer_cancel(&ktimer->timer);
1914
1915         /*
1916          * To simplify handling the periodic timer, leave the hv timer running
1917          * even if the deadline timer has expired, i.e. rely on the resulting
1918          * VM-Exit to recompute the periodic timer's target expiration.
1919          */
1920         if (!apic_lvtt_period(apic)) {
1921                 /*
1922                  * Cancel the hv timer if the sw timer fired while the hv timer
1923                  * was being programmed, or if the hv timer itself expired.
1924                  */
1925                 if (atomic_read(&ktimer->pending)) {
1926                         cancel_hv_timer(apic);
1927                 } else if (expired) {
1928                         apic_timer_expired(apic, false);
1929                         cancel_hv_timer(apic);
1930                 }
1931         }
1932
1933         trace_kvm_hv_timer_state(vcpu->vcpu_id, ktimer->hv_timer_in_use);
1934
1935         return true;
1936 }
1937
1938 static void start_sw_timer(struct kvm_lapic *apic)
1939 {
1940         struct kvm_timer *ktimer = &apic->lapic_timer;
1941
1942         WARN_ON(preemptible());
1943         if (apic->lapic_timer.hv_timer_in_use)
1944                 cancel_hv_timer(apic);
1945         if (!apic_lvtt_period(apic) && atomic_read(&ktimer->pending))
1946                 return;
1947
1948         if (apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
1949                 start_sw_period(apic);
1950         else if (apic_lvtt_tscdeadline(apic))
1951                 start_sw_tscdeadline(apic);
1952         trace_kvm_hv_timer_state(apic->vcpu->vcpu_id, false);
1953 }
1954
1955 static void restart_apic_timer(struct kvm_lapic *apic)
1956 {
1957         preempt_disable();
1958
1959         if (!apic_lvtt_period(apic) && atomic_read(&apic->lapic_timer.pending))
1960                 goto out;
1961
1962         if (!start_hv_timer(apic))
1963                 start_sw_timer(apic);
1964 out:
1965         preempt_enable();
1966 }
1967
1968 void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
1969 {
1970         struct kvm_lapic *apic = vcpu->arch.apic;
1971
1972         preempt_disable();
1973         /* If the preempt notifier has already run, it also called apic_timer_expired */
1974         if (!apic->lapic_timer.hv_timer_in_use)
1975                 goto out;
1976         WARN_ON(kvm_vcpu_is_blocking(vcpu));
1977         apic_timer_expired(apic, false);
1978         cancel_hv_timer(apic);
1979
1980         if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
1981                 advance_periodic_target_expiration(apic);
1982                 restart_apic_timer(apic);
1983         }
1984 out:
1985         preempt_enable();
1986 }
1987 EXPORT_SYMBOL_GPL(kvm_lapic_expired_hv_timer);
1988
1989 void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu)
1990 {
1991         restart_apic_timer(vcpu->arch.apic);
1992 }
1993
1994 void kvm_lapic_switch_to_sw_timer(struct kvm_vcpu *vcpu)
1995 {
1996         struct kvm_lapic *apic = vcpu->arch.apic;
1997
1998         preempt_disable();
1999         /* Possibly the TSC deadline timer is not enabled yet */
2000         if (apic->lapic_timer.hv_timer_in_use)
2001                 start_sw_timer(apic);
2002         preempt_enable();
2003 }
2004
2005 void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu)
2006 {
2007         struct kvm_lapic *apic = vcpu->arch.apic;
2008
2009         WARN_ON(!apic->lapic_timer.hv_timer_in_use);
2010         restart_apic_timer(apic);
2011 }
2012
2013 static void __start_apic_timer(struct kvm_lapic *apic, u32 count_reg)
2014 {
2015         atomic_set(&apic->lapic_timer.pending, 0);
2016
2017         if ((apic_lvtt_period(apic) || apic_lvtt_oneshot(apic))
2018             && !set_target_expiration(apic, count_reg))
2019                 return;
2020
2021         restart_apic_timer(apic);
2022 }
2023
2024 static void start_apic_timer(struct kvm_lapic *apic)
2025 {
2026         __start_apic_timer(apic, APIC_TMICT);
2027 }
2028
2029 static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
2030 {
2031         bool lvt0_in_nmi_mode = apic_lvt_nmi_mode(lvt0_val);
2032
2033         if (apic->lvt0_in_nmi_mode != lvt0_in_nmi_mode) {
2034                 apic->lvt0_in_nmi_mode = lvt0_in_nmi_mode;
2035                 if (lvt0_in_nmi_mode) {
2036                         atomic_inc(&apic->vcpu->kvm->arch.vapics_in_nmi_mode);
2037                 } else
2038                         atomic_dec(&apic->vcpu->kvm->arch.vapics_in_nmi_mode);
2039         }
2040 }
2041
2042 static void kvm_lapic_xapic_id_updated(struct kvm_lapic *apic)
2043 {
2044         struct kvm *kvm = apic->vcpu->kvm;
2045
2046         if (KVM_BUG_ON(apic_x2apic_mode(apic), kvm))
2047                 return;
2048
2049         if (kvm_xapic_id(apic) == apic->vcpu->vcpu_id)
2050                 return;
2051
2052         kvm_set_apicv_inhibit(apic->vcpu->kvm, APICV_INHIBIT_REASON_APIC_ID_MODIFIED);
2053 }
2054
2055 static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
2056 {
2057         int ret = 0;
2058
2059         trace_kvm_apic_write(reg, val);
2060
2061         switch (reg) {
2062         case APIC_ID:           /* Local APIC ID */
2063                 if (!apic_x2apic_mode(apic)) {
2064                         kvm_apic_set_xapic_id(apic, val >> 24);
2065                         kvm_lapic_xapic_id_updated(apic);
2066                 } else {
2067                         ret = 1;
2068                 }
2069                 break;
2070
2071         case APIC_TASKPRI:
2072                 report_tpr_access(apic, true);
2073                 apic_set_tpr(apic, val & 0xff);
2074                 break;
2075
2076         case APIC_EOI:
2077                 apic_set_eoi(apic);
2078                 break;
2079
2080         case APIC_LDR:
2081                 if (!apic_x2apic_mode(apic))
2082                         kvm_apic_set_ldr(apic, val & APIC_LDR_MASK);
2083                 else
2084                         ret = 1;
2085                 break;
2086
2087         case APIC_DFR:
2088                 if (!apic_x2apic_mode(apic))
2089                         kvm_apic_set_dfr(apic, val | 0x0FFFFFFF);
2090                 else
2091                         ret = 1;
2092                 break;
2093
2094         case APIC_SPIV: {
2095                 u32 mask = 0x3ff;
2096                 if (kvm_lapic_get_reg(apic, APIC_LVR) & APIC_LVR_DIRECTED_EOI)
2097                         mask |= APIC_SPIV_DIRECTED_EOI;
2098                 apic_set_spiv(apic, val & mask);
2099                 if (!(val & APIC_SPIV_APIC_ENABLED)) {
2100                         int i;
2101                         u32 lvt_val;
2102
2103                         for (i = 0; i < KVM_APIC_LVT_NUM; i++) {
2104                                 lvt_val = kvm_lapic_get_reg(apic,
2105                                                        APIC_LVTT + 0x10 * i);
2106                                 kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i,
2107                                              lvt_val | APIC_LVT_MASKED);
2108                         }
2109                         apic_update_lvtt(apic);
2110                         atomic_set(&apic->lapic_timer.pending, 0);
2111
2112                 }
2113                 break;
2114         }
2115         case APIC_ICR:
2116                 WARN_ON_ONCE(apic_x2apic_mode(apic));
2117
2118                 /* No delay here, so we always clear the pending bit */
2119                 val &= ~APIC_ICR_BUSY;
2120                 kvm_apic_send_ipi(apic, val, kvm_lapic_get_reg(apic, APIC_ICR2));
2121                 kvm_lapic_set_reg(apic, APIC_ICR, val);
2122                 break;
2123         case APIC_ICR2:
2124                 if (apic_x2apic_mode(apic))
2125                         ret = 1;
2126                 else
2127                         kvm_lapic_set_reg(apic, APIC_ICR2, val & 0xff000000);
2128                 break;
2129
2130         case APIC_LVT0:
2131                 apic_manage_nmi_watchdog(apic, val);
2132                 fallthrough;
2133         case APIC_LVTTHMR:
2134         case APIC_LVTPC:
2135         case APIC_LVT1:
2136         case APIC_LVTERR: {
2137                 /* TODO: Check vector */
2138                 size_t size;
2139                 u32 index;
2140
2141                 if (!kvm_apic_sw_enabled(apic))
2142                         val |= APIC_LVT_MASKED;
2143                 size = ARRAY_SIZE(apic_lvt_mask);
2144                 index = array_index_nospec(
2145                                 (reg - APIC_LVTT) >> 4, size);
2146                 val &= apic_lvt_mask[index];
2147                 kvm_lapic_set_reg(apic, reg, val);
2148                 break;
2149         }
2150
2151         case APIC_LVTT:
2152                 if (!kvm_apic_sw_enabled(apic))
2153                         val |= APIC_LVT_MASKED;
2154                 val &= (apic_lvt_mask[0] | apic->lapic_timer.timer_mode_mask);
2155                 kvm_lapic_set_reg(apic, APIC_LVTT, val);
2156                 apic_update_lvtt(apic);
2157                 break;
2158
2159         case APIC_TMICT:
2160                 if (apic_lvtt_tscdeadline(apic))
2161                         break;
2162
2163                 cancel_apic_timer(apic);
2164                 kvm_lapic_set_reg(apic, APIC_TMICT, val);
2165                 start_apic_timer(apic);
2166                 break;
2167
2168         case APIC_TDCR: {
2169                 uint32_t old_divisor = apic->divide_count;
2170
2171                 kvm_lapic_set_reg(apic, APIC_TDCR, val & 0xb);
2172                 update_divide_count(apic);
2173                 if (apic->divide_count != old_divisor &&
2174                                 apic->lapic_timer.period) {
2175                         hrtimer_cancel(&apic->lapic_timer.timer);
2176                         update_target_expiration(apic, old_divisor);
2177                         restart_apic_timer(apic);
2178                 }
2179                 break;
2180         }
2181         case APIC_ESR:
2182                 if (apic_x2apic_mode(apic) && val != 0)
2183                         ret = 1;
2184                 break;
2185
2186         case APIC_SELF_IPI:
2187                 if (apic_x2apic_mode(apic))
2188                         kvm_apic_send_ipi(apic, APIC_DEST_SELF | (val & APIC_VECTOR_MASK), 0);
2189                 else
2190                         ret = 1;
2191                 break;
2192         default:
2193                 ret = 1;
2194                 break;
2195         }
2196
2197         /*
2198          * Recalculate APIC maps if necessary, e.g. if the software enable bit
2199          * was toggled, the APIC ID changed, etc...   The maps are marked dirty
2200          * on relevant changes, i.e. this is a nop for most writes.
2201          */
2202         kvm_recalculate_apic_map(apic->vcpu->kvm);
2203
2204         return ret;
2205 }
2206
2207 static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
2208                             gpa_t address, int len, const void *data)
2209 {
2210         struct kvm_lapic *apic = to_lapic(this);
2211         unsigned int offset = address - apic->base_address;
2212         u32 val;
2213
2214         if (!apic_mmio_in_range(apic, address))
2215                 return -EOPNOTSUPP;
2216
2217         if (!kvm_apic_hw_enabled(apic) || apic_x2apic_mode(apic)) {
2218                 if (!kvm_check_has_quirk(vcpu->kvm,
2219                                          KVM_X86_QUIRK_LAPIC_MMIO_HOLE))
2220                         return -EOPNOTSUPP;
2221
2222                 return 0;
2223         }
2224
2225         /*
2226          * APIC register must be aligned on 128-bits boundary.
2227          * 32/64/128 bits registers must be accessed thru 32 bits.
2228          * Refer SDM 8.4.1
2229          */
2230         if (len != 4 || (offset & 0xf))
2231                 return 0;
2232
2233         val = *(u32*)data;
2234
2235         kvm_lapic_reg_write(apic, offset & 0xff0, val);
2236
2237         return 0;
2238 }
2239
2240 void kvm_lapic_set_eoi(struct kvm_vcpu *vcpu)
2241 {
2242         kvm_lapic_reg_write(vcpu->arch.apic, APIC_EOI, 0);
2243 }
2244 EXPORT_SYMBOL_GPL(kvm_lapic_set_eoi);
2245
2246 /* emulate APIC access in a trap manner */
2247 void kvm_apic_write_nodecode(struct kvm_vcpu *vcpu, u32 offset)
2248 {
2249         u32 val = kvm_lapic_get_reg(vcpu->arch.apic, offset);
2250
2251         /* TODO: optimize to just emulate side effect w/o one more write */
2252         kvm_lapic_reg_write(vcpu->arch.apic, offset, val);
2253 }
2254 EXPORT_SYMBOL_GPL(kvm_apic_write_nodecode);
2255
2256 void kvm_free_lapic(struct kvm_vcpu *vcpu)
2257 {
2258         struct kvm_lapic *apic = vcpu->arch.apic;
2259
2260         if (!vcpu->arch.apic)
2261                 return;
2262
2263         hrtimer_cancel(&apic->lapic_timer.timer);
2264
2265         if (!(vcpu->arch.apic_base & MSR_IA32_APICBASE_ENABLE))
2266                 static_branch_slow_dec_deferred(&apic_hw_disabled);
2267
2268         if (!apic->sw_enabled)
2269                 static_branch_slow_dec_deferred(&apic_sw_disabled);
2270
2271         if (apic->regs)
2272                 free_page((unsigned long)apic->regs);
2273
2274         kfree(apic);
2275 }
2276
2277 /*
2278  *----------------------------------------------------------------------
2279  * LAPIC interface
2280  *----------------------------------------------------------------------
2281  */
2282 u64 kvm_get_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu)
2283 {
2284         struct kvm_lapic *apic = vcpu->arch.apic;
2285
2286         if (!kvm_apic_present(vcpu) || !apic_lvtt_tscdeadline(apic))
2287                 return 0;
2288
2289         return apic->lapic_timer.tscdeadline;
2290 }
2291
2292 void kvm_set_lapic_tscdeadline_msr(struct kvm_vcpu *vcpu, u64 data)
2293 {
2294         struct kvm_lapic *apic = vcpu->arch.apic;
2295
2296         if (!kvm_apic_present(vcpu) || !apic_lvtt_tscdeadline(apic))
2297                 return;
2298
2299         hrtimer_cancel(&apic->lapic_timer.timer);
2300         apic->lapic_timer.tscdeadline = data;
2301         start_apic_timer(apic);
2302 }
2303
2304 void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
2305 {
2306         apic_set_tpr(vcpu->arch.apic, (cr8 & 0x0f) << 4);
2307 }
2308
2309 u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
2310 {
2311         u64 tpr;
2312
2313         tpr = (u64) kvm_lapic_get_reg(vcpu->arch.apic, APIC_TASKPRI);
2314
2315         return (tpr & 0xf0) >> 4;
2316 }
2317
2318 void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
2319 {
2320         u64 old_value = vcpu->arch.apic_base;
2321         struct kvm_lapic *apic = vcpu->arch.apic;
2322
2323         vcpu->arch.apic_base = value;
2324
2325         if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE)
2326                 kvm_update_cpuid_runtime(vcpu);
2327
2328         if (!apic)
2329                 return;
2330
2331         /* update jump label if enable bit changes */
2332         if ((old_value ^ value) & MSR_IA32_APICBASE_ENABLE) {
2333                 if (value & MSR_IA32_APICBASE_ENABLE) {
2334                         kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
2335                         static_branch_slow_dec_deferred(&apic_hw_disabled);
2336                         /* Check if there are APF page ready requests pending */
2337                         kvm_make_request(KVM_REQ_APF_READY, vcpu);
2338                 } else {
2339                         static_branch_inc(&apic_hw_disabled.key);
2340                         atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
2341                 }
2342         }
2343
2344         if (((old_value ^ value) & X2APIC_ENABLE) && (value & X2APIC_ENABLE))
2345                 kvm_apic_set_x2apic_id(apic, vcpu->vcpu_id);
2346
2347         if ((old_value ^ value) & (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE))
2348                 static_call_cond(kvm_x86_set_virtual_apic_mode)(vcpu);
2349
2350         apic->base_address = apic->vcpu->arch.apic_base &
2351                              MSR_IA32_APICBASE_BASE;
2352
2353         if ((value & MSR_IA32_APICBASE_ENABLE) &&
2354              apic->base_address != APIC_DEFAULT_PHYS_BASE) {
2355                 kvm_set_apicv_inhibit(apic->vcpu->kvm,
2356                                       APICV_INHIBIT_REASON_APIC_BASE_MODIFIED);
2357         }
2358 }
2359
2360 void kvm_apic_update_apicv(struct kvm_vcpu *vcpu)
2361 {
2362         struct kvm_lapic *apic = vcpu->arch.apic;
2363
2364         if (vcpu->arch.apicv_active) {
2365                 /* irr_pending is always true when apicv is activated. */
2366                 apic->irr_pending = true;
2367                 apic->isr_count = 1;
2368         } else {
2369                 /*
2370                  * Don't clear irr_pending, searching the IRR can race with
2371                  * updates from the CPU as APICv is still active from hardware's
2372                  * perspective.  The flag will be cleared as appropriate when
2373                  * KVM injects the interrupt.
2374                  */
2375                 apic->isr_count = count_vectors(apic->regs + APIC_ISR);
2376         }
2377 }
2378 EXPORT_SYMBOL_GPL(kvm_apic_update_apicv);
2379
2380 void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
2381 {
2382         struct kvm_lapic *apic = vcpu->arch.apic;
2383         u64 msr_val;
2384         int i;
2385
2386         if (!init_event) {
2387                 msr_val = APIC_DEFAULT_PHYS_BASE | MSR_IA32_APICBASE_ENABLE;
2388                 if (kvm_vcpu_is_reset_bsp(vcpu))
2389                         msr_val |= MSR_IA32_APICBASE_BSP;
2390                 kvm_lapic_set_base(vcpu, msr_val);
2391         }
2392
2393         if (!apic)
2394                 return;
2395
2396         /* Stop the timer in case it's a reset to an active apic */
2397         hrtimer_cancel(&apic->lapic_timer.timer);
2398
2399         /* The xAPIC ID is set at RESET even if the APIC was already enabled. */
2400         if (!init_event)
2401                 kvm_apic_set_xapic_id(apic, vcpu->vcpu_id);
2402         kvm_apic_set_version(apic->vcpu);
2403
2404         for (i = 0; i < KVM_APIC_LVT_NUM; i++)
2405                 kvm_lapic_set_reg(apic, APIC_LVTT + 0x10 * i, APIC_LVT_MASKED);
2406         apic_update_lvtt(apic);
2407         if (kvm_vcpu_is_reset_bsp(vcpu) &&
2408             kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_LINT0_REENABLED))
2409                 kvm_lapic_set_reg(apic, APIC_LVT0,
2410                              SET_APIC_DELIVERY_MODE(0, APIC_MODE_EXTINT));
2411         apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
2412
2413         kvm_apic_set_dfr(apic, 0xffffffffU);
2414         apic_set_spiv(apic, 0xff);
2415         kvm_lapic_set_reg(apic, APIC_TASKPRI, 0);
2416         if (!apic_x2apic_mode(apic))
2417                 kvm_apic_set_ldr(apic, 0);
2418         kvm_lapic_set_reg(apic, APIC_ESR, 0);
2419         if (!apic_x2apic_mode(apic)) {
2420                 kvm_lapic_set_reg(apic, APIC_ICR, 0);
2421                 kvm_lapic_set_reg(apic, APIC_ICR2, 0);
2422         } else {
2423                 kvm_lapic_set_reg64(apic, APIC_ICR, 0);
2424         }
2425         kvm_lapic_set_reg(apic, APIC_TDCR, 0);
2426         kvm_lapic_set_reg(apic, APIC_TMICT, 0);
2427         for (i = 0; i < 8; i++) {
2428                 kvm_lapic_set_reg(apic, APIC_IRR + 0x10 * i, 0);
2429                 kvm_lapic_set_reg(apic, APIC_ISR + 0x10 * i, 0);
2430                 kvm_lapic_set_reg(apic, APIC_TMR + 0x10 * i, 0);
2431         }
2432         kvm_apic_update_apicv(vcpu);
2433         apic->highest_isr_cache = -1;
2434         update_divide_count(apic);
2435         atomic_set(&apic->lapic_timer.pending, 0);
2436
2437         vcpu->arch.pv_eoi.msr_val = 0;
2438         apic_update_ppr(apic);
2439         if (vcpu->arch.apicv_active) {
2440                 static_call_cond(kvm_x86_apicv_post_state_restore)(vcpu);
2441                 static_call_cond(kvm_x86_hwapic_irr_update)(vcpu, -1);
2442                 static_call_cond(kvm_x86_hwapic_isr_update)(vcpu, -1);
2443         }
2444
2445         vcpu->arch.apic_arb_prio = 0;
2446         vcpu->arch.apic_attention = 0;
2447
2448         kvm_recalculate_apic_map(vcpu->kvm);
2449 }
2450
2451 /*
2452  *----------------------------------------------------------------------
2453  * timer interface
2454  *----------------------------------------------------------------------
2455  */
2456
2457 static bool lapic_is_periodic(struct kvm_lapic *apic)
2458 {
2459         return apic_lvtt_period(apic);
2460 }
2461
2462 int apic_has_pending_timer(struct kvm_vcpu *vcpu)
2463 {
2464         struct kvm_lapic *apic = vcpu->arch.apic;
2465
2466         if (apic_enabled(apic) && apic_lvt_enabled(apic, APIC_LVTT))
2467                 return atomic_read(&apic->lapic_timer.pending);
2468
2469         return 0;
2470 }
2471
2472 int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type)
2473 {
2474         u32 reg = kvm_lapic_get_reg(apic, lvt_type);
2475         int vector, mode, trig_mode;
2476
2477         if (kvm_apic_hw_enabled(apic) && !(reg & APIC_LVT_MASKED)) {
2478                 vector = reg & APIC_VECTOR_MASK;
2479                 mode = reg & APIC_MODE_MASK;
2480                 trig_mode = reg & APIC_LVT_LEVEL_TRIGGER;
2481                 return __apic_accept_irq(apic, mode, vector, 1, trig_mode,
2482                                         NULL);
2483         }
2484         return 0;
2485 }
2486
2487 void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu)
2488 {
2489         struct kvm_lapic *apic = vcpu->arch.apic;
2490
2491         if (apic)
2492                 kvm_apic_local_deliver(apic, APIC_LVT0);
2493 }
2494
2495 static const struct kvm_io_device_ops apic_mmio_ops = {
2496         .read     = apic_mmio_read,
2497         .write    = apic_mmio_write,
2498 };
2499
2500 static enum hrtimer_restart apic_timer_fn(struct hrtimer *data)
2501 {
2502         struct kvm_timer *ktimer = container_of(data, struct kvm_timer, timer);
2503         struct kvm_lapic *apic = container_of(ktimer, struct kvm_lapic, lapic_timer);
2504
2505         apic_timer_expired(apic, true);
2506
2507         if (lapic_is_periodic(apic)) {
2508                 advance_periodic_target_expiration(apic);
2509                 hrtimer_add_expires_ns(&ktimer->timer, ktimer->period);
2510                 return HRTIMER_RESTART;
2511         } else
2512                 return HRTIMER_NORESTART;
2513 }
2514
2515 int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
2516 {
2517         struct kvm_lapic *apic;
2518
2519         ASSERT(vcpu != NULL);
2520
2521         apic = kzalloc(sizeof(*apic), GFP_KERNEL_ACCOUNT);
2522         if (!apic)
2523                 goto nomem;
2524
2525         vcpu->arch.apic = apic;
2526
2527         apic->regs = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
2528         if (!apic->regs) {
2529                 printk(KERN_ERR "malloc apic regs error for vcpu %x\n",
2530                        vcpu->vcpu_id);
2531                 goto nomem_free_apic;
2532         }
2533         apic->vcpu = vcpu;
2534
2535         hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
2536                      HRTIMER_MODE_ABS_HARD);
2537         apic->lapic_timer.timer.function = apic_timer_fn;
2538         if (timer_advance_ns == -1) {
2539                 apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_NS_INIT;
2540                 lapic_timer_advance_dynamic = true;
2541         } else {
2542                 apic->lapic_timer.timer_advance_ns = timer_advance_ns;
2543                 lapic_timer_advance_dynamic = false;
2544         }
2545
2546         /*
2547          * Stuff the APIC ENABLE bit in lieu of temporarily incrementing
2548          * apic_hw_disabled; the full RESET value is set by kvm_lapic_reset().
2549          */
2550         vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE;
2551         static_branch_inc(&apic_sw_disabled.key); /* sw disabled at reset */
2552         kvm_iodevice_init(&apic->dev, &apic_mmio_ops);
2553
2554         return 0;
2555 nomem_free_apic:
2556         kfree(apic);
2557         vcpu->arch.apic = NULL;
2558 nomem:
2559         return -ENOMEM;
2560 }
2561
2562 int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu)
2563 {
2564         struct kvm_lapic *apic = vcpu->arch.apic;
2565         u32 ppr;
2566
2567         if (!kvm_apic_present(vcpu))
2568                 return -1;
2569
2570         __apic_update_ppr(apic, &ppr);
2571         return apic_has_interrupt_for_ppr(apic, ppr);
2572 }
2573 EXPORT_SYMBOL_GPL(kvm_apic_has_interrupt);
2574
2575 int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu)
2576 {
2577         u32 lvt0 = kvm_lapic_get_reg(vcpu->arch.apic, APIC_LVT0);
2578
2579         if (!kvm_apic_hw_enabled(vcpu->arch.apic))
2580                 return 1;
2581         if ((lvt0 & APIC_LVT_MASKED) == 0 &&
2582             GET_APIC_DELIVERY_MODE(lvt0) == APIC_MODE_EXTINT)
2583                 return 1;
2584         return 0;
2585 }
2586
2587 void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
2588 {
2589         struct kvm_lapic *apic = vcpu->arch.apic;
2590
2591         if (atomic_read(&apic->lapic_timer.pending) > 0) {
2592                 kvm_apic_inject_pending_timer_irqs(apic);
2593                 atomic_set(&apic->lapic_timer.pending, 0);
2594         }
2595 }
2596
2597 int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu)
2598 {
2599         int vector = kvm_apic_has_interrupt(vcpu);
2600         struct kvm_lapic *apic = vcpu->arch.apic;
2601         u32 ppr;
2602
2603         if (vector == -1)
2604                 return -1;
2605
2606         /*
2607          * We get here even with APIC virtualization enabled, if doing
2608          * nested virtualization and L1 runs with the "acknowledge interrupt
2609          * on exit" mode.  Then we cannot inject the interrupt via RVI,
2610          * because the process would deliver it through the IDT.
2611          */
2612
2613         apic_clear_irr(vector, apic);
2614         if (to_hv_vcpu(vcpu) && test_bit(vector, to_hv_synic(vcpu)->auto_eoi_bitmap)) {
2615                 /*
2616                  * For auto-EOI interrupts, there might be another pending
2617                  * interrupt above PPR, so check whether to raise another
2618                  * KVM_REQ_EVENT.
2619                  */
2620                 apic_update_ppr(apic);
2621         } else {
2622                 /*
2623                  * For normal interrupts, PPR has been raised and there cannot
2624                  * be a higher-priority pending interrupt---except if there was
2625                  * a concurrent interrupt injection, but that would have
2626                  * triggered KVM_REQ_EVENT already.
2627                  */
2628                 apic_set_isr(vector, apic);
2629                 __apic_update_ppr(apic, &ppr);
2630         }
2631
2632         return vector;
2633 }
2634
2635 static int kvm_apic_state_fixup(struct kvm_vcpu *vcpu,
2636                 struct kvm_lapic_state *s, bool set)
2637 {
2638         if (apic_x2apic_mode(vcpu->arch.apic)) {
2639                 u32 *id = (u32 *)(s->regs + APIC_ID);
2640                 u32 *ldr = (u32 *)(s->regs + APIC_LDR);
2641                 u64 icr;
2642
2643                 if (vcpu->kvm->arch.x2apic_format) {
2644                         if (*id != vcpu->vcpu_id)
2645                                 return -EINVAL;
2646                 } else {
2647                         if (set)
2648                                 *id >>= 24;
2649                         else
2650                                 *id <<= 24;
2651                 }
2652
2653                 /*
2654                  * In x2APIC mode, the LDR is fixed and based on the id.  And
2655                  * ICR is internally a single 64-bit register, but needs to be
2656                  * split to ICR+ICR2 in userspace for backwards compatibility.
2657                  */
2658                 if (set) {
2659                         *ldr = kvm_apic_calc_x2apic_ldr(*id);
2660
2661                         icr = __kvm_lapic_get_reg(s->regs, APIC_ICR) |
2662                               (u64)__kvm_lapic_get_reg(s->regs, APIC_ICR2) << 32;
2663                         __kvm_lapic_set_reg64(s->regs, APIC_ICR, icr);
2664                 } else {
2665                         icr = __kvm_lapic_get_reg64(s->regs, APIC_ICR);
2666                         __kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32);
2667                 }
2668         } else {
2669                 kvm_lapic_xapic_id_updated(vcpu->arch.apic);
2670         }
2671
2672         return 0;
2673 }
2674
2675 int kvm_apic_get_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
2676 {
2677         memcpy(s->regs, vcpu->arch.apic->regs, sizeof(*s));
2678
2679         /*
2680          * Get calculated timer current count for remaining timer period (if
2681          * any) and store it in the returned register set.
2682          */
2683         __kvm_lapic_set_reg(s->regs, APIC_TMCCT,
2684                             __apic_read(vcpu->arch.apic, APIC_TMCCT));
2685
2686         return kvm_apic_state_fixup(vcpu, s, false);
2687 }
2688
2689 int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
2690 {
2691         struct kvm_lapic *apic = vcpu->arch.apic;
2692         int r;
2693
2694         kvm_lapic_set_base(vcpu, vcpu->arch.apic_base);
2695         /* set SPIV separately to get count of SW disabled APICs right */
2696         apic_set_spiv(apic, *((u32 *)(s->regs + APIC_SPIV)));
2697
2698         r = kvm_apic_state_fixup(vcpu, s, true);
2699         if (r) {
2700                 kvm_recalculate_apic_map(vcpu->kvm);
2701                 return r;
2702         }
2703         memcpy(vcpu->arch.apic->regs, s->regs, sizeof(*s));
2704
2705         atomic_set_release(&apic->vcpu->kvm->arch.apic_map_dirty, DIRTY);
2706         kvm_recalculate_apic_map(vcpu->kvm);
2707         kvm_apic_set_version(vcpu);
2708
2709         apic_update_ppr(apic);
2710         cancel_apic_timer(apic);
2711         apic->lapic_timer.expired_tscdeadline = 0;
2712         apic_update_lvtt(apic);
2713         apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
2714         update_divide_count(apic);
2715         __start_apic_timer(apic, APIC_TMCCT);
2716         kvm_lapic_set_reg(apic, APIC_TMCCT, 0);
2717         kvm_apic_update_apicv(vcpu);
2718         apic->highest_isr_cache = -1;
2719         if (vcpu->arch.apicv_active) {
2720                 static_call_cond(kvm_x86_apicv_post_state_restore)(vcpu);
2721                 static_call_cond(kvm_x86_hwapic_irr_update)(vcpu, apic_find_highest_irr(apic));
2722                 static_call_cond(kvm_x86_hwapic_isr_update)(vcpu, apic_find_highest_isr(apic));
2723         }
2724         kvm_make_request(KVM_REQ_EVENT, vcpu);
2725         if (ioapic_in_kernel(vcpu->kvm))
2726                 kvm_rtc_eoi_tracking_restore_one(vcpu);
2727
2728         vcpu->arch.apic_arb_prio = 0;
2729
2730         return 0;
2731 }
2732
2733 void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
2734 {
2735         struct hrtimer *timer;
2736
2737         if (!lapic_in_kernel(vcpu) ||
2738                 kvm_can_post_timer_interrupt(vcpu))
2739                 return;
2740
2741         timer = &vcpu->arch.apic->lapic_timer.timer;
2742         if (hrtimer_cancel(timer))
2743                 hrtimer_start_expires(timer, HRTIMER_MODE_ABS_HARD);
2744 }
2745
2746 /*
2747  * apic_sync_pv_eoi_from_guest - called on vmexit or cancel interrupt
2748  *
2749  * Detect whether guest triggered PV EOI since the
2750  * last entry. If yes, set EOI on guests's behalf.
2751  * Clear PV EOI in guest memory in any case.
2752  */
2753 static void apic_sync_pv_eoi_from_guest(struct kvm_vcpu *vcpu,
2754                                         struct kvm_lapic *apic)
2755 {
2756         int vector;
2757         /*
2758          * PV EOI state is derived from KVM_APIC_PV_EOI_PENDING in host
2759          * and KVM_PV_EOI_ENABLED in guest memory as follows:
2760          *
2761          * KVM_APIC_PV_EOI_PENDING is unset:
2762          *      -> host disabled PV EOI.
2763          * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is set:
2764          *      -> host enabled PV EOI, guest did not execute EOI yet.
2765          * KVM_APIC_PV_EOI_PENDING is set, KVM_PV_EOI_ENABLED is unset:
2766          *      -> host enabled PV EOI, guest executed EOI.
2767          */
2768         BUG_ON(!pv_eoi_enabled(vcpu));
2769
2770         if (pv_eoi_test_and_clr_pending(vcpu))
2771                 return;
2772         vector = apic_set_eoi(apic);
2773         trace_kvm_pv_eoi(apic, vector);
2774 }
2775
2776 void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu)
2777 {
2778         u32 data;
2779
2780         if (test_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention))
2781                 apic_sync_pv_eoi_from_guest(vcpu, vcpu->arch.apic);
2782
2783         if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
2784                 return;
2785
2786         if (kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
2787                                   sizeof(u32)))
2788                 return;
2789
2790         apic_set_tpr(vcpu->arch.apic, data & 0xff);
2791 }
2792
2793 /*
2794  * apic_sync_pv_eoi_to_guest - called before vmentry
2795  *
2796  * Detect whether it's safe to enable PV EOI and
2797  * if yes do so.
2798  */
2799 static void apic_sync_pv_eoi_to_guest(struct kvm_vcpu *vcpu,
2800                                         struct kvm_lapic *apic)
2801 {
2802         if (!pv_eoi_enabled(vcpu) ||
2803             /* IRR set or many bits in ISR: could be nested. */
2804             apic->irr_pending ||
2805             /* Cache not set: could be safe but we don't bother. */
2806             apic->highest_isr_cache == -1 ||
2807             /* Need EOI to update ioapic. */
2808             kvm_ioapic_handles_vector(apic, apic->highest_isr_cache)) {
2809                 /*
2810                  * PV EOI was disabled by apic_sync_pv_eoi_from_guest
2811                  * so we need not do anything here.
2812                  */
2813                 return;
2814         }
2815
2816         pv_eoi_set_pending(apic->vcpu);
2817 }
2818
2819 void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu)
2820 {
2821         u32 data, tpr;
2822         int max_irr, max_isr;
2823         struct kvm_lapic *apic = vcpu->arch.apic;
2824
2825         apic_sync_pv_eoi_to_guest(vcpu, apic);
2826
2827         if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention))
2828                 return;
2829
2830         tpr = kvm_lapic_get_reg(apic, APIC_TASKPRI) & 0xff;
2831         max_irr = apic_find_highest_irr(apic);
2832         if (max_irr < 0)
2833                 max_irr = 0;
2834         max_isr = apic_find_highest_isr(apic);
2835         if (max_isr < 0)
2836                 max_isr = 0;
2837         data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24);
2838
2839         kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data,
2840                                 sizeof(u32));
2841 }
2842
2843 int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr)
2844 {
2845         if (vapic_addr) {
2846                 if (kvm_gfn_to_hva_cache_init(vcpu->kvm,
2847                                         &vcpu->arch.apic->vapic_cache,
2848                                         vapic_addr, sizeof(u32)))
2849                         return -EINVAL;
2850                 __set_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
2851         } else {
2852                 __clear_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention);
2853         }
2854
2855         vcpu->arch.apic->vapic_addr = vapic_addr;
2856         return 0;
2857 }
2858
2859 int kvm_x2apic_icr_write(struct kvm_lapic *apic, u64 data)
2860 {
2861         data &= ~APIC_ICR_BUSY;
2862
2863         kvm_apic_send_ipi(apic, (u32)data, (u32)(data >> 32));
2864         kvm_lapic_set_reg64(apic, APIC_ICR, data);
2865         trace_kvm_apic_write(APIC_ICR, data);
2866         return 0;
2867 }
2868
2869 static int kvm_lapic_msr_read(struct kvm_lapic *apic, u32 reg, u64 *data)
2870 {
2871         u32 low;
2872
2873         if (reg == APIC_ICR) {
2874                 *data = kvm_lapic_get_reg64(apic, APIC_ICR);
2875                 return 0;
2876         }
2877
2878         if (kvm_lapic_reg_read(apic, reg, 4, &low))
2879                 return 1;
2880
2881         *data = low;
2882
2883         return 0;
2884 }
2885
2886 static int kvm_lapic_msr_write(struct kvm_lapic *apic, u32 reg, u64 data)
2887 {
2888         /*
2889          * ICR is a 64-bit register in x2APIC mode (and Hyper'v PV vAPIC) and
2890          * can be written as such, all other registers remain accessible only
2891          * through 32-bit reads/writes.
2892          */
2893         if (reg == APIC_ICR)
2894                 return kvm_x2apic_icr_write(apic, data);
2895
2896         return kvm_lapic_reg_write(apic, reg, (u32)data);
2897 }
2898
2899 int kvm_x2apic_msr_write(struct kvm_vcpu *vcpu, u32 msr, u64 data)
2900 {
2901         struct kvm_lapic *apic = vcpu->arch.apic;
2902         u32 reg = (msr - APIC_BASE_MSR) << 4;
2903
2904         if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
2905                 return 1;
2906
2907         return kvm_lapic_msr_write(apic, reg, data);
2908 }
2909
2910 int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
2911 {
2912         struct kvm_lapic *apic = vcpu->arch.apic;
2913         u32 reg = (msr - APIC_BASE_MSR) << 4;
2914
2915         if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
2916                 return 1;
2917
2918         if (reg == APIC_DFR)
2919                 return 1;
2920
2921         return kvm_lapic_msr_read(apic, reg, data);
2922 }
2923
2924 int kvm_hv_vapic_msr_write(struct kvm_vcpu *vcpu, u32 reg, u64 data)
2925 {
2926         if (!lapic_in_kernel(vcpu))
2927                 return 1;
2928
2929         return kvm_lapic_msr_write(vcpu->arch.apic, reg, data);
2930 }
2931
2932 int kvm_hv_vapic_msr_read(struct kvm_vcpu *vcpu, u32 reg, u64 *data)
2933 {
2934         if (!lapic_in_kernel(vcpu))
2935                 return 1;
2936
2937         return kvm_lapic_msr_read(vcpu->arch.apic, reg, data);
2938 }
2939
2940 int kvm_lapic_set_pv_eoi(struct kvm_vcpu *vcpu, u64 data, unsigned long len)
2941 {
2942         u64 addr = data & ~KVM_MSR_ENABLED;
2943         struct gfn_to_hva_cache *ghc = &vcpu->arch.pv_eoi.data;
2944         unsigned long new_len;
2945         int ret;
2946
2947         if (!IS_ALIGNED(addr, 4))
2948                 return 1;
2949
2950         if (data & KVM_MSR_ENABLED) {
2951                 if (addr == ghc->gpa && len <= ghc->len)
2952                         new_len = ghc->len;
2953                 else
2954                         new_len = len;
2955
2956                 ret = kvm_gfn_to_hva_cache_init(vcpu->kvm, ghc, addr, new_len);
2957                 if (ret)
2958                         return ret;
2959         }
2960
2961         vcpu->arch.pv_eoi.msr_val = data;
2962
2963         return 0;
2964 }
2965
2966 int kvm_apic_accept_events(struct kvm_vcpu *vcpu)
2967 {
2968         struct kvm_lapic *apic = vcpu->arch.apic;
2969         u8 sipi_vector;
2970         int r;
2971         unsigned long pe;
2972
2973         if (!lapic_in_kernel(vcpu))
2974                 return 0;
2975
2976         /*
2977          * Read pending events before calling the check_events
2978          * callback.
2979          */
2980         pe = smp_load_acquire(&apic->pending_events);
2981         if (!pe)
2982                 return 0;
2983
2984         if (is_guest_mode(vcpu)) {
2985                 r = kvm_check_nested_events(vcpu);
2986                 if (r < 0)
2987                         return r == -EBUSY ? 0 : r;
2988                 /*
2989                  * If an event has happened and caused a vmexit,
2990                  * we know INITs are latched and therefore
2991                  * we will not incorrectly deliver an APIC
2992                  * event instead of a vmexit.
2993                  */
2994         }
2995
2996         /*
2997          * INITs are latched while CPU is in specific states
2998          * (SMM, VMX root mode, SVM with GIF=0).
2999          * Because a CPU cannot be in these states immediately
3000          * after it has processed an INIT signal (and thus in
3001          * KVM_MP_STATE_INIT_RECEIVED state), just eat SIPIs
3002          * and leave the INIT pending.
3003          */
3004         if (kvm_vcpu_latch_init(vcpu)) {
3005                 WARN_ON_ONCE(vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED);
3006                 if (test_bit(KVM_APIC_SIPI, &pe))
3007                         clear_bit(KVM_APIC_SIPI, &apic->pending_events);
3008                 return 0;
3009         }
3010
3011         if (test_bit(KVM_APIC_INIT, &pe)) {
3012                 clear_bit(KVM_APIC_INIT, &apic->pending_events);
3013                 kvm_vcpu_reset(vcpu, true);
3014                 if (kvm_vcpu_is_bsp(apic->vcpu))
3015                         vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
3016                 else
3017                         vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
3018         }
3019         if (test_bit(KVM_APIC_SIPI, &pe)) {
3020                 clear_bit(KVM_APIC_SIPI, &apic->pending_events);
3021                 if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
3022                         /* evaluate pending_events before reading the vector */
3023                         smp_rmb();
3024                         sipi_vector = apic->sipi_vector;
3025                         static_call(kvm_x86_vcpu_deliver_sipi_vector)(vcpu, sipi_vector);
3026                         vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
3027                 }
3028         }
3029         return 0;
3030 }
3031
3032 void kvm_lapic_exit(void)
3033 {
3034         static_key_deferred_flush(&apic_hw_disabled);
3035         WARN_ON(static_branch_unlikely(&apic_hw_disabled.key));
3036         static_key_deferred_flush(&apic_sw_disabled);
3037         WARN_ON(static_branch_unlikely(&apic_sw_disabled.key));
3038 }