Merge tag 'cgroup-for-6.6' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
[linux-block.git] / arch / x86 / kvm / smm.c
1 /* SPDX-License-Identifier: GPL-2.0 */
2 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
3
4 #include <linux/kvm_host.h>
5 #include "x86.h"
6 #include "kvm_cache_regs.h"
7 #include "kvm_emulate.h"
8 #include "smm.h"
9 #include "cpuid.h"
10 #include "trace.h"
11
12 #define CHECK_SMRAM32_OFFSET(field, offset) \
13         ASSERT_STRUCT_OFFSET(struct kvm_smram_state_32, field, offset - 0xFE00)
14
15 #define CHECK_SMRAM64_OFFSET(field, offset) \
16         ASSERT_STRUCT_OFFSET(struct kvm_smram_state_64, field, offset - 0xFE00)
17
18 static void check_smram_offsets(void)
19 {
20         /* 32 bit SMRAM image */
21         CHECK_SMRAM32_OFFSET(reserved1,                 0xFE00);
22         CHECK_SMRAM32_OFFSET(smbase,                    0xFEF8);
23         CHECK_SMRAM32_OFFSET(smm_revision,              0xFEFC);
24         CHECK_SMRAM32_OFFSET(io_inst_restart,           0xFF00);
25         CHECK_SMRAM32_OFFSET(auto_hlt_restart,          0xFF02);
26         CHECK_SMRAM32_OFFSET(io_restart_rdi,            0xFF04);
27         CHECK_SMRAM32_OFFSET(io_restart_rcx,            0xFF08);
28         CHECK_SMRAM32_OFFSET(io_restart_rsi,            0xFF0C);
29         CHECK_SMRAM32_OFFSET(io_restart_rip,            0xFF10);
30         CHECK_SMRAM32_OFFSET(cr4,                       0xFF14);
31         CHECK_SMRAM32_OFFSET(reserved2,                 0xFF18);
32         CHECK_SMRAM32_OFFSET(int_shadow,                0xFF1A);
33         CHECK_SMRAM32_OFFSET(reserved3,                 0xFF1B);
34         CHECK_SMRAM32_OFFSET(ds,                        0xFF2C);
35         CHECK_SMRAM32_OFFSET(fs,                        0xFF38);
36         CHECK_SMRAM32_OFFSET(gs,                        0xFF44);
37         CHECK_SMRAM32_OFFSET(idtr,                      0xFF50);
38         CHECK_SMRAM32_OFFSET(tr,                        0xFF5C);
39         CHECK_SMRAM32_OFFSET(gdtr,                      0xFF6C);
40         CHECK_SMRAM32_OFFSET(ldtr,                      0xFF78);
41         CHECK_SMRAM32_OFFSET(es,                        0xFF84);
42         CHECK_SMRAM32_OFFSET(cs,                        0xFF90);
43         CHECK_SMRAM32_OFFSET(ss,                        0xFF9C);
44         CHECK_SMRAM32_OFFSET(es_sel,                    0xFFA8);
45         CHECK_SMRAM32_OFFSET(cs_sel,                    0xFFAC);
46         CHECK_SMRAM32_OFFSET(ss_sel,                    0xFFB0);
47         CHECK_SMRAM32_OFFSET(ds_sel,                    0xFFB4);
48         CHECK_SMRAM32_OFFSET(fs_sel,                    0xFFB8);
49         CHECK_SMRAM32_OFFSET(gs_sel,                    0xFFBC);
50         CHECK_SMRAM32_OFFSET(ldtr_sel,                  0xFFC0);
51         CHECK_SMRAM32_OFFSET(tr_sel,                    0xFFC4);
52         CHECK_SMRAM32_OFFSET(dr7,                       0xFFC8);
53         CHECK_SMRAM32_OFFSET(dr6,                       0xFFCC);
54         CHECK_SMRAM32_OFFSET(gprs,                      0xFFD0);
55         CHECK_SMRAM32_OFFSET(eip,                       0xFFF0);
56         CHECK_SMRAM32_OFFSET(eflags,                    0xFFF4);
57         CHECK_SMRAM32_OFFSET(cr3,                       0xFFF8);
58         CHECK_SMRAM32_OFFSET(cr0,                       0xFFFC);
59
60         /* 64 bit SMRAM image */
61         CHECK_SMRAM64_OFFSET(es,                        0xFE00);
62         CHECK_SMRAM64_OFFSET(cs,                        0xFE10);
63         CHECK_SMRAM64_OFFSET(ss,                        0xFE20);
64         CHECK_SMRAM64_OFFSET(ds,                        0xFE30);
65         CHECK_SMRAM64_OFFSET(fs,                        0xFE40);
66         CHECK_SMRAM64_OFFSET(gs,                        0xFE50);
67         CHECK_SMRAM64_OFFSET(gdtr,                      0xFE60);
68         CHECK_SMRAM64_OFFSET(ldtr,                      0xFE70);
69         CHECK_SMRAM64_OFFSET(idtr,                      0xFE80);
70         CHECK_SMRAM64_OFFSET(tr,                        0xFE90);
71         CHECK_SMRAM64_OFFSET(io_restart_rip,            0xFEA0);
72         CHECK_SMRAM64_OFFSET(io_restart_rcx,            0xFEA8);
73         CHECK_SMRAM64_OFFSET(io_restart_rsi,            0xFEB0);
74         CHECK_SMRAM64_OFFSET(io_restart_rdi,            0xFEB8);
75         CHECK_SMRAM64_OFFSET(io_restart_dword,          0xFEC0);
76         CHECK_SMRAM64_OFFSET(reserved1,                 0xFEC4);
77         CHECK_SMRAM64_OFFSET(io_inst_restart,           0xFEC8);
78         CHECK_SMRAM64_OFFSET(auto_hlt_restart,          0xFEC9);
79         CHECK_SMRAM64_OFFSET(amd_nmi_mask,              0xFECA);
80         CHECK_SMRAM64_OFFSET(int_shadow,                0xFECB);
81         CHECK_SMRAM64_OFFSET(reserved2,                 0xFECC);
82         CHECK_SMRAM64_OFFSET(efer,                      0xFED0);
83         CHECK_SMRAM64_OFFSET(svm_guest_flag,            0xFED8);
84         CHECK_SMRAM64_OFFSET(svm_guest_vmcb_gpa,        0xFEE0);
85         CHECK_SMRAM64_OFFSET(svm_guest_virtual_int,     0xFEE8);
86         CHECK_SMRAM64_OFFSET(reserved3,                 0xFEF0);
87         CHECK_SMRAM64_OFFSET(smm_revison,               0xFEFC);
88         CHECK_SMRAM64_OFFSET(smbase,                    0xFF00);
89         CHECK_SMRAM64_OFFSET(reserved4,                 0xFF04);
90         CHECK_SMRAM64_OFFSET(ssp,                       0xFF18);
91         CHECK_SMRAM64_OFFSET(svm_guest_pat,             0xFF20);
92         CHECK_SMRAM64_OFFSET(svm_host_efer,             0xFF28);
93         CHECK_SMRAM64_OFFSET(svm_host_cr4,              0xFF30);
94         CHECK_SMRAM64_OFFSET(svm_host_cr3,              0xFF38);
95         CHECK_SMRAM64_OFFSET(svm_host_cr0,              0xFF40);
96         CHECK_SMRAM64_OFFSET(cr4,                       0xFF48);
97         CHECK_SMRAM64_OFFSET(cr3,                       0xFF50);
98         CHECK_SMRAM64_OFFSET(cr0,                       0xFF58);
99         CHECK_SMRAM64_OFFSET(dr7,                       0xFF60);
100         CHECK_SMRAM64_OFFSET(dr6,                       0xFF68);
101         CHECK_SMRAM64_OFFSET(rflags,                    0xFF70);
102         CHECK_SMRAM64_OFFSET(rip,                       0xFF78);
103         CHECK_SMRAM64_OFFSET(gprs,                      0xFF80);
104
105         BUILD_BUG_ON(sizeof(union kvm_smram) != 512);
106 }
107
108 #undef CHECK_SMRAM64_OFFSET
109 #undef CHECK_SMRAM32_OFFSET
110
111
112 void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm)
113 {
114         trace_kvm_smm_transition(vcpu->vcpu_id, vcpu->arch.smbase, entering_smm);
115
116         if (entering_smm) {
117                 vcpu->arch.hflags |= HF_SMM_MASK;
118         } else {
119                 vcpu->arch.hflags &= ~(HF_SMM_MASK | HF_SMM_INSIDE_NMI_MASK);
120
121                 /* Process a latched INIT or SMI, if any.  */
122                 kvm_make_request(KVM_REQ_EVENT, vcpu);
123
124                 /*
125                  * Even if KVM_SET_SREGS2 loaded PDPTRs out of band,
126                  * on SMM exit we still need to reload them from
127                  * guest memory
128                  */
129                 vcpu->arch.pdptrs_from_userspace = false;
130         }
131
132         kvm_mmu_reset_context(vcpu);
133 }
134
135 void process_smi(struct kvm_vcpu *vcpu)
136 {
137         vcpu->arch.smi_pending = true;
138         kvm_make_request(KVM_REQ_EVENT, vcpu);
139 }
140
141 static u32 enter_smm_get_segment_flags(struct kvm_segment *seg)
142 {
143         u32 flags = 0;
144         flags |= seg->g       << 23;
145         flags |= seg->db      << 22;
146         flags |= seg->l       << 21;
147         flags |= seg->avl     << 20;
148         flags |= seg->present << 15;
149         flags |= seg->dpl     << 13;
150         flags |= seg->s       << 12;
151         flags |= seg->type    << 8;
152         return flags;
153 }
154
155 static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu,
156                                   struct kvm_smm_seg_state_32 *state,
157                                   u32 *selector, int n)
158 {
159         struct kvm_segment seg;
160
161         kvm_get_segment(vcpu, &seg, n);
162         *selector = seg.selector;
163         state->base = seg.base;
164         state->limit = seg.limit;
165         state->flags = enter_smm_get_segment_flags(&seg);
166 }
167
168 #ifdef CONFIG_X86_64
169 static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu,
170                                   struct kvm_smm_seg_state_64 *state,
171                                   int n)
172 {
173         struct kvm_segment seg;
174
175         kvm_get_segment(vcpu, &seg, n);
176         state->selector = seg.selector;
177         state->attributes = enter_smm_get_segment_flags(&seg) >> 8;
178         state->limit = seg.limit;
179         state->base = seg.base;
180 }
181 #endif
182
183 static void enter_smm_save_state_32(struct kvm_vcpu *vcpu,
184                                     struct kvm_smram_state_32 *smram)
185 {
186         struct desc_ptr dt;
187         unsigned long val;
188         int i;
189
190         smram->cr0     = kvm_read_cr0(vcpu);
191         smram->cr3     = kvm_read_cr3(vcpu);
192         smram->eflags  = kvm_get_rflags(vcpu);
193         smram->eip     = kvm_rip_read(vcpu);
194
195         for (i = 0; i < 8; i++)
196                 smram->gprs[i] = kvm_register_read_raw(vcpu, i);
197
198         kvm_get_dr(vcpu, 6, &val);
199         smram->dr6     = (u32)val;
200         kvm_get_dr(vcpu, 7, &val);
201         smram->dr7     = (u32)val;
202
203         enter_smm_save_seg_32(vcpu, &smram->tr, &smram->tr_sel, VCPU_SREG_TR);
204         enter_smm_save_seg_32(vcpu, &smram->ldtr, &smram->ldtr_sel, VCPU_SREG_LDTR);
205
206         static_call(kvm_x86_get_gdt)(vcpu, &dt);
207         smram->gdtr.base = dt.address;
208         smram->gdtr.limit = dt.size;
209
210         static_call(kvm_x86_get_idt)(vcpu, &dt);
211         smram->idtr.base = dt.address;
212         smram->idtr.limit = dt.size;
213
214         enter_smm_save_seg_32(vcpu, &smram->es, &smram->es_sel, VCPU_SREG_ES);
215         enter_smm_save_seg_32(vcpu, &smram->cs, &smram->cs_sel, VCPU_SREG_CS);
216         enter_smm_save_seg_32(vcpu, &smram->ss, &smram->ss_sel, VCPU_SREG_SS);
217
218         enter_smm_save_seg_32(vcpu, &smram->ds, &smram->ds_sel, VCPU_SREG_DS);
219         enter_smm_save_seg_32(vcpu, &smram->fs, &smram->fs_sel, VCPU_SREG_FS);
220         enter_smm_save_seg_32(vcpu, &smram->gs, &smram->gs_sel, VCPU_SREG_GS);
221
222         smram->cr4 = kvm_read_cr4(vcpu);
223         smram->smm_revision = 0x00020000;
224         smram->smbase = vcpu->arch.smbase;
225
226         smram->int_shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu);
227 }
228
229 #ifdef CONFIG_X86_64
230 static void enter_smm_save_state_64(struct kvm_vcpu *vcpu,
231                                     struct kvm_smram_state_64 *smram)
232 {
233         struct desc_ptr dt;
234         unsigned long val;
235         int i;
236
237         for (i = 0; i < 16; i++)
238                 smram->gprs[15 - i] = kvm_register_read_raw(vcpu, i);
239
240         smram->rip    = kvm_rip_read(vcpu);
241         smram->rflags = kvm_get_rflags(vcpu);
242
243
244         kvm_get_dr(vcpu, 6, &val);
245         smram->dr6 = val;
246         kvm_get_dr(vcpu, 7, &val);
247         smram->dr7 = val;
248
249         smram->cr0 = kvm_read_cr0(vcpu);
250         smram->cr3 = kvm_read_cr3(vcpu);
251         smram->cr4 = kvm_read_cr4(vcpu);
252
253         smram->smbase = vcpu->arch.smbase;
254         smram->smm_revison = 0x00020064;
255
256         smram->efer = vcpu->arch.efer;
257
258         enter_smm_save_seg_64(vcpu, &smram->tr, VCPU_SREG_TR);
259
260         static_call(kvm_x86_get_idt)(vcpu, &dt);
261         smram->idtr.limit = dt.size;
262         smram->idtr.base = dt.address;
263
264         enter_smm_save_seg_64(vcpu, &smram->ldtr, VCPU_SREG_LDTR);
265
266         static_call(kvm_x86_get_gdt)(vcpu, &dt);
267         smram->gdtr.limit = dt.size;
268         smram->gdtr.base = dt.address;
269
270         enter_smm_save_seg_64(vcpu, &smram->es, VCPU_SREG_ES);
271         enter_smm_save_seg_64(vcpu, &smram->cs, VCPU_SREG_CS);
272         enter_smm_save_seg_64(vcpu, &smram->ss, VCPU_SREG_SS);
273         enter_smm_save_seg_64(vcpu, &smram->ds, VCPU_SREG_DS);
274         enter_smm_save_seg_64(vcpu, &smram->fs, VCPU_SREG_FS);
275         enter_smm_save_seg_64(vcpu, &smram->gs, VCPU_SREG_GS);
276
277         smram->int_shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu);
278 }
279 #endif
280
281 void enter_smm(struct kvm_vcpu *vcpu)
282 {
283         struct kvm_segment cs, ds;
284         struct desc_ptr dt;
285         unsigned long cr0;
286         union kvm_smram smram;
287
288         check_smram_offsets();
289
290         memset(smram.bytes, 0, sizeof(smram.bytes));
291
292 #ifdef CONFIG_X86_64
293         if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
294                 enter_smm_save_state_64(vcpu, &smram.smram64);
295         else
296 #endif
297                 enter_smm_save_state_32(vcpu, &smram.smram32);
298
299         /*
300          * Give enter_smm() a chance to make ISA-specific changes to the vCPU
301          * state (e.g. leave guest mode) after we've saved the state into the
302          * SMM state-save area.
303          *
304          * Kill the VM in the unlikely case of failure, because the VM
305          * can be in undefined state in this case.
306          */
307         if (static_call(kvm_x86_enter_smm)(vcpu, &smram))
308                 goto error;
309
310         kvm_smm_changed(vcpu, true);
311
312         if (kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, &smram, sizeof(smram)))
313                 goto error;
314
315         if (static_call(kvm_x86_get_nmi_mask)(vcpu))
316                 vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK;
317         else
318                 static_call(kvm_x86_set_nmi_mask)(vcpu, true);
319
320         kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
321         kvm_rip_write(vcpu, 0x8000);
322
323         static_call(kvm_x86_set_interrupt_shadow)(vcpu, 0);
324
325         cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG);
326         static_call(kvm_x86_set_cr0)(vcpu, cr0);
327         vcpu->arch.cr0 = cr0;
328
329         static_call(kvm_x86_set_cr4)(vcpu, 0);
330
331         /* Undocumented: IDT limit is set to zero on entry to SMM.  */
332         dt.address = dt.size = 0;
333         static_call(kvm_x86_set_idt)(vcpu, &dt);
334
335         if (WARN_ON_ONCE(kvm_set_dr(vcpu, 7, DR7_FIXED_1)))
336                 goto error;
337
338         cs.selector = (vcpu->arch.smbase >> 4) & 0xffff;
339         cs.base = vcpu->arch.smbase;
340
341         ds.selector = 0;
342         ds.base = 0;
343
344         cs.limit    = ds.limit = 0xffffffff;
345         cs.type     = ds.type = 0x3;
346         cs.dpl      = ds.dpl = 0;
347         cs.db       = ds.db = 0;
348         cs.s        = ds.s = 1;
349         cs.l        = ds.l = 0;
350         cs.g        = ds.g = 1;
351         cs.avl      = ds.avl = 0;
352         cs.present  = ds.present = 1;
353         cs.unusable = ds.unusable = 0;
354         cs.padding  = ds.padding = 0;
355
356         kvm_set_segment(vcpu, &cs, VCPU_SREG_CS);
357         kvm_set_segment(vcpu, &ds, VCPU_SREG_DS);
358         kvm_set_segment(vcpu, &ds, VCPU_SREG_ES);
359         kvm_set_segment(vcpu, &ds, VCPU_SREG_FS);
360         kvm_set_segment(vcpu, &ds, VCPU_SREG_GS);
361         kvm_set_segment(vcpu, &ds, VCPU_SREG_SS);
362
363 #ifdef CONFIG_X86_64
364         if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
365                 if (static_call(kvm_x86_set_efer)(vcpu, 0))
366                         goto error;
367 #endif
368
369         kvm_update_cpuid_runtime(vcpu);
370         kvm_mmu_reset_context(vcpu);
371         return;
372 error:
373         kvm_vm_dead(vcpu->kvm);
374 }
375
376 static void rsm_set_desc_flags(struct kvm_segment *desc, u32 flags)
377 {
378         desc->g    = (flags >> 23) & 1;
379         desc->db   = (flags >> 22) & 1;
380         desc->l    = (flags >> 21) & 1;
381         desc->avl  = (flags >> 20) & 1;
382         desc->present = (flags >> 15) & 1;
383         desc->dpl  = (flags >> 13) & 3;
384         desc->s    = (flags >> 12) & 1;
385         desc->type = (flags >>  8) & 15;
386
387         desc->unusable = !desc->present;
388         desc->padding = 0;
389 }
390
391 static int rsm_load_seg_32(struct kvm_vcpu *vcpu,
392                            const struct kvm_smm_seg_state_32 *state,
393                            u16 selector, int n)
394 {
395         struct kvm_segment desc;
396
397         desc.selector =           selector;
398         desc.base =               state->base;
399         desc.limit =              state->limit;
400         rsm_set_desc_flags(&desc, state->flags);
401         kvm_set_segment(vcpu, &desc, n);
402         return X86EMUL_CONTINUE;
403 }
404
405 #ifdef CONFIG_X86_64
406
407 static int rsm_load_seg_64(struct kvm_vcpu *vcpu,
408                            const struct kvm_smm_seg_state_64 *state,
409                            int n)
410 {
411         struct kvm_segment desc;
412
413         desc.selector =           state->selector;
414         rsm_set_desc_flags(&desc, state->attributes << 8);
415         desc.limit =              state->limit;
416         desc.base =               state->base;
417         kvm_set_segment(vcpu, &desc, n);
418         return X86EMUL_CONTINUE;
419 }
420 #endif
421
422 static int rsm_enter_protected_mode(struct kvm_vcpu *vcpu,
423                                     u64 cr0, u64 cr3, u64 cr4)
424 {
425         int bad;
426         u64 pcid;
427
428         /* In order to later set CR4.PCIDE, CR3[11:0] must be zero.  */
429         pcid = 0;
430         if (cr4 & X86_CR4_PCIDE) {
431                 pcid = cr3 & 0xfff;
432                 cr3 &= ~0xfff;
433         }
434
435         bad = kvm_set_cr3(vcpu, cr3);
436         if (bad)
437                 return X86EMUL_UNHANDLEABLE;
438
439         /*
440          * First enable PAE, long mode needs it before CR0.PG = 1 is set.
441          * Then enable protected mode.  However, PCID cannot be enabled
442          * if EFER.LMA=0, so set it separately.
443          */
444         bad = kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PCIDE);
445         if (bad)
446                 return X86EMUL_UNHANDLEABLE;
447
448         bad = kvm_set_cr0(vcpu, cr0);
449         if (bad)
450                 return X86EMUL_UNHANDLEABLE;
451
452         if (cr4 & X86_CR4_PCIDE) {
453                 bad = kvm_set_cr4(vcpu, cr4);
454                 if (bad)
455                         return X86EMUL_UNHANDLEABLE;
456                 if (pcid) {
457                         bad = kvm_set_cr3(vcpu, cr3 | pcid);
458                         if (bad)
459                                 return X86EMUL_UNHANDLEABLE;
460                 }
461
462         }
463
464         return X86EMUL_CONTINUE;
465 }
466
467 static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt,
468                              const struct kvm_smram_state_32 *smstate)
469 {
470         struct kvm_vcpu *vcpu = ctxt->vcpu;
471         struct desc_ptr dt;
472         int i, r;
473
474         ctxt->eflags =  smstate->eflags | X86_EFLAGS_FIXED;
475         ctxt->_eip =  smstate->eip;
476
477         for (i = 0; i < 8; i++)
478                 *reg_write(ctxt, i) = smstate->gprs[i];
479
480         if (kvm_set_dr(vcpu, 6, smstate->dr6))
481                 return X86EMUL_UNHANDLEABLE;
482         if (kvm_set_dr(vcpu, 7, smstate->dr7))
483                 return X86EMUL_UNHANDLEABLE;
484
485         rsm_load_seg_32(vcpu, &smstate->tr, smstate->tr_sel, VCPU_SREG_TR);
486         rsm_load_seg_32(vcpu, &smstate->ldtr, smstate->ldtr_sel, VCPU_SREG_LDTR);
487
488         dt.address =               smstate->gdtr.base;
489         dt.size =                  smstate->gdtr.limit;
490         static_call(kvm_x86_set_gdt)(vcpu, &dt);
491
492         dt.address =               smstate->idtr.base;
493         dt.size =                  smstate->idtr.limit;
494         static_call(kvm_x86_set_idt)(vcpu, &dt);
495
496         rsm_load_seg_32(vcpu, &smstate->es, smstate->es_sel, VCPU_SREG_ES);
497         rsm_load_seg_32(vcpu, &smstate->cs, smstate->cs_sel, VCPU_SREG_CS);
498         rsm_load_seg_32(vcpu, &smstate->ss, smstate->ss_sel, VCPU_SREG_SS);
499
500         rsm_load_seg_32(vcpu, &smstate->ds, smstate->ds_sel, VCPU_SREG_DS);
501         rsm_load_seg_32(vcpu, &smstate->fs, smstate->fs_sel, VCPU_SREG_FS);
502         rsm_load_seg_32(vcpu, &smstate->gs, smstate->gs_sel, VCPU_SREG_GS);
503
504         vcpu->arch.smbase = smstate->smbase;
505
506         r = rsm_enter_protected_mode(vcpu, smstate->cr0,
507                                         smstate->cr3, smstate->cr4);
508
509         if (r != X86EMUL_CONTINUE)
510                 return r;
511
512         static_call(kvm_x86_set_interrupt_shadow)(vcpu, 0);
513         ctxt->interruptibility = (u8)smstate->int_shadow;
514
515         return r;
516 }
517
518 #ifdef CONFIG_X86_64
519 static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt,
520                              const struct kvm_smram_state_64 *smstate)
521 {
522         struct kvm_vcpu *vcpu = ctxt->vcpu;
523         struct desc_ptr dt;
524         int i, r;
525
526         for (i = 0; i < 16; i++)
527                 *reg_write(ctxt, i) = smstate->gprs[15 - i];
528
529         ctxt->_eip   = smstate->rip;
530         ctxt->eflags = smstate->rflags | X86_EFLAGS_FIXED;
531
532         if (kvm_set_dr(vcpu, 6, smstate->dr6))
533                 return X86EMUL_UNHANDLEABLE;
534         if (kvm_set_dr(vcpu, 7, smstate->dr7))
535                 return X86EMUL_UNHANDLEABLE;
536
537         vcpu->arch.smbase =         smstate->smbase;
538
539         if (kvm_set_msr(vcpu, MSR_EFER, smstate->efer & ~EFER_LMA))
540                 return X86EMUL_UNHANDLEABLE;
541
542         rsm_load_seg_64(vcpu, &smstate->tr, VCPU_SREG_TR);
543
544         dt.size =                   smstate->idtr.limit;
545         dt.address =                smstate->idtr.base;
546         static_call(kvm_x86_set_idt)(vcpu, &dt);
547
548         rsm_load_seg_64(vcpu, &smstate->ldtr, VCPU_SREG_LDTR);
549
550         dt.size =                   smstate->gdtr.limit;
551         dt.address =                smstate->gdtr.base;
552         static_call(kvm_x86_set_gdt)(vcpu, &dt);
553
554         r = rsm_enter_protected_mode(vcpu, smstate->cr0, smstate->cr3, smstate->cr4);
555         if (r != X86EMUL_CONTINUE)
556                 return r;
557
558         rsm_load_seg_64(vcpu, &smstate->es, VCPU_SREG_ES);
559         rsm_load_seg_64(vcpu, &smstate->cs, VCPU_SREG_CS);
560         rsm_load_seg_64(vcpu, &smstate->ss, VCPU_SREG_SS);
561         rsm_load_seg_64(vcpu, &smstate->ds, VCPU_SREG_DS);
562         rsm_load_seg_64(vcpu, &smstate->fs, VCPU_SREG_FS);
563         rsm_load_seg_64(vcpu, &smstate->gs, VCPU_SREG_GS);
564
565         static_call(kvm_x86_set_interrupt_shadow)(vcpu, 0);
566         ctxt->interruptibility = (u8)smstate->int_shadow;
567
568         return X86EMUL_CONTINUE;
569 }
570 #endif
571
572 int emulator_leave_smm(struct x86_emulate_ctxt *ctxt)
573 {
574         struct kvm_vcpu *vcpu = ctxt->vcpu;
575         unsigned long cr0;
576         union kvm_smram smram;
577         u64 smbase;
578         int ret;
579
580         smbase = vcpu->arch.smbase;
581
582         ret = kvm_vcpu_read_guest(vcpu, smbase + 0xfe00, smram.bytes, sizeof(smram));
583         if (ret < 0)
584                 return X86EMUL_UNHANDLEABLE;
585
586         if ((vcpu->arch.hflags & HF_SMM_INSIDE_NMI_MASK) == 0)
587                 static_call(kvm_x86_set_nmi_mask)(vcpu, false);
588
589         kvm_smm_changed(vcpu, false);
590
591         /*
592          * Get back to real mode, to prepare a safe state in which to load
593          * CR0/CR3/CR4/EFER.  It's all a bit more complicated if the vCPU
594          * supports long mode.
595          */
596 #ifdef CONFIG_X86_64
597         if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) {
598                 struct kvm_segment cs_desc;
599                 unsigned long cr4;
600
601                 /* Zero CR4.PCIDE before CR0.PG.  */
602                 cr4 = kvm_read_cr4(vcpu);
603                 if (cr4 & X86_CR4_PCIDE)
604                         kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PCIDE);
605
606                 /* A 32-bit code segment is required to clear EFER.LMA.  */
607                 memset(&cs_desc, 0, sizeof(cs_desc));
608                 cs_desc.type = 0xb;
609                 cs_desc.s = cs_desc.g = cs_desc.present = 1;
610                 kvm_set_segment(vcpu, &cs_desc, VCPU_SREG_CS);
611         }
612 #endif
613
614         /* For the 64-bit case, this will clear EFER.LMA.  */
615         cr0 = kvm_read_cr0(vcpu);
616         if (cr0 & X86_CR0_PE)
617                 kvm_set_cr0(vcpu, cr0 & ~(X86_CR0_PG | X86_CR0_PE));
618
619 #ifdef CONFIG_X86_64
620         if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) {
621                 unsigned long cr4, efer;
622
623                 /* Clear CR4.PAE before clearing EFER.LME. */
624                 cr4 = kvm_read_cr4(vcpu);
625                 if (cr4 & X86_CR4_PAE)
626                         kvm_set_cr4(vcpu, cr4 & ~X86_CR4_PAE);
627
628                 /* And finally go back to 32-bit mode.  */
629                 efer = 0;
630                 kvm_set_msr(vcpu, MSR_EFER, efer);
631         }
632 #endif
633
634         /*
635          * Give leave_smm() a chance to make ISA-specific changes to the vCPU
636          * state (e.g. enter guest mode) before loading state from the SMM
637          * state-save area.
638          */
639         if (static_call(kvm_x86_leave_smm)(vcpu, &smram))
640                 return X86EMUL_UNHANDLEABLE;
641
642 #ifdef CONFIG_X86_64
643         if (guest_cpuid_has(vcpu, X86_FEATURE_LM))
644                 return rsm_load_state_64(ctxt, &smram.smram64);
645         else
646 #endif
647                 return rsm_load_state_32(ctxt, &smram.smram32);
648 }