Commit | Line | Data |
---|---|---|
b2441318 | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
6487673b | 2 | /* |
edcb5cf8 | 3 | * Asm versions of Xen pv-ops, suitable for direct use. |
130ace11 TH |
4 | * |
5 | * We only bother with direct forms (ie, vcpu in pda) of the | |
edcb5cf8 | 6 | * operations here; the indirect forms are better handled in C. |
6487673b JF |
7 | */ |
8 | ||
6487673b | 9 | #include <asm/thread_info.h> |
6487673b | 10 | #include <asm/processor-flags.h> |
9ec2b804 | 11 | #include <asm/segment.h> |
8f6380b9 | 12 | #include <asm/asm.h> |
9ec2b804 JF |
13 | |
14 | #include <xen/interface/xen.h> | |
6487673b | 15 | |
edcb5cf8 | 16 | #include <linux/linkage.h> |
6487673b | 17 | |
edcb5cf8 JG |
18 | /* Pseudo-flag used for virtual NMI, which we don't implement yet */ |
19 | #define XEN_EFLAGS_NMI 0x80000000 | |
6487673b | 20 | |
9ec2b804 | 21 | /* |
130ace11 TH |
22 | * This is run where a normal iret would be run, with the same stack setup: |
23 | * 8: eflags | |
24 | * 4: cs | |
25 | * esp-> 0: eip | |
26 | * | |
27 | * This attempts to make sure that any pending events are dealt with | |
28 | * on return to usermode, but there is a small window in which an | |
29 | * event can happen just before entering usermode. If the nested | |
30 | * interrupt ends up setting one of the TIF_WORK_MASK pending work | |
31 | * flags, they will not be tested again before returning to | |
32 | * usermode. This means that a process can end up with pending work, | |
33 | * which will be unprocessed until the process enters and leaves the | |
34 | * kernel again, which could be an unbounded amount of time. This | |
35 | * means that a pending signal or reschedule event could be | |
36 | * indefinitely delayed. | |
37 | * | |
38 | * The fix is to notice a nested interrupt in the critical window, and | |
39 | * if one occurs, then fold the nested interrupt into the current | |
40 | * interrupt stack frame, and re-process it iteratively rather than | |
41 | * recursively. This means that it will exit via the normal path, and | |
42 | * all pending work will be dealt with appropriately. | |
43 | * | |
44 | * Because the nested interrupt handler needs to deal with the current | |
45 | * stack state in whatever form its in, we keep things simple by only | |
46 | * using a single register which is pushed/popped on the stack. | |
9ec2b804 | 47 | */ |
4461bbc0 BO |
48 | |
49 | .macro POP_FS | |
50 | 1: | |
51 | popw %fs | |
52 | .pushsection .fixup, "ax" | |
53 | 2: movw $0, (%esp) | |
54 | jmp 1b | |
55 | .popsection | |
56 | _ASM_EXTABLE(1b,2b) | |
57 | .endm | |
58 | ||
78762b0e | 59 | SYM_CODE_START(xen_iret) |
9ec2b804 JF |
60 | /* test eflags for special cases */ |
61 | testl $(X86_EFLAGS_VM | XEN_EFLAGS_NMI), 8(%esp) | |
62 | jnz hyper_iret | |
63 | ||
64 | push %eax | |
65 | ESP_OFFSET=4 # bytes pushed onto stack | |
66 | ||
4461bbc0 | 67 | /* Store vcpu_info pointer for easy access */ |
9ec2b804 | 68 | #ifdef CONFIG_SMP |
4461bbc0 BO |
69 | pushw %fs |
70 | movl $(__KERNEL_PERCPU), %eax | |
71 | movl %eax, %fs | |
72 | movl %fs:xen_vcpu, %eax | |
73 | POP_FS | |
9ec2b804 | 74 | #else |
13d2b4d1 | 75 | movl %ss:xen_vcpu, %eax |
9ec2b804 JF |
76 | #endif |
77 | ||
78 | /* check IF state we're restoring */ | |
79 | testb $X86_EFLAGS_IF>>8, 8+1+ESP_OFFSET(%esp) | |
80 | ||
130ace11 TH |
81 | /* |
82 | * Maybe enable events. Once this happens we could get a | |
83 | * recursive event, so the critical region starts immediately | |
84 | * afterwards. However, if that happens we don't end up | |
85 | * resuming the code, so we don't have to be worried about | |
86 | * being preempted to another CPU. | |
87 | */ | |
13d2b4d1 | 88 | setz %ss:XEN_vcpu_info_mask(%eax) |
9ec2b804 JF |
89 | xen_iret_start_crit: |
90 | ||
91 | /* check for unmasked and pending */ | |
13d2b4d1 | 92 | cmpw $0x0001, %ss:XEN_vcpu_info_pending(%eax) |
9ec2b804 | 93 | |
130ace11 TH |
94 | /* |
95 | * If there's something pending, mask events again so we can | |
d198d499 IM |
96 | * jump back into xen_hypervisor_callback. Otherwise do not |
97 | * touch XEN_vcpu_info_mask. | |
130ace11 | 98 | */ |
d198d499 | 99 | jne 1f |
13d2b4d1 | 100 | movb $1, %ss:XEN_vcpu_info_mask(%eax) |
9ec2b804 | 101 | |
d198d499 | 102 | 1: popl %eax |
9ec2b804 | 103 | |
130ace11 TH |
104 | /* |
105 | * From this point on the registers are restored and the stack | |
106 | * updated, so we don't need to worry about it if we're | |
107 | * preempted | |
108 | */ | |
9ec2b804 JF |
109 | iret_restore_end: |
110 | ||
130ace11 TH |
111 | /* |
112 | * Jump to hypervisor_callback after fixing up the stack. | |
113 | * Events are masked, so jumping out of the critical region is | |
114 | * OK. | |
115 | */ | |
9ec2b804 JF |
116 | je xen_hypervisor_callback |
117 | ||
90e9f536 | 118 | 1: iret |
9ec2b804 | 119 | xen_iret_end_crit: |
8f6380b9 | 120 | _ASM_EXTABLE(1b, iret_exc) |
9ec2b804 JF |
121 | |
122 | hyper_iret: | |
123 | /* put this out of line since its very rarely used */ | |
124 | jmp hypercall_page + __HYPERVISOR_iret * 32 | |
78762b0e | 125 | SYM_CODE_END(xen_iret) |
9ec2b804 JF |
126 | |
127 | .globl xen_iret_start_crit, xen_iret_end_crit | |
128 | ||
129 | /* | |
29b810f5 | 130 | * This is called by xen_hypervisor_callback in entry_32.S when it sees |
130ace11 | 131 | * that the EIP at the time of interrupt was between |
29b810f5 | 132 | * xen_iret_start_crit and xen_iret_end_crit. |
130ace11 TH |
133 | * |
134 | * The stack format at this point is: | |
135 | * ---------------- | |
136 | * ss : (ss/esp may be present if we came from usermode) | |
137 | * esp : | |
138 | * eflags } outer exception info | |
139 | * cs } | |
140 | * eip } | |
130ace11 | 141 | * ---------------- |
29b810f5 | 142 | * eax : outer eax if it hasn't been restored |
130ace11 | 143 | * ---------------- |
29b810f5 JB |
144 | * eflags } |
145 | * cs } nested exception info | |
146 | * eip } | |
147 | * return address : (into xen_hypervisor_callback) | |
130ace11 | 148 | * |
29b810f5 JB |
149 | * In order to deliver the nested exception properly, we need to discard the |
150 | * nested exception frame such that when we handle the exception, we do it | |
151 | * in the context of the outer exception rather than starting a new one. | |
130ace11 | 152 | * |
29b810f5 JB |
153 | * The only caveat is that if the outer eax hasn't been restored yet (i.e. |
154 | * it's still on stack), we need to restore its value here. | |
9ec2b804 | 155 | */ |
78762b0e | 156 | SYM_CODE_START(xen_iret_crit_fixup) |
9ec2b804 | 157 | /* |
130ace11 TH |
158 | * Paranoia: Make sure we're really coming from kernel space. |
159 | * One could imagine a case where userspace jumps into the | |
160 | * critical range address, but just before the CPU delivers a | |
922eea2c JB |
161 | * PF, it decides to deliver an interrupt instead. Unlikely? |
162 | * Definitely. Easy to avoid? Yes. | |
9ec2b804 | 163 | */ |
922eea2c JB |
164 | testb $2, 2*4(%esp) /* nested CS */ |
165 | jnz 2f | |
9ec2b804 | 166 | |
130ace11 TH |
167 | /* |
168 | * If eip is before iret_restore_end then stack | |
169 | * hasn't been restored yet. | |
170 | */ | |
29b810f5 | 171 | cmpl $iret_restore_end, 1*4(%esp) |
9ec2b804 JF |
172 | jae 1f |
173 | ||
29b810f5 JB |
174 | movl 4*4(%esp), %eax /* load outer EAX */ |
175 | ret $4*4 /* discard nested EIP, CS, and EFLAGS as | |
176 | * well as the just restored EAX */ | |
9ec2b804 | 177 | |
29b810f5 JB |
178 | 1: |
179 | ret $3*4 /* discard nested EIP, CS, and EFLAGS */ | |
6487673b | 180 | |
29b810f5 JB |
181 | 2: |
182 | ret | |
78762b0e | 183 | SYM_CODE_END(xen_iret_crit_fixup) |