Commit | Line | Data |
---|---|---|
d3561b7f RR |
1 | /* Paravirtualization interfaces |
2 | Copyright (C) 2006 Rusty Russell IBM Corporation | |
3 | ||
4 | This program is free software; you can redistribute it and/or modify | |
5 | it under the terms of the GNU General Public License as published by | |
6 | the Free Software Foundation; either version 2 of the License, or | |
7 | (at your option) any later version. | |
8 | ||
9 | This program is distributed in the hope that it will be useful, | |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | GNU General Public License for more details. | |
13 | ||
14 | You should have received a copy of the GNU General Public License | |
15 | along with this program; if not, write to the Free Software | |
16 | Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
17 | */ | |
18 | #include <linux/errno.h> | |
19 | #include <linux/module.h> | |
20 | #include <linux/efi.h> | |
21 | #include <linux/bcd.h> | |
c9ccf30d | 22 | #include <linux/start_kernel.h> |
d3561b7f RR |
23 | |
24 | #include <asm/bug.h> | |
25 | #include <asm/paravirt.h> | |
26 | #include <asm/desc.h> | |
27 | #include <asm/setup.h> | |
28 | #include <asm/arch_hooks.h> | |
29 | #include <asm/time.h> | |
30 | #include <asm/irq.h> | |
31 | #include <asm/delay.h> | |
13623d79 RR |
32 | #include <asm/fixmap.h> |
33 | #include <asm/apic.h> | |
da181a8b | 34 | #include <asm/tlbflush.h> |
d3561b7f RR |
35 | |
36 | /* nop stub */ | |
37 | static void native_nop(void) | |
38 | { | |
39 | } | |
40 | ||
41 | static void __init default_banner(void) | |
42 | { | |
43 | printk(KERN_INFO "Booting paravirtualized kernel on %s\n", | |
44 | paravirt_ops.name); | |
45 | } | |
46 | ||
47 | char *memory_setup(void) | |
48 | { | |
49 | return paravirt_ops.memory_setup(); | |
50 | } | |
51 | ||
139ec7c4 RR |
52 | /* Simple instruction patching code. */ |
53 | #define DEF_NATIVE(name, code) \ | |
54 | extern const char start_##name[], end_##name[]; \ | |
55 | asm("start_" #name ": " code "; end_" #name ":") | |
56 | DEF_NATIVE(cli, "cli"); | |
57 | DEF_NATIVE(sti, "sti"); | |
58 | DEF_NATIVE(popf, "push %eax; popf"); | |
59 | DEF_NATIVE(pushf, "pushf; pop %eax"); | |
60 | DEF_NATIVE(pushf_cli, "pushf; pop %eax; cli"); | |
61 | DEF_NATIVE(iret, "iret"); | |
62 | DEF_NATIVE(sti_sysexit, "sti; sysexit"); | |
63 | ||
64 | static const struct native_insns | |
65 | { | |
66 | const char *start, *end; | |
67 | } native_insns[] = { | |
68 | [PARAVIRT_IRQ_DISABLE] = { start_cli, end_cli }, | |
69 | [PARAVIRT_IRQ_ENABLE] = { start_sti, end_sti }, | |
70 | [PARAVIRT_RESTORE_FLAGS] = { start_popf, end_popf }, | |
71 | [PARAVIRT_SAVE_FLAGS] = { start_pushf, end_pushf }, | |
72 | [PARAVIRT_SAVE_FLAGS_IRQ_DISABLE] = { start_pushf_cli, end_pushf_cli }, | |
73 | [PARAVIRT_INTERRUPT_RETURN] = { start_iret, end_iret }, | |
74 | [PARAVIRT_STI_SYSEXIT] = { start_sti_sysexit, end_sti_sysexit }, | |
75 | }; | |
76 | ||
77 | static unsigned native_patch(u8 type, u16 clobbers, void *insns, unsigned len) | |
78 | { | |
79 | unsigned int insn_len; | |
80 | ||
81 | /* Don't touch it if we don't have a replacement */ | |
82 | if (type >= ARRAY_SIZE(native_insns) || !native_insns[type].start) | |
83 | return len; | |
84 | ||
85 | insn_len = native_insns[type].end - native_insns[type].start; | |
86 | ||
87 | /* Similarly if we can't fit replacement. */ | |
88 | if (len < insn_len) | |
89 | return len; | |
90 | ||
91 | memcpy(insns, native_insns[type].start, insn_len); | |
92 | return insn_len; | |
93 | } | |
94 | ||
d3561b7f RR |
95 | static fastcall unsigned long native_get_debugreg(int regno) |
96 | { | |
97 | unsigned long val = 0; /* Damn you, gcc! */ | |
98 | ||
99 | switch (regno) { | |
100 | case 0: | |
101 | asm("movl %%db0, %0" :"=r" (val)); break; | |
102 | case 1: | |
103 | asm("movl %%db1, %0" :"=r" (val)); break; | |
104 | case 2: | |
105 | asm("movl %%db2, %0" :"=r" (val)); break; | |
106 | case 3: | |
107 | asm("movl %%db3, %0" :"=r" (val)); break; | |
108 | case 6: | |
109 | asm("movl %%db6, %0" :"=r" (val)); break; | |
110 | case 7: | |
111 | asm("movl %%db7, %0" :"=r" (val)); break; | |
112 | default: | |
113 | BUG(); | |
114 | } | |
115 | return val; | |
116 | } | |
117 | ||
118 | static fastcall void native_set_debugreg(int regno, unsigned long value) | |
119 | { | |
120 | switch (regno) { | |
121 | case 0: | |
122 | asm("movl %0,%%db0" : /* no output */ :"r" (value)); | |
123 | break; | |
124 | case 1: | |
125 | asm("movl %0,%%db1" : /* no output */ :"r" (value)); | |
126 | break; | |
127 | case 2: | |
128 | asm("movl %0,%%db2" : /* no output */ :"r" (value)); | |
129 | break; | |
130 | case 3: | |
131 | asm("movl %0,%%db3" : /* no output */ :"r" (value)); | |
132 | break; | |
133 | case 6: | |
134 | asm("movl %0,%%db6" : /* no output */ :"r" (value)); | |
135 | break; | |
136 | case 7: | |
137 | asm("movl %0,%%db7" : /* no output */ :"r" (value)); | |
138 | break; | |
139 | default: | |
140 | BUG(); | |
141 | } | |
142 | } | |
143 | ||
144 | void init_IRQ(void) | |
145 | { | |
146 | paravirt_ops.init_IRQ(); | |
147 | } | |
148 | ||
149 | static fastcall void native_clts(void) | |
150 | { | |
151 | asm volatile ("clts"); | |
152 | } | |
153 | ||
154 | static fastcall unsigned long native_read_cr0(void) | |
155 | { | |
156 | unsigned long val; | |
157 | asm volatile("movl %%cr0,%0\n\t" :"=r" (val)); | |
158 | return val; | |
159 | } | |
160 | ||
161 | static fastcall void native_write_cr0(unsigned long val) | |
162 | { | |
163 | asm volatile("movl %0,%%cr0": :"r" (val)); | |
164 | } | |
165 | ||
166 | static fastcall unsigned long native_read_cr2(void) | |
167 | { | |
168 | unsigned long val; | |
169 | asm volatile("movl %%cr2,%0\n\t" :"=r" (val)); | |
170 | return val; | |
171 | } | |
172 | ||
173 | static fastcall void native_write_cr2(unsigned long val) | |
174 | { | |
175 | asm volatile("movl %0,%%cr2": :"r" (val)); | |
176 | } | |
177 | ||
178 | static fastcall unsigned long native_read_cr3(void) | |
179 | { | |
180 | unsigned long val; | |
181 | asm volatile("movl %%cr3,%0\n\t" :"=r" (val)); | |
182 | return val; | |
183 | } | |
184 | ||
185 | static fastcall void native_write_cr3(unsigned long val) | |
186 | { | |
187 | asm volatile("movl %0,%%cr3": :"r" (val)); | |
188 | } | |
189 | ||
190 | static fastcall unsigned long native_read_cr4(void) | |
191 | { | |
192 | unsigned long val; | |
193 | asm volatile("movl %%cr4,%0\n\t" :"=r" (val)); | |
194 | return val; | |
195 | } | |
196 | ||
197 | static fastcall unsigned long native_read_cr4_safe(void) | |
198 | { | |
199 | unsigned long val; | |
200 | /* This could fault if %cr4 does not exist */ | |
201 | asm("1: movl %%cr4, %0 \n" | |
202 | "2: \n" | |
203 | ".section __ex_table,\"a\" \n" | |
204 | ".long 1b,2b \n" | |
205 | ".previous \n" | |
206 | : "=r" (val): "0" (0)); | |
207 | return val; | |
208 | } | |
209 | ||
210 | static fastcall void native_write_cr4(unsigned long val) | |
211 | { | |
212 | asm volatile("movl %0,%%cr4": :"r" (val)); | |
213 | } | |
214 | ||
215 | static fastcall unsigned long native_save_fl(void) | |
216 | { | |
217 | unsigned long f; | |
218 | asm volatile("pushfl ; popl %0":"=g" (f): /* no input */); | |
219 | return f; | |
220 | } | |
221 | ||
222 | static fastcall void native_restore_fl(unsigned long f) | |
223 | { | |
224 | asm volatile("pushl %0 ; popfl": /* no output */ | |
225 | :"g" (f) | |
226 | :"memory", "cc"); | |
227 | } | |
228 | ||
229 | static fastcall void native_irq_disable(void) | |
230 | { | |
231 | asm volatile("cli": : :"memory"); | |
232 | } | |
233 | ||
234 | static fastcall void native_irq_enable(void) | |
235 | { | |
236 | asm volatile("sti": : :"memory"); | |
237 | } | |
238 | ||
239 | static fastcall void native_safe_halt(void) | |
240 | { | |
241 | asm volatile("sti; hlt": : :"memory"); | |
242 | } | |
243 | ||
244 | static fastcall void native_halt(void) | |
245 | { | |
246 | asm volatile("hlt": : :"memory"); | |
247 | } | |
248 | ||
249 | static fastcall void native_wbinvd(void) | |
250 | { | |
251 | asm volatile("wbinvd": : :"memory"); | |
252 | } | |
253 | ||
254 | static fastcall unsigned long long native_read_msr(unsigned int msr, int *err) | |
255 | { | |
256 | unsigned long long val; | |
257 | ||
258 | asm volatile("2: rdmsr ; xorl %0,%0\n" | |
259 | "1:\n\t" | |
260 | ".section .fixup,\"ax\"\n\t" | |
261 | "3: movl %3,%0 ; jmp 1b\n\t" | |
262 | ".previous\n\t" | |
263 | ".section __ex_table,\"a\"\n" | |
264 | " .align 4\n\t" | |
265 | " .long 2b,3b\n\t" | |
266 | ".previous" | |
267 | : "=r" (*err), "=A" (val) | |
268 | : "c" (msr), "i" (-EFAULT)); | |
269 | ||
270 | return val; | |
271 | } | |
272 | ||
273 | static fastcall int native_write_msr(unsigned int msr, unsigned long long val) | |
274 | { | |
275 | int err; | |
276 | asm volatile("2: wrmsr ; xorl %0,%0\n" | |
277 | "1:\n\t" | |
278 | ".section .fixup,\"ax\"\n\t" | |
279 | "3: movl %4,%0 ; jmp 1b\n\t" | |
280 | ".previous\n\t" | |
281 | ".section __ex_table,\"a\"\n" | |
282 | " .align 4\n\t" | |
283 | " .long 2b,3b\n\t" | |
284 | ".previous" | |
285 | : "=a" (err) | |
286 | : "c" (msr), "0" ((u32)val), "d" ((u32)(val>>32)), | |
287 | "i" (-EFAULT)); | |
288 | return err; | |
289 | } | |
290 | ||
291 | static fastcall unsigned long long native_read_tsc(void) | |
292 | { | |
293 | unsigned long long val; | |
294 | asm volatile("rdtsc" : "=A" (val)); | |
295 | return val; | |
296 | } | |
297 | ||
298 | static fastcall unsigned long long native_read_pmc(void) | |
299 | { | |
300 | unsigned long long val; | |
301 | asm volatile("rdpmc" : "=A" (val)); | |
302 | return val; | |
303 | } | |
304 | ||
305 | static fastcall void native_load_tr_desc(void) | |
306 | { | |
307 | asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8)); | |
308 | } | |
309 | ||
310 | static fastcall void native_load_gdt(const struct Xgt_desc_struct *dtr) | |
311 | { | |
312 | asm volatile("lgdt %0"::"m" (*dtr)); | |
313 | } | |
314 | ||
315 | static fastcall void native_load_idt(const struct Xgt_desc_struct *dtr) | |
316 | { | |
317 | asm volatile("lidt %0"::"m" (*dtr)); | |
318 | } | |
319 | ||
320 | static fastcall void native_store_gdt(struct Xgt_desc_struct *dtr) | |
321 | { | |
322 | asm ("sgdt %0":"=m" (*dtr)); | |
323 | } | |
324 | ||
325 | static fastcall void native_store_idt(struct Xgt_desc_struct *dtr) | |
326 | { | |
327 | asm ("sidt %0":"=m" (*dtr)); | |
328 | } | |
329 | ||
330 | static fastcall unsigned long native_store_tr(void) | |
331 | { | |
332 | unsigned long tr; | |
333 | asm ("str %0":"=r" (tr)); | |
334 | return tr; | |
335 | } | |
336 | ||
337 | static fastcall void native_load_tls(struct thread_struct *t, unsigned int cpu) | |
338 | { | |
339 | #define C(i) get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i] | |
340 | C(0); C(1); C(2); | |
341 | #undef C | |
342 | } | |
343 | ||
344 | static inline void native_write_dt_entry(void *dt, int entry, u32 entry_low, u32 entry_high) | |
345 | { | |
346 | u32 *lp = (u32 *)((char *)dt + entry*8); | |
347 | lp[0] = entry_low; | |
348 | lp[1] = entry_high; | |
349 | } | |
350 | ||
351 | static fastcall void native_write_ldt_entry(void *dt, int entrynum, u32 low, u32 high) | |
352 | { | |
353 | native_write_dt_entry(dt, entrynum, low, high); | |
354 | } | |
355 | ||
356 | static fastcall void native_write_gdt_entry(void *dt, int entrynum, u32 low, u32 high) | |
357 | { | |
358 | native_write_dt_entry(dt, entrynum, low, high); | |
359 | } | |
360 | ||
361 | static fastcall void native_write_idt_entry(void *dt, int entrynum, u32 low, u32 high) | |
362 | { | |
363 | native_write_dt_entry(dt, entrynum, low, high); | |
364 | } | |
365 | ||
366 | static fastcall void native_load_esp0(struct tss_struct *tss, | |
367 | struct thread_struct *thread) | |
368 | { | |
369 | tss->esp0 = thread->esp0; | |
370 | ||
371 | /* This can only happen when SEP is enabled, no need to test "SEP"arately */ | |
372 | if (unlikely(tss->ss1 != thread->sysenter_cs)) { | |
373 | tss->ss1 = thread->sysenter_cs; | |
374 | wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); | |
375 | } | |
376 | } | |
377 | ||
378 | static fastcall void native_io_delay(void) | |
379 | { | |
380 | asm volatile("outb %al,$0x80"); | |
381 | } | |
382 | ||
da181a8b RR |
383 | static fastcall void native_flush_tlb(void) |
384 | { | |
385 | __native_flush_tlb(); | |
386 | } | |
387 | ||
388 | /* | |
389 | * Global pages have to be flushed a bit differently. Not a real | |
390 | * performance problem because this does not happen often. | |
391 | */ | |
392 | static fastcall void native_flush_tlb_global(void) | |
393 | { | |
394 | __native_flush_tlb_global(); | |
395 | } | |
396 | ||
397 | static fastcall void native_flush_tlb_single(u32 addr) | |
398 | { | |
399 | __native_flush_tlb_single(addr); | |
400 | } | |
401 | ||
402 | #ifndef CONFIG_X86_PAE | |
403 | static fastcall void native_set_pte(pte_t *ptep, pte_t pteval) | |
404 | { | |
405 | *ptep = pteval; | |
406 | } | |
407 | ||
408 | static fastcall void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval) | |
409 | { | |
410 | *ptep = pteval; | |
411 | } | |
412 | ||
413 | static fastcall void native_set_pmd(pmd_t *pmdp, pmd_t pmdval) | |
414 | { | |
415 | *pmdp = pmdval; | |
416 | } | |
417 | ||
418 | #else /* CONFIG_X86_PAE */ | |
419 | ||
420 | static fastcall void native_set_pte(pte_t *ptep, pte_t pte) | |
421 | { | |
422 | ptep->pte_high = pte.pte_high; | |
423 | smp_wmb(); | |
424 | ptep->pte_low = pte.pte_low; | |
425 | } | |
426 | ||
427 | static fastcall void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pte) | |
428 | { | |
429 | ptep->pte_high = pte.pte_high; | |
430 | smp_wmb(); | |
431 | ptep->pte_low = pte.pte_low; | |
432 | } | |
433 | ||
434 | static fastcall void native_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) | |
435 | { | |
436 | ptep->pte_low = 0; | |
437 | smp_wmb(); | |
438 | ptep->pte_high = pte.pte_high; | |
439 | smp_wmb(); | |
440 | ptep->pte_low = pte.pte_low; | |
441 | } | |
442 | ||
443 | static fastcall void native_set_pte_atomic(pte_t *ptep, pte_t pteval) | |
444 | { | |
445 | set_64bit((unsigned long long *)ptep,pte_val(pteval)); | |
446 | } | |
447 | ||
448 | static fastcall void native_set_pmd(pmd_t *pmdp, pmd_t pmdval) | |
449 | { | |
450 | set_64bit((unsigned long long *)pmdp,pmd_val(pmdval)); | |
451 | } | |
452 | ||
453 | static fastcall void native_set_pud(pud_t *pudp, pud_t pudval) | |
454 | { | |
455 | *pudp = pudval; | |
456 | } | |
457 | ||
458 | static fastcall void native_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | |
459 | { | |
460 | ptep->pte_low = 0; | |
461 | smp_wmb(); | |
462 | ptep->pte_high = 0; | |
463 | } | |
464 | ||
465 | static fastcall void native_pmd_clear(pmd_t *pmd) | |
466 | { | |
467 | u32 *tmp = (u32 *)pmd; | |
468 | *tmp = 0; | |
469 | smp_wmb(); | |
470 | *(tmp + 1) = 0; | |
471 | } | |
472 | #endif /* CONFIG_X86_PAE */ | |
473 | ||
d3561b7f RR |
474 | /* These are in entry.S */ |
475 | extern fastcall void native_iret(void); | |
476 | extern fastcall void native_irq_enable_sysexit(void); | |
477 | ||
478 | static int __init print_banner(void) | |
479 | { | |
480 | paravirt_ops.banner(); | |
481 | return 0; | |
482 | } | |
483 | core_initcall(print_banner); | |
484 | ||
c9ccf30d RR |
485 | /* We simply declare start_kernel to be the paravirt probe of last resort. */ |
486 | paravirt_probe(start_kernel); | |
487 | ||
d3561b7f RR |
488 | struct paravirt_ops paravirt_ops = { |
489 | .name = "bare hardware", | |
490 | .paravirt_enabled = 0, | |
491 | .kernel_rpl = 0, | |
492 | ||
139ec7c4 | 493 | .patch = native_patch, |
d3561b7f RR |
494 | .banner = default_banner, |
495 | .arch_setup = native_nop, | |
496 | .memory_setup = machine_specific_memory_setup, | |
497 | .get_wallclock = native_get_wallclock, | |
498 | .set_wallclock = native_set_wallclock, | |
499 | .time_init = time_init_hook, | |
500 | .init_IRQ = native_init_IRQ, | |
501 | ||
502 | .cpuid = native_cpuid, | |
503 | .get_debugreg = native_get_debugreg, | |
504 | .set_debugreg = native_set_debugreg, | |
505 | .clts = native_clts, | |
506 | .read_cr0 = native_read_cr0, | |
507 | .write_cr0 = native_write_cr0, | |
508 | .read_cr2 = native_read_cr2, | |
509 | .write_cr2 = native_write_cr2, | |
510 | .read_cr3 = native_read_cr3, | |
511 | .write_cr3 = native_write_cr3, | |
512 | .read_cr4 = native_read_cr4, | |
513 | .read_cr4_safe = native_read_cr4_safe, | |
514 | .write_cr4 = native_write_cr4, | |
515 | .save_fl = native_save_fl, | |
516 | .restore_fl = native_restore_fl, | |
517 | .irq_disable = native_irq_disable, | |
518 | .irq_enable = native_irq_enable, | |
519 | .safe_halt = native_safe_halt, | |
520 | .halt = native_halt, | |
521 | .wbinvd = native_wbinvd, | |
522 | .read_msr = native_read_msr, | |
523 | .write_msr = native_write_msr, | |
524 | .read_tsc = native_read_tsc, | |
525 | .read_pmc = native_read_pmc, | |
526 | .load_tr_desc = native_load_tr_desc, | |
527 | .set_ldt = native_set_ldt, | |
528 | .load_gdt = native_load_gdt, | |
529 | .load_idt = native_load_idt, | |
530 | .store_gdt = native_store_gdt, | |
531 | .store_idt = native_store_idt, | |
532 | .store_tr = native_store_tr, | |
533 | .load_tls = native_load_tls, | |
534 | .write_ldt_entry = native_write_ldt_entry, | |
535 | .write_gdt_entry = native_write_gdt_entry, | |
536 | .write_idt_entry = native_write_idt_entry, | |
537 | .load_esp0 = native_load_esp0, | |
538 | ||
539 | .set_iopl_mask = native_set_iopl_mask, | |
540 | .io_delay = native_io_delay, | |
541 | .const_udelay = __const_udelay, | |
542 | ||
13623d79 RR |
543 | #ifdef CONFIG_X86_LOCAL_APIC |
544 | .apic_write = native_apic_write, | |
545 | .apic_write_atomic = native_apic_write_atomic, | |
546 | .apic_read = native_apic_read, | |
547 | #endif | |
548 | ||
da181a8b RR |
549 | .flush_tlb_user = native_flush_tlb, |
550 | .flush_tlb_kernel = native_flush_tlb_global, | |
551 | .flush_tlb_single = native_flush_tlb_single, | |
552 | ||
553 | .set_pte = native_set_pte, | |
554 | .set_pte_at = native_set_pte_at, | |
555 | .set_pmd = native_set_pmd, | |
556 | .pte_update = (void *)native_nop, | |
557 | .pte_update_defer = (void *)native_nop, | |
558 | #ifdef CONFIG_X86_PAE | |
559 | .set_pte_atomic = native_set_pte_atomic, | |
560 | .set_pte_present = native_set_pte_present, | |
561 | .set_pud = native_set_pud, | |
562 | .pte_clear = native_pte_clear, | |
563 | .pmd_clear = native_pmd_clear, | |
564 | #endif | |
565 | ||
d3561b7f RR |
566 | .irq_enable_sysexit = native_irq_enable_sysexit, |
567 | .iret = native_iret, | |
568 | }; | |
0dbe5a11 IM |
569 | |
570 | /* | |
571 | * NOTE: CONFIG_PARAVIRT is experimental and the paravirt_ops | |
572 | * semantics are subject to change. Hence we only do this | |
573 | * internal-only export of this, until it gets sorted out and | |
574 | * all lowlevel CPU ops used by modules are separately exported. | |
575 | */ | |
576 | EXPORT_SYMBOL_GPL(paravirt_ops); |