x86/paravirt: Move items in pv_info under PARAVIRT_XXL umbrella
[linux-2.6-block.git] / arch / x86 / kernel / paravirt.c
1 /*  Paravirtualization interfaces
2     Copyright (C) 2006 Rusty Russell IBM Corporation
3
4     This program is free software; you can redistribute it and/or modify
5     it under the terms of the GNU General Public License as published by
6     the Free Software Foundation; either version 2 of the License, or
7     (at your option) any later version.
8
9     This program is distributed in the hope that it will be useful,
10     but WITHOUT ANY WARRANTY; without even the implied warranty of
11     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12     GNU General Public License for more details.
13
14     You should have received a copy of the GNU General Public License
15     along with this program; if not, write to the Free Software
16     Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17
18     2007 - x86_64 support added by Glauber de Oliveira Costa, Red Hat Inc
19 */
20
21 #include <linux/errno.h>
22 #include <linux/init.h>
23 #include <linux/export.h>
24 #include <linux/efi.h>
25 #include <linux/bcd.h>
26 #include <linux/highmem.h>
27 #include <linux/kprobes.h>
28
29 #include <asm/bug.h>
30 #include <asm/paravirt.h>
31 #include <asm/debugreg.h>
32 #include <asm/desc.h>
33 #include <asm/setup.h>
34 #include <asm/pgtable.h>
35 #include <asm/time.h>
36 #include <asm/pgalloc.h>
37 #include <asm/irq.h>
38 #include <asm/delay.h>
39 #include <asm/fixmap.h>
40 #include <asm/apic.h>
41 #include <asm/tlbflush.h>
42 #include <asm/timer.h>
43 #include <asm/special_insns.h>
44 #include <asm/tlb.h>
45
46 /*
47  * nop stub, which must not clobber anything *including the stack* to
48  * avoid confusing the entry prologues.
49  */
50 extern void _paravirt_nop(void);
51 asm (".pushsection .entry.text, \"ax\"\n"
52      ".global _paravirt_nop\n"
53      "_paravirt_nop:\n\t"
54      "ret\n\t"
55      ".size _paravirt_nop, . - _paravirt_nop\n\t"
56      ".type _paravirt_nop, @function\n\t"
57      ".popsection");
58
59 /* identity function, which can be inlined */
60 u32 notrace _paravirt_ident_32(u32 x)
61 {
62         return x;
63 }
64
65 u64 notrace _paravirt_ident_64(u64 x)
66 {
67         return x;
68 }
69
70 void __init default_banner(void)
71 {
72         printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
73                pv_info.name);
74 }
75
76 /* Undefined instruction for dealing with missing ops pointers. */
77 static const unsigned char ud2a[] = { 0x0f, 0x0b };
78
79 struct branch {
80         unsigned char opcode;
81         u32 delta;
82 } __attribute__((packed));
83
84 static unsigned paravirt_patch_call(void *insnbuf, const void *target,
85                                     unsigned long addr, unsigned len)
86 {
87         struct branch *b = insnbuf;
88         unsigned long delta = (unsigned long)target - (addr+5);
89
90         if (len < 5) {
91 #ifdef CONFIG_RETPOLINE
92                 WARN_ONCE("Failing to patch indirect CALL in %ps\n", (void *)addr);
93 #endif
94                 return len;     /* call too long for patch site */
95         }
96
97         b->opcode = 0xe8; /* call */
98         b->delta = delta;
99         BUILD_BUG_ON(sizeof(*b) != 5);
100
101         return 5;
102 }
103
104 static unsigned paravirt_patch_jmp(void *insnbuf, const void *target,
105                                    unsigned long addr, unsigned len)
106 {
107         struct branch *b = insnbuf;
108         unsigned long delta = (unsigned long)target - (addr+5);
109
110         if (len < 5) {
111 #ifdef CONFIG_RETPOLINE
112                 WARN_ONCE("Failing to patch indirect JMP in %ps\n", (void *)addr);
113 #endif
114                 return len;     /* call too long for patch site */
115         }
116
117         b->opcode = 0xe9;       /* jmp */
118         b->delta = delta;
119
120         return 5;
121 }
122
123 DEFINE_STATIC_KEY_TRUE(virt_spin_lock_key);
124
125 void __init native_pv_lock_init(void)
126 {
127         if (!static_cpu_has(X86_FEATURE_HYPERVISOR))
128                 static_branch_disable(&virt_spin_lock_key);
129 }
130
131 unsigned paravirt_patch_default(u8 type, void *insnbuf,
132                                 unsigned long addr, unsigned len)
133 {
134         /*
135          * Neat trick to map patch type back to the call within the
136          * corresponding structure.
137          */
138         void *opfunc = *((void **)&pv_ops + type);
139         unsigned ret;
140
141         if (opfunc == NULL)
142                 /* If there's no function, patch it with a ud2a (BUG) */
143                 ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a));
144         else if (opfunc == _paravirt_nop)
145                 ret = 0;
146
147         /* identity functions just return their single argument */
148         else if (opfunc == _paravirt_ident_32)
149                 ret = paravirt_patch_ident_32(insnbuf, len);
150         else if (opfunc == _paravirt_ident_64)
151                 ret = paravirt_patch_ident_64(insnbuf, len);
152
153         else if (type == PARAVIRT_PATCH(cpu.iret) ||
154                  type == PARAVIRT_PATCH(cpu.usergs_sysret64))
155                 /* If operation requires a jmp, then jmp */
156                 ret = paravirt_patch_jmp(insnbuf, opfunc, addr, len);
157         else
158                 /* Otherwise call the function. */
159                 ret = paravirt_patch_call(insnbuf, opfunc, addr, len);
160
161         return ret;
162 }
163
164 unsigned paravirt_patch_insns(void *insnbuf, unsigned len,
165                               const char *start, const char *end)
166 {
167         unsigned insn_len = end - start;
168
169         if (insn_len > len || start == NULL)
170                 insn_len = len;
171         else
172                 memcpy(insnbuf, start, insn_len);
173
174         return insn_len;
175 }
176
177 static void native_flush_tlb(void)
178 {
179         __native_flush_tlb();
180 }
181
182 /*
183  * Global pages have to be flushed a bit differently. Not a real
184  * performance problem because this does not happen often.
185  */
186 static void native_flush_tlb_global(void)
187 {
188         __native_flush_tlb_global();
189 }
190
191 static void native_flush_tlb_one_user(unsigned long addr)
192 {
193         __native_flush_tlb_one_user(addr);
194 }
195
196 struct static_key paravirt_steal_enabled;
197 struct static_key paravirt_steal_rq_enabled;
198
199 static u64 native_steal_clock(int cpu)
200 {
201         return 0;
202 }
203
204 /* These are in entry.S */
205 extern void native_iret(void);
206 extern void native_usergs_sysret64(void);
207
208 static struct resource reserve_ioports = {
209         .start = 0,
210         .end = IO_SPACE_LIMIT,
211         .name = "paravirt-ioport",
212         .flags = IORESOURCE_IO | IORESOURCE_BUSY,
213 };
214
215 /*
216  * Reserve the whole legacy IO space to prevent any legacy drivers
217  * from wasting time probing for their hardware.  This is a fairly
218  * brute-force approach to disabling all non-virtual drivers.
219  *
220  * Note that this must be called very early to have any effect.
221  */
222 int paravirt_disable_iospace(void)
223 {
224         return request_resource(&ioport_resource, &reserve_ioports);
225 }
226
227 static DEFINE_PER_CPU(enum paravirt_lazy_mode, paravirt_lazy_mode) = PARAVIRT_LAZY_NONE;
228
229 static inline void enter_lazy(enum paravirt_lazy_mode mode)
230 {
231         BUG_ON(this_cpu_read(paravirt_lazy_mode) != PARAVIRT_LAZY_NONE);
232
233         this_cpu_write(paravirt_lazy_mode, mode);
234 }
235
236 static void leave_lazy(enum paravirt_lazy_mode mode)
237 {
238         BUG_ON(this_cpu_read(paravirt_lazy_mode) != mode);
239
240         this_cpu_write(paravirt_lazy_mode, PARAVIRT_LAZY_NONE);
241 }
242
243 void paravirt_enter_lazy_mmu(void)
244 {
245         enter_lazy(PARAVIRT_LAZY_MMU);
246 }
247
248 void paravirt_leave_lazy_mmu(void)
249 {
250         leave_lazy(PARAVIRT_LAZY_MMU);
251 }
252
253 void paravirt_flush_lazy_mmu(void)
254 {
255         preempt_disable();
256
257         if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
258                 arch_leave_lazy_mmu_mode();
259                 arch_enter_lazy_mmu_mode();
260         }
261
262         preempt_enable();
263 }
264
265 void paravirt_start_context_switch(struct task_struct *prev)
266 {
267         BUG_ON(preemptible());
268
269         if (this_cpu_read(paravirt_lazy_mode) == PARAVIRT_LAZY_MMU) {
270                 arch_leave_lazy_mmu_mode();
271                 set_ti_thread_flag(task_thread_info(prev), TIF_LAZY_MMU_UPDATES);
272         }
273         enter_lazy(PARAVIRT_LAZY_CPU);
274 }
275
276 void paravirt_end_context_switch(struct task_struct *next)
277 {
278         BUG_ON(preemptible());
279
280         leave_lazy(PARAVIRT_LAZY_CPU);
281
282         if (test_and_clear_ti_thread_flag(task_thread_info(next), TIF_LAZY_MMU_UPDATES))
283                 arch_enter_lazy_mmu_mode();
284 }
285
286 enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
287 {
288         if (in_interrupt())
289                 return PARAVIRT_LAZY_NONE;
290
291         return this_cpu_read(paravirt_lazy_mode);
292 }
293
294 struct pv_info pv_info = {
295         .name = "bare hardware",
296 #ifdef CONFIG_PARAVIRT_XXL
297         .kernel_rpl = 0,
298         .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */
299
300 #ifdef CONFIG_X86_64
301         .extra_user_64bit_cs = __USER_CS,
302 #endif
303 #endif
304 };
305
306 #if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE)
307 /* 32-bit pagetable entries */
308 #define PTE_IDENT       __PV_IS_CALLEE_SAVE(_paravirt_ident_32)
309 #else
310 /* 64-bit pagetable entries */
311 #define PTE_IDENT       __PV_IS_CALLEE_SAVE(_paravirt_ident_64)
312 #endif
313
314 struct paravirt_patch_template pv_ops = {
315         /* Init ops. */
316         .init.patch             = native_patch,
317
318         /* Time ops. */
319         .time.sched_clock       = native_sched_clock,
320         .time.steal_clock       = native_steal_clock,
321
322         /* Cpu ops. */
323         .cpu.cpuid              = native_cpuid,
324         .cpu.get_debugreg       = native_get_debugreg,
325         .cpu.set_debugreg       = native_set_debugreg,
326         .cpu.read_cr0           = native_read_cr0,
327         .cpu.write_cr0          = native_write_cr0,
328         .cpu.write_cr4          = native_write_cr4,
329 #ifdef CONFIG_X86_64
330         .cpu.read_cr8           = native_read_cr8,
331         .cpu.write_cr8          = native_write_cr8,
332 #endif
333         .cpu.wbinvd             = native_wbinvd,
334         .cpu.read_msr           = native_read_msr,
335         .cpu.write_msr          = native_write_msr,
336         .cpu.read_msr_safe      = native_read_msr_safe,
337         .cpu.write_msr_safe     = native_write_msr_safe,
338         .cpu.read_pmc           = native_read_pmc,
339         .cpu.load_tr_desc       = native_load_tr_desc,
340         .cpu.set_ldt            = native_set_ldt,
341         .cpu.load_gdt           = native_load_gdt,
342         .cpu.load_idt           = native_load_idt,
343         .cpu.store_tr           = native_store_tr,
344         .cpu.load_tls           = native_load_tls,
345 #ifdef CONFIG_X86_64
346         .cpu.load_gs_index      = native_load_gs_index,
347 #endif
348         .cpu.write_ldt_entry    = native_write_ldt_entry,
349         .cpu.write_gdt_entry    = native_write_gdt_entry,
350         .cpu.write_idt_entry    = native_write_idt_entry,
351
352         .cpu.alloc_ldt          = paravirt_nop,
353         .cpu.free_ldt           = paravirt_nop,
354
355         .cpu.load_sp0           = native_load_sp0,
356
357 #ifdef CONFIG_X86_64
358         .cpu.usergs_sysret64    = native_usergs_sysret64,
359 #endif
360         .cpu.iret               = native_iret,
361         .cpu.swapgs             = native_swapgs,
362
363         .cpu.set_iopl_mask      = native_set_iopl_mask,
364         .cpu.io_delay           = native_io_delay,
365
366         .cpu.start_context_switch       = paravirt_nop,
367         .cpu.end_context_switch         = paravirt_nop,
368
369         /* Irq ops. */
370         .irq.save_fl            = __PV_IS_CALLEE_SAVE(native_save_fl),
371         .irq.restore_fl         = __PV_IS_CALLEE_SAVE(native_restore_fl),
372         .irq.irq_disable        = __PV_IS_CALLEE_SAVE(native_irq_disable),
373         .irq.irq_enable         = __PV_IS_CALLEE_SAVE(native_irq_enable),
374         .irq.safe_halt          = native_safe_halt,
375         .irq.halt               = native_halt,
376
377         /* Mmu ops. */
378         .mmu.read_cr2           = native_read_cr2,
379         .mmu.write_cr2          = native_write_cr2,
380         .mmu.read_cr3           = __native_read_cr3,
381         .mmu.write_cr3          = native_write_cr3,
382
383         .mmu.flush_tlb_user     = native_flush_tlb,
384         .mmu.flush_tlb_kernel   = native_flush_tlb_global,
385         .mmu.flush_tlb_one_user = native_flush_tlb_one_user,
386         .mmu.flush_tlb_others   = native_flush_tlb_others,
387         .mmu.tlb_remove_table   =
388                         (void (*)(struct mmu_gather *, void *))tlb_remove_page,
389
390         .mmu.pgd_alloc          = __paravirt_pgd_alloc,
391         .mmu.pgd_free           = paravirt_nop,
392
393         .mmu.alloc_pte          = paravirt_nop,
394         .mmu.alloc_pmd          = paravirt_nop,
395         .mmu.alloc_pud          = paravirt_nop,
396         .mmu.alloc_p4d          = paravirt_nop,
397         .mmu.release_pte        = paravirt_nop,
398         .mmu.release_pmd        = paravirt_nop,
399         .mmu.release_pud        = paravirt_nop,
400         .mmu.release_p4d        = paravirt_nop,
401
402         .mmu.set_pte            = native_set_pte,
403         .mmu.set_pte_at         = native_set_pte_at,
404         .mmu.set_pmd            = native_set_pmd,
405
406         .mmu.ptep_modify_prot_start     = __ptep_modify_prot_start,
407         .mmu.ptep_modify_prot_commit    = __ptep_modify_prot_commit,
408
409 #if CONFIG_PGTABLE_LEVELS >= 3
410 #ifdef CONFIG_X86_PAE
411         .mmu.set_pte_atomic     = native_set_pte_atomic,
412         .mmu.pte_clear          = native_pte_clear,
413         .mmu.pmd_clear          = native_pmd_clear,
414 #endif
415         .mmu.set_pud            = native_set_pud,
416
417         .mmu.pmd_val            = PTE_IDENT,
418         .mmu.make_pmd           = PTE_IDENT,
419
420 #if CONFIG_PGTABLE_LEVELS >= 4
421         .mmu.pud_val            = PTE_IDENT,
422         .mmu.make_pud           = PTE_IDENT,
423
424         .mmu.set_p4d            = native_set_p4d,
425
426 #if CONFIG_PGTABLE_LEVELS >= 5
427         .mmu.p4d_val            = PTE_IDENT,
428         .mmu.make_p4d           = PTE_IDENT,
429
430         .mmu.set_pgd            = native_set_pgd,
431 #endif /* CONFIG_PGTABLE_LEVELS >= 5 */
432 #endif /* CONFIG_PGTABLE_LEVELS >= 4 */
433 #endif /* CONFIG_PGTABLE_LEVELS >= 3 */
434
435         .mmu.pte_val            = PTE_IDENT,
436         .mmu.pgd_val            = PTE_IDENT,
437
438         .mmu.make_pte           = PTE_IDENT,
439         .mmu.make_pgd           = PTE_IDENT,
440
441         .mmu.dup_mmap           = paravirt_nop,
442         .mmu.exit_mmap          = paravirt_nop,
443         .mmu.activate_mm        = paravirt_nop,
444
445         .mmu.lazy_mode = {
446                 .enter          = paravirt_nop,
447                 .leave          = paravirt_nop,
448                 .flush          = paravirt_nop,
449         },
450
451         .mmu.set_fixmap         = native_set_fixmap,
452
453 #if defined(CONFIG_PARAVIRT_SPINLOCKS)
454         /* Lock ops. */
455 #ifdef CONFIG_SMP
456         .lock.queued_spin_lock_slowpath = native_queued_spin_lock_slowpath,
457         .lock.queued_spin_unlock        =
458                                 PV_CALLEE_SAVE(__native_queued_spin_unlock),
459         .lock.wait                      = paravirt_nop,
460         .lock.kick                      = paravirt_nop,
461         .lock.vcpu_is_preempted         =
462                                 PV_CALLEE_SAVE(__native_vcpu_is_preempted),
463 #endif /* SMP */
464 #endif
465 };
466
467 /* At this point, native_get/set_debugreg has real function entries */
468 NOKPROBE_SYMBOL(native_get_debugreg);
469 NOKPROBE_SYMBOL(native_set_debugreg);
470 NOKPROBE_SYMBOL(native_load_idt);
471
472 EXPORT_SYMBOL_GPL(pv_ops);
473 EXPORT_SYMBOL_GPL(pv_info);