Merge tag 'soc-drivers-6.9' of git://git.kernel.org/pub/scm/linux/kernel/git/soc/soc
[linux-block.git] / arch / x86 / xen / smp_pv.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Xen SMP support
4  *
5  * This file implements the Xen versions of smp_ops.  SMP under Xen is
6  * very straightforward.  Bringing a CPU up is simply a matter of
7  * loading its initial context and setting it running.
8  *
9  * IPIs are handled through the Xen event mechanism.
10  *
11  * Because virtual CPUs can be scheduled onto any real CPU, there's no
12  * useful topology information for the kernel to make use of.  As a
13  * result, all CPUs are treated as if they're single-core and
14  * single-threaded.
15  */
16 #include <linux/sched.h>
17 #include <linux/sched/task_stack.h>
18 #include <linux/err.h>
19 #include <linux/slab.h>
20 #include <linux/smp.h>
21 #include <linux/irq_work.h>
22 #include <linux/tick.h>
23 #include <linux/nmi.h>
24 #include <linux/cpuhotplug.h>
25 #include <linux/stackprotector.h>
26 #include <linux/pgtable.h>
27
28 #include <asm/paravirt.h>
29 #include <asm/idtentry.h>
30 #include <asm/desc.h>
31 #include <asm/cpu.h>
32 #include <asm/apic.h>
33 #include <asm/io_apic.h>
34
35 #include <xen/interface/xen.h>
36 #include <xen/interface/vcpu.h>
37 #include <xen/interface/xenpmu.h>
38
39 #include <asm/spec-ctrl.h>
40 #include <asm/xen/interface.h>
41 #include <asm/xen/hypercall.h>
42
43 #include <xen/xen.h>
44 #include <xen/page.h>
45 #include <xen/events.h>
46
47 #include <xen/hvc-console.h>
48 #include "xen-ops.h"
49 #include "mmu.h"
50 #include "smp.h"
51 #include "pmu.h"
52
53 cpumask_var_t xen_cpu_initialized_map;
54
55 static DEFINE_PER_CPU(struct xen_common_irq, xen_irq_work) = { .irq = -1 };
56 static DEFINE_PER_CPU(struct xen_common_irq, xen_pmu_irq) = { .irq = -1 };
57
58 static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id);
59
60 static void cpu_bringup(void)
61 {
62         int cpu;
63
64         cr4_init();
65         cpuhp_ap_sync_alive();
66         cpu_init();
67         fpu__init_cpu();
68         touch_softlockup_watchdog();
69
70         /* PVH runs in ring 0 and allows us to do native syscalls. Yay! */
71         if (!xen_feature(XENFEAT_supervisor_mode_kernel)) {
72                 xen_enable_sysenter();
73                 xen_enable_syscall();
74         }
75         cpu = smp_processor_id();
76         smp_store_cpu_info(cpu);
77         set_cpu_sibling_map(cpu);
78
79         speculative_store_bypass_ht_init();
80
81         xen_setup_cpu_clockevents();
82
83         notify_cpu_starting(cpu);
84
85         set_cpu_online(cpu, true);
86
87         smp_mb();
88
89         /* We can take interrupts now: we're officially "up". */
90         local_irq_enable();
91 }
92
93 asmlinkage __visible void cpu_bringup_and_idle(void)
94 {
95         cpu_bringup();
96         cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
97 }
98
99 void xen_smp_intr_free_pv(unsigned int cpu)
100 {
101         kfree(per_cpu(xen_irq_work, cpu).name);
102         per_cpu(xen_irq_work, cpu).name = NULL;
103         if (per_cpu(xen_irq_work, cpu).irq >= 0) {
104                 unbind_from_irqhandler(per_cpu(xen_irq_work, cpu).irq, NULL);
105                 per_cpu(xen_irq_work, cpu).irq = -1;
106         }
107
108         kfree(per_cpu(xen_pmu_irq, cpu).name);
109         per_cpu(xen_pmu_irq, cpu).name = NULL;
110         if (per_cpu(xen_pmu_irq, cpu).irq >= 0) {
111                 unbind_from_irqhandler(per_cpu(xen_pmu_irq, cpu).irq, NULL);
112                 per_cpu(xen_pmu_irq, cpu).irq = -1;
113         }
114 }
115
116 int xen_smp_intr_init_pv(unsigned int cpu)
117 {
118         int rc;
119         char *callfunc_name, *pmu_name;
120
121         callfunc_name = kasprintf(GFP_KERNEL, "irqwork%d", cpu);
122         per_cpu(xen_irq_work, cpu).name = callfunc_name;
123         rc = bind_ipi_to_irqhandler(XEN_IRQ_WORK_VECTOR,
124                                     cpu,
125                                     xen_irq_work_interrupt,
126                                     IRQF_PERCPU|IRQF_NOBALANCING,
127                                     callfunc_name,
128                                     NULL);
129         if (rc < 0)
130                 goto fail;
131         per_cpu(xen_irq_work, cpu).irq = rc;
132
133         if (is_xen_pmu) {
134                 pmu_name = kasprintf(GFP_KERNEL, "pmu%d", cpu);
135                 per_cpu(xen_pmu_irq, cpu).name = pmu_name;
136                 rc = bind_virq_to_irqhandler(VIRQ_XENPMU, cpu,
137                                              xen_pmu_irq_handler,
138                                              IRQF_PERCPU|IRQF_NOBALANCING,
139                                              pmu_name, NULL);
140                 if (rc < 0)
141                         goto fail;
142                 per_cpu(xen_pmu_irq, cpu).irq = rc;
143         }
144
145         return 0;
146
147  fail:
148         xen_smp_intr_free_pv(cpu);
149         return rc;
150 }
151
152 static void __init xen_pv_smp_config(void)
153 {
154         u32 apicid = 0;
155         int i;
156
157         topology_register_boot_apic(apicid++);
158
159         for (i = 1; i < nr_cpu_ids; i++)
160                 topology_register_apic(apicid++, CPU_ACPIID_INVALID, true);
161
162         /* Pretend to be a proper enumerated system */
163         smp_found_config = 1;
164 }
165
166 static void __init xen_pv_smp_prepare_boot_cpu(void)
167 {
168         BUG_ON(smp_processor_id() != 0);
169         native_smp_prepare_boot_cpu();
170
171         if (!xen_feature(XENFEAT_writable_page_tables))
172                 /* We've switched to the "real" per-cpu gdt, so make
173                  * sure the old memory can be recycled. */
174                 make_lowmem_page_readwrite(xen_initial_gdt);
175
176         xen_setup_vcpu_info_placement();
177
178         /*
179          * The alternative logic (which patches the unlock/lock) runs before
180          * the smp bootup up code is activated. Hence we need to set this up
181          * the core kernel is being patched. Otherwise we will have only
182          * modules patched but not core code.
183          */
184         xen_init_spinlocks();
185 }
186
187 static void __init xen_pv_smp_prepare_cpus(unsigned int max_cpus)
188 {
189         unsigned cpu;
190
191         if (ioapic_is_disabled) {
192                 char *m = (max_cpus == 0) ?
193                         "The nosmp parameter is incompatible with Xen; " \
194                         "use Xen dom0_max_vcpus=1 parameter" :
195                         "The noapic parameter is incompatible with Xen";
196
197                 xen_raw_printk(m);
198                 panic(m);
199         }
200         xen_init_lock_cpu(0);
201
202         smp_prepare_cpus_common();
203
204         speculative_store_bypass_ht_init();
205
206         xen_pmu_init(0);
207
208         if (xen_smp_intr_init(0) || xen_smp_intr_init_pv(0))
209                 BUG();
210
211         if (!alloc_cpumask_var(&xen_cpu_initialized_map, GFP_KERNEL))
212                 panic("could not allocate xen_cpu_initialized_map\n");
213
214         cpumask_copy(xen_cpu_initialized_map, cpumask_of(0));
215
216         /* Restrict the possible_map according to max_cpus. */
217         while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) {
218                 for (cpu = nr_cpu_ids - 1; !cpu_possible(cpu); cpu--)
219                         continue;
220                 set_cpu_possible(cpu, false);
221         }
222
223         for_each_possible_cpu(cpu)
224                 set_cpu_present(cpu, true);
225 }
226
227 static int
228 cpu_initialize_context(unsigned int cpu, struct task_struct *idle)
229 {
230         struct vcpu_guest_context *ctxt;
231         struct desc_struct *gdt;
232         unsigned long gdt_mfn;
233
234         if (cpumask_test_and_set_cpu(cpu, xen_cpu_initialized_map))
235                 return 0;
236
237         ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
238         if (ctxt == NULL) {
239                 cpumask_clear_cpu(cpu, xen_cpu_initialized_map);
240                 return -ENOMEM;
241         }
242
243         gdt = get_cpu_gdt_rw(cpu);
244
245         /*
246          * Bring up the CPU in cpu_bringup_and_idle() with the stack
247          * pointing just below where pt_regs would be if it were a normal
248          * kernel entry.
249          */
250         ctxt->user_regs.eip = (unsigned long)asm_cpu_bringup_and_idle;
251         ctxt->flags = VGCF_IN_KERNEL;
252         ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */
253         ctxt->user_regs.ds = __USER_DS;
254         ctxt->user_regs.es = __USER_DS;
255         ctxt->user_regs.ss = __KERNEL_DS;
256         ctxt->user_regs.cs = __KERNEL_CS;
257         ctxt->user_regs.esp = (unsigned long)task_pt_regs(idle);
258
259         xen_copy_trap_info(ctxt->trap_ctxt);
260
261         BUG_ON((unsigned long)gdt & ~PAGE_MASK);
262
263         gdt_mfn = arbitrary_virt_to_mfn(gdt);
264         make_lowmem_page_readonly(gdt);
265         make_lowmem_page_readonly(mfn_to_virt(gdt_mfn));
266
267         ctxt->gdt_frames[0] = gdt_mfn;
268         ctxt->gdt_ents      = GDT_ENTRIES;
269
270         /*
271          * Set SS:SP that Xen will use when entering guest kernel mode
272          * from guest user mode.  Subsequent calls to load_sp0() can
273          * change this value.
274          */
275         ctxt->kernel_ss = __KERNEL_DS;
276         ctxt->kernel_sp = task_top_of_stack(idle);
277
278         ctxt->gs_base_kernel = per_cpu_offset(cpu);
279         ctxt->event_callback_eip    =
280                 (unsigned long)xen_asm_exc_xen_hypervisor_callback;
281         ctxt->failsafe_callback_eip =
282                 (unsigned long)xen_failsafe_callback;
283         per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir);
284
285         ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_gfn(swapper_pg_dir));
286         if (HYPERVISOR_vcpu_op(VCPUOP_initialise, xen_vcpu_nr(cpu), ctxt))
287                 BUG();
288
289         kfree(ctxt);
290         return 0;
291 }
292
293 static int xen_pv_kick_ap(unsigned int cpu, struct task_struct *idle)
294 {
295         int rc;
296
297         rc = common_cpu_up(cpu, idle);
298         if (rc)
299                 return rc;
300
301         xen_setup_runstate_info(cpu);
302
303         /* make sure interrupts start blocked */
304         per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1;
305
306         rc = cpu_initialize_context(cpu, idle);
307         if (rc)
308                 return rc;
309
310         xen_pmu_init(cpu);
311
312         /*
313          * Why is this a BUG? If the hypercall fails then everything can be
314          * rolled back, no?
315          */
316         BUG_ON(HYPERVISOR_vcpu_op(VCPUOP_up, xen_vcpu_nr(cpu), NULL));
317
318         return 0;
319 }
320
321 static void xen_pv_poll_sync_state(void)
322 {
323         HYPERVISOR_sched_op(SCHEDOP_yield, NULL);
324 }
325
326 #ifdef CONFIG_HOTPLUG_CPU
327 static int xen_pv_cpu_disable(void)
328 {
329         unsigned int cpu = smp_processor_id();
330         if (cpu == 0)
331                 return -EBUSY;
332
333         cpu_disable_common();
334
335         load_cr3(swapper_pg_dir);
336         return 0;
337 }
338
339 static void xen_pv_cpu_die(unsigned int cpu)
340 {
341         while (HYPERVISOR_vcpu_op(VCPUOP_is_up, xen_vcpu_nr(cpu), NULL)) {
342                 __set_current_state(TASK_UNINTERRUPTIBLE);
343                 schedule_timeout(HZ/10);
344         }
345 }
346
347 static void xen_pv_cleanup_dead_cpu(unsigned int cpu)
348 {
349         xen_smp_intr_free(cpu);
350         xen_uninit_lock_cpu(cpu);
351         xen_teardown_timer(cpu);
352         xen_pmu_finish(cpu);
353 }
354
355 static void __noreturn xen_pv_play_dead(void) /* used only with HOTPLUG_CPU */
356 {
357         play_dead_common();
358         HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(smp_processor_id()), NULL);
359         xen_cpu_bringup_again((unsigned long)task_pt_regs(current));
360         BUG();
361 }
362
363 #else /* !CONFIG_HOTPLUG_CPU */
364 static int xen_pv_cpu_disable(void)
365 {
366         return -ENOSYS;
367 }
368
369 static void xen_pv_cpu_die(unsigned int cpu)
370 {
371         BUG();
372 }
373
374 static void xen_pv_cleanup_dead_cpu(unsigned int cpu)
375 {
376         BUG();
377 }
378
379 static void __noreturn xen_pv_play_dead(void)
380 {
381         BUG();
382 }
383
384 #endif
385 static void stop_self(void *v)
386 {
387         int cpu = smp_processor_id();
388
389         /* make sure we're not pinning something down */
390         load_cr3(swapper_pg_dir);
391         /* should set up a minimal gdt */
392
393         set_cpu_online(cpu, false);
394
395         HYPERVISOR_vcpu_op(VCPUOP_down, xen_vcpu_nr(cpu), NULL);
396         BUG();
397 }
398
399 static void xen_pv_stop_other_cpus(int wait)
400 {
401         smp_call_function(stop_self, NULL, wait);
402 }
403
404 static irqreturn_t xen_irq_work_interrupt(int irq, void *dev_id)
405 {
406         irq_work_run();
407         inc_irq_stat(apic_irq_work_irqs);
408
409         return IRQ_HANDLED;
410 }
411
412 void __init xen_smp_count_cpus(void)
413 {
414         unsigned int cpus;
415
416         for (cpus = 0; cpus < nr_cpu_ids; cpus++) {
417                 if (HYPERVISOR_vcpu_op(VCPUOP_is_up, cpus, NULL) < 0)
418                         break;
419         }
420
421         pr_info("Xen PV: Detected %u vCPUS\n", cpus);
422         if (cpus < nr_cpu_ids)
423                 set_nr_cpu_ids(cpus);
424 }
425
426 static const struct smp_ops xen_smp_ops __initconst = {
427         .smp_prepare_boot_cpu = xen_pv_smp_prepare_boot_cpu,
428         .smp_prepare_cpus = xen_pv_smp_prepare_cpus,
429         .smp_cpus_done = xen_smp_cpus_done,
430
431         .kick_ap_alive = xen_pv_kick_ap,
432         .cpu_die = xen_pv_cpu_die,
433         .cleanup_dead_cpu = xen_pv_cleanup_dead_cpu,
434         .poll_sync_state = xen_pv_poll_sync_state,
435         .cpu_disable = xen_pv_cpu_disable,
436         .play_dead = xen_pv_play_dead,
437
438         .stop_other_cpus = xen_pv_stop_other_cpus,
439         .smp_send_reschedule = xen_smp_send_reschedule,
440
441         .send_call_func_ipi = xen_smp_send_call_function_ipi,
442         .send_call_func_single_ipi = xen_smp_send_call_function_single_ipi,
443 };
444
445 void __init xen_smp_init(void)
446 {
447         smp_ops = xen_smp_ops;
448
449         /* Avoid searching for BIOS MP tables */
450         x86_init.mpparse.find_mptable           = x86_init_noop;
451         x86_init.mpparse.early_parse_smp_cfg    = x86_init_noop;
452
453         /* XEN/PV Dom0 has halfways sane topology information via CPUID/MADT */
454         if (xen_initial_domain())
455                 x86_init.mpparse.parse_smp_cfg  = x86_init_noop;
456         else
457                 x86_init.mpparse.parse_smp_cfg  = xen_pv_smp_config;
458 }