Merge tag 'nfsd-6.3-2' of git://git.kernel.org/pub/scm/linux/kernel/git/cel/linux
[linux-block.git] / arch / s390 / kernel / setup.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  S390 version
4  *    Copyright IBM Corp. 1999, 2012
5  *    Author(s): Hartmut Penner (hp@de.ibm.com),
6  *               Martin Schwidefsky (schwidefsky@de.ibm.com)
7  *
8  *  Derived from "arch/i386/kernel/setup.c"
9  *    Copyright (C) 1995, Linus Torvalds
10  */
11
12 /*
13  * This file handles the architecture-dependent parts of initialization
14  */
15
16 #define KMSG_COMPONENT "setup"
17 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
18
19 #include <linux/errno.h>
20 #include <linux/export.h>
21 #include <linux/sched.h>
22 #include <linux/sched/task.h>
23 #include <linux/cpu.h>
24 #include <linux/kernel.h>
25 #include <linux/memblock.h>
26 #include <linux/mm.h>
27 #include <linux/stddef.h>
28 #include <linux/unistd.h>
29 #include <linux/ptrace.h>
30 #include <linux/random.h>
31 #include <linux/user.h>
32 #include <linux/tty.h>
33 #include <linux/ioport.h>
34 #include <linux/delay.h>
35 #include <linux/init.h>
36 #include <linux/initrd.h>
37 #include <linux/root_dev.h>
38 #include <linux/console.h>
39 #include <linux/kernel_stat.h>
40 #include <linux/dma-map-ops.h>
41 #include <linux/device.h>
42 #include <linux/notifier.h>
43 #include <linux/pfn.h>
44 #include <linux/ctype.h>
45 #include <linux/reboot.h>
46 #include <linux/topology.h>
47 #include <linux/kexec.h>
48 #include <linux/crash_dump.h>
49 #include <linux/memory.h>
50 #include <linux/compat.h>
51 #include <linux/start_kernel.h>
52 #include <linux/hugetlb.h>
53 #include <linux/kmemleak.h>
54
55 #include <asm/archrandom.h>
56 #include <asm/boot_data.h>
57 #include <asm/ipl.h>
58 #include <asm/facility.h>
59 #include <asm/smp.h>
60 #include <asm/mmu_context.h>
61 #include <asm/cpcmd.h>
62 #include <asm/abs_lowcore.h>
63 #include <asm/nmi.h>
64 #include <asm/irq.h>
65 #include <asm/page.h>
66 #include <asm/ptrace.h>
67 #include <asm/sections.h>
68 #include <asm/ebcdic.h>
69 #include <asm/diag.h>
70 #include <asm/os_info.h>
71 #include <asm/sclp.h>
72 #include <asm/stacktrace.h>
73 #include <asm/sysinfo.h>
74 #include <asm/numa.h>
75 #include <asm/alternative.h>
76 #include <asm/nospec-branch.h>
77 #include <asm/mem_detect.h>
78 #include <asm/maccess.h>
79 #include <asm/uv.h>
80 #include <asm/asm-offsets.h>
81 #include "entry.h"
82
83 /*
84  * Machine setup..
85  */
86 unsigned int console_mode = 0;
87 EXPORT_SYMBOL(console_mode);
88
89 unsigned int console_devno = -1;
90 EXPORT_SYMBOL(console_devno);
91
92 unsigned int console_irq = -1;
93 EXPORT_SYMBOL(console_irq);
94
95 /*
96  * Some code and data needs to stay below 2 GB, even when the kernel would be
97  * relocated above 2 GB, because it has to use 31 bit addresses.
98  * Such code and data is part of the .amode31 section.
99  */
100 unsigned long __amode31_ref __samode31 = (unsigned long)&_samode31;
101 unsigned long __amode31_ref __eamode31 = (unsigned long)&_eamode31;
102 unsigned long __amode31_ref __stext_amode31 = (unsigned long)&_stext_amode31;
103 unsigned long __amode31_ref __etext_amode31 = (unsigned long)&_etext_amode31;
104 struct exception_table_entry __amode31_ref *__start_amode31_ex_table = _start_amode31_ex_table;
105 struct exception_table_entry __amode31_ref *__stop_amode31_ex_table = _stop_amode31_ex_table;
106
107 /*
108  * Control registers CR2, CR5 and CR15 are initialized with addresses
109  * of tables that must be placed below 2G which is handled by the AMODE31
110  * sections.
111  * Because the AMODE31 sections are relocated below 2G at startup,
112  * the content of control registers CR2, CR5 and CR15 must be updated
113  * with new addresses after the relocation. The initial initialization of
114  * control registers occurs in head64.S and then gets updated again after AMODE31
115  * relocation. We must access the relevant AMODE31 tables indirectly via
116  * pointers placed in the .amode31.refs linker section. Those pointers get
117  * updated automatically during AMODE31 relocation and always contain a valid
118  * address within AMODE31 sections.
119  */
120
121 static __amode31_data u32 __ctl_duct_amode31[16] __aligned(64);
122
123 static __amode31_data u64 __ctl_aste_amode31[8] __aligned(64) = {
124         [1] = 0xffffffffffffffff
125 };
126
127 static __amode31_data u32 __ctl_duald_amode31[32] __aligned(128) = {
128         0x80000000, 0, 0, 0,
129         0x80000000, 0, 0, 0,
130         0x80000000, 0, 0, 0,
131         0x80000000, 0, 0, 0,
132         0x80000000, 0, 0, 0,
133         0x80000000, 0, 0, 0,
134         0x80000000, 0, 0, 0,
135         0x80000000, 0, 0, 0
136 };
137
138 static __amode31_data u32 __ctl_linkage_stack_amode31[8] __aligned(64) = {
139         0, 0, 0x89000000, 0,
140         0, 0, 0x8a000000, 0
141 };
142
143 static u64 __amode31_ref *__ctl_aste = __ctl_aste_amode31;
144 static u32 __amode31_ref *__ctl_duald = __ctl_duald_amode31;
145 static u32 __amode31_ref *__ctl_linkage_stack = __ctl_linkage_stack_amode31;
146 static u32 __amode31_ref *__ctl_duct = __ctl_duct_amode31;
147
148 int __bootdata(noexec_disabled);
149 unsigned long __bootdata(ident_map_size);
150 struct mem_detect_info __bootdata(mem_detect);
151 struct initrd_data __bootdata(initrd_data);
152 unsigned long __bootdata(pgalloc_pos);
153 unsigned long __bootdata(pgalloc_end);
154 unsigned long __bootdata(pgalloc_low);
155
156 unsigned long __bootdata_preserved(__kaslr_offset);
157 unsigned long __bootdata(__amode31_base);
158 unsigned int __bootdata_preserved(zlib_dfltcc_support);
159 EXPORT_SYMBOL(zlib_dfltcc_support);
160 u64 __bootdata_preserved(stfle_fac_list[16]);
161 EXPORT_SYMBOL(stfle_fac_list);
162 u64 __bootdata_preserved(alt_stfle_fac_list[16]);
163 struct oldmem_data __bootdata_preserved(oldmem_data);
164
165 unsigned long VMALLOC_START;
166 EXPORT_SYMBOL(VMALLOC_START);
167
168 unsigned long VMALLOC_END;
169 EXPORT_SYMBOL(VMALLOC_END);
170
171 struct page *vmemmap;
172 EXPORT_SYMBOL(vmemmap);
173 unsigned long vmemmap_size;
174
175 unsigned long MODULES_VADDR;
176 unsigned long MODULES_END;
177
178 /* An array with a pointer to the lowcore of every CPU. */
179 struct lowcore *lowcore_ptr[NR_CPUS];
180 EXPORT_SYMBOL(lowcore_ptr);
181
182 DEFINE_STATIC_KEY_FALSE(cpu_has_bear);
183
184 /*
185  * The Write Back bit position in the physaddr is given by the SLPC PCI.
186  * Leaving the mask zero always uses write through which is safe
187  */
188 unsigned long mio_wb_bit_mask __ro_after_init;
189
190 /*
191  * This is set up by the setup-routine at boot-time
192  * for S390 need to find out, what we have to setup
193  * using address 0x10400 ...
194  */
195
196 #include <asm/setup.h>
197
198 /*
199  * condev= and conmode= setup parameter.
200  */
201
202 static int __init condev_setup(char *str)
203 {
204         int vdev;
205
206         vdev = simple_strtoul(str, &str, 0);
207         if (vdev >= 0 && vdev < 65536) {
208                 console_devno = vdev;
209                 console_irq = -1;
210         }
211         return 1;
212 }
213
214 __setup("condev=", condev_setup);
215
216 static void __init set_preferred_console(void)
217 {
218         if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP)
219                 add_preferred_console("ttyS", 0, NULL);
220         else if (CONSOLE_IS_3270)
221                 add_preferred_console("tty3270", 0, NULL);
222         else if (CONSOLE_IS_VT220)
223                 add_preferred_console("ttysclp", 0, NULL);
224         else if (CONSOLE_IS_HVC)
225                 add_preferred_console("hvc", 0, NULL);
226 }
227
228 static int __init conmode_setup(char *str)
229 {
230 #if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
231         if (!strcmp(str, "hwc") || !strcmp(str, "sclp"))
232                 SET_CONSOLE_SCLP;
233 #endif
234 #if defined(CONFIG_TN3215_CONSOLE)
235         if (!strcmp(str, "3215"))
236                 SET_CONSOLE_3215;
237 #endif
238 #if defined(CONFIG_TN3270_CONSOLE)
239         if (!strcmp(str, "3270"))
240                 SET_CONSOLE_3270;
241 #endif
242         set_preferred_console();
243         return 1;
244 }
245
246 __setup("conmode=", conmode_setup);
247
248 static void __init conmode_default(void)
249 {
250         char query_buffer[1024];
251         char *ptr;
252
253         if (MACHINE_IS_VM) {
254                 cpcmd("QUERY CONSOLE", query_buffer, 1024, NULL);
255                 console_devno = simple_strtoul(query_buffer + 5, NULL, 16);
256                 ptr = strstr(query_buffer, "SUBCHANNEL =");
257                 console_irq = simple_strtoul(ptr + 13, NULL, 16);
258                 cpcmd("QUERY TERM", query_buffer, 1024, NULL);
259                 ptr = strstr(query_buffer, "CONMODE");
260                 /*
261                  * Set the conmode to 3215 so that the device recognition 
262                  * will set the cu_type of the console to 3215. If the
263                  * conmode is 3270 and we don't set it back then both
264                  * 3215 and the 3270 driver will try to access the console
265                  * device (3215 as console and 3270 as normal tty).
266                  */
267                 cpcmd("TERM CONMODE 3215", NULL, 0, NULL);
268                 if (ptr == NULL) {
269 #if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
270                         SET_CONSOLE_SCLP;
271 #endif
272                         return;
273                 }
274                 if (str_has_prefix(ptr + 8, "3270")) {
275 #if defined(CONFIG_TN3270_CONSOLE)
276                         SET_CONSOLE_3270;
277 #elif defined(CONFIG_TN3215_CONSOLE)
278                         SET_CONSOLE_3215;
279 #elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
280                         SET_CONSOLE_SCLP;
281 #endif
282                 } else if (str_has_prefix(ptr + 8, "3215")) {
283 #if defined(CONFIG_TN3215_CONSOLE)
284                         SET_CONSOLE_3215;
285 #elif defined(CONFIG_TN3270_CONSOLE)
286                         SET_CONSOLE_3270;
287 #elif defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
288                         SET_CONSOLE_SCLP;
289 #endif
290                 }
291         } else if (MACHINE_IS_KVM) {
292                 if (sclp.has_vt220 && IS_ENABLED(CONFIG_SCLP_VT220_CONSOLE))
293                         SET_CONSOLE_VT220;
294                 else if (sclp.has_linemode && IS_ENABLED(CONFIG_SCLP_CONSOLE))
295                         SET_CONSOLE_SCLP;
296                 else
297                         SET_CONSOLE_HVC;
298         } else {
299 #if defined(CONFIG_SCLP_CONSOLE) || defined(CONFIG_SCLP_VT220_CONSOLE)
300                 SET_CONSOLE_SCLP;
301 #endif
302         }
303 }
304
305 #ifdef CONFIG_CRASH_DUMP
306 static void __init setup_zfcpdump(void)
307 {
308         if (!is_ipl_type_dump())
309                 return;
310         if (oldmem_data.start)
311                 return;
312         strcat(boot_command_line, " cio_ignore=all,!ipldev,!condev");
313         console_loglevel = 2;
314 }
315 #else
316 static inline void setup_zfcpdump(void) {}
317 #endif /* CONFIG_CRASH_DUMP */
318
319  /*
320  * Reboot, halt and power_off stubs. They just call _machine_restart,
321  * _machine_halt or _machine_power_off. 
322  */
323
324 void machine_restart(char *command)
325 {
326         if ((!in_interrupt() && !in_atomic()) || oops_in_progress)
327                 /*
328                  * Only unblank the console if we are called in enabled
329                  * context or a bust_spinlocks cleared the way for us.
330                  */
331                 console_unblank();
332         _machine_restart(command);
333 }
334
335 void machine_halt(void)
336 {
337         if (!in_interrupt() || oops_in_progress)
338                 /*
339                  * Only unblank the console if we are called in enabled
340                  * context or a bust_spinlocks cleared the way for us.
341                  */
342                 console_unblank();
343         _machine_halt();
344 }
345
346 void machine_power_off(void)
347 {
348         if (!in_interrupt() || oops_in_progress)
349                 /*
350                  * Only unblank the console if we are called in enabled
351                  * context or a bust_spinlocks cleared the way for us.
352                  */
353                 console_unblank();
354         _machine_power_off();
355 }
356
357 /*
358  * Dummy power off function.
359  */
360 void (*pm_power_off)(void) = machine_power_off;
361 EXPORT_SYMBOL_GPL(pm_power_off);
362
363 void *restart_stack;
364
365 unsigned long stack_alloc(void)
366 {
367 #ifdef CONFIG_VMAP_STACK
368         void *ret;
369
370         ret = __vmalloc_node(THREAD_SIZE, THREAD_SIZE, THREADINFO_GFP,
371                              NUMA_NO_NODE, __builtin_return_address(0));
372         kmemleak_not_leak(ret);
373         return (unsigned long)ret;
374 #else
375         return __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
376 #endif
377 }
378
379 void stack_free(unsigned long stack)
380 {
381 #ifdef CONFIG_VMAP_STACK
382         vfree((void *) stack);
383 #else
384         free_pages(stack, THREAD_SIZE_ORDER);
385 #endif
386 }
387
388 int __init arch_early_irq_init(void)
389 {
390         unsigned long stack;
391
392         stack = __get_free_pages(GFP_KERNEL, THREAD_SIZE_ORDER);
393         if (!stack)
394                 panic("Couldn't allocate async stack");
395         S390_lowcore.async_stack = stack + STACK_INIT_OFFSET;
396         return 0;
397 }
398
399 void __init arch_call_rest_init(void)
400 {
401         unsigned long stack;
402
403         smp_reinit_ipl_cpu();
404         stack = stack_alloc();
405         if (!stack)
406                 panic("Couldn't allocate kernel stack");
407         current->stack = (void *) stack;
408 #ifdef CONFIG_VMAP_STACK
409         current->stack_vm_area = (void *) stack;
410 #endif
411         set_task_stack_end_magic(current);
412         stack += STACK_INIT_OFFSET;
413         S390_lowcore.kernel_stack = stack;
414         call_on_stack_noreturn(rest_init, stack);
415 }
416
417 static void __init setup_lowcore(void)
418 {
419         struct lowcore *lc, *abs_lc;
420         unsigned long mcck_stack;
421
422         /*
423          * Setup lowcore for boot cpu
424          */
425         BUILD_BUG_ON(sizeof(struct lowcore) != LC_PAGES * PAGE_SIZE);
426         lc = memblock_alloc_low(sizeof(*lc), sizeof(*lc));
427         if (!lc)
428                 panic("%s: Failed to allocate %zu bytes align=%zx\n",
429                       __func__, sizeof(*lc), sizeof(*lc));
430
431         lc->restart_psw.mask = PSW_KERNEL_BITS & ~PSW_MASK_DAT;
432         lc->restart_psw.addr = __pa(restart_int_handler);
433         lc->external_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
434         lc->external_new_psw.addr = (unsigned long) ext_int_handler;
435         lc->svc_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
436         lc->svc_new_psw.addr = (unsigned long) system_call;
437         lc->program_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
438         lc->program_new_psw.addr = (unsigned long) pgm_check_handler;
439         lc->mcck_new_psw.mask = PSW_KERNEL_BITS;
440         lc->mcck_new_psw.addr = (unsigned long) mcck_int_handler;
441         lc->io_new_psw.mask = PSW_KERNEL_BITS | PSW_MASK_MCHECK;
442         lc->io_new_psw.addr = (unsigned long) io_int_handler;
443         lc->clock_comparator = clock_comparator_max;
444         lc->nodat_stack = ((unsigned long) &init_thread_union)
445                 + THREAD_SIZE - STACK_FRAME_OVERHEAD - sizeof(struct pt_regs);
446         lc->current_task = (unsigned long)&init_task;
447         lc->lpp = LPP_MAGIC;
448         lc->machine_flags = S390_lowcore.machine_flags;
449         lc->preempt_count = S390_lowcore.preempt_count;
450         nmi_alloc_mcesa_early(&lc->mcesad);
451         lc->sys_enter_timer = S390_lowcore.sys_enter_timer;
452         lc->exit_timer = S390_lowcore.exit_timer;
453         lc->user_timer = S390_lowcore.user_timer;
454         lc->system_timer = S390_lowcore.system_timer;
455         lc->steal_timer = S390_lowcore.steal_timer;
456         lc->last_update_timer = S390_lowcore.last_update_timer;
457         lc->last_update_clock = S390_lowcore.last_update_clock;
458
459         /*
460          * Allocate the global restart stack which is the same for
461          * all CPUs in cast *one* of them does a PSW restart.
462          */
463         restart_stack = memblock_alloc(THREAD_SIZE, THREAD_SIZE);
464         if (!restart_stack)
465                 panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
466                       __func__, THREAD_SIZE, THREAD_SIZE);
467         restart_stack += STACK_INIT_OFFSET;
468
469         /*
470          * Set up PSW restart to call ipl.c:do_restart(). Copy the relevant
471          * restart data to the absolute zero lowcore. This is necessary if
472          * PSW restart is done on an offline CPU that has lowcore zero.
473          */
474         lc->restart_stack = (unsigned long) restart_stack;
475         lc->restart_fn = (unsigned long) do_restart;
476         lc->restart_data = 0;
477         lc->restart_source = -1U;
478         __ctl_store(lc->cregs_save_area, 0, 15);
479
480         mcck_stack = (unsigned long)memblock_alloc(THREAD_SIZE, THREAD_SIZE);
481         if (!mcck_stack)
482                 panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
483                       __func__, THREAD_SIZE, THREAD_SIZE);
484         lc->mcck_stack = mcck_stack + STACK_INIT_OFFSET;
485
486         lc->spinlock_lockval = arch_spin_lockval(0);
487         lc->spinlock_index = 0;
488         arch_spin_lock_setup(0);
489         lc->return_lpswe = gen_lpswe(__LC_RETURN_PSW);
490         lc->return_mcck_lpswe = gen_lpswe(__LC_RETURN_MCCK_PSW);
491         lc->preempt_count = PREEMPT_DISABLED;
492         lc->kernel_asce = S390_lowcore.kernel_asce;
493         lc->user_asce = S390_lowcore.user_asce;
494
495         abs_lc = get_abs_lowcore();
496         abs_lc->restart_stack = lc->restart_stack;
497         abs_lc->restart_fn = lc->restart_fn;
498         abs_lc->restart_data = lc->restart_data;
499         abs_lc->restart_source = lc->restart_source;
500         abs_lc->restart_psw = lc->restart_psw;
501         abs_lc->restart_flags = RESTART_FLAG_CTLREGS;
502         memcpy(abs_lc->cregs_save_area, lc->cregs_save_area, sizeof(abs_lc->cregs_save_area));
503         abs_lc->program_new_psw = lc->program_new_psw;
504         abs_lc->mcesad = lc->mcesad;
505         put_abs_lowcore(abs_lc);
506
507         set_prefix(__pa(lc));
508         lowcore_ptr[0] = lc;
509         if (abs_lowcore_map(0, lowcore_ptr[0], false))
510                 panic("Couldn't setup absolute lowcore");
511 }
512
513 static struct resource code_resource = {
514         .name  = "Kernel code",
515         .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
516 };
517
518 static struct resource data_resource = {
519         .name = "Kernel data",
520         .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
521 };
522
523 static struct resource bss_resource = {
524         .name = "Kernel bss",
525         .flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM,
526 };
527
528 static struct resource __initdata *standard_resources[] = {
529         &code_resource,
530         &data_resource,
531         &bss_resource,
532 };
533
534 static void __init setup_resources(void)
535 {
536         struct resource *res, *std_res, *sub_res;
537         phys_addr_t start, end;
538         int j;
539         u64 i;
540
541         code_resource.start = (unsigned long) _text;
542         code_resource.end = (unsigned long) _etext - 1;
543         data_resource.start = (unsigned long) _etext;
544         data_resource.end = (unsigned long) _edata - 1;
545         bss_resource.start = (unsigned long) __bss_start;
546         bss_resource.end = (unsigned long) __bss_stop - 1;
547
548         for_each_mem_range(i, &start, &end) {
549                 res = memblock_alloc(sizeof(*res), 8);
550                 if (!res)
551                         panic("%s: Failed to allocate %zu bytes align=0x%x\n",
552                               __func__, sizeof(*res), 8);
553                 res->flags = IORESOURCE_BUSY | IORESOURCE_SYSTEM_RAM;
554
555                 res->name = "System RAM";
556                 res->start = start;
557                 /*
558                  * In memblock, end points to the first byte after the
559                  * range while in resourses, end points to the last byte in
560                  * the range.
561                  */
562                 res->end = end - 1;
563                 request_resource(&iomem_resource, res);
564
565                 for (j = 0; j < ARRAY_SIZE(standard_resources); j++) {
566                         std_res = standard_resources[j];
567                         if (std_res->start < res->start ||
568                             std_res->start > res->end)
569                                 continue;
570                         if (std_res->end > res->end) {
571                                 sub_res = memblock_alloc(sizeof(*sub_res), 8);
572                                 if (!sub_res)
573                                         panic("%s: Failed to allocate %zu bytes align=0x%x\n",
574                                               __func__, sizeof(*sub_res), 8);
575                                 *sub_res = *std_res;
576                                 sub_res->end = res->end;
577                                 std_res->start = res->end + 1;
578                                 request_resource(res, sub_res);
579                         } else {
580                                 request_resource(res, std_res);
581                         }
582                 }
583         }
584 #ifdef CONFIG_CRASH_DUMP
585         /*
586          * Re-add removed crash kernel memory as reserved memory. This makes
587          * sure it will be mapped with the identity mapping and struct pages
588          * will be created, so it can be resized later on.
589          * However add it later since the crash kernel resource should not be
590          * part of the System RAM resource.
591          */
592         if (crashk_res.end) {
593                 memblock_add_node(crashk_res.start, resource_size(&crashk_res),
594                                   0, MEMBLOCK_NONE);
595                 memblock_reserve(crashk_res.start, resource_size(&crashk_res));
596                 insert_resource(&iomem_resource, &crashk_res);
597         }
598 #endif
599 }
600
601 static void __init setup_memory_end(void)
602 {
603         max_pfn = max_low_pfn = PFN_DOWN(ident_map_size);
604         pr_notice("The maximum memory size is %luMB\n", ident_map_size >> 20);
605 }
606
607 #ifdef CONFIG_CRASH_DUMP
608
609 /*
610  * When kdump is enabled, we have to ensure that no memory from the area
611  * [0 - crashkernel memory size] is set offline - it will be exchanged with
612  * the crashkernel memory region when kdump is triggered. The crashkernel
613  * memory region can never get offlined (pages are unmovable).
614  */
615 static int kdump_mem_notifier(struct notifier_block *nb,
616                               unsigned long action, void *data)
617 {
618         struct memory_notify *arg = data;
619
620         if (action != MEM_GOING_OFFLINE)
621                 return NOTIFY_OK;
622         if (arg->start_pfn < PFN_DOWN(resource_size(&crashk_res)))
623                 return NOTIFY_BAD;
624         return NOTIFY_OK;
625 }
626
627 static struct notifier_block kdump_mem_nb = {
628         .notifier_call = kdump_mem_notifier,
629 };
630
631 #endif
632
633 /*
634  * Reserve page tables created by decompressor
635  */
636 static void __init reserve_pgtables(void)
637 {
638         memblock_reserve(pgalloc_pos, pgalloc_end - pgalloc_pos);
639 }
640
641 /*
642  * Reserve memory for kdump kernel to be loaded with kexec
643  */
644 static void __init reserve_crashkernel(void)
645 {
646 #ifdef CONFIG_CRASH_DUMP
647         unsigned long long crash_base, crash_size;
648         phys_addr_t low, high;
649         int rc;
650
651         rc = parse_crashkernel(boot_command_line, ident_map_size, &crash_size,
652                                &crash_base);
653
654         crash_base = ALIGN(crash_base, KEXEC_CRASH_MEM_ALIGN);
655         crash_size = ALIGN(crash_size, KEXEC_CRASH_MEM_ALIGN);
656         if (rc || crash_size == 0)
657                 return;
658
659         if (memblock.memory.regions[0].size < crash_size) {
660                 pr_info("crashkernel reservation failed: %s\n",
661                         "first memory chunk must be at least crashkernel size");
662                 return;
663         }
664
665         low = crash_base ?: oldmem_data.start;
666         high = low + crash_size;
667         if (low >= oldmem_data.start && high <= oldmem_data.start + oldmem_data.size) {
668                 /* The crashkernel fits into OLDMEM, reuse OLDMEM */
669                 crash_base = low;
670         } else {
671                 /* Find suitable area in free memory */
672                 low = max_t(unsigned long, crash_size, sclp.hsa_size);
673                 high = crash_base ? crash_base + crash_size : ULONG_MAX;
674
675                 if (crash_base && crash_base < low) {
676                         pr_info("crashkernel reservation failed: %s\n",
677                                 "crash_base too low");
678                         return;
679                 }
680                 low = crash_base ?: low;
681                 crash_base = memblock_phys_alloc_range(crash_size,
682                                                        KEXEC_CRASH_MEM_ALIGN,
683                                                        low, high);
684         }
685
686         if (!crash_base) {
687                 pr_info("crashkernel reservation failed: %s\n",
688                         "no suitable area found");
689                 return;
690         }
691
692         if (register_memory_notifier(&kdump_mem_nb)) {
693                 memblock_phys_free(crash_base, crash_size);
694                 return;
695         }
696
697         if (!oldmem_data.start && MACHINE_IS_VM)
698                 diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size));
699         crashk_res.start = crash_base;
700         crashk_res.end = crash_base + crash_size - 1;
701         memblock_remove(crash_base, crash_size);
702         pr_info("Reserving %lluMB of memory at %lluMB "
703                 "for crashkernel (System RAM: %luMB)\n",
704                 crash_size >> 20, crash_base >> 20,
705                 (unsigned long)memblock.memory.total_size >> 20);
706         os_info_crashkernel_add(crash_base, crash_size);
707 #endif
708 }
709
710 /*
711  * Reserve the initrd from being used by memblock
712  */
713 static void __init reserve_initrd(void)
714 {
715 #ifdef CONFIG_BLK_DEV_INITRD
716         if (!initrd_data.start || !initrd_data.size)
717                 return;
718         initrd_start = (unsigned long)__va(initrd_data.start);
719         initrd_end = initrd_start + initrd_data.size;
720         memblock_reserve(initrd_data.start, initrd_data.size);
721 #endif
722 }
723
724 /*
725  * Reserve the memory area used to pass the certificate lists
726  */
727 static void __init reserve_certificate_list(void)
728 {
729         if (ipl_cert_list_addr)
730                 memblock_reserve(ipl_cert_list_addr, ipl_cert_list_size);
731 }
732
733 static void __init reserve_mem_detect_info(void)
734 {
735         unsigned long start, size;
736
737         get_mem_detect_reserved(&start, &size);
738         if (size)
739                 memblock_reserve(start, size);
740 }
741
742 static void __init free_mem_detect_info(void)
743 {
744         unsigned long start, size;
745
746         get_mem_detect_reserved(&start, &size);
747         if (size)
748                 memblock_phys_free(start, size);
749 }
750
751 static const char * __init get_mem_info_source(void)
752 {
753         switch (mem_detect.info_source) {
754         case MEM_DETECT_SCLP_STOR_INFO:
755                 return "sclp storage info";
756         case MEM_DETECT_DIAG260:
757                 return "diag260";
758         case MEM_DETECT_SCLP_READ_INFO:
759                 return "sclp read info";
760         case MEM_DETECT_BIN_SEARCH:
761                 return "binary search";
762         }
763         return "none";
764 }
765
766 static void __init memblock_add_mem_detect_info(void)
767 {
768         unsigned long start, end;
769         int i;
770
771         pr_debug("physmem info source: %s (%hhd)\n",
772                  get_mem_info_source(), mem_detect.info_source);
773         /* keep memblock lists close to the kernel */
774         memblock_set_bottom_up(true);
775         for_each_mem_detect_usable_block(i, &start, &end)
776                 memblock_add(start, end - start);
777         for_each_mem_detect_block(i, &start, &end)
778                 memblock_physmem_add(start, end - start);
779         memblock_set_bottom_up(false);
780         memblock_set_node(0, ULONG_MAX, &memblock.memory, 0);
781 }
782
783 /*
784  * Check for initrd being in usable memory
785  */
786 static void __init check_initrd(void)
787 {
788 #ifdef CONFIG_BLK_DEV_INITRD
789         if (initrd_data.start && initrd_data.size &&
790             !memblock_is_region_memory(initrd_data.start, initrd_data.size)) {
791                 pr_err("The initial RAM disk does not fit into the memory\n");
792                 memblock_phys_free(initrd_data.start, initrd_data.size);
793                 initrd_start = initrd_end = 0;
794         }
795 #endif
796 }
797
798 /*
799  * Reserve memory used for lowcore/command line/kernel image.
800  */
801 static void __init reserve_kernel(void)
802 {
803         memblock_reserve(0, STARTUP_NORMAL_OFFSET);
804         memblock_reserve(OLDMEM_BASE, sizeof(unsigned long));
805         memblock_reserve(OLDMEM_SIZE, sizeof(unsigned long));
806         memblock_reserve(__amode31_base, __eamode31 - __samode31);
807         memblock_reserve(__pa(sclp_early_sccb), EXT_SCCB_READ_SCP);
808         memblock_reserve(__pa(_stext), _end - _stext);
809 }
810
811 static void __init setup_memory(void)
812 {
813         phys_addr_t start, end;
814         u64 i;
815
816         /*
817          * Init storage key for present memory
818          */
819         for_each_mem_range(i, &start, &end)
820                 storage_key_init_range(start, end);
821
822         psw_set_key(PAGE_DEFAULT_KEY);
823 }
824
825 static void __init relocate_amode31_section(void)
826 {
827         unsigned long amode31_size = __eamode31 - __samode31;
828         long amode31_offset = __amode31_base - __samode31;
829         long *ptr;
830
831         pr_info("Relocating AMODE31 section of size 0x%08lx\n", amode31_size);
832
833         /* Move original AMODE31 section to the new one */
834         memmove((void *)__amode31_base, (void *)__samode31, amode31_size);
835         /* Zero out the old AMODE31 section to catch invalid accesses within it */
836         memset((void *)__samode31, 0, amode31_size);
837
838         /* Update all AMODE31 region references */
839         for (ptr = _start_amode31_refs; ptr != _end_amode31_refs; ptr++)
840                 *ptr += amode31_offset;
841 }
842
843 /* This must be called after AMODE31 relocation */
844 static void __init setup_cr(void)
845 {
846         union ctlreg2 cr2;
847         union ctlreg5 cr5;
848         union ctlreg15 cr15;
849
850         __ctl_duct[1] = (unsigned long)__ctl_aste;
851         __ctl_duct[2] = (unsigned long)__ctl_aste;
852         __ctl_duct[4] = (unsigned long)__ctl_duald;
853
854         /* Update control registers CR2, CR5 and CR15 */
855         __ctl_store(cr2.val, 2, 2);
856         __ctl_store(cr5.val, 5, 5);
857         __ctl_store(cr15.val, 15, 15);
858         cr2.ducto = (unsigned long)__ctl_duct >> 6;
859         cr5.pasteo = (unsigned long)__ctl_duct >> 6;
860         cr15.lsea = (unsigned long)__ctl_linkage_stack >> 3;
861         __ctl_load(cr2.val, 2, 2);
862         __ctl_load(cr5.val, 5, 5);
863         __ctl_load(cr15.val, 15, 15);
864 }
865
866 /*
867  * Add system information as device randomness
868  */
869 static void __init setup_randomness(void)
870 {
871         struct sysinfo_3_2_2 *vmms;
872
873         vmms = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
874         if (!vmms)
875                 panic("Failed to allocate memory for sysinfo structure\n");
876         if (stsi(vmms, 3, 2, 2) == 0 && vmms->count)
877                 add_device_randomness(&vmms->vm, sizeof(vmms->vm[0]) * vmms->count);
878         memblock_free(vmms, PAGE_SIZE);
879
880         if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG))
881                 static_branch_enable(&s390_arch_random_available);
882 }
883
884 /*
885  * Find the correct size for the task_struct. This depends on
886  * the size of the struct fpu at the end of the thread_struct
887  * which is embedded in the task_struct.
888  */
889 static void __init setup_task_size(void)
890 {
891         int task_size = sizeof(struct task_struct);
892
893         if (!MACHINE_HAS_VX) {
894                 task_size -= sizeof(__vector128) * __NUM_VXRS;
895                 task_size += sizeof(freg_t) * __NUM_FPRS;
896         }
897         arch_task_struct_size = task_size;
898 }
899
900 /*
901  * Issue diagnose 318 to set the control program name and
902  * version codes.
903  */
904 static void __init setup_control_program_code(void)
905 {
906         union diag318_info diag318_info = {
907                 .cpnc = CPNC_LINUX,
908                 .cpvc = 0,
909         };
910
911         if (!sclp.has_diag318)
912                 return;
913
914         diag_stat_inc(DIAG_STAT_X318);
915         asm volatile("diag %0,0,0x318\n" : : "d" (diag318_info.val));
916 }
917
918 /*
919  * Print the component list from the IPL report
920  */
921 static void __init log_component_list(void)
922 {
923         struct ipl_rb_component_entry *ptr, *end;
924         char *str;
925
926         if (!early_ipl_comp_list_addr)
927                 return;
928         if (ipl_block.hdr.flags & IPL_PL_FLAG_SIPL)
929                 pr_info("Linux is running with Secure-IPL enabled\n");
930         else
931                 pr_info("Linux is running with Secure-IPL disabled\n");
932         ptr = (void *) early_ipl_comp_list_addr;
933         end = (void *) ptr + early_ipl_comp_list_size;
934         pr_info("The IPL report contains the following components:\n");
935         while (ptr < end) {
936                 if (ptr->flags & IPL_RB_COMPONENT_FLAG_SIGNED) {
937                         if (ptr->flags & IPL_RB_COMPONENT_FLAG_VERIFIED)
938                                 str = "signed, verified";
939                         else
940                                 str = "signed, verification failed";
941                 } else {
942                         str = "not signed";
943                 }
944                 pr_info("%016llx - %016llx (%s)\n",
945                         ptr->addr, ptr->addr + ptr->len, str);
946                 ptr++;
947         }
948 }
949
950 /*
951  * Setup function called from init/main.c just after the banner
952  * was printed.
953  */
954
955 void __init setup_arch(char **cmdline_p)
956 {
957         /*
958          * print what head.S has found out about the machine
959          */
960         if (MACHINE_IS_VM)
961                 pr_info("Linux is running as a z/VM "
962                         "guest operating system in 64-bit mode\n");
963         else if (MACHINE_IS_KVM)
964                 pr_info("Linux is running under KVM in 64-bit mode\n");
965         else if (MACHINE_IS_LPAR)
966                 pr_info("Linux is running natively in 64-bit mode\n");
967         else
968                 pr_info("Linux is running as a guest in 64-bit mode\n");
969
970         log_component_list();
971
972         /* Have one command line that is parsed and saved in /proc/cmdline */
973         /* boot_command_line has been already set up in early.c */
974         *cmdline_p = boot_command_line;
975
976         ROOT_DEV = Root_RAM0;
977
978         setup_initial_init_mm(_text, _etext, _edata, _end);
979
980         if (IS_ENABLED(CONFIG_EXPOLINE_AUTO))
981                 nospec_auto_detect();
982
983         jump_label_init();
984         parse_early_param();
985 #ifdef CONFIG_CRASH_DUMP
986         /* Deactivate elfcorehdr= kernel parameter */
987         elfcorehdr_addr = ELFCORE_ADDR_MAX;
988 #endif
989
990         os_info_init();
991         setup_ipl();
992         setup_task_size();
993         setup_control_program_code();
994
995         /* Do some memory reservations *before* memory is added to memblock */
996         reserve_pgtables();
997         reserve_kernel();
998         reserve_initrd();
999         reserve_certificate_list();
1000         reserve_mem_detect_info();
1001         memblock_set_current_limit(ident_map_size);
1002         memblock_allow_resize();
1003
1004         /* Get information about *all* installed memory */
1005         memblock_add_mem_detect_info();
1006
1007         free_mem_detect_info();
1008         setup_memory_end();
1009         memblock_dump_all();
1010         setup_memory();
1011
1012         relocate_amode31_section();
1013         setup_cr();
1014         setup_uv();
1015         dma_contiguous_reserve(ident_map_size);
1016         vmcp_cma_reserve();
1017         if (MACHINE_HAS_EDAT2)
1018                 hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT);
1019
1020         check_initrd();
1021         reserve_crashkernel();
1022 #ifdef CONFIG_CRASH_DUMP
1023         /*
1024          * Be aware that smp_save_dump_secondary_cpus() triggers a system reset.
1025          * Therefore CPU and device initialization should be done afterwards.
1026          */
1027         smp_save_dump_secondary_cpus();
1028 #endif
1029
1030         setup_resources();
1031         setup_lowcore();
1032         smp_fill_possible_mask();
1033         cpu_detect_mhz_feature();
1034         cpu_init();
1035         numa_setup();
1036         smp_detect_cpus();
1037         topology_init_early();
1038
1039         if (test_facility(193))
1040                 static_branch_enable(&cpu_has_bear);
1041
1042         /*
1043          * Create kernel page tables.
1044          */
1045         paging_init();
1046
1047         /*
1048          * After paging_init created the kernel page table, the new PSWs
1049          * in lowcore can now run with DAT enabled.
1050          */
1051 #ifdef CONFIG_CRASH_DUMP
1052         smp_save_dump_ipl_cpu();
1053 #endif
1054
1055         /* Setup default console */
1056         conmode_default();
1057         set_preferred_console();
1058
1059         apply_alternative_instructions();
1060         if (IS_ENABLED(CONFIG_EXPOLINE))
1061                 nospec_init_branches();
1062
1063         /* Setup zfcp/nvme dump support */
1064         setup_zfcpdump();
1065
1066         /* Add system specific data to the random pool */
1067         setup_randomness();
1068 }