powerpc/64: Hard code cache geometry on POWER8
[linux-2.6-block.git] / arch / powerpc / kernel / setup_64.c
CommitLineData
40ef8cbc
PM
1/*
2 *
3 * Common boot and setup code.
4 *
5 * Copyright (C) 2001 PPC64 Team, IBM Corp
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
11 */
12
7191b615 13#define DEBUG
40ef8cbc 14
4b16f8e2 15#include <linux/export.h>
40ef8cbc
PM
16#include <linux/string.h>
17#include <linux/sched.h>
18#include <linux/init.h>
19#include <linux/kernel.h>
20#include <linux/reboot.h>
21#include <linux/delay.h>
22#include <linux/initrd.h>
40ef8cbc
PM
23#include <linux/seq_file.h>
24#include <linux/ioport.h>
25#include <linux/console.h>
26#include <linux/utsname.h>
27#include <linux/tty.h>
28#include <linux/root_dev.h>
29#include <linux/notifier.h>
30#include <linux/cpu.h>
31#include <linux/unistd.h>
32#include <linux/serial.h>
33#include <linux/serial_8250.h>
7a0268fa 34#include <linux/bootmem.h>
12d04eef 35#include <linux/pci.h>
945feb17 36#include <linux/lockdep.h>
95f72d1e 37#include <linux/memblock.h>
a5d86257 38#include <linux/memory.h>
c54b2bf1 39#include <linux/nmi.h>
a6146888 40
40ef8cbc 41#include <asm/io.h>
0cc4746c 42#include <asm/kdump.h>
40ef8cbc
PM
43#include <asm/prom.h>
44#include <asm/processor.h>
45#include <asm/pgtable.h>
40ef8cbc
PM
46#include <asm/smp.h>
47#include <asm/elf.h>
48#include <asm/machdep.h>
49#include <asm/paca.h>
40ef8cbc
PM
50#include <asm/time.h>
51#include <asm/cputable.h>
52#include <asm/sections.h>
53#include <asm/btext.h>
54#include <asm/nvram.h>
55#include <asm/setup.h>
40ef8cbc
PM
56#include <asm/rtas.h>
57#include <asm/iommu.h>
58#include <asm/serial.h>
59#include <asm/cache.h>
60#include <asm/page.h>
61#include <asm/mmu.h>
40ef8cbc 62#include <asm/firmware.h>
f78541dc 63#include <asm/xmon.h>
dcad47fc 64#include <asm/udbg.h>
593e537b 65#include <asm/kexec.h>
d36b4c4f 66#include <asm/code-patching.h>
5d31a96e 67#include <asm/livepatch.h>
d3cbff1b 68#include <asm/opal.h>
b1923caa 69#include <asm/cputhreads.h>
40ef8cbc
PM
70
71#ifdef DEBUG
72#define DBG(fmt...) udbg_printf(fmt)
73#else
74#define DBG(fmt...)
75#endif
76
8246aca7 77int spinning_secondaries;
40ef8cbc
PM
78u64 ppc64_pft_size;
79
dabcafd3 80struct ppc64_caches ppc64_caches = {
e2827fe5
BH
81 .l1d = {
82 .block_size = 0x40,
83 .log_block_size = 6,
84 },
85 .l1i = {
86 .block_size = 0x40,
87 .log_block_size = 6
88 },
dabcafd3 89};
40ef8cbc
PM
90EXPORT_SYMBOL_GPL(ppc64_caches);
91
28efc35f 92#if defined(CONFIG_PPC_BOOK3E) && defined(CONFIG_SMP)
b1923caa 93void __init setup_tlb_core_data(void)
28efc35f
SW
94{
95 int cpu;
96
82d86de2
SW
97 BUILD_BUG_ON(offsetof(struct tlb_core_data, lock) != 0);
98
28efc35f
SW
99 for_each_possible_cpu(cpu) {
100 int first = cpu_first_thread_sibling(cpu);
101
d9e1831a
SW
102 /*
103 * If we boot via kdump on a non-primary thread,
104 * make sure we point at the thread that actually
105 * set up this TLB.
106 */
107 if (cpu_first_thread_sibling(boot_cpuid) == first)
108 first = boot_cpuid;
109
28efc35f
SW
110 paca[cpu].tcd_ptr = &paca[first].tcd;
111
112 /*
113 * If we have threads, we need either tlbsrx.
114 * or e6500 tablewalk mode, or else TLB handlers
115 * will be racy and could produce duplicate entries.
116 */
117 if (smt_enabled_at_boot >= 2 &&
118 !mmu_has_feature(MMU_FTR_USE_TLBRSRV) &&
119 book3e_htw_mode != PPC_HTW_E6500) {
120 /* Should we panic instead? */
121 WARN_ONCE("%s: unsupported MMU configuration -- expect problems\n",
122 __func__);
123 }
124 }
125}
28efc35f
SW
126#endif
127
40ef8cbc
PM
128#ifdef CONFIG_SMP
129
954e6da5 130static char *smt_enabled_cmdline;
40ef8cbc
PM
131
132/* Look for ibm,smt-enabled OF option */
b1923caa 133void __init check_smt_enabled(void)
40ef8cbc
PM
134{
135 struct device_node *dn;
a7f67bdf 136 const char *smt_option;
40ef8cbc 137
954e6da5
NF
138 /* Default to enabling all threads */
139 smt_enabled_at_boot = threads_per_core;
40ef8cbc 140
954e6da5
NF
141 /* Allow the command line to overrule the OF option */
142 if (smt_enabled_cmdline) {
143 if (!strcmp(smt_enabled_cmdline, "on"))
144 smt_enabled_at_boot = threads_per_core;
145 else if (!strcmp(smt_enabled_cmdline, "off"))
146 smt_enabled_at_boot = 0;
147 else {
1618bd53 148 int smt;
954e6da5
NF
149 int rc;
150
1618bd53 151 rc = kstrtoint(smt_enabled_cmdline, 10, &smt);
954e6da5
NF
152 if (!rc)
153 smt_enabled_at_boot =
1618bd53 154 min(threads_per_core, smt);
954e6da5
NF
155 }
156 } else {
157 dn = of_find_node_by_path("/options");
158 if (dn) {
159 smt_option = of_get_property(dn, "ibm,smt-enabled",
160 NULL);
161
162 if (smt_option) {
163 if (!strcmp(smt_option, "on"))
164 smt_enabled_at_boot = threads_per_core;
165 else if (!strcmp(smt_option, "off"))
166 smt_enabled_at_boot = 0;
167 }
168
169 of_node_put(dn);
170 }
171 }
40ef8cbc
PM
172}
173
174/* Look for smt-enabled= cmdline option */
175static int __init early_smt_enabled(char *p)
176{
954e6da5 177 smt_enabled_cmdline = p;
40ef8cbc
PM
178 return 0;
179}
180early_param("smt-enabled", early_smt_enabled);
181
40ef8cbc
PM
182#endif /* CONFIG_SMP */
183
25e13814 184/** Fix up paca fields required for the boot cpu */
009776ba 185static void __init fixup_boot_paca(void)
25e13814
ME
186{
187 /* The boot cpu is started */
188 get_paca()->cpu_start = 1;
189 /* Allow percpu accesses to work until we setup percpu data */
190 get_paca()->data_offset = 0;
191}
192
009776ba 193static void __init configure_exceptions(void)
8f619b54 194{
633440f1 195 /*
d3cbff1b
BH
196 * Setup the trampolines from the lowmem exception vectors
197 * to the kdump kernel when not using a relocatable kernel.
633440f1 198 */
d3cbff1b
BH
199 setup_kdump_trampoline();
200
201 /* Under a PAPR hypervisor, we need hypercalls */
202 if (firmware_has_feature(FW_FEATURE_SET_MODE)) {
203 /* Enable AIL if possible */
204 pseries_enable_reloc_on_exc();
205
206 /*
207 * Tell the hypervisor that we want our exceptions to
208 * be taken in little endian mode.
209 *
210 * We don't call this for big endian as our calling convention
211 * makes us always enter in BE, and the call may fail under
212 * some circumstances with kdump.
213 */
214#ifdef __LITTLE_ENDIAN__
215 pseries_little_endian_exceptions();
216#endif
217 } else {
218 /* Set endian mode using OPAL */
219 if (firmware_has_feature(FW_FEATURE_OPAL))
220 opal_configure_cores();
221
c0a36013 222 /* AIL on native is done in cpu_ready_for_interrupts() */
8f619b54
BH
223 }
224}
225
d3cbff1b
BH
226static void cpu_ready_for_interrupts(void)
227{
c0a36013
BH
228 /*
229 * Enable AIL if supported, and we are in hypervisor mode. This
230 * is called once for every processor.
231 *
232 * If we are not in hypervisor mode the job is done once for
233 * the whole partition in configure_exceptions().
234 */
235 if (early_cpu_has_feature(CPU_FTR_HVMODE) &&
236 early_cpu_has_feature(CPU_FTR_ARCH_207S)) {
237 unsigned long lpcr = mfspr(SPRN_LPCR);
238 mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3);
239 }
240
d3cbff1b
BH
241 /* Set IR and DR in PACA MSR */
242 get_paca()->kernel_msr = MSR_KERNEL;
243}
244
40ef8cbc
PM
245/*
246 * Early initialization entry point. This is called by head.S
247 * with MMU translation disabled. We rely on the "feature" of
248 * the CPU that ignores the top 2 bits of the address in real
249 * mode so we can access kernel globals normally provided we
250 * only toy with things in the RMO region. From here, we do
95f72d1e 251 * some early parsing of the device-tree to setup out MEMBLOCK
40ef8cbc
PM
252 * data structures, and allocate & initialize the hash table
253 * and segment tables so we can start running with translation
254 * enabled.
255 *
256 * It is this function which will call the probe() callback of
257 * the various platform types and copy the matching one to the
258 * global ppc_md structure. Your platform can eventually do
259 * some very early initializations from the probe() routine, but
260 * this is not recommended, be very careful as, for example, the
261 * device-tree is not accessible via normal means at this point.
262 */
263
264void __init early_setup(unsigned long dt_ptr)
265{
6a7e4064
GL
266 static __initdata struct paca_struct boot_paca;
267
24d96495
BH
268 /* -------- printk is _NOT_ safe to use here ! ------- */
269
42c4aaad 270 /* Identify CPU type */
974a76f5 271 identify_cpu(0, mfspr(SPRN_PVR));
42c4aaad 272
33dbcf72 273 /* Assume we're on cpu 0 for now. Don't write to the paca yet! */
1426d5a3
ME
274 initialise_paca(&boot_paca, 0);
275 setup_paca(&boot_paca);
25e13814 276 fixup_boot_paca();
33dbcf72 277
24d96495
BH
278 /* -------- printk is now safe to use ------- */
279
f2fd2513
BH
280 /* Enable early debugging if any specified (see udbg.h) */
281 udbg_early_init();
282
e8222502 283 DBG(" -> early_setup(), dt_ptr: 0x%lx\n", dt_ptr);
40ef8cbc 284
40ef8cbc 285 /*
3c607ce2
LV
286 * Do early initialization using the flattened device
287 * tree, such as retrieving the physical memory map or
288 * calculating/retrieving the hash table size.
40ef8cbc
PM
289 */
290 early_init_devtree(__va(dt_ptr));
291
4df20460 292 /* Now we know the logical id of our boot cpu, setup the paca. */
1426d5a3 293 setup_paca(&paca[boot_cpuid]);
25e13814 294 fixup_boot_paca();
4df20460 295
63c254a5 296 /*
d3cbff1b
BH
297 * Configure exception handlers. This include setting up trampolines
298 * if needed, setting exception endian mode, etc...
63c254a5 299 */
d3cbff1b 300 configure_exceptions();
0cc4746c 301
c4bd6cb8
BH
302 /* Apply all the dynamic patching */
303 apply_feature_fixups();
97f6e0cc 304 setup_feature_keys();
c4bd6cb8 305
9e8066f3
ME
306 /* Initialize the hash table or TLB handling */
307 early_init_mmu();
308
a944a9c4
BH
309 /*
310 * At this point, we can let interrupts switch to virtual mode
311 * (the MMU has been setup), so adjust the MSR in the PACA to
8f619b54 312 * have IR and DR set and enable AIL if it exists
a944a9c4 313 */
8f619b54 314 cpu_ready_for_interrupts();
a944a9c4 315
40ef8cbc 316 DBG(" <- early_setup()\n");
7191b615
BH
317
318#ifdef CONFIG_PPC_EARLY_DEBUG_BOOTX
319 /*
320 * This needs to be done *last* (after the above DBG() even)
321 *
322 * Right after we return from this function, we turn on the MMU
323 * which means the real-mode access trick that btext does will
324 * no longer work, it needs to switch to using a real MMU
325 * mapping. This call will ensure that it does
326 */
327 btext_map();
328#endif /* CONFIG_PPC_EARLY_DEBUG_BOOTX */
40ef8cbc
PM
329}
330
799d6046
PM
331#ifdef CONFIG_SMP
332void early_setup_secondary(void)
333{
103b7827 334 /* Mark interrupts disabled in PACA */
757c74d2 335 get_paca()->soft_enabled = 0;
799d6046 336
757c74d2
BH
337 /* Initialize the hash table or TLB handling */
338 early_init_mmu_secondary();
a944a9c4
BH
339
340 /*
341 * At this point, we can let interrupts switch to virtual mode
342 * (the MMU has been setup), so adjust the MSR in the PACA to
343 * have IR and DR set.
344 */
8f619b54 345 cpu_ready_for_interrupts();
799d6046
PM
346}
347
348#endif /* CONFIG_SMP */
40ef8cbc 349
da665885 350#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC_CORE)
567cf94d
SW
351static bool use_spinloop(void)
352{
353 if (!IS_ENABLED(CONFIG_PPC_BOOK3E))
354 return true;
355
356 /*
357 * When book3e boots from kexec, the ePAPR spin table does
358 * not get used.
359 */
360 return of_property_read_bool(of_chosen, "linux,booted-from-kexec");
361}
362
b8f51021
ME
363void smp_release_cpus(void)
364{
758438a7 365 unsigned long *ptr;
9d07bc84 366 int i;
b8f51021 367
567cf94d
SW
368 if (!use_spinloop())
369 return;
370
b8f51021
ME
371 DBG(" -> smp_release_cpus()\n");
372
373 /* All secondary cpus are spinning on a common spinloop, release them
374 * all now so they can start to spin on their individual paca
375 * spinloops. For non SMP kernels, the secondary cpus never get out
376 * of the common spinloop.
1f6a93e4 377 */
b8f51021 378
758438a7
ME
379 ptr = (unsigned long *)((unsigned long)&__secondary_hold_spinloop
380 - PHYSICAL_START);
2751b628 381 *ptr = ppc_function_entry(generic_secondary_smp_init);
9d07bc84
BH
382
383 /* And wait a bit for them to catch up */
384 for (i = 0; i < 100000; i++) {
385 mb();
386 HMT_low();
7ac87abb 387 if (spinning_secondaries == 0)
9d07bc84
BH
388 break;
389 udelay(1);
390 }
7ac87abb 391 DBG("spinning_secondaries = %d\n", spinning_secondaries);
b8f51021
ME
392
393 DBG(" <- smp_release_cpus()\n");
394}
da665885 395#endif /* CONFIG_SMP || CONFIG_KEXEC_CORE */
b8f51021 396
40ef8cbc 397/*
799d6046
PM
398 * Initialize some remaining members of the ppc64_caches and systemcfg
399 * structures
40ef8cbc
PM
400 * (at least until we get rid of them completely). This is mostly some
401 * cache informations about the CPU that will be used by cache flush
402 * routines and/or provided to userland
403 */
e2827fe5
BH
404
405static void init_cache_info(struct ppc_cache_info *info, u32 size, u32 lsize,
406 u32 bsize, u32 sets)
407{
408 info->size = size;
409 info->sets = sets;
410 info->line_size = lsize;
411 info->block_size = bsize;
412 info->log_block_size = __ilog2(bsize);
413 info->blocks_per_page = PAGE_SIZE / bsize;
414}
415
416static bool __init parse_cache_info(struct device_node *np,
417 bool icache,
418 struct ppc_cache_info *info)
419{
420 static const char *ipropnames[] __initdata = {
421 "i-cache-size",
422 "i-cache-sets",
423 "i-cache-block-size",
424 "i-cache-line-size",
425 };
426 static const char *dpropnames[] __initdata = {
427 "d-cache-size",
428 "d-cache-sets",
429 "d-cache-block-size",
430 "d-cache-line-size",
431 };
432 const char **propnames = icache ? ipropnames : dpropnames;
433 const __be32 *sizep, *lsizep, *bsizep, *setsp;
434 u32 size, lsize, bsize, sets;
435 bool success = true;
436
437 size = 0;
438 sets = -1u;
439 lsize = bsize = cur_cpu_spec->dcache_bsize;
440 sizep = of_get_property(np, propnames[0], NULL);
441 if (sizep != NULL)
442 size = be32_to_cpu(*sizep);
443 setsp = of_get_property(np, propnames[1], NULL);
444 if (setsp != NULL)
445 sets = be32_to_cpu(*setsp);
446 bsizep = of_get_property(np, propnames[2], NULL);
447 lsizep = of_get_property(np, propnames[3], NULL);
448 if (bsizep == NULL)
449 bsizep = lsizep;
450 if (lsizep != NULL)
451 lsize = be32_to_cpu(*lsizep);
452 if (bsizep != NULL)
453 bsize = be32_to_cpu(*bsizep);
454 if (sizep == NULL || bsizep == NULL || lsizep == NULL)
455 success = false;
456
457 /*
458 * OF is weird .. it represents fully associative caches
459 * as "1 way" which doesn't make much sense and doesn't
460 * leave room for direct mapped. We'll assume that 0
461 * in OF means direct mapped for that reason.
462 */
463 if (sets == 1)
464 sets = 0;
465 else if (sets == 0)
466 sets = 1;
467
468 init_cache_info(info, size, lsize, bsize, sets);
469
470 return success;
471}
472
b1923caa 473void __init initialize_cache_info(void)
40ef8cbc 474{
608b4214
BH
475 struct device_node *cpu = NULL, *l2, *l3 = NULL;
476 u32 pvr;
40ef8cbc
PM
477
478 DBG(" -> initialize_cache_info()\n");
479
608b4214
BH
480 /*
481 * All shipping POWER8 machines have a firmware bug that
482 * puts incorrect information in the device-tree. This will
483 * be (hopefully) fixed for future chips but for now hard
484 * code the values if we are running on one of these
485 */
486 pvr = PVR_VER(mfspr(SPRN_PVR));
487 if (pvr == PVR_POWER8 || pvr == PVR_POWER8E ||
488 pvr == PVR_POWER8NVL) {
489 /* size lsize blk sets */
490 init_cache_info(&ppc64_caches.l1i, 0x8000, 128, 128, 32);
491 init_cache_info(&ppc64_caches.l1d, 0x10000, 128, 128, 64);
492 init_cache_info(&ppc64_caches.l2, 0x80000, 128, 0, 512);
493 init_cache_info(&ppc64_caches.l3, 0x800000, 128, 0, 8192);
494 } else
495 cpu = of_find_node_by_type(NULL, "cpu");
40ef8cbc 496
e2827fe5
BH
497 /*
498 * We're assuming *all* of the CPUs have the same
499 * d-cache and i-cache sizes... -Peter
500 */
65e01f38
BH
501 if (cpu) {
502 if (!parse_cache_info(cpu, false, &ppc64_caches.l1d))
e2827fe5
BH
503 DBG("Argh, can't find dcache properties !\n");
504
65e01f38 505 if (!parse_cache_info(cpu, true, &ppc64_caches.l1i))
e2827fe5 506 DBG("Argh, can't find icache properties !\n");
65e01f38
BH
507
508 /*
509 * Try to find the L2 and L3 if any. Assume they are
510 * unified and use the D-side properties.
511 */
512 l2 = of_find_next_cache_node(cpu);
513 of_node_put(cpu);
514 if (l2) {
515 parse_cache_info(l2, false, &ppc64_caches.l2);
516 l3 = of_find_next_cache_node(l2);
517 of_node_put(l2);
518 }
519 if (l3) {
520 parse_cache_info(l3, false, &ppc64_caches.l3);
521 of_node_put(l3);
522 }
40ef8cbc
PM
523 }
524
9df549af 525 /* For use by binfmt_elf */
e2827fe5
BH
526 dcache_bsize = ppc64_caches.l1d.block_size;
527 icache_bsize = ppc64_caches.l1i.block_size;
9df549af 528
40ef8cbc
PM
529 DBG(" <- initialize_cache_info()\n");
530}
531
40bd587a
BH
532/* This returns the limit below which memory accesses to the linear
533 * mapping are guarnateed not to cause a TLB or SLB miss. This is
534 * used to allocate interrupt or emergency stacks for which our
535 * exception entry path doesn't deal with being interrupted.
536 */
009776ba 537static __init u64 safe_stack_limit(void)
095c7965 538{
40bd587a
BH
539#ifdef CONFIG_PPC_BOOK3E
540 /* Freescale BookE bolts the entire linear mapping */
541 if (mmu_has_feature(MMU_FTR_TYPE_FSL_E))
542 return linear_map_top;
543 /* Other BookE, we assume the first GB is bolted */
544 return 1ul << 30;
545#else
546 /* BookS, the first segment is bolted */
547 if (mmu_has_feature(MMU_FTR_1T_SEGMENT))
095c7965 548 return 1UL << SID_SHIFT_1T;
095c7965 549 return 1UL << SID_SHIFT;
40bd587a 550#endif
095c7965
AB
551}
552
b1923caa 553void __init irqstack_early_init(void)
40ef8cbc 554{
40bd587a 555 u64 limit = safe_stack_limit();
40ef8cbc
PM
556 unsigned int i;
557
558 /*
8f4da26e
AB
559 * Interrupt stacks must be in the first segment since we
560 * cannot afford to take SLB misses on them.
40ef8cbc 561 */
0e551954 562 for_each_possible_cpu(i) {
3c726f8d 563 softirq_ctx[i] = (struct thread_info *)
95f72d1e 564 __va(memblock_alloc_base(THREAD_SIZE,
095c7965 565 THREAD_SIZE, limit));
3c726f8d 566 hardirq_ctx[i] = (struct thread_info *)
95f72d1e 567 __va(memblock_alloc_base(THREAD_SIZE,
095c7965 568 THREAD_SIZE, limit));
40ef8cbc
PM
569 }
570}
40ef8cbc 571
2d27cfd3 572#ifdef CONFIG_PPC_BOOK3E
b1923caa 573void __init exc_lvl_early_init(void)
2d27cfd3
BH
574{
575 unsigned int i;
160c7324 576 unsigned long sp;
2d27cfd3
BH
577
578 for_each_possible_cpu(i) {
160c7324
TC
579 sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE);
580 critirq_ctx[i] = (struct thread_info *)__va(sp);
581 paca[i].crit_kstack = __va(sp + THREAD_SIZE);
582
583 sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE);
584 dbgirq_ctx[i] = (struct thread_info *)__va(sp);
585 paca[i].dbg_kstack = __va(sp + THREAD_SIZE);
586
587 sp = memblock_alloc(THREAD_SIZE, THREAD_SIZE);
588 mcheckirq_ctx[i] = (struct thread_info *)__va(sp);
589 paca[i].mc_kstack = __va(sp + THREAD_SIZE);
2d27cfd3 590 }
d36b4c4f
KG
591
592 if (cpu_has_feature(CPU_FTR_DEBUG_LVL_EXC))
565c2f24 593 patch_exception(0x040, exc_debug_debug_book3e);
2d27cfd3 594}
2d27cfd3
BH
595#endif
596
40ef8cbc
PM
597/*
598 * Stack space used when we detect a bad kernel stack pointer, and
729b0f71
MS
599 * early in SMP boots before relocation is enabled. Exclusive emergency
600 * stack for machine checks.
40ef8cbc 601 */
b1923caa 602void __init emergency_stack_init(void)
40ef8cbc 603{
095c7965 604 u64 limit;
40ef8cbc
PM
605 unsigned int i;
606
607 /*
608 * Emergency stacks must be under 256MB, we cannot afford to take
609 * SLB misses on them. The ABI also requires them to be 128-byte
610 * aligned.
611 *
612 * Since we use these as temporary stacks during secondary CPU
613 * bringup, we need to get at them in real mode. This means they
614 * must also be within the RMO region.
615 */
40bd587a 616 limit = min(safe_stack_limit(), ppc64_rma_size);
40ef8cbc 617
3243d874 618 for_each_possible_cpu(i) {
5d31a96e
ME
619 struct thread_info *ti;
620 ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
621 klp_init_thread_info(ti);
622 paca[i].emergency_sp = (void *)ti + THREAD_SIZE;
729b0f71
MS
623
624#ifdef CONFIG_PPC_BOOK3S_64
625 /* emergency stack for machine check exception handling. */
5d31a96e
ME
626 ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
627 klp_init_thread_info(ti);
628 paca[i].mc_emergency_sp = (void *)ti + THREAD_SIZE;
729b0f71 629#endif
3243d874 630 }
40ef8cbc
PM
631}
632
7a0268fa 633#ifdef CONFIG_SMP
c2a7e818
TH
634#define PCPU_DYN_SIZE ()
635
636static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
7a0268fa 637{
c2a7e818
TH
638 return __alloc_bootmem_node(NODE_DATA(cpu_to_node(cpu)), size, align,
639 __pa(MAX_DMA_ADDRESS));
640}
7a0268fa 641
c2a7e818
TH
642static void __init pcpu_fc_free(void *ptr, size_t size)
643{
644 free_bootmem(__pa(ptr), size);
645}
7a0268fa 646
c2a7e818
TH
647static int pcpu_cpu_distance(unsigned int from, unsigned int to)
648{
649 if (cpu_to_node(from) == cpu_to_node(to))
650 return LOCAL_DISTANCE;
651 else
652 return REMOTE_DISTANCE;
653}
654
ae01f84b
AB
655unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
656EXPORT_SYMBOL(__per_cpu_offset);
657
c2a7e818
TH
658void __init setup_per_cpu_areas(void)
659{
660 const size_t dyn_size = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
661 size_t atom_size;
662 unsigned long delta;
663 unsigned int cpu;
664 int rc;
665
666 /*
667 * Linear mapping is one of 4K, 1M and 16M. For 4K, no need
668 * to group units. For larger mappings, use 1M atom which
669 * should be large enough to contain a number of units.
670 */
671 if (mmu_linear_psize == MMU_PAGE_4K)
672 atom_size = PAGE_SIZE;
673 else
674 atom_size = 1 << 20;
675
676 rc = pcpu_embed_first_chunk(0, dyn_size, atom_size, pcpu_cpu_distance,
677 pcpu_fc_alloc, pcpu_fc_free);
678 if (rc < 0)
679 panic("cannot initialize percpu area (err=%d)", rc);
680
681 delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
ae01f84b
AB
682 for_each_possible_cpu(cpu) {
683 __per_cpu_offset[cpu] = delta + pcpu_unit_offsets[cpu];
684 paca[cpu].data_offset = __per_cpu_offset[cpu];
685 }
7a0268fa
AB
686}
687#endif
4cb3cee0 688
a5d86257
AB
689#ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
690unsigned long memory_block_size_bytes(void)
691{
692 if (ppc_md.memory_block_size)
693 return ppc_md.memory_block_size();
694
695 return MIN_MEMORY_BLOCK_SIZE;
696}
697#endif
4cb3cee0 698
ecd73cc5 699#if defined(CONFIG_PPC_INDIRECT_PIO) || defined(CONFIG_PPC_INDIRECT_MMIO)
4cb3cee0
BH
700struct ppc_pci_io ppc_pci_io;
701EXPORT_SYMBOL(ppc_pci_io);
ecd73cc5 702#endif
c54b2bf1
AB
703
704#ifdef CONFIG_HARDLOCKUP_DETECTOR
705u64 hw_nmi_get_sample_period(int watchdog_thresh)
706{
707 return ppc_proc_freq * watchdog_thresh;
708}
709
710/*
711 * The hardlockup detector breaks PMU event based branches and is likely
712 * to get false positives in KVM guests, so disable it by default.
713 */
714static int __init disable_hardlockup_detector(void)
715{
d19d5efd 716 hardlockup_detector_disable();
c54b2bf1
AB
717
718 return 0;
719}
720early_initcall(disable_hardlockup_detector);
721#endif