ARM: 5776/1: Check compiler version and EABI support when adding ARM unwind support.
[linux-2.6-block.git] / arch / arm / mm / mmu.c
CommitLineData
d111e8f9
RK
1/*
2 * linux/arch/arm/mm/mmu.c
3 *
4 * Copyright (C) 1995-2005 Russell King
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
ae8f1541 10#include <linux/module.h>
d111e8f9
RK
11#include <linux/kernel.h>
12#include <linux/errno.h>
13#include <linux/init.h>
14#include <linux/bootmem.h>
15#include <linux/mman.h>
16#include <linux/nodemask.h>
17
0ba8b9b2 18#include <asm/cputype.h>
d111e8f9 19#include <asm/mach-types.h>
37efe642 20#include <asm/sections.h>
3f973e22 21#include <asm/cachetype.h>
d111e8f9
RK
22#include <asm/setup.h>
23#include <asm/sizes.h>
e616c591 24#include <asm/smp_plat.h>
d111e8f9 25#include <asm/tlb.h>
d73cd428 26#include <asm/highmem.h>
d111e8f9
RK
27
28#include <asm/mach/arch.h>
29#include <asm/mach/map.h>
30
31#include "mm.h"
32
33DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
34
d111e8f9
RK
35/*
36 * empty_zero_page is a special page that is used for
37 * zero-initialized data and COW.
38 */
39struct page *empty_zero_page;
3653f3ab 40EXPORT_SYMBOL(empty_zero_page);
d111e8f9
RK
41
42/*
43 * The pmd table for the upper-most set of pages.
44 */
45pmd_t *top_pmd;
46
ae8f1541
RK
47#define CPOLICY_UNCACHED 0
48#define CPOLICY_BUFFERED 1
49#define CPOLICY_WRITETHROUGH 2
50#define CPOLICY_WRITEBACK 3
51#define CPOLICY_WRITEALLOC 4
52
53static unsigned int cachepolicy __initdata = CPOLICY_WRITEBACK;
54static unsigned int ecc_mask __initdata = 0;
44b18693 55pgprot_t pgprot_user;
ae8f1541
RK
56pgprot_t pgprot_kernel;
57
44b18693 58EXPORT_SYMBOL(pgprot_user);
ae8f1541
RK
59EXPORT_SYMBOL(pgprot_kernel);
60
61struct cachepolicy {
62 const char policy[16];
63 unsigned int cr_mask;
64 unsigned int pmd;
65 unsigned int pte;
66};
67
68static struct cachepolicy cache_policies[] __initdata = {
69 {
70 .policy = "uncached",
71 .cr_mask = CR_W|CR_C,
72 .pmd = PMD_SECT_UNCACHED,
bb30f36f 73 .pte = L_PTE_MT_UNCACHED,
ae8f1541
RK
74 }, {
75 .policy = "buffered",
76 .cr_mask = CR_C,
77 .pmd = PMD_SECT_BUFFERED,
bb30f36f 78 .pte = L_PTE_MT_BUFFERABLE,
ae8f1541
RK
79 }, {
80 .policy = "writethrough",
81 .cr_mask = 0,
82 .pmd = PMD_SECT_WT,
bb30f36f 83 .pte = L_PTE_MT_WRITETHROUGH,
ae8f1541
RK
84 }, {
85 .policy = "writeback",
86 .cr_mask = 0,
87 .pmd = PMD_SECT_WB,
bb30f36f 88 .pte = L_PTE_MT_WRITEBACK,
ae8f1541
RK
89 }, {
90 .policy = "writealloc",
91 .cr_mask = 0,
92 .pmd = PMD_SECT_WBWA,
bb30f36f 93 .pte = L_PTE_MT_WRITEALLOC,
ae8f1541
RK
94 }
95};
96
97/*
6cbdc8c5 98 * These are useful for identifying cache coherency
ae8f1541
RK
99 * problems by allowing the cache or the cache and
100 * writebuffer to be turned off. (Note: the write
101 * buffer should not be on and the cache off).
102 */
103static void __init early_cachepolicy(char **p)
104{
105 int i;
106
107 for (i = 0; i < ARRAY_SIZE(cache_policies); i++) {
108 int len = strlen(cache_policies[i].policy);
109
110 if (memcmp(*p, cache_policies[i].policy, len) == 0) {
111 cachepolicy = i;
112 cr_alignment &= ~cache_policies[i].cr_mask;
113 cr_no_alignment &= ~cache_policies[i].cr_mask;
114 *p += len;
115 break;
116 }
117 }
118 if (i == ARRAY_SIZE(cache_policies))
119 printk(KERN_ERR "ERROR: unknown or unsupported cache policy\n");
11179d8c
CM
120 if (cpu_architecture() >= CPU_ARCH_ARMv6) {
121 printk(KERN_WARNING "Only cachepolicy=writeback supported on ARMv6 and later\n");
122 cachepolicy = CPOLICY_WRITEBACK;
123 }
ae8f1541
RK
124 flush_cache_all();
125 set_cr(cr_alignment);
126}
127__early_param("cachepolicy=", early_cachepolicy);
128
129static void __init early_nocache(char **__unused)
130{
131 char *p = "buffered";
132 printk(KERN_WARNING "nocache is deprecated; use cachepolicy=%s\n", p);
133 early_cachepolicy(&p);
134}
135__early_param("nocache", early_nocache);
136
137static void __init early_nowrite(char **__unused)
138{
139 char *p = "uncached";
140 printk(KERN_WARNING "nowb is deprecated; use cachepolicy=%s\n", p);
141 early_cachepolicy(&p);
142}
143__early_param("nowb", early_nowrite);
144
145static void __init early_ecc(char **p)
146{
147 if (memcmp(*p, "on", 2) == 0) {
148 ecc_mask = PMD_PROTECTION;
149 *p += 2;
150 } else if (memcmp(*p, "off", 3) == 0) {
151 ecc_mask = 0;
152 *p += 3;
153 }
154}
155__early_param("ecc=", early_ecc);
156
157static int __init noalign_setup(char *__unused)
158{
159 cr_alignment &= ~CR_A;
160 cr_no_alignment &= ~CR_A;
161 set_cr(cr_alignment);
162 return 1;
163}
164__setup("noalign", noalign_setup);
165
255d1f86
RK
166#ifndef CONFIG_SMP
167void adjust_cr(unsigned long mask, unsigned long set)
168{
169 unsigned long flags;
170
171 mask &= ~CR_A;
172
173 set &= mask;
174
175 local_irq_save(flags);
176
177 cr_no_alignment = (cr_no_alignment & ~mask) | set;
178 cr_alignment = (cr_alignment & ~mask) | set;
179
180 set_cr((get_cr() & ~mask) | set);
181
182 local_irq_restore(flags);
183}
184#endif
185
0af92bef 186#define PROT_PTE_DEVICE L_PTE_PRESENT|L_PTE_YOUNG|L_PTE_DIRTY|L_PTE_WRITE
b1cce6b1 187#define PROT_SECT_DEVICE PMD_TYPE_SECT|PMD_SECT_AP_WRITE
0af92bef 188
b29e9f5e 189static struct mem_type mem_types[] = {
0af92bef 190 [MT_DEVICE] = { /* Strongly ordered / ARMv6 shared device */
bb30f36f
RK
191 .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_SHARED |
192 L_PTE_SHARED,
0af92bef 193 .prot_l1 = PMD_TYPE_TABLE,
b1cce6b1 194 .prot_sect = PROT_SECT_DEVICE | PMD_SECT_S,
0af92bef
RK
195 .domain = DOMAIN_IO,
196 },
197 [MT_DEVICE_NONSHARED] = { /* ARMv6 non-shared device */
bb30f36f 198 .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_NONSHARED,
0af92bef 199 .prot_l1 = PMD_TYPE_TABLE,
b1cce6b1 200 .prot_sect = PROT_SECT_DEVICE,
0af92bef
RK
201 .domain = DOMAIN_IO,
202 },
203 [MT_DEVICE_CACHED] = { /* ioremap_cached */
bb30f36f 204 .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_CACHED,
0af92bef
RK
205 .prot_l1 = PMD_TYPE_TABLE,
206 .prot_sect = PROT_SECT_DEVICE | PMD_SECT_WB,
207 .domain = DOMAIN_IO,
208 },
1ad77a87 209 [MT_DEVICE_WC] = { /* ioremap_wc */
bb30f36f 210 .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_WC,
0af92bef 211 .prot_l1 = PMD_TYPE_TABLE,
b1cce6b1 212 .prot_sect = PROT_SECT_DEVICE,
0af92bef 213 .domain = DOMAIN_IO,
ae8f1541 214 },
ebb4c658
RK
215 [MT_UNCACHED] = {
216 .prot_pte = PROT_PTE_DEVICE,
217 .prot_l1 = PMD_TYPE_TABLE,
218 .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN,
219 .domain = DOMAIN_IO,
220 },
ae8f1541 221 [MT_CACHECLEAN] = {
9ef79635 222 .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN,
ae8f1541
RK
223 .domain = DOMAIN_KERNEL,
224 },
225 [MT_MINICLEAN] = {
9ef79635 226 .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN | PMD_SECT_MINICACHE,
ae8f1541
RK
227 .domain = DOMAIN_KERNEL,
228 },
229 [MT_LOW_VECTORS] = {
230 .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
231 L_PTE_EXEC,
232 .prot_l1 = PMD_TYPE_TABLE,
233 .domain = DOMAIN_USER,
234 },
235 [MT_HIGH_VECTORS] = {
236 .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
237 L_PTE_USER | L_PTE_EXEC,
238 .prot_l1 = PMD_TYPE_TABLE,
239 .domain = DOMAIN_USER,
240 },
241 [MT_MEMORY] = {
9ef79635 242 .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
ae8f1541
RK
243 .domain = DOMAIN_KERNEL,
244 },
245 [MT_ROM] = {
9ef79635 246 .prot_sect = PMD_TYPE_SECT,
ae8f1541
RK
247 .domain = DOMAIN_KERNEL,
248 },
e4707dd3
PW
249 [MT_MEMORY_NONCACHED] = {
250 .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
251 .domain = DOMAIN_KERNEL,
252 },
ae8f1541
RK
253};
254
b29e9f5e
RK
255const struct mem_type *get_mem_type(unsigned int type)
256{
257 return type < ARRAY_SIZE(mem_types) ? &mem_types[type] : NULL;
258}
69d3a84a 259EXPORT_SYMBOL(get_mem_type);
b29e9f5e 260
ae8f1541
RK
261/*
262 * Adjust the PMD section entries according to the CPU in use.
263 */
264static void __init build_mem_type_table(void)
265{
266 struct cachepolicy *cp;
267 unsigned int cr = get_cr();
bb30f36f 268 unsigned int user_pgprot, kern_pgprot, vecs_pgprot;
ae8f1541
RK
269 int cpu_arch = cpu_architecture();
270 int i;
271
11179d8c 272 if (cpu_arch < CPU_ARCH_ARMv6) {
ae8f1541 273#if defined(CONFIG_CPU_DCACHE_DISABLE)
11179d8c
CM
274 if (cachepolicy > CPOLICY_BUFFERED)
275 cachepolicy = CPOLICY_BUFFERED;
ae8f1541 276#elif defined(CONFIG_CPU_DCACHE_WRITETHROUGH)
11179d8c
CM
277 if (cachepolicy > CPOLICY_WRITETHROUGH)
278 cachepolicy = CPOLICY_WRITETHROUGH;
ae8f1541 279#endif
11179d8c 280 }
ae8f1541
RK
281 if (cpu_arch < CPU_ARCH_ARMv5) {
282 if (cachepolicy >= CPOLICY_WRITEALLOC)
283 cachepolicy = CPOLICY_WRITEBACK;
284 ecc_mask = 0;
285 }
bb30f36f
RK
286#ifdef CONFIG_SMP
287 cachepolicy = CPOLICY_WRITEALLOC;
288#endif
ae8f1541 289
1ad77a87 290 /*
b1cce6b1
RK
291 * Strip out features not present on earlier architectures.
292 * Pre-ARMv5 CPUs don't have TEX bits. Pre-ARMv6 CPUs or those
293 * without extended page tables don't have the 'Shared' bit.
1ad77a87 294 */
b1cce6b1
RK
295 if (cpu_arch < CPU_ARCH_ARMv5)
296 for (i = 0; i < ARRAY_SIZE(mem_types); i++)
297 mem_types[i].prot_sect &= ~PMD_SECT_TEX(7);
298 if ((cpu_arch < CPU_ARCH_ARMv6 || !(cr & CR_XP)) && !cpu_is_xsc3())
299 for (i = 0; i < ARRAY_SIZE(mem_types); i++)
300 mem_types[i].prot_sect &= ~PMD_SECT_S;
ae8f1541
RK
301
302 /*
b1cce6b1
RK
303 * ARMv5 and lower, bit 4 must be set for page tables (was: cache
304 * "update-able on write" bit on ARM610). However, Xscale and
305 * Xscale3 require this bit to be cleared.
ae8f1541 306 */
b1cce6b1 307 if (cpu_is_xscale() || cpu_is_xsc3()) {
9ef79635 308 for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
ae8f1541 309 mem_types[i].prot_sect &= ~PMD_BIT4;
9ef79635
RK
310 mem_types[i].prot_l1 &= ~PMD_BIT4;
311 }
312 } else if (cpu_arch < CPU_ARCH_ARMv6) {
313 for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
ae8f1541
RK
314 if (mem_types[i].prot_l1)
315 mem_types[i].prot_l1 |= PMD_BIT4;
9ef79635
RK
316 if (mem_types[i].prot_sect)
317 mem_types[i].prot_sect |= PMD_BIT4;
318 }
319 }
ae8f1541 320
b1cce6b1
RK
321 /*
322 * Mark the device areas according to the CPU/architecture.
323 */
324 if (cpu_is_xsc3() || (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP))) {
325 if (!cpu_is_xsc3()) {
326 /*
327 * Mark device regions on ARMv6+ as execute-never
328 * to prevent speculative instruction fetches.
329 */
330 mem_types[MT_DEVICE].prot_sect |= PMD_SECT_XN;
331 mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_XN;
332 mem_types[MT_DEVICE_CACHED].prot_sect |= PMD_SECT_XN;
333 mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_XN;
334 }
335 if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) {
336 /*
337 * For ARMv7 with TEX remapping,
338 * - shared device is SXCB=1100
339 * - nonshared device is SXCB=0100
340 * - write combine device mem is SXCB=0001
341 * (Uncached Normal memory)
342 */
343 mem_types[MT_DEVICE].prot_sect |= PMD_SECT_TEX(1);
344 mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(1);
345 mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_BUFFERABLE;
346 } else if (cpu_is_xsc3()) {
347 /*
348 * For Xscale3,
349 * - shared device is TEXCB=00101
350 * - nonshared device is TEXCB=01000
351 * - write combine device mem is TEXCB=00100
352 * (Inner/Outer Uncacheable in xsc3 parlance)
353 */
354 mem_types[MT_DEVICE].prot_sect |= PMD_SECT_TEX(1) | PMD_SECT_BUFFERED;
355 mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(2);
356 mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1);
357 } else {
358 /*
359 * For ARMv6 and ARMv7 without TEX remapping,
360 * - shared device is TEXCB=00001
361 * - nonshared device is TEXCB=01000
362 * - write combine device mem is TEXCB=00100
363 * (Uncached Normal in ARMv6 parlance).
364 */
365 mem_types[MT_DEVICE].prot_sect |= PMD_SECT_BUFFERED;
366 mem_types[MT_DEVICE_NONSHARED].prot_sect |= PMD_SECT_TEX(2);
367 mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1);
368 }
369 } else {
370 /*
371 * On others, write combining is "Uncached/Buffered"
372 */
373 mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_BUFFERABLE;
374 }
375
376 /*
377 * Now deal with the memory-type mappings
378 */
ae8f1541 379 cp = &cache_policies[cachepolicy];
bb30f36f
RK
380 vecs_pgprot = kern_pgprot = user_pgprot = cp->pte;
381
382#ifndef CONFIG_SMP
383 /*
384 * Only use write-through for non-SMP systems
385 */
386 if (cpu_arch >= CPU_ARCH_ARMv5 && cachepolicy > CPOLICY_WRITETHROUGH)
387 vecs_pgprot = cache_policies[CPOLICY_WRITETHROUGH].pte;
388#endif
ae8f1541
RK
389
390 /*
391 * Enable CPU-specific coherency if supported.
392 * (Only available on XSC3 at the moment.)
393 */
b1cce6b1
RK
394 if (arch_is_coherent() && cpu_is_xsc3())
395 mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S;
ae8f1541
RK
396
397 /*
398 * ARMv6 and above have extended page tables.
399 */
400 if (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP)) {
ae8f1541
RK
401 /*
402 * Mark cache clean areas and XIP ROM read only
403 * from SVC mode and no access from userspace.
404 */
405 mem_types[MT_ROM].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
406 mem_types[MT_MINICLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
407 mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
408
ae8f1541
RK
409#ifdef CONFIG_SMP
410 /*
411 * Mark memory with the "shared" attribute for SMP systems
412 */
413 user_pgprot |= L_PTE_SHARED;
414 kern_pgprot |= L_PTE_SHARED;
bb30f36f 415 vecs_pgprot |= L_PTE_SHARED;
ae8f1541 416 mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S;
e4707dd3 417 mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_S;
ae8f1541
RK
418#endif
419 }
420
e4707dd3
PW
421 /*
422 * Non-cacheable Normal - intended for memory areas that must
423 * not cause dirty cache line writebacks when used
424 */
425 if (cpu_arch >= CPU_ARCH_ARMv6) {
426 if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) {
427 /* Non-cacheable Normal is XCB = 001 */
428 mem_types[MT_MEMORY_NONCACHED].prot_sect |=
429 PMD_SECT_BUFFERED;
430 } else {
431 /* For both ARMv6 and non-TEX-remapping ARMv7 */
432 mem_types[MT_MEMORY_NONCACHED].prot_sect |=
433 PMD_SECT_TEX(1);
434 }
435 } else {
436 mem_types[MT_MEMORY_NONCACHED].prot_sect |= PMD_SECT_BUFFERABLE;
437 }
438
ae8f1541
RK
439 for (i = 0; i < 16; i++) {
440 unsigned long v = pgprot_val(protection_map[i]);
bb30f36f 441 protection_map[i] = __pgprot(v | user_pgprot);
ae8f1541
RK
442 }
443
bb30f36f
RK
444 mem_types[MT_LOW_VECTORS].prot_pte |= vecs_pgprot;
445 mem_types[MT_HIGH_VECTORS].prot_pte |= vecs_pgprot;
ae8f1541 446
44b18693 447 pgprot_user = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | user_pgprot);
ae8f1541
RK
448 pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG |
449 L_PTE_DIRTY | L_PTE_WRITE |
450 L_PTE_EXEC | kern_pgprot);
451
452 mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask;
453 mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask;
454 mem_types[MT_MEMORY].prot_sect |= ecc_mask | cp->pmd;
455 mem_types[MT_ROM].prot_sect |= cp->pmd;
456
457 switch (cp->pmd) {
458 case PMD_SECT_WT:
459 mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WT;
460 break;
461 case PMD_SECT_WB:
462 case PMD_SECT_WBWA:
463 mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WB;
464 break;
465 }
466 printk("Memory policy: ECC %sabled, Data cache %s\n",
467 ecc_mask ? "en" : "dis", cp->policy);
2497f0a8
RK
468
469 for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
470 struct mem_type *t = &mem_types[i];
471 if (t->prot_l1)
472 t->prot_l1 |= PMD_DOMAIN(t->domain);
473 if (t->prot_sect)
474 t->prot_sect |= PMD_DOMAIN(t->domain);
475 }
ae8f1541
RK
476}
477
478#define vectors_base() (vectors_high() ? 0xffff0000 : 0)
479
24e6c699
RK
480static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
481 unsigned long end, unsigned long pfn,
482 const struct mem_type *type)
ae8f1541 483{
24e6c699 484 pte_t *pte;
ae8f1541 485
24e6c699
RK
486 if (pmd_none(*pmd)) {
487 pte = alloc_bootmem_low_pages(2 * PTRS_PER_PTE * sizeof(pte_t));
488 __pmd_populate(pmd, __pa(pte) | type->prot_l1);
489 }
ae8f1541 490
24e6c699
RK
491 pte = pte_offset_kernel(pmd, addr);
492 do {
40d192b6 493 set_pte_ext(pte, pfn_pte(pfn, __pgprot(type->prot_pte)), 0);
24e6c699
RK
494 pfn++;
495 } while (pte++, addr += PAGE_SIZE, addr != end);
ae8f1541
RK
496}
497
24e6c699
RK
498static void __init alloc_init_section(pgd_t *pgd, unsigned long addr,
499 unsigned long end, unsigned long phys,
500 const struct mem_type *type)
ae8f1541 501{
24e6c699 502 pmd_t *pmd = pmd_offset(pgd, addr);
ae8f1541 503
24e6c699
RK
504 /*
505 * Try a section mapping - end, addr and phys must all be aligned
506 * to a section boundary. Note that PMDs refer to the individual
507 * L1 entries, whereas PGDs refer to a group of L1 entries making
508 * up one logical pointer to an L2 table.
509 */
510 if (((addr | end | phys) & ~SECTION_MASK) == 0) {
511 pmd_t *p = pmd;
ae8f1541 512
24e6c699
RK
513 if (addr & SECTION_SIZE)
514 pmd++;
515
516 do {
517 *pmd = __pmd(phys | type->prot_sect);
518 phys += SECTION_SIZE;
519 } while (pmd++, addr += SECTION_SIZE, addr != end);
ae8f1541 520
24e6c699
RK
521 flush_pmd_entry(p);
522 } else {
523 /*
524 * No need to loop; pte's aren't interested in the
525 * individual L1 entries.
526 */
527 alloc_init_pte(pmd, addr, end, __phys_to_pfn(phys), type);
528 }
ae8f1541
RK
529}
530
4a56c1e4
RK
531static void __init create_36bit_mapping(struct map_desc *md,
532 const struct mem_type *type)
533{
534 unsigned long phys, addr, length, end;
535 pgd_t *pgd;
536
537 addr = md->virtual;
538 phys = (unsigned long)__pfn_to_phys(md->pfn);
539 length = PAGE_ALIGN(md->length);
540
541 if (!(cpu_architecture() >= CPU_ARCH_ARMv6 || cpu_is_xsc3())) {
542 printk(KERN_ERR "MM: CPU does not support supersection "
543 "mapping for 0x%08llx at 0x%08lx\n",
544 __pfn_to_phys((u64)md->pfn), addr);
545 return;
546 }
547
548 /* N.B. ARMv6 supersections are only defined to work with domain 0.
549 * Since domain assignments can in fact be arbitrary, the
550 * 'domain == 0' check below is required to insure that ARMv6
551 * supersections are only allocated for domain 0 regardless
552 * of the actual domain assignments in use.
553 */
554 if (type->domain) {
555 printk(KERN_ERR "MM: invalid domain in supersection "
556 "mapping for 0x%08llx at 0x%08lx\n",
557 __pfn_to_phys((u64)md->pfn), addr);
558 return;
559 }
560
561 if ((addr | length | __pfn_to_phys(md->pfn)) & ~SUPERSECTION_MASK) {
562 printk(KERN_ERR "MM: cannot create mapping for "
563 "0x%08llx at 0x%08lx invalid alignment\n",
564 __pfn_to_phys((u64)md->pfn), addr);
565 return;
566 }
567
568 /*
569 * Shift bits [35:32] of address into bits [23:20] of PMD
570 * (See ARMv6 spec).
571 */
572 phys |= (((md->pfn >> (32 - PAGE_SHIFT)) & 0xF) << 20);
573
574 pgd = pgd_offset_k(addr);
575 end = addr + length;
576 do {
577 pmd_t *pmd = pmd_offset(pgd, addr);
578 int i;
579
580 for (i = 0; i < 16; i++)
581 *pmd++ = __pmd(phys | type->prot_sect | PMD_SECT_SUPER);
582
583 addr += SUPERSECTION_SIZE;
584 phys += SUPERSECTION_SIZE;
585 pgd += SUPERSECTION_SIZE >> PGDIR_SHIFT;
586 } while (addr != end);
587}
588
ae8f1541
RK
589/*
590 * Create the page directory entries and any necessary
591 * page tables for the mapping specified by `md'. We
592 * are able to cope here with varying sizes and address
593 * offsets, and we take full advantage of sections and
594 * supersections.
595 */
596void __init create_mapping(struct map_desc *md)
597{
24e6c699 598 unsigned long phys, addr, length, end;
d5c98176 599 const struct mem_type *type;
24e6c699 600 pgd_t *pgd;
ae8f1541
RK
601
602 if (md->virtual != vectors_base() && md->virtual < TASK_SIZE) {
603 printk(KERN_WARNING "BUG: not creating mapping for "
604 "0x%08llx at 0x%08lx in user region\n",
605 __pfn_to_phys((u64)md->pfn), md->virtual);
606 return;
607 }
608
609 if ((md->type == MT_DEVICE || md->type == MT_ROM) &&
610 md->virtual >= PAGE_OFFSET && md->virtual < VMALLOC_END) {
611 printk(KERN_WARNING "BUG: mapping for 0x%08llx at 0x%08lx "
612 "overlaps vmalloc space\n",
613 __pfn_to_phys((u64)md->pfn), md->virtual);
614 }
615
d5c98176 616 type = &mem_types[md->type];
ae8f1541
RK
617
618 /*
619 * Catch 36-bit addresses
620 */
4a56c1e4
RK
621 if (md->pfn >= 0x100000) {
622 create_36bit_mapping(md, type);
623 return;
ae8f1541
RK
624 }
625
7b9c7b4d 626 addr = md->virtual & PAGE_MASK;
24e6c699 627 phys = (unsigned long)__pfn_to_phys(md->pfn);
7b9c7b4d 628 length = PAGE_ALIGN(md->length + (md->virtual & ~PAGE_MASK));
ae8f1541 629
24e6c699 630 if (type->prot_l1 == 0 && ((addr | phys | length) & ~SECTION_MASK)) {
ae8f1541
RK
631 printk(KERN_WARNING "BUG: map for 0x%08lx at 0x%08lx can not "
632 "be mapped using pages, ignoring.\n",
24e6c699 633 __pfn_to_phys(md->pfn), addr);
ae8f1541
RK
634 return;
635 }
636
24e6c699
RK
637 pgd = pgd_offset_k(addr);
638 end = addr + length;
639 do {
640 unsigned long next = pgd_addr_end(addr, end);
ae8f1541 641
24e6c699 642 alloc_init_section(pgd, addr, next, phys, type);
ae8f1541 643
24e6c699
RK
644 phys += next - addr;
645 addr = next;
646 } while (pgd++, addr != end);
ae8f1541
RK
647}
648
649/*
650 * Create the architecture specific mappings
651 */
652void __init iotable_init(struct map_desc *io_desc, int nr)
653{
654 int i;
655
656 for (i = 0; i < nr; i++)
657 create_mapping(io_desc + i);
658}
659
6c5da7ac
RK
660static unsigned long __initdata vmalloc_reserve = SZ_128M;
661
662/*
663 * vmalloc=size forces the vmalloc area to be exactly 'size'
664 * bytes. This can be used to increase (or decrease) the vmalloc
665 * area - the default is 128m.
666 */
667static void __init early_vmalloc(char **arg)
668{
669 vmalloc_reserve = memparse(*arg, arg);
670
671 if (vmalloc_reserve < SZ_16M) {
672 vmalloc_reserve = SZ_16M;
673 printk(KERN_WARNING
674 "vmalloc area too small, limiting to %luMB\n",
675 vmalloc_reserve >> 20);
676 }
9210807c
NP
677
678 if (vmalloc_reserve > VMALLOC_END - (PAGE_OFFSET + SZ_32M)) {
679 vmalloc_reserve = VMALLOC_END - (PAGE_OFFSET + SZ_32M);
680 printk(KERN_WARNING
681 "vmalloc area is too big, limiting to %luMB\n",
682 vmalloc_reserve >> 20);
683 }
6c5da7ac
RK
684}
685__early_param("vmalloc=", early_vmalloc);
686
687#define VMALLOC_MIN (void *)(VMALLOC_END - vmalloc_reserve)
688
4b5f32ce 689static void __init sanity_check_meminfo(void)
60296c71 690{
dde5828f 691 int i, j, highmem = 0;
60296c71 692
4b5f32ce 693 for (i = 0, j = 0; i < meminfo.nr_banks; i++) {
a1bbaec0
NP
694 struct membank *bank = &meminfo.bank[j];
695 *bank = meminfo.bank[i];
60296c71 696
a1bbaec0 697#ifdef CONFIG_HIGHMEM
dde5828f
RK
698 if (__va(bank->start) > VMALLOC_MIN ||
699 __va(bank->start) < (void *)PAGE_OFFSET)
700 highmem = 1;
701
702 bank->highmem = highmem;
703
a1bbaec0
NP
704 /*
705 * Split those memory banks which are partially overlapping
706 * the vmalloc area greatly simplifying things later.
707 */
708 if (__va(bank->start) < VMALLOC_MIN &&
709 bank->size > VMALLOC_MIN - __va(bank->start)) {
710 if (meminfo.nr_banks >= NR_BANKS) {
711 printk(KERN_CRIT "NR_BANKS too low, "
712 "ignoring high memory\n");
713 } else {
714 memmove(bank + 1, bank,
715 (meminfo.nr_banks - i) * sizeof(*bank));
716 meminfo.nr_banks++;
717 i++;
718 bank[1].size -= VMALLOC_MIN - __va(bank->start);
719 bank[1].start = __pa(VMALLOC_MIN - 1) + 1;
dde5828f 720 bank[1].highmem = highmem = 1;
a1bbaec0
NP
721 j++;
722 }
723 bank->size = VMALLOC_MIN - __va(bank->start);
724 }
725#else
041d785f
RK
726 bank->highmem = highmem;
727
a1bbaec0
NP
728 /*
729 * Check whether this memory bank would entirely overlap
730 * the vmalloc area.
731 */
3fd9825c 732 if (__va(bank->start) >= VMALLOC_MIN ||
f0bba9f9 733 __va(bank->start) < (void *)PAGE_OFFSET) {
a1bbaec0
NP
734 printk(KERN_NOTICE "Ignoring RAM at %.8lx-%.8lx "
735 "(vmalloc region overlap).\n",
736 bank->start, bank->start + bank->size - 1);
737 continue;
738 }
60296c71 739
a1bbaec0
NP
740 /*
741 * Check whether this memory bank would partially overlap
742 * the vmalloc area.
743 */
744 if (__va(bank->start + bank->size) > VMALLOC_MIN ||
745 __va(bank->start + bank->size) < __va(bank->start)) {
746 unsigned long newsize = VMALLOC_MIN - __va(bank->start);
747 printk(KERN_NOTICE "Truncating RAM at %.8lx-%.8lx "
748 "to -%.8lx (vmalloc region overlap).\n",
749 bank->start, bank->start + bank->size - 1,
750 bank->start + newsize - 1);
751 bank->size = newsize;
752 }
753#endif
754 j++;
60296c71 755 }
e616c591
RK
756#ifdef CONFIG_HIGHMEM
757 if (highmem) {
758 const char *reason = NULL;
759
760 if (cache_is_vipt_aliasing()) {
761 /*
762 * Interactions between kmap and other mappings
763 * make highmem support with aliasing VIPT caches
764 * rather difficult.
765 */
766 reason = "with VIPT aliasing cache";
767#ifdef CONFIG_SMP
768 } else if (tlb_ops_need_broadcast()) {
769 /*
770 * kmap_high needs to occasionally flush TLB entries,
771 * however, if the TLB entries need to be broadcast
772 * we may deadlock:
773 * kmap_high(irqs off)->flush_all_zero_pkmaps->
774 * flush_tlb_kernel_range->smp_call_function_many
775 * (must not be called with irqs off)
776 */
777 reason = "without hardware TLB ops broadcasting";
778#endif
779 }
780 if (reason) {
781 printk(KERN_CRIT "HIGHMEM is not supported %s, ignoring high memory\n",
782 reason);
783 while (j > 0 && meminfo.bank[j - 1].highmem)
784 j--;
785 }
786 }
787#endif
4b5f32ce 788 meminfo.nr_banks = j;
60296c71
LB
789}
790
4b5f32ce 791static inline void prepare_page_table(void)
d111e8f9
RK
792{
793 unsigned long addr;
794
795 /*
796 * Clear out all the mappings below the kernel image.
797 */
ab4f2ee1 798 for (addr = 0; addr < MODULES_VADDR; addr += PGDIR_SIZE)
d111e8f9
RK
799 pmd_clear(pmd_off_k(addr));
800
801#ifdef CONFIG_XIP_KERNEL
802 /* The XIP kernel is mapped in the module area -- skip over it */
37efe642 803 addr = ((unsigned long)_etext + PGDIR_SIZE - 1) & PGDIR_MASK;
d111e8f9
RK
804#endif
805 for ( ; addr < PAGE_OFFSET; addr += PGDIR_SIZE)
806 pmd_clear(pmd_off_k(addr));
807
808 /*
809 * Clear out all the kernel space mappings, except for the first
810 * memory bank, up to the end of the vmalloc region.
811 */
4b5f32ce 812 for (addr = __phys_to_virt(bank_phys_end(&meminfo.bank[0]));
d111e8f9
RK
813 addr < VMALLOC_END; addr += PGDIR_SIZE)
814 pmd_clear(pmd_off_k(addr));
815}
816
817/*
818 * Reserve the various regions of node 0
819 */
820void __init reserve_node_zero(pg_data_t *pgdat)
821{
822 unsigned long res_size = 0;
823
824 /*
825 * Register the kernel text and data with bootmem.
826 * Note that this can only be in node 0.
827 */
828#ifdef CONFIG_XIP_KERNEL
37efe642 829 reserve_bootmem_node(pgdat, __pa(_data), _end - _data,
72a7fe39 830 BOOTMEM_DEFAULT);
d111e8f9 831#else
37efe642 832 reserve_bootmem_node(pgdat, __pa(_stext), _end - _stext,
72a7fe39 833 BOOTMEM_DEFAULT);
d111e8f9
RK
834#endif
835
836 /*
837 * Reserve the page tables. These are already in use,
838 * and can only be in node 0.
839 */
840 reserve_bootmem_node(pgdat, __pa(swapper_pg_dir),
72a7fe39 841 PTRS_PER_PGD * sizeof(pgd_t), BOOTMEM_DEFAULT);
d111e8f9
RK
842
843 /*
844 * Hmm... This should go elsewhere, but we really really need to
845 * stop things allocating the low memory; ideally we need a better
846 * implementation of GFP_DMA which does not assume that DMA-able
847 * memory starts at zero.
848 */
849 if (machine_is_integrator() || machine_is_cintegrator())
850 res_size = __pa(swapper_pg_dir) - PHYS_OFFSET;
851
852 /*
853 * These should likewise go elsewhere. They pre-reserve the
854 * screen memory region at the start of main system memory.
855 */
856 if (machine_is_edb7211())
857 res_size = 0x00020000;
858 if (machine_is_p720t())
859 res_size = 0x00014000;
860
bbf6f280
BD
861 /* H1940 and RX3715 need to reserve this for suspend */
862
863 if (machine_is_h1940() || machine_is_rx3715()) {
72a7fe39
BW
864 reserve_bootmem_node(pgdat, 0x30003000, 0x1000,
865 BOOTMEM_DEFAULT);
866 reserve_bootmem_node(pgdat, 0x30081000, 0x1000,
867 BOOTMEM_DEFAULT);
9073341c
BD
868 }
869
81854f82
MV
870 if (machine_is_palmld() || machine_is_palmtx()) {
871 reserve_bootmem_node(pgdat, 0xa0000000, 0x1000,
872 BOOTMEM_EXCLUSIVE);
873 reserve_bootmem_node(pgdat, 0xa0200000, 0x1000,
874 BOOTMEM_EXCLUSIVE);
875 }
876
e6c3f4b8
TSC
877 if (machine_is_treo680()) {
878 reserve_bootmem_node(pgdat, 0xa0000000, 0x1000,
879 BOOTMEM_EXCLUSIVE);
880 reserve_bootmem_node(pgdat, 0xa2000000, 0x1000,
881 BOOTMEM_EXCLUSIVE);
882 }
883
81854f82
MV
884 if (machine_is_palmt5())
885 reserve_bootmem_node(pgdat, 0xa0200000, 0x1000,
886 BOOTMEM_EXCLUSIVE);
887
d98aac75
LW
888 /*
889 * U300 - This platform family can share physical memory
890 * between two ARM cpus, one running Linux and the other
891 * running another OS.
892 */
893 if (machine_is_u300()) {
894#ifdef CONFIG_MACH_U300_SINGLE_RAM
895#if ((CONFIG_MACH_U300_ACCESS_MEM_SIZE & 1) == 1) && \
896 CONFIG_MACH_U300_2MB_ALIGNMENT_FIX
897 res_size = 0x00100000;
898#endif
899#endif
900 }
901
d111e8f9
RK
902#ifdef CONFIG_SA1111
903 /*
904 * Because of the SA1111 DMA bug, we want to preserve our
905 * precious DMA-able memory...
906 */
907 res_size = __pa(swapper_pg_dir) - PHYS_OFFSET;
908#endif
909 if (res_size)
72a7fe39
BW
910 reserve_bootmem_node(pgdat, PHYS_OFFSET, res_size,
911 BOOTMEM_DEFAULT);
d111e8f9
RK
912}
913
914/*
915 * Set up device the mappings. Since we clear out the page tables for all
916 * mappings above VMALLOC_END, we will remove any debug device mappings.
917 * This means you have to be careful how you debug this function, or any
918 * called function. This means you can't use any function or debugging
919 * method which may touch any device, otherwise the kernel _will_ crash.
920 */
921static void __init devicemaps_init(struct machine_desc *mdesc)
922{
923 struct map_desc map;
924 unsigned long addr;
925 void *vectors;
926
927 /*
928 * Allocate the vector page early.
929 */
930 vectors = alloc_bootmem_low_pages(PAGE_SIZE);
d111e8f9
RK
931
932 for (addr = VMALLOC_END; addr; addr += PGDIR_SIZE)
933 pmd_clear(pmd_off_k(addr));
934
935 /*
936 * Map the kernel if it is XIP.
937 * It is always first in the modulearea.
938 */
939#ifdef CONFIG_XIP_KERNEL
940 map.pfn = __phys_to_pfn(CONFIG_XIP_PHYS_ADDR & SECTION_MASK);
ab4f2ee1 941 map.virtual = MODULES_VADDR;
37efe642 942 map.length = ((unsigned long)_etext - map.virtual + ~SECTION_MASK) & SECTION_MASK;
d111e8f9
RK
943 map.type = MT_ROM;
944 create_mapping(&map);
945#endif
946
947 /*
948 * Map the cache flushing regions.
949 */
950#ifdef FLUSH_BASE
951 map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS);
952 map.virtual = FLUSH_BASE;
953 map.length = SZ_1M;
954 map.type = MT_CACHECLEAN;
955 create_mapping(&map);
956#endif
957#ifdef FLUSH_BASE_MINICACHE
958 map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS + SZ_1M);
959 map.virtual = FLUSH_BASE_MINICACHE;
960 map.length = SZ_1M;
961 map.type = MT_MINICLEAN;
962 create_mapping(&map);
963#endif
964
965 /*
966 * Create a mapping for the machine vectors at the high-vectors
967 * location (0xffff0000). If we aren't using high-vectors, also
968 * create a mapping at the low-vectors virtual address.
969 */
970 map.pfn = __phys_to_pfn(virt_to_phys(vectors));
971 map.virtual = 0xffff0000;
972 map.length = PAGE_SIZE;
973 map.type = MT_HIGH_VECTORS;
974 create_mapping(&map);
975
976 if (!vectors_high()) {
977 map.virtual = 0;
978 map.type = MT_LOW_VECTORS;
979 create_mapping(&map);
980 }
981
982 /*
983 * Ask the machine support to map in the statically mapped devices.
984 */
985 if (mdesc->map_io)
986 mdesc->map_io();
987
988 /*
989 * Finally flush the caches and tlb to ensure that we're in a
990 * consistent state wrt the writebuffer. This also ensures that
991 * any write-allocated cache lines in the vector page are written
992 * back. After this point, we can start to touch devices again.
993 */
994 local_flush_tlb_all();
995 flush_cache_all();
996}
997
d73cd428
NP
998static void __init kmap_init(void)
999{
1000#ifdef CONFIG_HIGHMEM
1001 pmd_t *pmd = pmd_off_k(PKMAP_BASE);
1002 pte_t *pte = alloc_bootmem_low_pages(2 * PTRS_PER_PTE * sizeof(pte_t));
1003 BUG_ON(!pmd_none(*pmd) || !pte);
1004 __pmd_populate(pmd, __pa(pte) | _PAGE_KERNEL_TABLE);
1005 pkmap_page_table = pte + PTRS_PER_PTE;
1006#endif
1007}
1008
d111e8f9
RK
1009/*
1010 * paging_init() sets up the page tables, initialises the zone memory
1011 * maps, and sets up the zero page, bad page and bad page tables.
1012 */
4b5f32ce 1013void __init paging_init(struct machine_desc *mdesc)
d111e8f9
RK
1014{
1015 void *zero_page;
1016
1017 build_mem_type_table();
4b5f32ce
NP
1018 sanity_check_meminfo();
1019 prepare_page_table();
1020 bootmem_init();
d111e8f9 1021 devicemaps_init(mdesc);
d73cd428 1022 kmap_init();
d111e8f9
RK
1023
1024 top_pmd = pmd_off_k(0xffff0000);
1025
1026 /*
6ce1b871
JL
1027 * allocate the zero page. Note that this always succeeds and
1028 * returns a zeroed result.
d111e8f9
RK
1029 */
1030 zero_page = alloc_bootmem_low_pages(PAGE_SIZE);
d111e8f9
RK
1031 empty_zero_page = virt_to_page(zero_page);
1032 flush_dcache_page(empty_zero_page);
1033}
ae8f1541
RK
1034
1035/*
1036 * In order to soft-boot, we need to insert a 1:1 mapping in place of
1037 * the user-mode pages. This will then ensure that we have predictable
1038 * results when turning the mmu off
1039 */
1040void setup_mm_for_reboot(char mode)
1041{
1042 unsigned long base_pmdval;
1043 pgd_t *pgd;
1044 int i;
1045
1046 if (current->mm && current->mm->pgd)
1047 pgd = current->mm->pgd;
1048 else
1049 pgd = init_mm.pgd;
1050
1051 base_pmdval = PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | PMD_TYPE_SECT;
1052 if (cpu_architecture() <= CPU_ARCH_ARMv5TEJ && !cpu_is_xscale())
1053 base_pmdval |= PMD_BIT4;
1054
1055 for (i = 0; i < FIRST_USER_PGD_NR + USER_PTRS_PER_PGD; i++, pgd++) {
1056 unsigned long pmdval = (i << PGDIR_SHIFT) | base_pmdval;
1057 pmd_t *pmd;
1058
1059 pmd = pmd_off(pgd, i << PGDIR_SHIFT);
1060 pmd[0] = __pmd(pmdval);
1061 pmd[1] = __pmd(pmdval + (1 << (PGDIR_SHIFT - 1)));
1062 flush_pmd_entry(pmd);
1063 }
1064}