[ARM] fix VFP+softfloat binaries
[linux-2.6-block.git] / arch / arm / mm / mmu.c
CommitLineData
d111e8f9
RK
1/*
2 * linux/arch/arm/mm/mmu.c
3 *
4 * Copyright (C) 1995-2005 Russell King
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 as
8 * published by the Free Software Foundation.
9 */
ae8f1541 10#include <linux/module.h>
d111e8f9
RK
11#include <linux/kernel.h>
12#include <linux/errno.h>
13#include <linux/init.h>
14#include <linux/bootmem.h>
15#include <linux/mman.h>
16#include <linux/nodemask.h>
17
0ba8b9b2 18#include <asm/cputype.h>
d111e8f9
RK
19#include <asm/mach-types.h>
20#include <asm/setup.h>
21#include <asm/sizes.h>
22#include <asm/tlb.h>
23
24#include <asm/mach/arch.h>
25#include <asm/mach/map.h>
26
27#include "mm.h"
28
29DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
30
d111e8f9
RK
31/*
32 * empty_zero_page is a special page that is used for
33 * zero-initialized data and COW.
34 */
35struct page *empty_zero_page;
3653f3ab 36EXPORT_SYMBOL(empty_zero_page);
d111e8f9
RK
37
38/*
39 * The pmd table for the upper-most set of pages.
40 */
41pmd_t *top_pmd;
42
ae8f1541
RK
43#define CPOLICY_UNCACHED 0
44#define CPOLICY_BUFFERED 1
45#define CPOLICY_WRITETHROUGH 2
46#define CPOLICY_WRITEBACK 3
47#define CPOLICY_WRITEALLOC 4
48
49static unsigned int cachepolicy __initdata = CPOLICY_WRITEBACK;
50static unsigned int ecc_mask __initdata = 0;
44b18693 51pgprot_t pgprot_user;
ae8f1541
RK
52pgprot_t pgprot_kernel;
53
44b18693 54EXPORT_SYMBOL(pgprot_user);
ae8f1541
RK
55EXPORT_SYMBOL(pgprot_kernel);
56
57struct cachepolicy {
58 const char policy[16];
59 unsigned int cr_mask;
60 unsigned int pmd;
61 unsigned int pte;
62};
63
64static struct cachepolicy cache_policies[] __initdata = {
65 {
66 .policy = "uncached",
67 .cr_mask = CR_W|CR_C,
68 .pmd = PMD_SECT_UNCACHED,
bb30f36f 69 .pte = L_PTE_MT_UNCACHED,
ae8f1541
RK
70 }, {
71 .policy = "buffered",
72 .cr_mask = CR_C,
73 .pmd = PMD_SECT_BUFFERED,
bb30f36f 74 .pte = L_PTE_MT_BUFFERABLE,
ae8f1541
RK
75 }, {
76 .policy = "writethrough",
77 .cr_mask = 0,
78 .pmd = PMD_SECT_WT,
bb30f36f 79 .pte = L_PTE_MT_WRITETHROUGH,
ae8f1541
RK
80 }, {
81 .policy = "writeback",
82 .cr_mask = 0,
83 .pmd = PMD_SECT_WB,
bb30f36f 84 .pte = L_PTE_MT_WRITEBACK,
ae8f1541
RK
85 }, {
86 .policy = "writealloc",
87 .cr_mask = 0,
88 .pmd = PMD_SECT_WBWA,
bb30f36f 89 .pte = L_PTE_MT_WRITEALLOC,
ae8f1541
RK
90 }
91};
92
93/*
6cbdc8c5 94 * These are useful for identifying cache coherency
ae8f1541
RK
95 * problems by allowing the cache or the cache and
96 * writebuffer to be turned off. (Note: the write
97 * buffer should not be on and the cache off).
98 */
99static void __init early_cachepolicy(char **p)
100{
101 int i;
102
103 for (i = 0; i < ARRAY_SIZE(cache_policies); i++) {
104 int len = strlen(cache_policies[i].policy);
105
106 if (memcmp(*p, cache_policies[i].policy, len) == 0) {
107 cachepolicy = i;
108 cr_alignment &= ~cache_policies[i].cr_mask;
109 cr_no_alignment &= ~cache_policies[i].cr_mask;
110 *p += len;
111 break;
112 }
113 }
114 if (i == ARRAY_SIZE(cache_policies))
115 printk(KERN_ERR "ERROR: unknown or unsupported cache policy\n");
11179d8c
CM
116 if (cpu_architecture() >= CPU_ARCH_ARMv6) {
117 printk(KERN_WARNING "Only cachepolicy=writeback supported on ARMv6 and later\n");
118 cachepolicy = CPOLICY_WRITEBACK;
119 }
ae8f1541
RK
120 flush_cache_all();
121 set_cr(cr_alignment);
122}
123__early_param("cachepolicy=", early_cachepolicy);
124
125static void __init early_nocache(char **__unused)
126{
127 char *p = "buffered";
128 printk(KERN_WARNING "nocache is deprecated; use cachepolicy=%s\n", p);
129 early_cachepolicy(&p);
130}
131__early_param("nocache", early_nocache);
132
133static void __init early_nowrite(char **__unused)
134{
135 char *p = "uncached";
136 printk(KERN_WARNING "nowb is deprecated; use cachepolicy=%s\n", p);
137 early_cachepolicy(&p);
138}
139__early_param("nowb", early_nowrite);
140
141static void __init early_ecc(char **p)
142{
143 if (memcmp(*p, "on", 2) == 0) {
144 ecc_mask = PMD_PROTECTION;
145 *p += 2;
146 } else if (memcmp(*p, "off", 3) == 0) {
147 ecc_mask = 0;
148 *p += 3;
149 }
150}
151__early_param("ecc=", early_ecc);
152
153static int __init noalign_setup(char *__unused)
154{
155 cr_alignment &= ~CR_A;
156 cr_no_alignment &= ~CR_A;
157 set_cr(cr_alignment);
158 return 1;
159}
160__setup("noalign", noalign_setup);
161
255d1f86
RK
162#ifndef CONFIG_SMP
163void adjust_cr(unsigned long mask, unsigned long set)
164{
165 unsigned long flags;
166
167 mask &= ~CR_A;
168
169 set &= mask;
170
171 local_irq_save(flags);
172
173 cr_no_alignment = (cr_no_alignment & ~mask) | set;
174 cr_alignment = (cr_alignment & ~mask) | set;
175
176 set_cr((get_cr() & ~mask) | set);
177
178 local_irq_restore(flags);
179}
180#endif
181
0af92bef
RK
182#define PROT_PTE_DEVICE L_PTE_PRESENT|L_PTE_YOUNG|L_PTE_DIRTY|L_PTE_WRITE
183#define PROT_SECT_DEVICE PMD_TYPE_SECT|PMD_SECT_XN|PMD_SECT_AP_WRITE
184
b29e9f5e 185static struct mem_type mem_types[] = {
0af92bef 186 [MT_DEVICE] = { /* Strongly ordered / ARMv6 shared device */
bb30f36f
RK
187 .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_SHARED |
188 L_PTE_SHARED,
0af92bef
RK
189 .prot_l1 = PMD_TYPE_TABLE,
190 .prot_sect = PROT_SECT_DEVICE | PMD_SECT_UNCACHED,
191 .domain = DOMAIN_IO,
192 },
193 [MT_DEVICE_NONSHARED] = { /* ARMv6 non-shared device */
bb30f36f 194 .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_NONSHARED,
0af92bef
RK
195 .prot_l1 = PMD_TYPE_TABLE,
196 .prot_sect = PROT_SECT_DEVICE | PMD_SECT_TEX(2),
197 .domain = DOMAIN_IO,
198 },
199 [MT_DEVICE_CACHED] = { /* ioremap_cached */
bb30f36f 200 .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_CACHED,
0af92bef
RK
201 .prot_l1 = PMD_TYPE_TABLE,
202 .prot_sect = PROT_SECT_DEVICE | PMD_SECT_WB,
203 .domain = DOMAIN_IO,
204 },
1ad77a87 205 [MT_DEVICE_WC] = { /* ioremap_wc */
bb30f36f 206 .prot_pte = PROT_PTE_DEVICE | L_PTE_MT_DEV_WC,
0af92bef 207 .prot_l1 = PMD_TYPE_TABLE,
bb30f36f 208 .prot_sect = PROT_SECT_DEVICE | PMD_SECT_BUFFERABLE,
0af92bef 209 .domain = DOMAIN_IO,
ae8f1541
RK
210 },
211 [MT_CACHECLEAN] = {
9ef79635 212 .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN,
ae8f1541
RK
213 .domain = DOMAIN_KERNEL,
214 },
215 [MT_MINICLEAN] = {
9ef79635 216 .prot_sect = PMD_TYPE_SECT | PMD_SECT_XN | PMD_SECT_MINICACHE,
ae8f1541
RK
217 .domain = DOMAIN_KERNEL,
218 },
219 [MT_LOW_VECTORS] = {
220 .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
221 L_PTE_EXEC,
222 .prot_l1 = PMD_TYPE_TABLE,
223 .domain = DOMAIN_USER,
224 },
225 [MT_HIGH_VECTORS] = {
226 .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
227 L_PTE_USER | L_PTE_EXEC,
228 .prot_l1 = PMD_TYPE_TABLE,
229 .domain = DOMAIN_USER,
230 },
231 [MT_MEMORY] = {
9ef79635 232 .prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
ae8f1541
RK
233 .domain = DOMAIN_KERNEL,
234 },
235 [MT_ROM] = {
9ef79635 236 .prot_sect = PMD_TYPE_SECT,
ae8f1541
RK
237 .domain = DOMAIN_KERNEL,
238 },
ae8f1541
RK
239};
240
b29e9f5e
RK
241const struct mem_type *get_mem_type(unsigned int type)
242{
243 return type < ARRAY_SIZE(mem_types) ? &mem_types[type] : NULL;
244}
245
ae8f1541
RK
246/*
247 * Adjust the PMD section entries according to the CPU in use.
248 */
249static void __init build_mem_type_table(void)
250{
251 struct cachepolicy *cp;
252 unsigned int cr = get_cr();
bb30f36f 253 unsigned int user_pgprot, kern_pgprot, vecs_pgprot;
ae8f1541
RK
254 int cpu_arch = cpu_architecture();
255 int i;
256
11179d8c 257 if (cpu_arch < CPU_ARCH_ARMv6) {
ae8f1541 258#if defined(CONFIG_CPU_DCACHE_DISABLE)
11179d8c
CM
259 if (cachepolicy > CPOLICY_BUFFERED)
260 cachepolicy = CPOLICY_BUFFERED;
ae8f1541 261#elif defined(CONFIG_CPU_DCACHE_WRITETHROUGH)
11179d8c
CM
262 if (cachepolicy > CPOLICY_WRITETHROUGH)
263 cachepolicy = CPOLICY_WRITETHROUGH;
ae8f1541 264#endif
11179d8c 265 }
ae8f1541
RK
266 if (cpu_arch < CPU_ARCH_ARMv5) {
267 if (cachepolicy >= CPOLICY_WRITEALLOC)
268 cachepolicy = CPOLICY_WRITEBACK;
269 ecc_mask = 0;
270 }
bb30f36f
RK
271#ifdef CONFIG_SMP
272 cachepolicy = CPOLICY_WRITEALLOC;
273#endif
ae8f1541 274
1ad77a87
LB
275 /*
276 * On non-Xscale3 ARMv5-and-older systems, use CB=01
277 * (Uncached/Buffered) for ioremap_wc() mappings. On XScale3
278 * and ARMv6+, use TEXCB=00100 mappings (Inner/Outer Uncacheable
279 * in xsc3 parlance, Uncached Normal in ARMv6 parlance).
280 */
281 if (cpu_is_xsc3() || cpu_arch >= CPU_ARCH_ARMv6) {
1ad77a87 282 mem_types[MT_DEVICE_WC].prot_sect |= PMD_SECT_TEX(1);
bb30f36f 283 mem_types[MT_DEVICE_WC].prot_sect &= ~PMD_SECT_BUFFERABLE;
1ad77a87 284 }
ae8f1541
RK
285
286 /*
9ef79635
RK
287 * ARMv5 and lower, bit 4 must be set for page tables.
288 * (was: cache "update-able on write" bit on ARM610)
289 * However, Xscale cores require this bit to be cleared.
ae8f1541 290 */
9ef79635
RK
291 if (cpu_is_xscale()) {
292 for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
ae8f1541 293 mem_types[i].prot_sect &= ~PMD_BIT4;
9ef79635
RK
294 mem_types[i].prot_l1 &= ~PMD_BIT4;
295 }
296 } else if (cpu_arch < CPU_ARCH_ARMv6) {
297 for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
ae8f1541
RK
298 if (mem_types[i].prot_l1)
299 mem_types[i].prot_l1 |= PMD_BIT4;
9ef79635
RK
300 if (mem_types[i].prot_sect)
301 mem_types[i].prot_sect |= PMD_BIT4;
302 }
303 }
ae8f1541
RK
304
305 cp = &cache_policies[cachepolicy];
bb30f36f
RK
306 vecs_pgprot = kern_pgprot = user_pgprot = cp->pte;
307
308#ifndef CONFIG_SMP
309 /*
310 * Only use write-through for non-SMP systems
311 */
312 if (cpu_arch >= CPU_ARCH_ARMv5 && cachepolicy > CPOLICY_WRITETHROUGH)
313 vecs_pgprot = cache_policies[CPOLICY_WRITETHROUGH].pte;
314#endif
ae8f1541
RK
315
316 /*
317 * Enable CPU-specific coherency if supported.
318 * (Only available on XSC3 at the moment.)
319 */
320 if (arch_is_coherent()) {
321 if (cpu_is_xsc3()) {
322 mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S;
0e5fdca7 323 mem_types[MT_MEMORY].prot_pte |= L_PTE_SHARED;
ae8f1541
RK
324 }
325 }
326
327 /*
328 * ARMv6 and above have extended page tables.
329 */
330 if (cpu_arch >= CPU_ARCH_ARMv6 && (cr & CR_XP)) {
ae8f1541
RK
331 /*
332 * Mark cache clean areas and XIP ROM read only
333 * from SVC mode and no access from userspace.
334 */
335 mem_types[MT_ROM].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
336 mem_types[MT_MINICLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
337 mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
338
339 /*
340 * Mark the device area as "shared device"
341 */
ae8f1541
RK
342 mem_types[MT_DEVICE].prot_sect |= PMD_SECT_BUFFERED;
343
ae8f1541
RK
344#ifdef CONFIG_SMP
345 /*
346 * Mark memory with the "shared" attribute for SMP systems
347 */
348 user_pgprot |= L_PTE_SHARED;
349 kern_pgprot |= L_PTE_SHARED;
bb30f36f 350 vecs_pgprot |= L_PTE_SHARED;
ae8f1541
RK
351 mem_types[MT_MEMORY].prot_sect |= PMD_SECT_S;
352#endif
353 }
354
355 for (i = 0; i < 16; i++) {
356 unsigned long v = pgprot_val(protection_map[i]);
bb30f36f 357 protection_map[i] = __pgprot(v | user_pgprot);
ae8f1541
RK
358 }
359
bb30f36f
RK
360 mem_types[MT_LOW_VECTORS].prot_pte |= vecs_pgprot;
361 mem_types[MT_HIGH_VECTORS].prot_pte |= vecs_pgprot;
ae8f1541 362
bb30f36f 363 if (cpu_arch < CPU_ARCH_ARMv5)
ae8f1541 364 mem_types[MT_MINICLEAN].prot_sect &= ~PMD_SECT_TEX(1);
ae8f1541 365
44b18693 366 pgprot_user = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG | user_pgprot);
ae8f1541
RK
367 pgprot_kernel = __pgprot(L_PTE_PRESENT | L_PTE_YOUNG |
368 L_PTE_DIRTY | L_PTE_WRITE |
369 L_PTE_EXEC | kern_pgprot);
370
371 mem_types[MT_LOW_VECTORS].prot_l1 |= ecc_mask;
372 mem_types[MT_HIGH_VECTORS].prot_l1 |= ecc_mask;
373 mem_types[MT_MEMORY].prot_sect |= ecc_mask | cp->pmd;
374 mem_types[MT_ROM].prot_sect |= cp->pmd;
375
376 switch (cp->pmd) {
377 case PMD_SECT_WT:
378 mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WT;
379 break;
380 case PMD_SECT_WB:
381 case PMD_SECT_WBWA:
382 mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_WB;
383 break;
384 }
385 printk("Memory policy: ECC %sabled, Data cache %s\n",
386 ecc_mask ? "en" : "dis", cp->policy);
2497f0a8
RK
387
388 for (i = 0; i < ARRAY_SIZE(mem_types); i++) {
389 struct mem_type *t = &mem_types[i];
390 if (t->prot_l1)
391 t->prot_l1 |= PMD_DOMAIN(t->domain);
392 if (t->prot_sect)
393 t->prot_sect |= PMD_DOMAIN(t->domain);
394 }
ae8f1541
RK
395}
396
397#define vectors_base() (vectors_high() ? 0xffff0000 : 0)
398
24e6c699
RK
399static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr,
400 unsigned long end, unsigned long pfn,
401 const struct mem_type *type)
ae8f1541 402{
24e6c699 403 pte_t *pte;
ae8f1541 404
24e6c699
RK
405 if (pmd_none(*pmd)) {
406 pte = alloc_bootmem_low_pages(2 * PTRS_PER_PTE * sizeof(pte_t));
407 __pmd_populate(pmd, __pa(pte) | type->prot_l1);
408 }
ae8f1541 409
24e6c699
RK
410 pte = pte_offset_kernel(pmd, addr);
411 do {
40d192b6 412 set_pte_ext(pte, pfn_pte(pfn, __pgprot(type->prot_pte)), 0);
24e6c699
RK
413 pfn++;
414 } while (pte++, addr += PAGE_SIZE, addr != end);
ae8f1541
RK
415}
416
24e6c699
RK
417static void __init alloc_init_section(pgd_t *pgd, unsigned long addr,
418 unsigned long end, unsigned long phys,
419 const struct mem_type *type)
ae8f1541 420{
24e6c699 421 pmd_t *pmd = pmd_offset(pgd, addr);
ae8f1541 422
24e6c699
RK
423 /*
424 * Try a section mapping - end, addr and phys must all be aligned
425 * to a section boundary. Note that PMDs refer to the individual
426 * L1 entries, whereas PGDs refer to a group of L1 entries making
427 * up one logical pointer to an L2 table.
428 */
429 if (((addr | end | phys) & ~SECTION_MASK) == 0) {
430 pmd_t *p = pmd;
ae8f1541 431
24e6c699
RK
432 if (addr & SECTION_SIZE)
433 pmd++;
434
435 do {
436 *pmd = __pmd(phys | type->prot_sect);
437 phys += SECTION_SIZE;
438 } while (pmd++, addr += SECTION_SIZE, addr != end);
ae8f1541 439
24e6c699
RK
440 flush_pmd_entry(p);
441 } else {
442 /*
443 * No need to loop; pte's aren't interested in the
444 * individual L1 entries.
445 */
446 alloc_init_pte(pmd, addr, end, __phys_to_pfn(phys), type);
447 }
ae8f1541
RK
448}
449
4a56c1e4
RK
450static void __init create_36bit_mapping(struct map_desc *md,
451 const struct mem_type *type)
452{
453 unsigned long phys, addr, length, end;
454 pgd_t *pgd;
455
456 addr = md->virtual;
457 phys = (unsigned long)__pfn_to_phys(md->pfn);
458 length = PAGE_ALIGN(md->length);
459
460 if (!(cpu_architecture() >= CPU_ARCH_ARMv6 || cpu_is_xsc3())) {
461 printk(KERN_ERR "MM: CPU does not support supersection "
462 "mapping for 0x%08llx at 0x%08lx\n",
463 __pfn_to_phys((u64)md->pfn), addr);
464 return;
465 }
466
467 /* N.B. ARMv6 supersections are only defined to work with domain 0.
468 * Since domain assignments can in fact be arbitrary, the
469 * 'domain == 0' check below is required to insure that ARMv6
470 * supersections are only allocated for domain 0 regardless
471 * of the actual domain assignments in use.
472 */
473 if (type->domain) {
474 printk(KERN_ERR "MM: invalid domain in supersection "
475 "mapping for 0x%08llx at 0x%08lx\n",
476 __pfn_to_phys((u64)md->pfn), addr);
477 return;
478 }
479
480 if ((addr | length | __pfn_to_phys(md->pfn)) & ~SUPERSECTION_MASK) {
481 printk(KERN_ERR "MM: cannot create mapping for "
482 "0x%08llx at 0x%08lx invalid alignment\n",
483 __pfn_to_phys((u64)md->pfn), addr);
484 return;
485 }
486
487 /*
488 * Shift bits [35:32] of address into bits [23:20] of PMD
489 * (See ARMv6 spec).
490 */
491 phys |= (((md->pfn >> (32 - PAGE_SHIFT)) & 0xF) << 20);
492
493 pgd = pgd_offset_k(addr);
494 end = addr + length;
495 do {
496 pmd_t *pmd = pmd_offset(pgd, addr);
497 int i;
498
499 for (i = 0; i < 16; i++)
500 *pmd++ = __pmd(phys | type->prot_sect | PMD_SECT_SUPER);
501
502 addr += SUPERSECTION_SIZE;
503 phys += SUPERSECTION_SIZE;
504 pgd += SUPERSECTION_SIZE >> PGDIR_SHIFT;
505 } while (addr != end);
506}
507
ae8f1541
RK
508/*
509 * Create the page directory entries and any necessary
510 * page tables for the mapping specified by `md'. We
511 * are able to cope here with varying sizes and address
512 * offsets, and we take full advantage of sections and
513 * supersections.
514 */
515void __init create_mapping(struct map_desc *md)
516{
24e6c699 517 unsigned long phys, addr, length, end;
d5c98176 518 const struct mem_type *type;
24e6c699 519 pgd_t *pgd;
ae8f1541
RK
520
521 if (md->virtual != vectors_base() && md->virtual < TASK_SIZE) {
522 printk(KERN_WARNING "BUG: not creating mapping for "
523 "0x%08llx at 0x%08lx in user region\n",
524 __pfn_to_phys((u64)md->pfn), md->virtual);
525 return;
526 }
527
528 if ((md->type == MT_DEVICE || md->type == MT_ROM) &&
529 md->virtual >= PAGE_OFFSET && md->virtual < VMALLOC_END) {
530 printk(KERN_WARNING "BUG: mapping for 0x%08llx at 0x%08lx "
531 "overlaps vmalloc space\n",
532 __pfn_to_phys((u64)md->pfn), md->virtual);
533 }
534
d5c98176 535 type = &mem_types[md->type];
ae8f1541
RK
536
537 /*
538 * Catch 36-bit addresses
539 */
4a56c1e4
RK
540 if (md->pfn >= 0x100000) {
541 create_36bit_mapping(md, type);
542 return;
ae8f1541
RK
543 }
544
7b9c7b4d 545 addr = md->virtual & PAGE_MASK;
24e6c699 546 phys = (unsigned long)__pfn_to_phys(md->pfn);
7b9c7b4d 547 length = PAGE_ALIGN(md->length + (md->virtual & ~PAGE_MASK));
ae8f1541 548
24e6c699 549 if (type->prot_l1 == 0 && ((addr | phys | length) & ~SECTION_MASK)) {
ae8f1541
RK
550 printk(KERN_WARNING "BUG: map for 0x%08lx at 0x%08lx can not "
551 "be mapped using pages, ignoring.\n",
24e6c699 552 __pfn_to_phys(md->pfn), addr);
ae8f1541
RK
553 return;
554 }
555
24e6c699
RK
556 pgd = pgd_offset_k(addr);
557 end = addr + length;
558 do {
559 unsigned long next = pgd_addr_end(addr, end);
ae8f1541 560
24e6c699 561 alloc_init_section(pgd, addr, next, phys, type);
ae8f1541 562
24e6c699
RK
563 phys += next - addr;
564 addr = next;
565 } while (pgd++, addr != end);
ae8f1541
RK
566}
567
568/*
569 * Create the architecture specific mappings
570 */
571void __init iotable_init(struct map_desc *io_desc, int nr)
572{
573 int i;
574
575 for (i = 0; i < nr; i++)
576 create_mapping(io_desc + i);
577}
578
6c5da7ac
RK
579static unsigned long __initdata vmalloc_reserve = SZ_128M;
580
581/*
582 * vmalloc=size forces the vmalloc area to be exactly 'size'
583 * bytes. This can be used to increase (or decrease) the vmalloc
584 * area - the default is 128m.
585 */
586static void __init early_vmalloc(char **arg)
587{
588 vmalloc_reserve = memparse(*arg, arg);
589
590 if (vmalloc_reserve < SZ_16M) {
591 vmalloc_reserve = SZ_16M;
592 printk(KERN_WARNING
593 "vmalloc area too small, limiting to %luMB\n",
594 vmalloc_reserve >> 20);
595 }
596}
597__early_param("vmalloc=", early_vmalloc);
598
599#define VMALLOC_MIN (void *)(VMALLOC_END - vmalloc_reserve)
600
60296c71
LB
601static int __init check_membank_valid(struct membank *mb)
602{
603 /*
eca73214
RK
604 * Check whether this memory region has non-zero size or
605 * invalid node number.
60296c71 606 */
eca73214 607 if (mb->size == 0 || mb->node >= MAX_NUMNODES)
60296c71
LB
608 return 0;
609
610 /*
611 * Check whether this memory region would entirely overlap
612 * the vmalloc area.
613 */
614 if (phys_to_virt(mb->start) >= VMALLOC_MIN) {
615 printk(KERN_NOTICE "Ignoring RAM at %.8lx-%.8lx "
616 "(vmalloc region overlap).\n",
617 mb->start, mb->start + mb->size - 1);
618 return 0;
619 }
620
621 /*
622 * Check whether this memory region would partially overlap
623 * the vmalloc area.
624 */
625 if (phys_to_virt(mb->start + mb->size) < phys_to_virt(mb->start) ||
626 phys_to_virt(mb->start + mb->size) > VMALLOC_MIN) {
627 unsigned long newsize = VMALLOC_MIN - phys_to_virt(mb->start);
628
629 printk(KERN_NOTICE "Truncating RAM at %.8lx-%.8lx "
630 "to -%.8lx (vmalloc region overlap).\n",
631 mb->start, mb->start + mb->size - 1,
632 mb->start + newsize - 1);
633 mb->size = newsize;
634 }
635
636 return 1;
637}
638
639static void __init sanity_check_meminfo(struct meminfo *mi)
640{
eca73214 641 int i, j;
60296c71
LB
642
643 for (i = 0, j = 0; i < mi->nr_banks; i++) {
644 if (check_membank_valid(&mi->bank[i]))
645 mi->bank[j++] = mi->bank[i];
646 }
647 mi->nr_banks = j;
648}
649
d111e8f9
RK
650static inline void prepare_page_table(struct meminfo *mi)
651{
652 unsigned long addr;
653
654 /*
655 * Clear out all the mappings below the kernel image.
656 */
657 for (addr = 0; addr < MODULE_START; addr += PGDIR_SIZE)
658 pmd_clear(pmd_off_k(addr));
659
660#ifdef CONFIG_XIP_KERNEL
661 /* The XIP kernel is mapped in the module area -- skip over it */
662 addr = ((unsigned long)&_etext + PGDIR_SIZE - 1) & PGDIR_MASK;
663#endif
664 for ( ; addr < PAGE_OFFSET; addr += PGDIR_SIZE)
665 pmd_clear(pmd_off_k(addr));
666
667 /*
668 * Clear out all the kernel space mappings, except for the first
669 * memory bank, up to the end of the vmalloc region.
670 */
671 for (addr = __phys_to_virt(mi->bank[0].start + mi->bank[0].size);
672 addr < VMALLOC_END; addr += PGDIR_SIZE)
673 pmd_clear(pmd_off_k(addr));
674}
675
676/*
677 * Reserve the various regions of node 0
678 */
679void __init reserve_node_zero(pg_data_t *pgdat)
680{
681 unsigned long res_size = 0;
682
683 /*
684 * Register the kernel text and data with bootmem.
685 * Note that this can only be in node 0.
686 */
687#ifdef CONFIG_XIP_KERNEL
72a7fe39
BW
688 reserve_bootmem_node(pgdat, __pa(&__data_start), &_end - &__data_start,
689 BOOTMEM_DEFAULT);
d111e8f9 690#else
72a7fe39
BW
691 reserve_bootmem_node(pgdat, __pa(&_stext), &_end - &_stext,
692 BOOTMEM_DEFAULT);
d111e8f9
RK
693#endif
694
695 /*
696 * Reserve the page tables. These are already in use,
697 * and can only be in node 0.
698 */
699 reserve_bootmem_node(pgdat, __pa(swapper_pg_dir),
72a7fe39 700 PTRS_PER_PGD * sizeof(pgd_t), BOOTMEM_DEFAULT);
d111e8f9
RK
701
702 /*
703 * Hmm... This should go elsewhere, but we really really need to
704 * stop things allocating the low memory; ideally we need a better
705 * implementation of GFP_DMA which does not assume that DMA-able
706 * memory starts at zero.
707 */
708 if (machine_is_integrator() || machine_is_cintegrator())
709 res_size = __pa(swapper_pg_dir) - PHYS_OFFSET;
710
711 /*
712 * These should likewise go elsewhere. They pre-reserve the
713 * screen memory region at the start of main system memory.
714 */
715 if (machine_is_edb7211())
716 res_size = 0x00020000;
717 if (machine_is_p720t())
718 res_size = 0x00014000;
719
bbf6f280
BD
720 /* H1940 and RX3715 need to reserve this for suspend */
721
722 if (machine_is_h1940() || machine_is_rx3715()) {
72a7fe39
BW
723 reserve_bootmem_node(pgdat, 0x30003000, 0x1000,
724 BOOTMEM_DEFAULT);
725 reserve_bootmem_node(pgdat, 0x30081000, 0x1000,
726 BOOTMEM_DEFAULT);
9073341c
BD
727 }
728
d111e8f9
RK
729#ifdef CONFIG_SA1111
730 /*
731 * Because of the SA1111 DMA bug, we want to preserve our
732 * precious DMA-able memory...
733 */
734 res_size = __pa(swapper_pg_dir) - PHYS_OFFSET;
735#endif
736 if (res_size)
72a7fe39
BW
737 reserve_bootmem_node(pgdat, PHYS_OFFSET, res_size,
738 BOOTMEM_DEFAULT);
d111e8f9
RK
739}
740
741/*
742 * Set up device the mappings. Since we clear out the page tables for all
743 * mappings above VMALLOC_END, we will remove any debug device mappings.
744 * This means you have to be careful how you debug this function, or any
745 * called function. This means you can't use any function or debugging
746 * method which may touch any device, otherwise the kernel _will_ crash.
747 */
748static void __init devicemaps_init(struct machine_desc *mdesc)
749{
750 struct map_desc map;
751 unsigned long addr;
752 void *vectors;
753
754 /*
755 * Allocate the vector page early.
756 */
757 vectors = alloc_bootmem_low_pages(PAGE_SIZE);
758 BUG_ON(!vectors);
759
760 for (addr = VMALLOC_END; addr; addr += PGDIR_SIZE)
761 pmd_clear(pmd_off_k(addr));
762
763 /*
764 * Map the kernel if it is XIP.
765 * It is always first in the modulearea.
766 */
767#ifdef CONFIG_XIP_KERNEL
768 map.pfn = __phys_to_pfn(CONFIG_XIP_PHYS_ADDR & SECTION_MASK);
769 map.virtual = MODULE_START;
770 map.length = ((unsigned long)&_etext - map.virtual + ~SECTION_MASK) & SECTION_MASK;
771 map.type = MT_ROM;
772 create_mapping(&map);
773#endif
774
775 /*
776 * Map the cache flushing regions.
777 */
778#ifdef FLUSH_BASE
779 map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS);
780 map.virtual = FLUSH_BASE;
781 map.length = SZ_1M;
782 map.type = MT_CACHECLEAN;
783 create_mapping(&map);
784#endif
785#ifdef FLUSH_BASE_MINICACHE
786 map.pfn = __phys_to_pfn(FLUSH_BASE_PHYS + SZ_1M);
787 map.virtual = FLUSH_BASE_MINICACHE;
788 map.length = SZ_1M;
789 map.type = MT_MINICLEAN;
790 create_mapping(&map);
791#endif
792
793 /*
794 * Create a mapping for the machine vectors at the high-vectors
795 * location (0xffff0000). If we aren't using high-vectors, also
796 * create a mapping at the low-vectors virtual address.
797 */
798 map.pfn = __phys_to_pfn(virt_to_phys(vectors));
799 map.virtual = 0xffff0000;
800 map.length = PAGE_SIZE;
801 map.type = MT_HIGH_VECTORS;
802 create_mapping(&map);
803
804 if (!vectors_high()) {
805 map.virtual = 0;
806 map.type = MT_LOW_VECTORS;
807 create_mapping(&map);
808 }
809
810 /*
811 * Ask the machine support to map in the statically mapped devices.
812 */
813 if (mdesc->map_io)
814 mdesc->map_io();
815
816 /*
817 * Finally flush the caches and tlb to ensure that we're in a
818 * consistent state wrt the writebuffer. This also ensures that
819 * any write-allocated cache lines in the vector page are written
820 * back. After this point, we can start to touch devices again.
821 */
822 local_flush_tlb_all();
823 flush_cache_all();
824}
825
826/*
827 * paging_init() sets up the page tables, initialises the zone memory
828 * maps, and sets up the zero page, bad page and bad page tables.
829 */
830void __init paging_init(struct meminfo *mi, struct machine_desc *mdesc)
831{
832 void *zero_page;
833
834 build_mem_type_table();
60296c71 835 sanity_check_meminfo(mi);
d111e8f9
RK
836 prepare_page_table(mi);
837 bootmem_init(mi);
838 devicemaps_init(mdesc);
839
840 top_pmd = pmd_off_k(0xffff0000);
841
842 /*
843 * allocate the zero page. Note that we count on this going ok.
844 */
845 zero_page = alloc_bootmem_low_pages(PAGE_SIZE);
846 memzero(zero_page, PAGE_SIZE);
847 empty_zero_page = virt_to_page(zero_page);
848 flush_dcache_page(empty_zero_page);
849}
ae8f1541
RK
850
851/*
852 * In order to soft-boot, we need to insert a 1:1 mapping in place of
853 * the user-mode pages. This will then ensure that we have predictable
854 * results when turning the mmu off
855 */
856void setup_mm_for_reboot(char mode)
857{
858 unsigned long base_pmdval;
859 pgd_t *pgd;
860 int i;
861
862 if (current->mm && current->mm->pgd)
863 pgd = current->mm->pgd;
864 else
865 pgd = init_mm.pgd;
866
867 base_pmdval = PMD_SECT_AP_WRITE | PMD_SECT_AP_READ | PMD_TYPE_SECT;
868 if (cpu_architecture() <= CPU_ARCH_ARMv5TEJ && !cpu_is_xscale())
869 base_pmdval |= PMD_BIT4;
870
871 for (i = 0; i < FIRST_USER_PGD_NR + USER_PTRS_PER_PGD; i++, pgd++) {
872 unsigned long pmdval = (i << PGDIR_SHIFT) | base_pmdval;
873 pmd_t *pmd;
874
875 pmd = pmd_off(pgd, i << PGDIR_SHIFT);
876 pmd[0] = __pmd(pmdval);
877 pmd[1] = __pmd(pmdval + (1 << (PGDIR_SHIFT - 1)));
878 flush_pmd_entry(pmd);
879 }
880}