Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | #ifndef _I386_PGTABLE_H |
2 | #define _I386_PGTABLE_H | |
3 | ||
1da177e4 LT |
4 | |
5 | /* | |
6 | * The Linux memory management assumes a three-level page table setup. On | |
7 | * the i386, we use that, but "fold" the mid level into the top-level page | |
8 | * table, so that we physically have the same two-level page table as the | |
9 | * i386 mmu expects. | |
10 | * | |
11 | * This file contains the functions and defines necessary to modify and use | |
12 | * the i386 page table tree. | |
13 | */ | |
14 | #ifndef __ASSEMBLY__ | |
15 | #include <asm/processor.h> | |
16 | #include <asm/fixmap.h> | |
17 | #include <linux/threads.h> | |
da181a8b | 18 | #include <asm/paravirt.h> |
1da177e4 | 19 | |
1977f032 | 20 | #include <linux/bitops.h> |
1da177e4 LT |
21 | #include <linux/slab.h> |
22 | #include <linux/list.h> | |
23 | #include <linux/spinlock.h> | |
24 | ||
8c65b4a6 TS |
25 | struct mm_struct; |
26 | struct vm_area_struct; | |
27 | ||
1da177e4 LT |
28 | /* |
29 | * ZERO_PAGE is a global shared page that is always zero: used | |
30 | * for zero-mapped memory areas etc.. | |
31 | */ | |
32 | #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) | |
33 | extern unsigned long empty_zero_page[1024]; | |
34 | extern pgd_t swapper_pg_dir[1024]; | |
e18b890b | 35 | extern struct kmem_cache *pmd_cache; |
1da177e4 LT |
36 | extern spinlock_t pgd_lock; |
37 | extern struct page *pgd_list; | |
f1d1a842 | 38 | void check_pgt_cache(void); |
1da177e4 | 39 | |
4ba9b9d0 | 40 | void pmd_ctor(struct kmem_cache *, void *); |
1da177e4 LT |
41 | void pgtable_cache_init(void); |
42 | void paging_init(void); | |
43 | ||
f1d1a842 | 44 | |
1da177e4 LT |
45 | /* |
46 | * The Linux x86 paging architecture is 'compile-time dual-mode', it | |
47 | * implements both the traditional 2-level x86 page tables and the | |
48 | * newer 3-level PAE-mode page tables. | |
49 | */ | |
50 | #ifdef CONFIG_X86_PAE | |
51 | # include <asm/pgtable-3level-defs.h> | |
52 | # define PMD_SIZE (1UL << PMD_SHIFT) | |
53 | # define PMD_MASK (~(PMD_SIZE-1)) | |
54 | #else | |
55 | # include <asm/pgtable-2level-defs.h> | |
56 | #endif | |
57 | ||
58 | #define PGDIR_SIZE (1UL << PGDIR_SHIFT) | |
59 | #define PGDIR_MASK (~(PGDIR_SIZE-1)) | |
60 | ||
1da177e4 LT |
61 | #define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT) |
62 | #define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS) | |
63 | ||
64 | #define TWOLEVEL_PGDIR_SHIFT 22 | |
65 | #define BOOT_USER_PGD_PTRS (__PAGE_OFFSET >> TWOLEVEL_PGDIR_SHIFT) | |
66 | #define BOOT_KERNEL_PGD_PTRS (1024-BOOT_USER_PGD_PTRS) | |
67 | ||
68 | /* Just any arbitrary offset to the start of the vmalloc VM area: the | |
69 | * current 8MB value just means that there will be a 8MB "hole" after the | |
70 | * physical memory until the kernel virtual memory starts. That means that | |
71 | * any out-of-bounds memory accesses will hopefully be caught. | |
72 | * The vmalloc() routines leaves a hole of 4kB between each vmalloced | |
73 | * area for the same reason. ;) | |
74 | */ | |
75 | #define VMALLOC_OFFSET (8*1024*1024) | |
8f0accc8 | 76 | #define VMALLOC_START (((unsigned long) high_memory + \ |
1da177e4 LT |
77 | 2*VMALLOC_OFFSET-1) & ~(VMALLOC_OFFSET-1)) |
78 | #ifdef CONFIG_HIGHMEM | |
79 | # define VMALLOC_END (PKMAP_BASE-2*PAGE_SIZE) | |
80 | #else | |
81 | # define VMALLOC_END (FIXADDR_START-2*PAGE_SIZE) | |
82 | #endif | |
83 | ||
1da177e4 LT |
84 | /* |
85 | * Define this if things work differently on an i386 and an i486: | |
86 | * it will (on an i486) warn about kernel memory accesses that are | |
e49332bd | 87 | * done without a 'access_ok(VERIFY_WRITE,..)' |
1da177e4 | 88 | */ |
e49332bd | 89 | #undef TEST_ACCESS_OK |
1da177e4 LT |
90 | |
91 | /* The boot page tables (all created as a single array) */ | |
92 | extern unsigned long pg0[]; | |
93 | ||
94 | #define pte_present(x) ((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE)) | |
1da177e4 | 95 | |
705e87c0 HD |
96 | /* To avoid harmful races, pmd_none(x) should check only the lower when PAE */ |
97 | #define pmd_none(x) (!(unsigned long)pmd_val(x)) | |
1da177e4 | 98 | #define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT) |
1da177e4 LT |
99 | #define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) |
100 | ||
101 | ||
102 | #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) | |
103 | ||
104 | /* | |
105 | * The following only work if pte_present() is true. | |
106 | * Undefined behaviour if not.. | |
107 | */ | |
92ac166f IM |
108 | static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } |
109 | static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } | |
110 | static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_RW; } | |
111 | static inline int pte_huge(pte_t pte) { return pte_val(pte) & _PAGE_PSE; } | |
1da177e4 LT |
112 | |
113 | /* | |
114 | * The following only works if pte_present() is not true. | |
115 | */ | |
92ac166f | 116 | static inline int pte_file(pte_t pte) { return pte_val(pte) & _PAGE_FILE; } |
1da177e4 | 117 | |
1444d2da IM |
118 | static inline pte_t pte_mkclean(pte_t pte) { return __pte(pte_val(pte) & ~_PAGE_DIRTY); } |
119 | static inline pte_t pte_mkold(pte_t pte) { return __pte(pte_val(pte) & ~_PAGE_ACCESSED); } | |
120 | static inline pte_t pte_wrprotect(pte_t pte) { return __pte(pte_val(pte) & ~_PAGE_RW); } | |
121 | static inline pte_t pte_mkdirty(pte_t pte) { return __pte(pte_val(pte) | _PAGE_DIRTY); } | |
122 | static inline pte_t pte_mkyoung(pte_t pte) { return __pte(pte_val(pte) | _PAGE_ACCESSED); } | |
123 | static inline pte_t pte_mkwrite(pte_t pte) { return __pte(pte_val(pte) | _PAGE_RW); } | |
124 | static inline pte_t pte_mkhuge(pte_t pte) { return __pte(pte_val(pte) | _PAGE_PSE); } | |
1da177e4 LT |
125 | |
126 | #ifdef CONFIG_X86_PAE | |
127 | # include <asm/pgtable-3level.h> | |
128 | #else | |
129 | # include <asm/pgtable-2level.h> | |
130 | #endif | |
131 | ||
da181a8b | 132 | #ifndef CONFIG_PARAVIRT |
789e6ac0 ZA |
133 | /* |
134 | * Rules for using pte_update - it must be called after any PTE update which | |
135 | * has not been done using the set_pte / clear_pte interfaces. It is used by | |
136 | * shadow mode hypervisors to resynchronize the shadow page tables. Kernel PTE | |
137 | * updates should either be sets, clears, or set_pte_atomic for P->P | |
138 | * transitions, which means this hook should only be called for user PTEs. | |
139 | * This hook implies a P->P protection or access change has taken place, which | |
140 | * requires a subsequent TLB flush. The notification can optionally be delayed | |
141 | * until the TLB flush event by using the pte_update_defer form of the | |
142 | * interface, but care must be taken to assure that the flush happens while | |
143 | * still holding the same page table lock so that the shadow and primary pages | |
144 | * do not become out of sync on SMP. | |
145 | */ | |
146 | #define pte_update(mm, addr, ptep) do { } while (0) | |
147 | #define pte_update_defer(mm, addr, ptep) do { } while (0) | |
da181a8b | 148 | #endif |
789e6ac0 | 149 | |
9e5e3162 ZA |
150 | /* local pte updates need not use xchg for locking */ |
151 | static inline pte_t native_local_ptep_get_and_clear(pte_t *ptep) | |
152 | { | |
153 | pte_t res = *ptep; | |
154 | ||
155 | /* Pure native function needs no input for mm, addr */ | |
156 | native_pte_clear(NULL, 0, ptep); | |
157 | return res; | |
158 | } | |
159 | ||
2965a0e6 RR |
160 | /* |
161 | * We only update the dirty/accessed state if we set | |
162 | * the dirty bit by hand in the kernel, since the hardware | |
163 | * will do the accessed bit for us, and we don't want to | |
164 | * race with other CPU's that might be updating the dirty | |
165 | * bit at the same time. | |
166 | */ | |
167 | #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS | |
168 | #define ptep_set_access_flags(vma, address, ptep, entry, dirty) \ | |
8dab5241 BH |
169 | ({ \ |
170 | int __changed = !pte_same(*(ptep), entry); \ | |
171 | if (__changed && dirty) { \ | |
2965a0e6 | 172 | (ptep)->pte_low = (entry).pte_low; \ |
dfbea0ad | 173 | pte_update_defer((vma)->vm_mm, (address), (ptep)); \ |
2965a0e6 RR |
174 | flush_tlb_page(vma, address); \ |
175 | } \ | |
8dab5241 BH |
176 | __changed; \ |
177 | }) | |
2965a0e6 | 178 | |
6049742d | 179 | #define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG |
0013572b | 180 | #define ptep_test_and_clear_young(vma, addr, ptep) ({ \ |
d6f8bb13 HD |
181 | int __ret = 0; \ |
182 | if (pte_young(*(ptep))) \ | |
183 | __ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, \ | |
184 | &(ptep)->pte_low); \ | |
185 | if (__ret) \ | |
186 | pte_update((vma)->vm_mm, addr, ptep); \ | |
187 | __ret; \ | |
0013572b | 188 | }) |
25e4df5b | 189 | |
25e4df5b ZA |
190 | #define __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH |
191 | #define ptep_clear_flush_young(vma, address, ptep) \ | |
192 | ({ \ | |
193 | int __young; \ | |
10a8d6ae | 194 | __young = ptep_test_and_clear_young((vma), (address), (ptep)); \ |
0013572b | 195 | if (__young) \ |
25e4df5b | 196 | flush_tlb_page(vma, address); \ |
25e4df5b ZA |
197 | __young; \ |
198 | }) | |
1da177e4 | 199 | |
8ecb8950 ZA |
200 | #define __HAVE_ARCH_PTEP_GET_AND_CLEAR |
201 | static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | |
202 | { | |
4cdd9c89 | 203 | pte_t pte = native_ptep_get_and_clear(ptep); |
8ecb8950 ZA |
204 | pte_update(mm, addr, ptep); |
205 | return pte; | |
206 | } | |
207 | ||
6049742d | 208 | #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL |
a600388d ZA |
209 | static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long addr, pte_t *ptep, int full) |
210 | { | |
211 | pte_t pte; | |
212 | if (full) { | |
9e5e3162 ZA |
213 | /* |
214 | * Full address destruction in progress; paravirt does not | |
215 | * care about updates and native needs no locking | |
216 | */ | |
217 | pte = native_local_ptep_get_and_clear(ptep); | |
a600388d ZA |
218 | } else { |
219 | pte = ptep_get_and_clear(mm, addr, ptep); | |
220 | } | |
221 | return pte; | |
222 | } | |
223 | ||
6049742d | 224 | #define __HAVE_ARCH_PTEP_SET_WRPROTECT |
1da177e4 LT |
225 | static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) |
226 | { | |
227 | clear_bit(_PAGE_BIT_RW, &ptep->pte_low); | |
789e6ac0 | 228 | pte_update(mm, addr, ptep); |
1da177e4 LT |
229 | } |
230 | ||
d7271b14 ZA |
231 | /* |
232 | * clone_pgd_range(pgd_t *dst, pgd_t *src, int count); | |
233 | * | |
234 | * dst - pointer to pgd range anwhere on a pgd page | |
235 | * src - "" | |
236 | * count - the number of pgds to copy. | |
237 | * | |
238 | * dst and src can be on the same page, but the range must not overlap, | |
239 | * and must not cross a page boundary. | |
240 | */ | |
241 | static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count) | |
242 | { | |
243 | memcpy(dst, src, count * sizeof(pgd_t)); | |
244 | } | |
245 | ||
1da177e4 LT |
246 | /* |
247 | * Macro to mark a page protection value as "uncacheable". On processors which do not support | |
248 | * it, this is a no-op. | |
249 | */ | |
250 | #define pgprot_noncached(prot) ((boot_cpu_data.x86 > 3) \ | |
251 | ? (__pgprot(pgprot_val(prot) | _PAGE_PCD | _PAGE_PWT)) : (prot)) | |
252 | ||
253 | /* | |
254 | * Conversion functions: convert a page and protection to a page entry, | |
255 | * and a page entry and page directory to the page they refer to. | |
256 | */ | |
257 | ||
258 | #define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot)) | |
1da177e4 LT |
259 | |
260 | static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) | |
261 | { | |
262 | pte.pte_low &= _PAGE_CHG_MASK; | |
263 | pte.pte_low |= pgprot_val(newprot); | |
264 | #ifdef CONFIG_X86_PAE | |
265 | /* | |
266 | * Chop off the NX bit (if present), and add the NX portion of | |
267 | * the newprot (if present): | |
268 | */ | |
269 | pte.pte_high &= ~(1 << (_PAGE_BIT_NX - 32)); | |
270 | pte.pte_high |= (pgprot_val(newprot) >> 32) & \ | |
271 | (__supported_pte_mask >> 32); | |
272 | #endif | |
273 | return pte; | |
274 | } | |
275 | ||
1da177e4 LT |
276 | #define pmd_large(pmd) \ |
277 | ((pmd_val(pmd) & (_PAGE_PSE|_PAGE_PRESENT)) == (_PAGE_PSE|_PAGE_PRESENT)) | |
278 | ||
279 | /* | |
280 | * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD] | |
281 | * | |
282 | * this macro returns the index of the entry in the pgd page which would | |
283 | * control the given virtual address | |
284 | */ | |
285 | #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) | |
286 | #define pgd_index_k(addr) pgd_index(addr) | |
287 | ||
288 | /* | |
289 | * pgd_offset() returns a (pgd_t *) | |
290 | * pgd_index() is used get the offset into the pgd page's array of pgd_t's; | |
291 | */ | |
292 | #define pgd_offset(mm, address) ((mm)->pgd+pgd_index(address)) | |
293 | ||
294 | /* | |
295 | * a shortcut which implies the use of the kernel's pgd, instead | |
296 | * of a process's | |
297 | */ | |
298 | #define pgd_offset_k(address) pgd_offset(&init_mm, address) | |
299 | ||
300 | /* | |
301 | * the pmd page can be thought of an array like this: pmd_t[PTRS_PER_PMD] | |
302 | * | |
303 | * this macro returns the index of the entry in the pmd page which would | |
304 | * control the given virtual address | |
305 | */ | |
306 | #define pmd_index(address) \ | |
307 | (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) | |
308 | ||
309 | /* | |
310 | * the pte page can be thought of an array like this: pte_t[PTRS_PER_PTE] | |
311 | * | |
312 | * this macro returns the index of the entry in the pte page which would | |
313 | * control the given virtual address | |
314 | */ | |
315 | #define pte_index(address) \ | |
316 | (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) | |
317 | #define pte_offset_kernel(dir, address) \ | |
46a82b2d | 318 | ((pte_t *) pmd_page_vaddr(*(dir)) + pte_index(address)) |
1da177e4 | 319 | |
ca140fda PBG |
320 | #define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT)) |
321 | ||
46a82b2d | 322 | #define pmd_page_vaddr(pmd) \ |
ca140fda PBG |
323 | ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) |
324 | ||
1da177e4 LT |
325 | /* |
326 | * Helper function that returns the kernel pagetable entry controlling | |
327 | * the virtual address 'address'. NULL means no pagetable entry present. | |
328 | * NOTE: the return type is pte_t but if the pmd is PSE then we return it | |
329 | * as a pte too. | |
330 | */ | |
331 | extern pte_t *lookup_address(unsigned long address); | |
332 | ||
333 | /* | |
334 | * Make a given kernel text page executable/non-executable. | |
335 | * Returns the previous executability setting of that page (which | |
336 | * is used to restore the previous state). Used by the SMP bootup code. | |
337 | * NOTE: this is an __init function for security reasons. | |
338 | */ | |
339 | #ifdef CONFIG_X86_PAE | |
340 | extern int set_kernel_exec(unsigned long vaddr, int enable); | |
341 | #else | |
342 | static inline int set_kernel_exec(unsigned long vaddr, int enable) { return 0;} | |
343 | #endif | |
344 | ||
1da177e4 | 345 | #if defined(CONFIG_HIGHPTE) |
a27fe809 | 346 | #define pte_offset_map(dir, address) \ |
ce6234b5 | 347 | ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE0) + pte_index(address)) |
a27fe809 | 348 | #define pte_offset_map_nested(dir, address) \ |
ce6234b5 | 349 | ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE1) + pte_index(address)) |
1da177e4 LT |
350 | #define pte_unmap(pte) kunmap_atomic(pte, KM_PTE0) |
351 | #define pte_unmap_nested(pte) kunmap_atomic(pte, KM_PTE1) | |
352 | #else | |
353 | #define pte_offset_map(dir, address) \ | |
354 | ((pte_t *)page_address(pmd_page(*(dir))) + pte_index(address)) | |
355 | #define pte_offset_map_nested(dir, address) pte_offset_map(dir, address) | |
356 | #define pte_unmap(pte) do { } while (0) | |
357 | #define pte_unmap_nested(pte) do { } while (0) | |
358 | #endif | |
359 | ||
23002d88 ZA |
360 | /* Clear a kernel PTE and flush it from the TLB */ |
361 | #define kpte_clear_flush(ptep, vaddr) \ | |
362 | do { \ | |
363 | pte_clear(&init_mm, vaddr, ptep); \ | |
364 | __flush_tlb_one(vaddr); \ | |
365 | } while (0) | |
366 | ||
1da177e4 LT |
367 | /* |
368 | * The i386 doesn't have any external MMU info: the kernel page | |
369 | * tables contain all the necessary information. | |
1da177e4 LT |
370 | */ |
371 | #define update_mmu_cache(vma,address,pte) do { } while (0) | |
b239fb25 JF |
372 | |
373 | void native_pagetable_setup_start(pgd_t *base); | |
374 | void native_pagetable_setup_done(pgd_t *base); | |
375 | ||
376 | #ifndef CONFIG_PARAVIRT | |
377 | static inline void paravirt_pagetable_setup_start(pgd_t *base) | |
378 | { | |
379 | native_pagetable_setup_start(base); | |
380 | } | |
381 | ||
382 | static inline void paravirt_pagetable_setup_done(pgd_t *base) | |
383 | { | |
384 | native_pagetable_setup_done(base); | |
385 | } | |
386 | #endif /* !CONFIG_PARAVIRT */ | |
387 | ||
1da177e4 LT |
388 | #endif /* !__ASSEMBLY__ */ |
389 | ||
4757d7d8 TG |
390 | /* |
391 | * kern_addr_valid() is (1) for FLATMEM and (0) for | |
392 | * SPARSEMEM and DISCONTIGMEM | |
393 | */ | |
05b79bdc | 394 | #ifdef CONFIG_FLATMEM |
1da177e4 | 395 | #define kern_addr_valid(addr) (1) |
4757d7d8 TG |
396 | #else |
397 | #define kern_addr_valid(kaddr) (0) | |
398 | #endif | |
1da177e4 | 399 | |
1da177e4 LT |
400 | #define io_remap_pfn_range(vma, vaddr, pfn, size, prot) \ |
401 | remap_pfn_range(vma, vaddr, pfn, size, prot) | |
402 | ||
1da177e4 LT |
403 | #include <asm-generic/pgtable.h> |
404 | ||
405 | #endif /* _I386_PGTABLE_H */ |