Commit | Line | Data |
---|---|---|
9f4c815c IM |
1 | /* |
2 | * Copyright 2002 Andi Kleen, SuSE Labs. | |
1da177e4 | 3 | * Thanks to Ben LaHaise for precious feedback. |
9f4c815c | 4 | */ |
1da177e4 | 5 | #include <linux/highmem.h> |
8192206d | 6 | #include <linux/bootmem.h> |
1da177e4 | 7 | #include <linux/module.h> |
9f4c815c | 8 | #include <linux/sched.h> |
1da177e4 | 9 | #include <linux/slab.h> |
9f4c815c IM |
10 | #include <linux/mm.h> |
11 | ||
950f9d95 | 12 | #include <asm/e820.h> |
1da177e4 LT |
13 | #include <asm/processor.h> |
14 | #include <asm/tlbflush.h> | |
f8af095d | 15 | #include <asm/sections.h> |
9f4c815c IM |
16 | #include <asm/uaccess.h> |
17 | #include <asm/pgalloc.h> | |
1da177e4 | 18 | |
9df84993 IM |
19 | /* |
20 | * The current flushing context - we pass it instead of 5 arguments: | |
21 | */ | |
72e458df TG |
22 | struct cpa_data { |
23 | unsigned long vaddr; | |
72e458df TG |
24 | pgprot_t mask_set; |
25 | pgprot_t mask_clr; | |
65e074df | 26 | int numpages; |
f4ae5da0 | 27 | int flushtlb; |
72e458df TG |
28 | }; |
29 | ||
ed724be6 AV |
30 | static inline int |
31 | within(unsigned long addr, unsigned long start, unsigned long end) | |
687c4825 | 32 | { |
ed724be6 AV |
33 | return addr >= start && addr < end; |
34 | } | |
35 | ||
d7c8f21a TG |
36 | /* |
37 | * Flushing functions | |
38 | */ | |
cd8ddf1a | 39 | |
cd8ddf1a TG |
40 | /** |
41 | * clflush_cache_range - flush a cache range with clflush | |
42 | * @addr: virtual start address | |
43 | * @size: number of bytes to flush | |
44 | * | |
45 | * clflush is an unordered instruction which needs fencing with mfence | |
46 | * to avoid ordering issues. | |
47 | */ | |
4c61afcd | 48 | void clflush_cache_range(void *vaddr, unsigned int size) |
d7c8f21a | 49 | { |
4c61afcd | 50 | void *vend = vaddr + size - 1; |
d7c8f21a | 51 | |
cd8ddf1a | 52 | mb(); |
4c61afcd IM |
53 | |
54 | for (; vaddr < vend; vaddr += boot_cpu_data.x86_clflush_size) | |
55 | clflush(vaddr); | |
56 | /* | |
57 | * Flush any possible final partial cacheline: | |
58 | */ | |
59 | clflush(vend); | |
60 | ||
cd8ddf1a | 61 | mb(); |
d7c8f21a TG |
62 | } |
63 | ||
af1e6844 | 64 | static void __cpa_flush_all(void *arg) |
d7c8f21a | 65 | { |
6bb8383b AK |
66 | unsigned long cache = (unsigned long)arg; |
67 | ||
d7c8f21a TG |
68 | /* |
69 | * Flush all to work around Errata in early athlons regarding | |
70 | * large page flushing. | |
71 | */ | |
72 | __flush_tlb_all(); | |
73 | ||
6bb8383b | 74 | if (cache && boot_cpu_data.x86_model >= 4) |
d7c8f21a TG |
75 | wbinvd(); |
76 | } | |
77 | ||
6bb8383b | 78 | static void cpa_flush_all(unsigned long cache) |
d7c8f21a TG |
79 | { |
80 | BUG_ON(irqs_disabled()); | |
81 | ||
6bb8383b | 82 | on_each_cpu(__cpa_flush_all, (void *) cache, 1, 1); |
d7c8f21a TG |
83 | } |
84 | ||
57a6a46a TG |
85 | static void __cpa_flush_range(void *arg) |
86 | { | |
57a6a46a TG |
87 | /* |
88 | * We could optimize that further and do individual per page | |
89 | * tlb invalidates for a low number of pages. Caveat: we must | |
90 | * flush the high aliases on 64bit as well. | |
91 | */ | |
92 | __flush_tlb_all(); | |
57a6a46a TG |
93 | } |
94 | ||
6bb8383b | 95 | static void cpa_flush_range(unsigned long start, int numpages, int cache) |
57a6a46a | 96 | { |
4c61afcd IM |
97 | unsigned int i, level; |
98 | unsigned long addr; | |
99 | ||
57a6a46a | 100 | BUG_ON(irqs_disabled()); |
4c61afcd | 101 | WARN_ON(PAGE_ALIGN(start) != start); |
57a6a46a | 102 | |
3b233e52 | 103 | on_each_cpu(__cpa_flush_range, NULL, 1, 1); |
57a6a46a | 104 | |
6bb8383b AK |
105 | if (!cache) |
106 | return; | |
107 | ||
3b233e52 TG |
108 | /* |
109 | * We only need to flush on one CPU, | |
110 | * clflush is a MESI-coherent instruction that | |
111 | * will cause all other CPUs to flush the same | |
112 | * cachelines: | |
113 | */ | |
4c61afcd IM |
114 | for (i = 0, addr = start; i < numpages; i++, addr += PAGE_SIZE) { |
115 | pte_t *pte = lookup_address(addr, &level); | |
116 | ||
117 | /* | |
118 | * Only flush present addresses: | |
119 | */ | |
7bfb72e8 | 120 | if (pte && (pte_val(*pte) & _PAGE_PRESENT)) |
4c61afcd IM |
121 | clflush_cache_range((void *) addr, PAGE_SIZE); |
122 | } | |
57a6a46a TG |
123 | } |
124 | ||
cc0f21bb AV |
125 | #define HIGH_MAP_START __START_KERNEL_map |
126 | #define HIGH_MAP_END (__START_KERNEL_map + KERNEL_TEXT_SIZE) | |
127 | ||
128 | ||
129 | /* | |
130 | * Converts a virtual address to a X86-64 highmap address | |
131 | */ | |
132 | static unsigned long virt_to_highmap(void *address) | |
133 | { | |
134 | #ifdef CONFIG_X86_64 | |
135 | return __pa((unsigned long)address) + HIGH_MAP_START - phys_base; | |
136 | #else | |
137 | return (unsigned long)address; | |
138 | #endif | |
139 | } | |
140 | ||
ed724be6 AV |
141 | /* |
142 | * Certain areas of memory on x86 require very specific protection flags, | |
143 | * for example the BIOS area or kernel text. Callers don't always get this | |
144 | * right (again, ioremap() on BIOS memory is not uncommon) so this function | |
145 | * checks and fixes these known static required protection bits. | |
146 | */ | |
147 | static inline pgprot_t static_protections(pgprot_t prot, unsigned long address) | |
148 | { | |
149 | pgprot_t forbidden = __pgprot(0); | |
150 | ||
687c4825 | 151 | /* |
ed724be6 AV |
152 | * The BIOS area between 640k and 1Mb needs to be executable for |
153 | * PCI BIOS based config access (CONFIG_PCI_GOBIOS) support. | |
687c4825 | 154 | */ |
ed724be6 AV |
155 | if (within(__pa(address), BIOS_BEGIN, BIOS_END)) |
156 | pgprot_val(forbidden) |= _PAGE_NX; | |
157 | ||
158 | /* | |
159 | * The kernel text needs to be executable for obvious reasons | |
160 | * Does not cover __inittext since that is gone later on | |
161 | */ | |
162 | if (within(address, (unsigned long)_text, (unsigned long)_etext)) | |
163 | pgprot_val(forbidden) |= _PAGE_NX; | |
cc0f21bb AV |
164 | /* |
165 | * Do the same for the x86-64 high kernel mapping | |
166 | */ | |
167 | if (within(address, virt_to_highmap(_text), virt_to_highmap(_etext))) | |
168 | pgprot_val(forbidden) |= _PAGE_NX; | |
169 | ||
ed724be6 AV |
170 | /* The .rodata section needs to be read-only */ |
171 | if (within(address, (unsigned long)__start_rodata, | |
172 | (unsigned long)__end_rodata)) | |
173 | pgprot_val(forbidden) |= _PAGE_RW; | |
cc0f21bb AV |
174 | /* |
175 | * Do the same for the x86-64 high kernel mapping | |
176 | */ | |
177 | if (within(address, virt_to_highmap(__start_rodata), | |
178 | virt_to_highmap(__end_rodata))) | |
179 | pgprot_val(forbidden) |= _PAGE_RW; | |
ed724be6 AV |
180 | |
181 | prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden)); | |
687c4825 IM |
182 | |
183 | return prot; | |
184 | } | |
185 | ||
9a14aefc TG |
186 | /* |
187 | * Lookup the page table entry for a virtual address. Return a pointer | |
188 | * to the entry and the level of the mapping. | |
189 | * | |
190 | * Note: We return pud and pmd either when the entry is marked large | |
191 | * or when the present bit is not set. Otherwise we would return a | |
192 | * pointer to a nonexisting mapping. | |
193 | */ | |
f0646e43 | 194 | pte_t *lookup_address(unsigned long address, int *level) |
9f4c815c | 195 | { |
1da177e4 LT |
196 | pgd_t *pgd = pgd_offset_k(address); |
197 | pud_t *pud; | |
198 | pmd_t *pmd; | |
9f4c815c | 199 | |
30551bb3 TG |
200 | *level = PG_LEVEL_NONE; |
201 | ||
1da177e4 LT |
202 | if (pgd_none(*pgd)) |
203 | return NULL; | |
9df84993 | 204 | |
1da177e4 LT |
205 | pud = pud_offset(pgd, address); |
206 | if (pud_none(*pud)) | |
207 | return NULL; | |
c2f71ee2 AK |
208 | |
209 | *level = PG_LEVEL_1G; | |
210 | if (pud_large(*pud) || !pud_present(*pud)) | |
211 | return (pte_t *)pud; | |
212 | ||
1da177e4 LT |
213 | pmd = pmd_offset(pud, address); |
214 | if (pmd_none(*pmd)) | |
215 | return NULL; | |
30551bb3 TG |
216 | |
217 | *level = PG_LEVEL_2M; | |
9a14aefc | 218 | if (pmd_large(*pmd) || !pmd_present(*pmd)) |
1da177e4 | 219 | return (pte_t *)pmd; |
1da177e4 | 220 | |
30551bb3 | 221 | *level = PG_LEVEL_4K; |
9df84993 | 222 | |
9f4c815c IM |
223 | return pte_offset_kernel(pmd, address); |
224 | } | |
225 | ||
9df84993 IM |
226 | /* |
227 | * Set the new pmd in all the pgds we know about: | |
228 | */ | |
9a3dc780 | 229 | static void __set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) |
9f4c815c | 230 | { |
9f4c815c IM |
231 | /* change init_mm */ |
232 | set_pte_atomic(kpte, pte); | |
44af6c41 | 233 | #ifdef CONFIG_X86_32 |
e4b71dcf | 234 | if (!SHARED_KERNEL_PMD) { |
44af6c41 IM |
235 | struct page *page; |
236 | ||
e3ed910d | 237 | list_for_each_entry(page, &pgd_list, lru) { |
44af6c41 IM |
238 | pgd_t *pgd; |
239 | pud_t *pud; | |
240 | pmd_t *pmd; | |
241 | ||
242 | pgd = (pgd_t *)page_address(page) + pgd_index(address); | |
243 | pud = pud_offset(pgd, address); | |
244 | pmd = pmd_offset(pud, address); | |
245 | set_pte_atomic((pte_t *)pmd, pte); | |
246 | } | |
1da177e4 | 247 | } |
44af6c41 | 248 | #endif |
1da177e4 LT |
249 | } |
250 | ||
9df84993 IM |
251 | static int |
252 | try_preserve_large_page(pte_t *kpte, unsigned long address, | |
253 | struct cpa_data *cpa) | |
65e074df TG |
254 | { |
255 | unsigned long nextpage_addr, numpages, pmask, psize, flags; | |
256 | pte_t new_pte, old_pte, *tmp; | |
257 | pgprot_t old_prot, new_prot; | |
beaff633 | 258 | int level, do_split = 1; |
65e074df TG |
259 | |
260 | spin_lock_irqsave(&pgd_lock, flags); | |
261 | /* | |
262 | * Check for races, another CPU might have split this page | |
263 | * up already: | |
264 | */ | |
265 | tmp = lookup_address(address, &level); | |
266 | if (tmp != kpte) | |
267 | goto out_unlock; | |
268 | ||
269 | switch (level) { | |
270 | case PG_LEVEL_2M: | |
31422c51 AK |
271 | psize = PMD_PAGE_SIZE; |
272 | pmask = PMD_PAGE_MASK; | |
65e074df | 273 | break; |
f07333fd | 274 | #ifdef CONFIG_X86_64 |
65e074df | 275 | case PG_LEVEL_1G: |
f07333fd AK |
276 | psize = PMD_PAGE_SIZE; |
277 | pmask = PMD_PAGE_MASK; | |
278 | break; | |
279 | #endif | |
65e074df | 280 | default: |
beaff633 | 281 | do_split = -EINVAL; |
65e074df TG |
282 | goto out_unlock; |
283 | } | |
284 | ||
285 | /* | |
286 | * Calculate the number of pages, which fit into this large | |
287 | * page starting at address: | |
288 | */ | |
289 | nextpage_addr = (address + psize) & pmask; | |
290 | numpages = (nextpage_addr - address) >> PAGE_SHIFT; | |
291 | if (numpages < cpa->numpages) | |
292 | cpa->numpages = numpages; | |
293 | ||
294 | /* | |
295 | * We are safe now. Check whether the new pgprot is the same: | |
296 | */ | |
297 | old_pte = *kpte; | |
298 | old_prot = new_prot = pte_pgprot(old_pte); | |
299 | ||
300 | pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr); | |
301 | pgprot_val(new_prot) |= pgprot_val(cpa->mask_set); | |
302 | new_prot = static_protections(new_prot, address); | |
303 | ||
304 | /* | |
305 | * If there are no changes, return. maxpages has been updated | |
306 | * above: | |
307 | */ | |
308 | if (pgprot_val(new_prot) == pgprot_val(old_prot)) { | |
beaff633 | 309 | do_split = 0; |
65e074df TG |
310 | goto out_unlock; |
311 | } | |
312 | ||
313 | /* | |
314 | * We need to change the attributes. Check, whether we can | |
315 | * change the large page in one go. We request a split, when | |
316 | * the address is not aligned and the number of pages is | |
317 | * smaller than the number of pages in the large page. Note | |
318 | * that we limited the number of possible pages already to | |
319 | * the number of pages in the large page. | |
320 | */ | |
321 | if (address == (nextpage_addr - psize) && cpa->numpages == numpages) { | |
322 | /* | |
323 | * The address is aligned and the number of pages | |
324 | * covers the full page. | |
325 | */ | |
326 | new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot)); | |
327 | __set_pmd_pte(kpte, address, new_pte); | |
328 | cpa->flushtlb = 1; | |
beaff633 | 329 | do_split = 0; |
65e074df TG |
330 | } |
331 | ||
332 | out_unlock: | |
333 | spin_unlock_irqrestore(&pgd_lock, flags); | |
9df84993 | 334 | |
beaff633 | 335 | return do_split; |
65e074df TG |
336 | } |
337 | ||
7afe15b9 | 338 | static int split_large_page(pte_t *kpte, unsigned long address) |
bb5c2dbd | 339 | { |
7b610eec | 340 | unsigned long flags, pfn, pfninc = 1; |
9df84993 IM |
341 | gfp_t gfp_flags = GFP_KERNEL; |
342 | unsigned int i, level; | |
bb5c2dbd | 343 | pte_t *pbase, *tmp; |
9df84993 | 344 | pgprot_t ref_prot; |
bb5c2dbd IM |
345 | struct page *base; |
346 | ||
12d6f21e | 347 | #ifdef CONFIG_DEBUG_PAGEALLOC |
86f03989 | 348 | gfp_flags = GFP_ATOMIC | __GFP_NOWARN; |
12d6f21e IM |
349 | #endif |
350 | base = alloc_pages(gfp_flags, 0); | |
bb5c2dbd IM |
351 | if (!base) |
352 | return -ENOMEM; | |
353 | ||
9a3dc780 | 354 | spin_lock_irqsave(&pgd_lock, flags); |
bb5c2dbd IM |
355 | /* |
356 | * Check for races, another CPU might have split this page | |
357 | * up for us already: | |
358 | */ | |
359 | tmp = lookup_address(address, &level); | |
6ce9fc17 | 360 | if (tmp != kpte) |
bb5c2dbd IM |
361 | goto out_unlock; |
362 | ||
bb5c2dbd | 363 | pbase = (pte_t *)page_address(base); |
44af6c41 | 364 | #ifdef CONFIG_X86_32 |
bb5c2dbd | 365 | paravirt_alloc_pt(&init_mm, page_to_pfn(base)); |
44af6c41 | 366 | #endif |
07cf89c0 | 367 | ref_prot = pte_pgprot(pte_clrhuge(*kpte)); |
bb5c2dbd | 368 | |
f07333fd AK |
369 | #ifdef CONFIG_X86_64 |
370 | if (level == PG_LEVEL_1G) { | |
371 | pfninc = PMD_PAGE_SIZE >> PAGE_SHIFT; | |
372 | pgprot_val(ref_prot) |= _PAGE_PSE; | |
f07333fd AK |
373 | } |
374 | #endif | |
375 | ||
63c1dcf4 TG |
376 | /* |
377 | * Get the target pfn from the original entry: | |
378 | */ | |
379 | pfn = pte_pfn(*kpte); | |
f07333fd | 380 | for (i = 0; i < PTRS_PER_PTE; i++, pfn += pfninc) |
63c1dcf4 | 381 | set_pte(&pbase[i], pfn_pte(pfn, ref_prot)); |
bb5c2dbd IM |
382 | |
383 | /* | |
07cf89c0 | 384 | * Install the new, split up pagetable. Important details here: |
4c881ca1 HY |
385 | * |
386 | * On Intel the NX bit of all levels must be cleared to make a | |
387 | * page executable. See section 4.13.2 of Intel 64 and IA-32 | |
388 | * Architectures Software Developer's Manual). | |
07cf89c0 TG |
389 | * |
390 | * Mark the entry present. The current mapping might be | |
391 | * set to not present, which we preserved above. | |
bb5c2dbd | 392 | */ |
4c881ca1 | 393 | ref_prot = pte_pgprot(pte_mkexec(pte_clrhuge(*kpte))); |
07cf89c0 | 394 | pgprot_val(ref_prot) |= _PAGE_PRESENT; |
9a3dc780 | 395 | __set_pmd_pte(kpte, address, mk_pte(base, ref_prot)); |
bb5c2dbd IM |
396 | base = NULL; |
397 | ||
398 | out_unlock: | |
9a3dc780 | 399 | spin_unlock_irqrestore(&pgd_lock, flags); |
bb5c2dbd IM |
400 | |
401 | if (base) | |
402 | __free_pages(base, 0); | |
403 | ||
404 | return 0; | |
405 | } | |
406 | ||
72e458df | 407 | static int __change_page_attr(unsigned long address, struct cpa_data *cpa) |
9f4c815c | 408 | { |
87f7f8fe | 409 | int level, do_split, err; |
1da177e4 | 410 | struct page *kpte_page; |
9f4c815c | 411 | pte_t *kpte; |
1da177e4 | 412 | |
97f99fed | 413 | repeat: |
f0646e43 | 414 | kpte = lookup_address(address, &level); |
1da177e4 LT |
415 | if (!kpte) |
416 | return -EINVAL; | |
9f4c815c | 417 | |
1da177e4 | 418 | kpte_page = virt_to_page(kpte); |
65d2f0bc AK |
419 | BUG_ON(PageLRU(kpte_page)); |
420 | BUG_ON(PageCompound(kpte_page)); | |
421 | ||
30551bb3 | 422 | if (level == PG_LEVEL_4K) { |
86f03989 | 423 | pte_t new_pte, old_pte = *kpte; |
626c2c9d AV |
424 | pgprot_t new_prot = pte_pgprot(old_pte); |
425 | ||
426 | if(!pte_val(old_pte)) { | |
72e458df TG |
427 | printk(KERN_WARNING "CPA: called for zero pte. " |
428 | "vaddr = %lx cpa->vaddr = %lx\n", address, | |
429 | cpa->vaddr); | |
430 | WARN_ON(1); | |
626c2c9d AV |
431 | return -EINVAL; |
432 | } | |
86f03989 | 433 | |
72e458df TG |
434 | pgprot_val(new_prot) &= ~pgprot_val(cpa->mask_clr); |
435 | pgprot_val(new_prot) |= pgprot_val(cpa->mask_set); | |
86f03989 IM |
436 | |
437 | new_prot = static_protections(new_prot, address); | |
438 | ||
626c2c9d AV |
439 | /* |
440 | * We need to keep the pfn from the existing PTE, | |
441 | * after all we're only going to change it's attributes | |
442 | * not the memory it points to | |
443 | */ | |
444 | new_pte = pfn_pte(pte_pfn(old_pte), canon_pgprot(new_prot)); | |
f4ae5da0 TG |
445 | |
446 | /* | |
447 | * Do we really change anything ? | |
448 | */ | |
449 | if (pte_val(old_pte) != pte_val(new_pte)) { | |
450 | set_pte_atomic(kpte, new_pte); | |
451 | cpa->flushtlb = 1; | |
452 | } | |
65e074df TG |
453 | cpa->numpages = 1; |
454 | return 0; | |
1da177e4 | 455 | } |
65e074df TG |
456 | |
457 | /* | |
458 | * Check, whether we can keep the large page intact | |
459 | * and just change the pte: | |
460 | */ | |
beaff633 | 461 | do_split = try_preserve_large_page(kpte, address, cpa); |
65e074df TG |
462 | /* |
463 | * When the range fits into the existing large page, | |
464 | * return. cp->numpages and cpa->tlbflush have been updated in | |
465 | * try_large_page: | |
466 | */ | |
87f7f8fe IM |
467 | if (do_split <= 0) |
468 | return do_split; | |
65e074df TG |
469 | |
470 | /* | |
471 | * We have to split the large page: | |
472 | */ | |
87f7f8fe IM |
473 | err = split_large_page(kpte, address); |
474 | if (!err) { | |
475 | cpa->flushtlb = 1; | |
476 | goto repeat; | |
477 | } | |
beaff633 | 478 | |
87f7f8fe | 479 | return err; |
9f4c815c | 480 | } |
1da177e4 | 481 | |
44af6c41 IM |
482 | /** |
483 | * change_page_attr_addr - Change page table attributes in linear mapping | |
484 | * @address: Virtual address in linear mapping. | |
44af6c41 | 485 | * @prot: New page table attribute (PAGE_*) |
1da177e4 | 486 | * |
44af6c41 IM |
487 | * Change page attributes of a page in the direct mapping. This is a variant |
488 | * of change_page_attr() that also works on memory holes that do not have | |
489 | * mem_map entry (pfn_valid() is false). | |
9f4c815c | 490 | * |
44af6c41 | 491 | * See change_page_attr() documentation for more details. |
75cbade8 AV |
492 | * |
493 | * Modules and drivers should use the set_memory_* APIs instead. | |
1da177e4 | 494 | */ |
72e458df | 495 | static int change_page_attr_addr(struct cpa_data *cpa) |
1da177e4 | 496 | { |
0879750f | 497 | int err; |
72e458df | 498 | unsigned long address = cpa->vaddr; |
44af6c41 IM |
499 | |
500 | #ifdef CONFIG_X86_64 | |
626c2c9d AV |
501 | unsigned long phys_addr = __pa(address); |
502 | ||
0879750f TG |
503 | /* |
504 | * If we are inside the high mapped kernel range, then we | |
505 | * fixup the low mapping first. __va() returns the virtual | |
506 | * address in the linear mapping: | |
507 | */ | |
508 | if (within(address, HIGH_MAP_START, HIGH_MAP_END)) | |
509 | address = (unsigned long) __va(phys_addr); | |
44af6c41 IM |
510 | #endif |
511 | ||
72e458df | 512 | err = __change_page_attr(address, cpa); |
0879750f TG |
513 | if (err) |
514 | return err; | |
44af6c41 | 515 | |
44af6c41 | 516 | #ifdef CONFIG_X86_64 |
488fd995 | 517 | /* |
0879750f TG |
518 | * If the physical address is inside the kernel map, we need |
519 | * to touch the high mapped kernel as well: | |
488fd995 | 520 | */ |
0879750f TG |
521 | if (within(phys_addr, 0, KERNEL_TEXT_SIZE)) { |
522 | /* | |
523 | * Calc the high mapping address. See __phys_addr() | |
524 | * for the non obvious details. | |
cc0f21bb AV |
525 | * |
526 | * Note that NX and other required permissions are | |
527 | * checked in static_protections(). | |
0879750f TG |
528 | */ |
529 | address = phys_addr + HIGH_MAP_START - phys_base; | |
0879750f | 530 | |
86f03989 | 531 | /* |
0879750f TG |
532 | * Our high aliases are imprecise, because we check |
533 | * everything between 0 and KERNEL_TEXT_SIZE, so do | |
534 | * not propagate lookup failures back to users: | |
86f03989 | 535 | */ |
72e458df | 536 | __change_page_attr(address, cpa); |
9f4c815c | 537 | } |
488fd995 | 538 | #endif |
1da177e4 LT |
539 | return err; |
540 | } | |
541 | ||
72e458df | 542 | static int __change_page_attr_set_clr(struct cpa_data *cpa) |
ff31452b | 543 | { |
65e074df | 544 | int ret, numpages = cpa->numpages; |
ff31452b | 545 | |
65e074df TG |
546 | while (numpages) { |
547 | /* | |
548 | * Store the remaining nr of pages for the large page | |
549 | * preservation check. | |
550 | */ | |
551 | cpa->numpages = numpages; | |
72e458df | 552 | ret = change_page_attr_addr(cpa); |
ff31452b TG |
553 | if (ret) |
554 | return ret; | |
ff31452b | 555 | |
65e074df TG |
556 | /* |
557 | * Adjust the number of pages with the result of the | |
558 | * CPA operation. Either a large page has been | |
559 | * preserved or a single page update happened. | |
560 | */ | |
561 | BUG_ON(cpa->numpages > numpages); | |
562 | numpages -= cpa->numpages; | |
563 | cpa->vaddr += cpa->numpages * PAGE_SIZE; | |
564 | } | |
ff31452b TG |
565 | return 0; |
566 | } | |
567 | ||
6bb8383b AK |
568 | static inline int cache_attr(pgprot_t attr) |
569 | { | |
570 | return pgprot_val(attr) & | |
571 | (_PAGE_PAT | _PAGE_PAT_LARGE | _PAGE_PWT | _PAGE_PCD); | |
572 | } | |
573 | ||
ff31452b TG |
574 | static int change_page_attr_set_clr(unsigned long addr, int numpages, |
575 | pgprot_t mask_set, pgprot_t mask_clr) | |
576 | { | |
72e458df | 577 | struct cpa_data cpa; |
6bb8383b | 578 | int ret, cache; |
331e4065 TG |
579 | |
580 | /* | |
581 | * Check, if we are requested to change a not supported | |
582 | * feature: | |
583 | */ | |
584 | mask_set = canon_pgprot(mask_set); | |
585 | mask_clr = canon_pgprot(mask_clr); | |
586 | if (!pgprot_val(mask_set) && !pgprot_val(mask_clr)) | |
587 | return 0; | |
588 | ||
72e458df TG |
589 | cpa.vaddr = addr; |
590 | cpa.numpages = numpages; | |
591 | cpa.mask_set = mask_set; | |
592 | cpa.mask_clr = mask_clr; | |
f4ae5da0 | 593 | cpa.flushtlb = 0; |
72e458df TG |
594 | |
595 | ret = __change_page_attr_set_clr(&cpa); | |
ff31452b | 596 | |
f4ae5da0 TG |
597 | /* |
598 | * Check whether we really changed something: | |
599 | */ | |
600 | if (!cpa.flushtlb) | |
601 | return ret; | |
602 | ||
6bb8383b AK |
603 | /* |
604 | * No need to flush, when we did not set any of the caching | |
605 | * attributes: | |
606 | */ | |
607 | cache = cache_attr(mask_set); | |
608 | ||
57a6a46a TG |
609 | /* |
610 | * On success we use clflush, when the CPU supports it to | |
611 | * avoid the wbindv. If the CPU does not support it and in the | |
af1e6844 | 612 | * error case we fall back to cpa_flush_all (which uses |
57a6a46a TG |
613 | * wbindv): |
614 | */ | |
615 | if (!ret && cpu_has_clflush) | |
6bb8383b | 616 | cpa_flush_range(addr, numpages, cache); |
57a6a46a | 617 | else |
6bb8383b | 618 | cpa_flush_all(cache); |
ff31452b TG |
619 | |
620 | return ret; | |
621 | } | |
622 | ||
56744546 TG |
623 | static inline int change_page_attr_set(unsigned long addr, int numpages, |
624 | pgprot_t mask) | |
75cbade8 | 625 | { |
56744546 | 626 | return change_page_attr_set_clr(addr, numpages, mask, __pgprot(0)); |
75cbade8 AV |
627 | } |
628 | ||
56744546 TG |
629 | static inline int change_page_attr_clear(unsigned long addr, int numpages, |
630 | pgprot_t mask) | |
72932c7a | 631 | { |
5827040d | 632 | return change_page_attr_set_clr(addr, numpages, __pgprot(0), mask); |
72932c7a TG |
633 | } |
634 | ||
635 | int set_memory_uc(unsigned long addr, int numpages) | |
636 | { | |
637 | return change_page_attr_set(addr, numpages, | |
638 | __pgprot(_PAGE_PCD | _PAGE_PWT)); | |
75cbade8 AV |
639 | } |
640 | EXPORT_SYMBOL(set_memory_uc); | |
641 | ||
642 | int set_memory_wb(unsigned long addr, int numpages) | |
643 | { | |
72932c7a TG |
644 | return change_page_attr_clear(addr, numpages, |
645 | __pgprot(_PAGE_PCD | _PAGE_PWT)); | |
75cbade8 AV |
646 | } |
647 | EXPORT_SYMBOL(set_memory_wb); | |
648 | ||
649 | int set_memory_x(unsigned long addr, int numpages) | |
650 | { | |
72932c7a | 651 | return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_NX)); |
75cbade8 AV |
652 | } |
653 | EXPORT_SYMBOL(set_memory_x); | |
654 | ||
655 | int set_memory_nx(unsigned long addr, int numpages) | |
656 | { | |
72932c7a | 657 | return change_page_attr_set(addr, numpages, __pgprot(_PAGE_NX)); |
75cbade8 AV |
658 | } |
659 | EXPORT_SYMBOL(set_memory_nx); | |
660 | ||
661 | int set_memory_ro(unsigned long addr, int numpages) | |
662 | { | |
72932c7a | 663 | return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_RW)); |
75cbade8 | 664 | } |
75cbade8 AV |
665 | |
666 | int set_memory_rw(unsigned long addr, int numpages) | |
667 | { | |
72932c7a | 668 | return change_page_attr_set(addr, numpages, __pgprot(_PAGE_RW)); |
75cbade8 | 669 | } |
f62d0f00 IM |
670 | |
671 | int set_memory_np(unsigned long addr, int numpages) | |
672 | { | |
72932c7a | 673 | return change_page_attr_clear(addr, numpages, __pgprot(_PAGE_PRESENT)); |
f62d0f00 | 674 | } |
75cbade8 AV |
675 | |
676 | int set_pages_uc(struct page *page, int numpages) | |
677 | { | |
678 | unsigned long addr = (unsigned long)page_address(page); | |
75cbade8 | 679 | |
d7c8f21a | 680 | return set_memory_uc(addr, numpages); |
75cbade8 AV |
681 | } |
682 | EXPORT_SYMBOL(set_pages_uc); | |
683 | ||
684 | int set_pages_wb(struct page *page, int numpages) | |
685 | { | |
686 | unsigned long addr = (unsigned long)page_address(page); | |
75cbade8 | 687 | |
d7c8f21a | 688 | return set_memory_wb(addr, numpages); |
75cbade8 AV |
689 | } |
690 | EXPORT_SYMBOL(set_pages_wb); | |
691 | ||
692 | int set_pages_x(struct page *page, int numpages) | |
693 | { | |
694 | unsigned long addr = (unsigned long)page_address(page); | |
75cbade8 | 695 | |
d7c8f21a | 696 | return set_memory_x(addr, numpages); |
75cbade8 AV |
697 | } |
698 | EXPORT_SYMBOL(set_pages_x); | |
699 | ||
700 | int set_pages_nx(struct page *page, int numpages) | |
701 | { | |
702 | unsigned long addr = (unsigned long)page_address(page); | |
75cbade8 | 703 | |
d7c8f21a | 704 | return set_memory_nx(addr, numpages); |
75cbade8 AV |
705 | } |
706 | EXPORT_SYMBOL(set_pages_nx); | |
707 | ||
708 | int set_pages_ro(struct page *page, int numpages) | |
709 | { | |
710 | unsigned long addr = (unsigned long)page_address(page); | |
75cbade8 | 711 | |
d7c8f21a | 712 | return set_memory_ro(addr, numpages); |
75cbade8 | 713 | } |
75cbade8 AV |
714 | |
715 | int set_pages_rw(struct page *page, int numpages) | |
716 | { | |
717 | unsigned long addr = (unsigned long)page_address(page); | |
e81d5dc4 | 718 | |
d7c8f21a | 719 | return set_memory_rw(addr, numpages); |
78c94aba IM |
720 | } |
721 | ||
1da177e4 | 722 | #ifdef CONFIG_DEBUG_PAGEALLOC |
f62d0f00 IM |
723 | |
724 | static int __set_pages_p(struct page *page, int numpages) | |
725 | { | |
72e458df TG |
726 | struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page), |
727 | .numpages = numpages, | |
728 | .mask_set = __pgprot(_PAGE_PRESENT | _PAGE_RW), | |
729 | .mask_clr = __pgprot(0)}; | |
72932c7a | 730 | |
72e458df | 731 | return __change_page_attr_set_clr(&cpa); |
f62d0f00 IM |
732 | } |
733 | ||
734 | static int __set_pages_np(struct page *page, int numpages) | |
735 | { | |
72e458df TG |
736 | struct cpa_data cpa = { .vaddr = (unsigned long) page_address(page), |
737 | .numpages = numpages, | |
738 | .mask_set = __pgprot(0), | |
739 | .mask_clr = __pgprot(_PAGE_PRESENT | _PAGE_RW)}; | |
72932c7a | 740 | |
72e458df | 741 | return __change_page_attr_set_clr(&cpa); |
f62d0f00 IM |
742 | } |
743 | ||
1da177e4 LT |
744 | void kernel_map_pages(struct page *page, int numpages, int enable) |
745 | { | |
746 | if (PageHighMem(page)) | |
747 | return; | |
9f4c815c | 748 | if (!enable) { |
f9b8404c IM |
749 | debug_check_no_locks_freed(page_address(page), |
750 | numpages * PAGE_SIZE); | |
9f4c815c | 751 | } |
de5097c2 | 752 | |
12d6f21e IM |
753 | /* |
754 | * If page allocator is not up yet then do not call c_p_a(): | |
755 | */ | |
756 | if (!debug_pagealloc_enabled) | |
757 | return; | |
758 | ||
9f4c815c | 759 | /* |
e4b71dcf IM |
760 | * The return value is ignored - the calls cannot fail, |
761 | * large pages are disabled at boot time: | |
1da177e4 | 762 | */ |
f62d0f00 IM |
763 | if (enable) |
764 | __set_pages_p(page, numpages); | |
765 | else | |
766 | __set_pages_np(page, numpages); | |
9f4c815c IM |
767 | |
768 | /* | |
e4b71dcf IM |
769 | * We should perform an IPI and flush all tlbs, |
770 | * but that can deadlock->flush only current cpu: | |
1da177e4 LT |
771 | */ |
772 | __flush_tlb_all(); | |
773 | } | |
774 | #endif | |
d1028a15 AV |
775 | |
776 | /* | |
777 | * The testcases use internal knowledge of the implementation that shouldn't | |
778 | * be exposed to the rest of the kernel. Include these directly here. | |
779 | */ | |
780 | #ifdef CONFIG_CPA_DEBUG | |
781 | #include "pageattr-test.c" | |
782 | #endif |