Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * Copyright 2002 Andi Kleen, SuSE Labs. | |
3 | * Thanks to Ben LaHaise for precious feedback. | |
4 | */ | |
5 | ||
1da177e4 LT |
6 | #include <linux/mm.h> |
7 | #include <linux/sched.h> | |
8 | #include <linux/highmem.h> | |
9 | #include <linux/module.h> | |
10 | #include <linux/slab.h> | |
11 | #include <asm/uaccess.h> | |
12 | #include <asm/processor.h> | |
13 | #include <asm/tlbflush.h> | |
c9b02a24 | 14 | #include <asm/pgalloc.h> |
f8af095d | 15 | #include <asm/sections.h> |
1da177e4 LT |
16 | |
17 | static DEFINE_SPINLOCK(cpa_lock); | |
18 | static struct list_head df_list = LIST_HEAD_INIT(df_list); | |
19 | ||
20 | ||
21 | pte_t *lookup_address(unsigned long address) | |
22 | { | |
23 | pgd_t *pgd = pgd_offset_k(address); | |
24 | pud_t *pud; | |
25 | pmd_t *pmd; | |
26 | if (pgd_none(*pgd)) | |
27 | return NULL; | |
28 | pud = pud_offset(pgd, address); | |
29 | if (pud_none(*pud)) | |
30 | return NULL; | |
31 | pmd = pmd_offset(pud, address); | |
32 | if (pmd_none(*pmd)) | |
33 | return NULL; | |
34 | if (pmd_large(*pmd)) | |
35 | return (pte_t *)pmd; | |
36 | return pte_offset_kernel(pmd, address); | |
37 | } | |
38 | ||
f8af095d DJ |
39 | static struct page *split_large_page(unsigned long address, pgprot_t prot, |
40 | pgprot_t ref_prot) | |
1da177e4 LT |
41 | { |
42 | int i; | |
43 | unsigned long addr; | |
44 | struct page *base; | |
45 | pte_t *pbase; | |
46 | ||
47 | spin_unlock_irq(&cpa_lock); | |
48 | base = alloc_pages(GFP_KERNEL, 0); | |
49 | spin_lock_irq(&cpa_lock); | |
50 | if (!base) | |
51 | return NULL; | |
52 | ||
84d1c054 NP |
53 | /* |
54 | * page_private is used to track the number of entries in | |
55 | * the page table page that have non standard attributes. | |
56 | */ | |
57 | SetPagePrivate(base); | |
58 | page_private(base) = 0; | |
59 | ||
1da177e4 LT |
60 | address = __pa(address); |
61 | addr = address & LARGE_PAGE_MASK; | |
62 | pbase = (pte_t *)page_address(base); | |
fdb4c338 | 63 | paravirt_alloc_pt(&init_mm, page_to_pfn(base)); |
1da177e4 | 64 | for (i = 0; i < PTRS_PER_PTE; i++, addr += PAGE_SIZE) { |
c9b02a24 | 65 | set_pte(&pbase[i], pfn_pte(addr >> PAGE_SHIFT, |
f8af095d | 66 | addr == address ? prot : ref_prot)); |
1da177e4 LT |
67 | } |
68 | return base; | |
69 | } | |
70 | ||
018d2ad0 | 71 | static void cache_flush_page(struct page *p) |
1da177e4 | 72 | { |
6619a8fb | 73 | void *adr = page_address(p); |
018d2ad0 AK |
74 | int i; |
75 | for (i = 0; i < PAGE_SIZE; i += boot_cpu_data.x86_clflush_size) | |
6619a8fb | 76 | clflush(adr+i); |
018d2ad0 AK |
77 | } |
78 | ||
79 | static void flush_kernel_map(void *arg) | |
80 | { | |
81 | struct list_head *lh = (struct list_head *)arg; | |
82 | struct page *p; | |
3760dd6e | 83 | |
018d2ad0 | 84 | /* High level code is not ready for clflush yet */ |
d3f3c934 | 85 | if (0 && cpu_has_clflush) { |
018d2ad0 AK |
86 | list_for_each_entry (p, lh, lru) |
87 | cache_flush_page(p); | |
3760dd6e | 88 | } else if (boot_cpu_data.x86_model >= 4) |
4bb0d3ec | 89 | wbinvd(); |
3760dd6e | 90 | |
1da177e4 LT |
91 | /* Flush all to work around Errata in early athlons regarding |
92 | * large page flushing. | |
93 | */ | |
94 | __flush_tlb_all(); | |
95 | } | |
96 | ||
97 | static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) | |
98 | { | |
99 | struct page *page; | |
100 | unsigned long flags; | |
101 | ||
102 | set_pte_atomic(kpte, pte); /* change init_mm */ | |
5311ab62 | 103 | if (SHARED_KERNEL_PMD) |
1da177e4 LT |
104 | return; |
105 | ||
106 | spin_lock_irqsave(&pgd_lock, flags); | |
107 | for (page = pgd_list; page; page = (struct page *)page->index) { | |
108 | pgd_t *pgd; | |
109 | pud_t *pud; | |
110 | pmd_t *pmd; | |
111 | pgd = (pgd_t *)page_address(page) + pgd_index(address); | |
112 | pud = pud_offset(pgd, address); | |
113 | pmd = pmd_offset(pud, address); | |
114 | set_pte_atomic((pte_t *)pmd, pte); | |
115 | } | |
116 | spin_unlock_irqrestore(&pgd_lock, flags); | |
117 | } | |
118 | ||
119 | /* | |
120 | * No more special protections in this 2/4MB area - revert to a | |
121 | * large page again. | |
122 | */ | |
123 | static inline void revert_page(struct page *kpte_page, unsigned long address) | |
124 | { | |
f8af095d DJ |
125 | pgprot_t ref_prot; |
126 | pte_t *linear; | |
127 | ||
128 | ref_prot = | |
129 | ((address & LARGE_PAGE_MASK) < (unsigned long)&_etext) | |
130 | ? PAGE_KERNEL_LARGE_EXEC : PAGE_KERNEL_LARGE; | |
131 | ||
132 | linear = (pte_t *) | |
1da177e4 LT |
133 | pmd_offset(pud_offset(pgd_offset_k(address), address), address); |
134 | set_pmd_pte(linear, address, | |
135 | pfn_pte((__pa(address) & LARGE_PAGE_MASK) >> PAGE_SHIFT, | |
f8af095d | 136 | ref_prot)); |
1da177e4 LT |
137 | } |
138 | ||
65d2f0bc AK |
139 | static inline void save_page(struct page *kpte_page) |
140 | { | |
141 | if (!test_and_set_bit(PG_arch_1, &kpte_page->flags)) | |
142 | list_add(&kpte_page->lru, &df_list); | |
143 | } | |
144 | ||
1da177e4 LT |
145 | static int |
146 | __change_page_attr(struct page *page, pgprot_t prot) | |
147 | { | |
148 | pte_t *kpte; | |
149 | unsigned long address; | |
150 | struct page *kpte_page; | |
151 | ||
152 | BUG_ON(PageHighMem(page)); | |
153 | address = (unsigned long)page_address(page); | |
154 | ||
155 | kpte = lookup_address(address); | |
156 | if (!kpte) | |
157 | return -EINVAL; | |
158 | kpte_page = virt_to_page(kpte); | |
65d2f0bc AK |
159 | BUG_ON(PageLRU(kpte_page)); |
160 | BUG_ON(PageCompound(kpte_page)); | |
161 | ||
1da177e4 | 162 | if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) { |
d01ad8dd | 163 | if (!pte_huge(*kpte)) { |
1da177e4 LT |
164 | set_pte_atomic(kpte, mk_pte(page, prot)); |
165 | } else { | |
f8af095d DJ |
166 | pgprot_t ref_prot; |
167 | struct page *split; | |
168 | ||
169 | ref_prot = | |
170 | ((address & LARGE_PAGE_MASK) < (unsigned long)&_etext) | |
171 | ? PAGE_KERNEL_EXEC : PAGE_KERNEL; | |
172 | split = split_large_page(address, prot, ref_prot); | |
1da177e4 LT |
173 | if (!split) |
174 | return -ENOMEM; | |
f8af095d | 175 | set_pmd_pte(kpte,address,mk_pte(split, ref_prot)); |
1da177e4 | 176 | kpte_page = split; |
84d1c054 NP |
177 | } |
178 | page_private(kpte_page)++; | |
d01ad8dd | 179 | } else if (!pte_huge(*kpte)) { |
1da177e4 | 180 | set_pte_atomic(kpte, mk_pte(page, PAGE_KERNEL)); |
84d1c054 NP |
181 | BUG_ON(page_private(kpte_page) == 0); |
182 | page_private(kpte_page)--; | |
1da177e4 LT |
183 | } else |
184 | BUG(); | |
185 | ||
186 | /* | |
187 | * If the pte was reserved, it means it was created at boot | |
188 | * time (not via split_large_page) and in turn we must not | |
189 | * replace it with a largepage. | |
190 | */ | |
65d2f0bc AK |
191 | |
192 | save_page(kpte_page); | |
1da177e4 | 193 | if (!PageReserved(kpte_page)) { |
84d1c054 | 194 | if (cpu_has_pse && (page_private(kpte_page) == 0)) { |
c119ecce | 195 | paravirt_release_pt(page_to_pfn(kpte_page)); |
1da177e4 LT |
196 | revert_page(kpte_page, address); |
197 | } | |
198 | } | |
199 | return 0; | |
200 | } | |
201 | ||
018d2ad0 | 202 | static inline void flush_map(struct list_head *l) |
1da177e4 | 203 | { |
018d2ad0 | 204 | on_each_cpu(flush_kernel_map, l, 1, 1); |
1da177e4 LT |
205 | } |
206 | ||
207 | /* | |
208 | * Change the page attributes of an page in the linear mapping. | |
209 | * | |
210 | * This should be used when a page is mapped with a different caching policy | |
211 | * than write-back somewhere - some CPUs do not like it when mappings with | |
212 | * different caching policies exist. This changes the page attributes of the | |
213 | * in kernel linear mapping too. | |
214 | * | |
215 | * The caller needs to ensure that there are no conflicting mappings elsewhere. | |
216 | * This function only deals with the kernel linear map. | |
217 | * | |
218 | * Caller must call global_flush_tlb() after this. | |
219 | */ | |
220 | int change_page_attr(struct page *page, int numpages, pgprot_t prot) | |
221 | { | |
222 | int err = 0; | |
223 | int i; | |
224 | unsigned long flags; | |
225 | ||
226 | spin_lock_irqsave(&cpa_lock, flags); | |
227 | for (i = 0; i < numpages; i++, page++) { | |
228 | err = __change_page_attr(page, prot); | |
229 | if (err) | |
230 | break; | |
231 | } | |
232 | spin_unlock_irqrestore(&cpa_lock, flags); | |
233 | return err; | |
234 | } | |
235 | ||
236 | void global_flush_tlb(void) | |
626ab0e6 ON |
237 | { |
238 | struct list_head l; | |
1da177e4 LT |
239 | struct page *pg, *next; |
240 | ||
241 | BUG_ON(irqs_disabled()); | |
242 | ||
243 | spin_lock_irq(&cpa_lock); | |
626ab0e6 | 244 | list_replace_init(&df_list, &l); |
1da177e4 | 245 | spin_unlock_irq(&cpa_lock); |
018d2ad0 | 246 | flush_map(&l); |
3760dd6e | 247 | list_for_each_entry_safe(pg, next, &l, lru) { |
65d2f0bc AK |
248 | list_del(&pg->lru); |
249 | clear_bit(PG_arch_1, &pg->flags); | |
250 | if (PageReserved(pg) || !cpu_has_pse || page_private(pg) != 0) | |
251 | continue; | |
252 | ClearPagePrivate(pg); | |
1da177e4 | 253 | __free_page(pg); |
3760dd6e | 254 | } |
626ab0e6 | 255 | } |
1da177e4 LT |
256 | |
257 | #ifdef CONFIG_DEBUG_PAGEALLOC | |
258 | void kernel_map_pages(struct page *page, int numpages, int enable) | |
259 | { | |
260 | if (PageHighMem(page)) | |
261 | return; | |
de5097c2 | 262 | if (!enable) |
f9b8404c IM |
263 | debug_check_no_locks_freed(page_address(page), |
264 | numpages * PAGE_SIZE); | |
de5097c2 | 265 | |
1da177e4 LT |
266 | /* the return value is ignored - the calls cannot fail, |
267 | * large pages are disabled at boot time. | |
268 | */ | |
269 | change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0)); | |
270 | /* we should perform an IPI and flush all tlbs, | |
271 | * but that can deadlock->flush only current cpu. | |
272 | */ | |
273 | __flush_tlb_all(); | |
274 | } | |
275 | #endif | |
276 | ||
277 | EXPORT_SYMBOL(change_page_attr); | |
278 | EXPORT_SYMBOL(global_flush_tlb); |