Commit | Line | Data |
---|---|---|
196d9d8b PZ |
1 | #include <linux/gfp.h> |
2 | #include <linux/highmem.h> | |
3 | #include <linux/kernel.h> | |
4 | #include <linux/mmdebug.h> | |
5 | #include <linux/mm_types.h> | |
6 | #include <linux/pagemap.h> | |
7 | #include <linux/rcupdate.h> | |
8 | #include <linux/smp.h> | |
9 | #include <linux/swap.h> | |
10 | ||
11 | #include <asm/pgalloc.h> | |
12 | #include <asm/tlb.h> | |
13 | ||
14 | #ifdef HAVE_GENERIC_MMU_GATHER | |
15 | ||
16 | static bool tlb_next_batch(struct mmu_gather *tlb) | |
17 | { | |
18 | struct mmu_gather_batch *batch; | |
19 | ||
20 | batch = tlb->active; | |
21 | if (batch->next) { | |
22 | tlb->active = batch->next; | |
23 | return true; | |
24 | } | |
25 | ||
26 | if (tlb->batch_count == MAX_GATHER_BATCH_COUNT) | |
27 | return false; | |
28 | ||
29 | batch = (void *)__get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0); | |
30 | if (!batch) | |
31 | return false; | |
32 | ||
33 | tlb->batch_count++; | |
34 | batch->next = NULL; | |
35 | batch->nr = 0; | |
36 | batch->max = MAX_GATHER_BATCH; | |
37 | ||
38 | tlb->active->next = batch; | |
39 | tlb->active = batch; | |
40 | ||
41 | return true; | |
42 | } | |
43 | ||
44 | void arch_tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, | |
45 | unsigned long start, unsigned long end) | |
46 | { | |
47 | tlb->mm = mm; | |
48 | ||
49 | /* Is it from 0 to ~0? */ | |
50 | tlb->fullmm = !(start | (end+1)); | |
51 | tlb->need_flush_all = 0; | |
52 | tlb->local.next = NULL; | |
53 | tlb->local.nr = 0; | |
54 | tlb->local.max = ARRAY_SIZE(tlb->__pages); | |
55 | tlb->active = &tlb->local; | |
56 | tlb->batch_count = 0; | |
57 | ||
58 | #ifdef CONFIG_HAVE_RCU_TABLE_FREE | |
59 | tlb->batch = NULL; | |
60 | #endif | |
61 | tlb->page_size = 0; | |
62 | ||
63 | __tlb_reset_range(tlb); | |
64 | } | |
65 | ||
66 | void tlb_flush_mmu_free(struct mmu_gather *tlb) | |
67 | { | |
68 | struct mmu_gather_batch *batch; | |
69 | ||
70 | #ifdef CONFIG_HAVE_RCU_TABLE_FREE | |
71 | tlb_table_flush(tlb); | |
72 | #endif | |
73 | for (batch = &tlb->local; batch && batch->nr; batch = batch->next) { | |
74 | free_pages_and_swap_cache(batch->pages, batch->nr); | |
75 | batch->nr = 0; | |
76 | } | |
77 | tlb->active = &tlb->local; | |
78 | } | |
79 | ||
80 | void tlb_flush_mmu(struct mmu_gather *tlb) | |
81 | { | |
82 | tlb_flush_mmu_tlbonly(tlb); | |
83 | tlb_flush_mmu_free(tlb); | |
84 | } | |
85 | ||
86 | /* tlb_finish_mmu | |
87 | * Called at the end of the shootdown operation to free up any resources | |
88 | * that were required. | |
89 | */ | |
90 | void arch_tlb_finish_mmu(struct mmu_gather *tlb, | |
91 | unsigned long start, unsigned long end, bool force) | |
92 | { | |
93 | struct mmu_gather_batch *batch, *next; | |
94 | ||
95 | if (force) { | |
96 | __tlb_reset_range(tlb); | |
97 | __tlb_adjust_range(tlb, start, end - start); | |
98 | } | |
99 | ||
100 | tlb_flush_mmu(tlb); | |
101 | ||
102 | /* keep the page table cache within bounds */ | |
103 | check_pgt_cache(); | |
104 | ||
105 | for (batch = tlb->local.next; batch; batch = next) { | |
106 | next = batch->next; | |
107 | free_pages((unsigned long)batch, 0); | |
108 | } | |
109 | tlb->local.next = NULL; | |
110 | } | |
111 | ||
112 | /* __tlb_remove_page | |
113 | * Must perform the equivalent to __free_pte(pte_get_and_clear(ptep)), while | |
114 | * handling the additional races in SMP caused by other CPUs caching valid | |
115 | * mappings in their TLBs. Returns the number of free page slots left. | |
116 | * When out of page slots we must call tlb_flush_mmu(). | |
117 | *returns true if the caller should flush. | |
118 | */ | |
119 | bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size) | |
120 | { | |
121 | struct mmu_gather_batch *batch; | |
122 | ||
123 | VM_BUG_ON(!tlb->end); | |
124 | VM_WARN_ON(tlb->page_size != page_size); | |
125 | ||
126 | batch = tlb->active; | |
127 | /* | |
128 | * Add the page and check if we are full. If so | |
129 | * force a flush. | |
130 | */ | |
131 | batch->pages[batch->nr++] = page; | |
132 | if (batch->nr == batch->max) { | |
133 | if (!tlb_next_batch(tlb)) | |
134 | return true; | |
135 | batch = tlb->active; | |
136 | } | |
137 | VM_BUG_ON_PAGE(batch->nr > batch->max, page); | |
138 | ||
139 | return false; | |
140 | } | |
141 | ||
142 | #endif /* HAVE_GENERIC_MMU_GATHER */ | |
143 | ||
144 | #ifdef CONFIG_HAVE_RCU_TABLE_FREE | |
145 | ||
146 | /* | |
147 | * See the comment near struct mmu_table_batch. | |
148 | */ | |
149 | ||
150 | /* | |
151 | * If we want tlb_remove_table() to imply TLB invalidates. | |
152 | */ | |
153 | static inline void tlb_table_invalidate(struct mmu_gather *tlb) | |
154 | { | |
155 | #ifdef CONFIG_HAVE_RCU_TABLE_INVALIDATE | |
156 | /* | |
157 | * Invalidate page-table caches used by hardware walkers. Then we still | |
158 | * need to RCU-sched wait while freeing the pages because software | |
159 | * walkers can still be in-flight. | |
160 | */ | |
161 | tlb_flush_mmu_tlbonly(tlb); | |
162 | #endif | |
163 | } | |
164 | ||
165 | static void tlb_remove_table_smp_sync(void *arg) | |
166 | { | |
167 | /* Simply deliver the interrupt */ | |
168 | } | |
169 | ||
170 | static void tlb_remove_table_one(void *table) | |
171 | { | |
172 | /* | |
173 | * This isn't an RCU grace period and hence the page-tables cannot be | |
174 | * assumed to be actually RCU-freed. | |
175 | * | |
176 | * It is however sufficient for software page-table walkers that rely on | |
177 | * IRQ disabling. See the comment near struct mmu_table_batch. | |
178 | */ | |
179 | smp_call_function(tlb_remove_table_smp_sync, NULL, 1); | |
180 | __tlb_remove_table(table); | |
181 | } | |
182 | ||
183 | static void tlb_remove_table_rcu(struct rcu_head *head) | |
184 | { | |
185 | struct mmu_table_batch *batch; | |
186 | int i; | |
187 | ||
188 | batch = container_of(head, struct mmu_table_batch, rcu); | |
189 | ||
190 | for (i = 0; i < batch->nr; i++) | |
191 | __tlb_remove_table(batch->tables[i]); | |
192 | ||
193 | free_page((unsigned long)batch); | |
194 | } | |
195 | ||
196 | void tlb_table_flush(struct mmu_gather *tlb) | |
197 | { | |
198 | struct mmu_table_batch **batch = &tlb->batch; | |
199 | ||
200 | if (*batch) { | |
201 | tlb_table_invalidate(tlb); | |
202 | call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu); | |
203 | *batch = NULL; | |
204 | } | |
205 | } | |
206 | ||
207 | void tlb_remove_table(struct mmu_gather *tlb, void *table) | |
208 | { | |
209 | struct mmu_table_batch **batch = &tlb->batch; | |
210 | ||
211 | if (*batch == NULL) { | |
212 | *batch = (struct mmu_table_batch *)__get_free_page(GFP_NOWAIT | __GFP_NOWARN); | |
213 | if (*batch == NULL) { | |
214 | tlb_table_invalidate(tlb); | |
215 | tlb_remove_table_one(table); | |
216 | return; | |
217 | } | |
218 | (*batch)->nr = 0; | |
219 | } | |
220 | ||
221 | (*batch)->tables[(*batch)->nr++] = table; | |
222 | if ((*batch)->nr == MAX_TABLE_BATCH) | |
223 | tlb_table_flush(tlb); | |
224 | } | |
225 | ||
226 | #endif /* CONFIG_HAVE_RCU_TABLE_FREE */ | |
227 | ||
228 | /** | |
229 | * tlb_gather_mmu - initialize an mmu_gather structure for page-table tear-down | |
230 | * @tlb: the mmu_gather structure to initialize | |
231 | * @mm: the mm_struct of the target address space | |
232 | * @start: start of the region that will be removed from the page-table | |
233 | * @end: end of the region that will be removed from the page-table | |
234 | * | |
235 | * Called to initialize an (on-stack) mmu_gather structure for page-table | |
236 | * tear-down from @mm. The @start and @end are set to 0 and -1 | |
237 | * respectively when @mm is without users and we're going to destroy | |
238 | * the full address space (exit/execve). | |
239 | */ | |
240 | void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, | |
241 | unsigned long start, unsigned long end) | |
242 | { | |
243 | arch_tlb_gather_mmu(tlb, mm, start, end); | |
244 | inc_tlb_flush_pending(tlb->mm); | |
245 | } | |
246 | ||
247 | void tlb_finish_mmu(struct mmu_gather *tlb, | |
248 | unsigned long start, unsigned long end) | |
249 | { | |
250 | /* | |
251 | * If there are parallel threads are doing PTE changes on same range | |
252 | * under non-exclusive lock(e.g., mmap_sem read-side) but defer TLB | |
253 | * flush by batching, a thread has stable TLB entry can fail to flush | |
254 | * the TLB by observing pte_none|!pte_dirty, for example so flush TLB | |
255 | * forcefully if we detect parallel PTE batching threads. | |
256 | */ | |
257 | bool force = mm_tlb_flush_nested(tlb->mm); | |
258 | ||
259 | arch_tlb_finish_mmu(tlb, start, end, force); | |
260 | dec_tlb_flush_pending(tlb->mm); | |
261 | } |