powerpc/64s: Fix page table fragment refcount race vs speculative references
[linux-2.6-block.git] / arch / powerpc / mm / mmu_context_book3s64.c
CommitLineData
14cf11af
PM
1/*
2 * MMU context allocation for 64-bit kernels.
3 *
4 * Copyright (C) 2004 Anton Blanchard, IBM Corp. <anton@samba.org>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
10 *
11 */
12
14cf11af
PM
13#include <linux/sched.h>
14#include <linux/kernel.h>
15#include <linux/errno.h>
16#include <linux/string.h>
17#include <linux/types.h>
18#include <linux/mm.h>
4fb158f6 19#include <linux/pkeys.h>
14cf11af
PM
20#include <linux/spinlock.h>
21#include <linux/idr.h>
4b16f8e2 22#include <linux/export.h>
5a0e3ad6 23#include <linux/gfp.h>
851d2e2f 24#include <linux/slab.h>
14cf11af
PM
25
26#include <asm/mmu_context.h>
5c1f6ee9 27#include <asm/pgalloc.h>
14cf11af
PM
28
29static DEFINE_SPINLOCK(mmu_context_lock);
7317ac87 30static DEFINE_IDA(mmu_context_ida);
14cf11af 31
c1ff840d 32static int alloc_context_id(int min_id, int max_id)
14cf11af 33{
c1ff840d 34 int index, err;
14cf11af
PM
35
36again:
7317ac87 37 if (!ida_pre_get(&mmu_context_ida, GFP_KERNEL))
14cf11af
PM
38 return -ENOMEM;
39
40 spin_lock(&mmu_context_lock);
c1ff840d 41 err = ida_get_new_above(&mmu_context_ida, min_id, &index);
14cf11af
PM
42 spin_unlock(&mmu_context_lock);
43
44 if (err == -EAGAIN)
45 goto again;
46 else if (err)
47 return err;
48
c1ff840d 49 if (index > max_id) {
f86c9747 50 spin_lock(&mmu_context_lock);
7317ac87 51 ida_remove(&mmu_context_ida, index);
f86c9747 52 spin_unlock(&mmu_context_lock);
14cf11af
PM
53 return -ENOMEM;
54 }
55
e85a4710
AG
56 return index;
57}
a336f2f5 58
82228e36
AK
59void hash__reserve_context_id(int id)
60{
61 int rc, result = 0;
62
63 do {
64 if (!ida_pre_get(&mmu_context_ida, GFP_KERNEL))
65 break;
66
67 spin_lock(&mmu_context_lock);
68 rc = ida_get_new_above(&mmu_context_ida, id, &result);
69 spin_unlock(&mmu_context_lock);
70 } while (rc == -EAGAIN);
71
72 WARN(result != id, "mmu: Failed to reserve context id %d (rc %d)\n", id, result);
73}
74
a336f2f5
ME
75int hash__alloc_context_id(void)
76{
e6f81a92
AK
77 unsigned long max;
78
79 if (mmu_has_feature(MMU_FTR_68_BIT_VA))
80 max = MAX_USER_CONTEXT;
81 else
82 max = MAX_USER_CONTEXT_65BIT_VA;
83
84 return alloc_context_id(MIN_USER_CONTEXT, max);
a336f2f5
ME
85}
86EXPORT_SYMBOL_GPL(hash__alloc_context_id);
87
760573c1
ME
88static int hash__init_new_context(struct mm_struct *mm)
89{
90 int index;
91
92 index = hash__alloc_context_id();
93 if (index < 0)
94 return index;
95
96 /*
97 * The old code would re-promote on fork, we don't do that when using
98 * slices as it could cause problem promoting slices that have been
99 * forced down to 4K.
100 *
101 * For book3s we have MMU_NO_CONTEXT set to be ~0. Hence check
102 * explicitly against context.id == 0. This ensures that we properly
103 * initialize context slice details for newly allocated mm's (which will
104 * have id == 0) and don't alter context slice inherited via fork (which
105 * will have id != 0).
106 *
107 * We should not be calling init_new_context() on init_mm. Hence a
108 * check against 0 is OK.
109 */
110 if (mm->context.id == 0)
1753dd18 111 slice_init_new_context_exec(mm);
760573c1
ME
112
113 subpage_prot_init_new_context(mm);
114
4fb158f6 115 pkey_mm_init(mm);
760573c1
ME
116 return index;
117}
118
119static int radix__init_new_context(struct mm_struct *mm)
7e381c0f
AK
120{
121 unsigned long rts_field;
a25bd72b 122 int index, max_id;
760573c1 123
a25bd72b
BH
124 max_id = (1 << mmu_pid_bits) - 1;
125 index = alloc_context_id(mmu_base_pid, max_id);
760573c1
ME
126 if (index < 0)
127 return index;
7e381c0f
AK
128
129 /*
130 * set the process table entry,
131 */
b23d9c5b 132 rts_field = radix__get_tree_size();
7e381c0f 133 process_tb[index].prtb0 = cpu_to_be64(rts_field | __pa(mm->pgd) | RADIX_PGD_INDEX_SIZE);
760573c1 134
3a6a0470
BH
135 /*
136 * Order the above store with subsequent update of the PID
137 * register (at which point HW can start loading/caching
138 * the entry) and the corresponding load by the MMU from
139 * the L2 cache.
140 */
141 asm volatile("ptesync;isync" : : : "memory");
142
1ab66d1f
AP
143 mm->context.npu_context = NULL;
144
760573c1 145 return index;
7e381c0f 146}
e85a4710
AG
147
148int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
149{
150 int index;
151
760573c1
ME
152 if (radix_enabled())
153 index = radix__init_new_context(mm);
154 else
155 index = hash__init_new_context(mm);
156
e85a4710
AG
157 if (index < 0)
158 return index;
159
9dfe5c53 160 mm->context.id = index;
14cf11af 161
5c1f6ee9 162 mm->context.pte_frag = NULL;
8a6c697b 163 mm->context.pmd_frag = NULL;
15b244a8 164#ifdef CONFIG_SPAPR_TCE_IOMMU
88f54a35 165 mm_iommu_init(mm);
5c1f6ee9 166#endif
a619e59c 167 atomic_set(&mm->context.active_cpus, 0);
aff6f8cb 168 atomic_set(&mm->context.copros, 0);
a619e59c 169
14cf11af
PM
170 return 0;
171}
172
e85a4710 173void __destroy_context(int context_id)
14cf11af
PM
174{
175 spin_lock(&mmu_context_lock);
7317ac87 176 ida_remove(&mmu_context_ida, context_id);
14cf11af 177 spin_unlock(&mmu_context_lock);
e85a4710
AG
178}
179EXPORT_SYMBOL_GPL(__destroy_context);
14cf11af 180
f384796c
AK
181static void destroy_contexts(mm_context_t *ctx)
182{
183 int index, context_id;
184
185 spin_lock(&mmu_context_lock);
186 for (index = 0; index < ARRAY_SIZE(ctx->extended_id); index++) {
187 context_id = ctx->extended_id[index];
188 if (context_id)
189 ida_remove(&mmu_context_ida, context_id);
190 }
191 spin_unlock(&mmu_context_lock);
192}
193
8a6c697b 194static void pte_frag_destroy(void *pte_frag)
5c1f6ee9
AK
195{
196 int count;
5c1f6ee9
AK
197 struct page *page;
198
5c1f6ee9
AK
199 page = virt_to_page(pte_frag);
200 /* drop all the pending references */
201 count = ((unsigned long)pte_frag & ~PAGE_MASK) >> PTE_FRAG_SIZE_SHIFT;
202 /* We allow PTE_FRAG_NR fragments from a PTE page */
4231aba0 203 if (atomic_sub_and_test(PTE_FRAG_NR - count, &page->pt_frag_refcount)) {
5c1f6ee9 204 pgtable_page_dtor(page);
4231aba0 205 __free_page(page);
5c1f6ee9
AK
206 }
207}
208
8a6c697b
AK
209static void pmd_frag_destroy(void *pmd_frag)
210{
211 int count;
212 struct page *page;
213
214 page = virt_to_page(pmd_frag);
215 /* drop all the pending references */
216 count = ((unsigned long)pmd_frag & ~PAGE_MASK) >> PMD_FRAG_SIZE_SHIFT;
217 /* We allow PTE_FRAG_NR fragments from a PTE page */
4231aba0 218 if (atomic_sub_and_test(PMD_FRAG_NR - count, &page->pt_frag_refcount)) {
8a6c697b 219 pgtable_pmd_page_dtor(page);
4231aba0 220 __free_page(page);
8a6c697b
AK
221 }
222}
223
34c604d2 224static void destroy_pagetable_cache(struct mm_struct *mm)
8a6c697b
AK
225{
226 void *frag;
227
228 frag = mm->context.pte_frag;
229 if (frag)
230 pte_frag_destroy(frag);
231
232 frag = mm->context.pmd_frag;
233 if (frag)
234 pmd_frag_destroy(frag);
235 return;
236}
237
e85a4710
AG
238void destroy_context(struct mm_struct *mm)
239{
15b244a8 240#ifdef CONFIG_SPAPR_TCE_IOMMU
4b6fad70 241 WARN_ON_ONCE(!list_empty(&mm->context.iommu_group_mem_list));
15b244a8 242#endif
30b49ec7
NP
243 if (radix_enabled())
244 WARN_ON(process_tb[mm->context.id].prtb0 != 0);
245 else
246 subpage_prot_free(mm);
f384796c 247 destroy_contexts(&mm->context);
30b49ec7
NP
248 mm->context.id = MMU_NO_CONTEXT;
249}
250
251void arch_exit_mmap(struct mm_struct *mm)
252{
34c604d2
NP
253 destroy_pagetable_cache(mm);
254
c6bb0b8d
BH
255 if (radix_enabled()) {
256 /*
257 * Radix doesn't have a valid bit in the process table
258 * entries. However we know that at least P9 implementation
259 * will avoid caching an entry with an invalid RTS field,
260 * and 0 is invalid. So this will do.
30b49ec7
NP
261 *
262 * This runs before the "fullmm" tlb flush in exit_mmap,
263 * which does a RIC=2 tlbie to clear the process table
264 * entry. See the "fullmm" comments in tlb-radix.c.
265 *
266 * No barrier required here after the store because
267 * this process will do the invalidate, which starts with
268 * ptesync.
c6bb0b8d
BH
269 */
270 process_tb[mm->context.id].prtb0 = 0;
30b49ec7 271 }
14cf11af 272}
7e381c0f
AK
273
274#ifdef CONFIG_PPC_RADIX_MMU
275void radix__switch_mmu_context(struct mm_struct *prev, struct mm_struct *next)
276{
2bf1071a
NP
277 mtspr(SPRN_PID, next->context.id);
278 isync();
7e381c0f
AK
279}
280#endif