[S390] fix s390 assembler code alignments
[linux-2.6-block.git] / arch / s390 / mm / pgtable.c
CommitLineData
3610cce8 1/*
239a6425 2 * Copyright IBM Corp. 2007,2009
3610cce8
MS
3 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
4 */
5
6#include <linux/sched.h>
7#include <linux/kernel.h>
8#include <linux/errno.h>
5a0e3ad6 9#include <linux/gfp.h>
3610cce8
MS
10#include <linux/mm.h>
11#include <linux/swap.h>
12#include <linux/smp.h>
13#include <linux/highmem.h>
3610cce8
MS
14#include <linux/pagemap.h>
15#include <linux/spinlock.h>
16#include <linux/module.h>
17#include <linux/quicklist.h>
80217147 18#include <linux/rcupdate.h>
3610cce8
MS
19
20#include <asm/system.h>
21#include <asm/pgtable.h>
22#include <asm/pgalloc.h>
23#include <asm/tlb.h>
24#include <asm/tlbflush.h>
6252d702 25#include <asm/mmu_context.h>
3610cce8
MS
26
27#ifndef CONFIG_64BIT
28#define ALLOC_ORDER 1
36409f63 29#define FRAG_MASK 0x0f
3610cce8
MS
30#else
31#define ALLOC_ORDER 2
36409f63 32#define FRAG_MASK 0x03
3610cce8
MS
33#endif
34
239a6425
HC
35unsigned long VMALLOC_START = VMALLOC_END - VMALLOC_SIZE;
36EXPORT_SYMBOL(VMALLOC_START);
37
38static int __init parse_vmalloc(char *arg)
39{
40 if (!arg)
41 return -EINVAL;
42 VMALLOC_START = (VMALLOC_END - memparse(arg, &arg)) & PAGE_MASK;
43 return 0;
44}
45early_param("vmalloc", parse_vmalloc);
46
043d0708 47unsigned long *crst_table_alloc(struct mm_struct *mm)
3610cce8
MS
48{
49 struct page *page = alloc_pages(GFP_KERNEL, ALLOC_ORDER);
50
51 if (!page)
52 return NULL;
3610cce8
MS
53 return (unsigned long *) page_to_phys(page);
54}
55
80217147
MS
56void crst_table_free(struct mm_struct *mm, unsigned long *table)
57{
043d0708 58 free_pages((unsigned long) table, ALLOC_ORDER);
80217147
MS
59}
60
6252d702
MS
61#ifdef CONFIG_64BIT
62int crst_table_upgrade(struct mm_struct *mm, unsigned long limit)
63{
64 unsigned long *table, *pgd;
65 unsigned long entry;
66
67 BUG_ON(limit > (1UL << 53));
68repeat:
043d0708 69 table = crst_table_alloc(mm);
6252d702
MS
70 if (!table)
71 return -ENOMEM;
80217147 72 spin_lock_bh(&mm->page_table_lock);
6252d702
MS
73 if (mm->context.asce_limit < limit) {
74 pgd = (unsigned long *) mm->pgd;
75 if (mm->context.asce_limit <= (1UL << 31)) {
76 entry = _REGION3_ENTRY_EMPTY;
77 mm->context.asce_limit = 1UL << 42;
78 mm->context.asce_bits = _ASCE_TABLE_LENGTH |
79 _ASCE_USER_BITS |
80 _ASCE_TYPE_REGION3;
81 } else {
82 entry = _REGION2_ENTRY_EMPTY;
83 mm->context.asce_limit = 1UL << 53;
84 mm->context.asce_bits = _ASCE_TABLE_LENGTH |
85 _ASCE_USER_BITS |
86 _ASCE_TYPE_REGION2;
87 }
88 crst_table_init(table, entry);
89 pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd);
90 mm->pgd = (pgd_t *) table;
f481bfaf 91 mm->task_size = mm->context.asce_limit;
6252d702
MS
92 table = NULL;
93 }
80217147 94 spin_unlock_bh(&mm->page_table_lock);
6252d702
MS
95 if (table)
96 crst_table_free(mm, table);
97 if (mm->context.asce_limit < limit)
98 goto repeat;
99 update_mm(mm, current);
100 return 0;
101}
102
103void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
104{
105 pgd_t *pgd;
106
107 if (mm->context.asce_limit <= limit)
108 return;
109 __tlb_flush_mm(mm);
110 while (mm->context.asce_limit > limit) {
111 pgd = mm->pgd;
112 switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) {
113 case _REGION_ENTRY_TYPE_R2:
114 mm->context.asce_limit = 1UL << 42;
115 mm->context.asce_bits = _ASCE_TABLE_LENGTH |
116 _ASCE_USER_BITS |
117 _ASCE_TYPE_REGION3;
118 break;
119 case _REGION_ENTRY_TYPE_R3:
120 mm->context.asce_limit = 1UL << 31;
121 mm->context.asce_bits = _ASCE_TABLE_LENGTH |
122 _ASCE_USER_BITS |
123 _ASCE_TYPE_SEGMENT;
124 break;
125 default:
126 BUG();
127 }
128 mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
f481bfaf 129 mm->task_size = mm->context.asce_limit;
6252d702
MS
130 crst_table_free(mm, (unsigned long *) pgd);
131 }
132 update_mm(mm, current);
133}
134#endif
135
36409f63
MS
136static inline unsigned int atomic_xor_bits(atomic_t *v, unsigned int bits)
137{
138 unsigned int old, new;
139
140 do {
141 old = atomic_read(v);
142 new = old ^ bits;
143 } while (atomic_cmpxchg(v, old, new) != old);
144 return new;
145}
146
3610cce8
MS
147/*
148 * page table entry allocation/free routines.
149 */
36409f63
MS
150#ifdef CONFIG_PGSTE
151static inline unsigned long *page_table_alloc_pgste(struct mm_struct *mm)
152{
153 struct page *page;
154 unsigned long *table;
155
156 page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
157 if (!page)
158 return NULL;
159 pgtable_page_ctor(page);
160 atomic_set(&page->_mapcount, 3);
161 table = (unsigned long *) page_to_phys(page);
162 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE/2);
163 clear_table(table + PTRS_PER_PTE, 0, PAGE_SIZE/2);
164 return table;
165}
166
167static inline void page_table_free_pgste(unsigned long *table)
168{
169 struct page *page;
170
171 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
172 pgtable_page_ctor(page);
173 atomic_set(&page->_mapcount, -1);
174 __free_page(page);
175}
176#endif
177
146e4b3c 178unsigned long *page_table_alloc(struct mm_struct *mm)
3610cce8 179{
146e4b3c 180 struct page *page;
3610cce8 181 unsigned long *table;
36409f63 182 unsigned int mask, bit;
3610cce8 183
36409f63
MS
184#ifdef CONFIG_PGSTE
185 if (mm_has_pgste(mm))
186 return page_table_alloc_pgste(mm);
187#endif
188 /* Allocate fragments of a 4K page as 1K/2K page table */
80217147 189 spin_lock_bh(&mm->context.list_lock);
36409f63 190 mask = FRAG_MASK;
146e4b3c
MS
191 if (!list_empty(&mm->context.pgtable_list)) {
192 page = list_first_entry(&mm->context.pgtable_list,
193 struct page, lru);
36409f63
MS
194 table = (unsigned long *) page_to_phys(page);
195 mask = atomic_read(&page->_mapcount);
196 mask = mask | (mask >> 4);
146e4b3c 197 }
36409f63 198 if ((mask & FRAG_MASK) == FRAG_MASK) {
80217147 199 spin_unlock_bh(&mm->context.list_lock);
146e4b3c
MS
200 page = alloc_page(GFP_KERNEL|__GFP_REPEAT);
201 if (!page)
3610cce8 202 return NULL;
146e4b3c 203 pgtable_page_ctor(page);
36409f63 204 atomic_set(&page->_mapcount, 1);
146e4b3c 205 table = (unsigned long *) page_to_phys(page);
36409f63 206 clear_table(table, _PAGE_TYPE_EMPTY, PAGE_SIZE);
80217147 207 spin_lock_bh(&mm->context.list_lock);
146e4b3c 208 list_add(&page->lru, &mm->context.pgtable_list);
36409f63
MS
209 } else {
210 for (bit = 1; mask & bit; bit <<= 1)
211 table += PTRS_PER_PTE;
212 mask = atomic_xor_bits(&page->_mapcount, bit);
213 if ((mask & FRAG_MASK) == FRAG_MASK)
214 list_del(&page->lru);
3610cce8 215 }
80217147 216 spin_unlock_bh(&mm->context.list_lock);
3610cce8
MS
217 return table;
218}
219
36409f63 220void page_table_free(struct mm_struct *mm, unsigned long *table)
80217147
MS
221{
222 struct page *page;
36409f63 223 unsigned int bit, mask;
80217147 224
36409f63
MS
225#ifdef CONFIG_PGSTE
226 if (mm_has_pgste(mm))
227 return page_table_free_pgste(table);
228#endif
229 /* Free 1K/2K page table fragment of a 4K page */
80217147 230 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
36409f63
MS
231 bit = 1 << ((__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t)));
232 spin_lock_bh(&mm->context.list_lock);
233 if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
234 list_del(&page->lru);
235 mask = atomic_xor_bits(&page->_mapcount, bit);
236 if (mask & FRAG_MASK)
237 list_add(&page->lru, &mm->context.pgtable_list);
238 spin_unlock_bh(&mm->context.list_lock);
239 if (mask == 0) {
80217147 240 pgtable_page_dtor(page);
36409f63 241 atomic_set(&page->_mapcount, -1);
80217147
MS
242 __free_page(page);
243 }
244}
245
36409f63
MS
246#ifdef CONFIG_HAVE_RCU_TABLE_FREE
247
248static void __page_table_free_rcu(void *table, unsigned bit)
3610cce8 249{
146e4b3c 250 struct page *page;
3610cce8 251
36409f63
MS
252#ifdef CONFIG_PGSTE
253 if (bit == FRAG_MASK)
254 return page_table_free_pgste(table);
255#endif
256 /* Free 1K/2K page table fragment of a 4K page */
146e4b3c 257 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
36409f63 258 if (atomic_xor_bits(&page->_mapcount, bit) == 0) {
146e4b3c 259 pgtable_page_dtor(page);
36409f63 260 atomic_set(&page->_mapcount, -1);
146e4b3c
MS
261 __free_page(page);
262 }
263}
3610cce8 264
36409f63 265void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table)
80217147 266{
36409f63 267 struct mm_struct *mm;
80217147 268 struct page *page;
36409f63 269 unsigned int bit, mask;
80217147 270
36409f63
MS
271 mm = tlb->mm;
272#ifdef CONFIG_PGSTE
273 if (mm_has_pgste(mm)) {
274 table = (unsigned long *) (__pa(table) | FRAG_MASK);
275 tlb_remove_table(tlb, table);
276 return;
80217147 277 }
36409f63
MS
278#endif
279 bit = 1 << ((__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t)));
80217147
MS
280 page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
281 spin_lock_bh(&mm->context.list_lock);
36409f63
MS
282 if ((atomic_read(&page->_mapcount) & FRAG_MASK) != FRAG_MASK)
283 list_del(&page->lru);
284 mask = atomic_xor_bits(&page->_mapcount, bit | (bit << 4));
285 if (mask & FRAG_MASK)
286 list_add_tail(&page->lru, &mm->context.pgtable_list);
80217147 287 spin_unlock_bh(&mm->context.list_lock);
36409f63
MS
288 table = (unsigned long *) (__pa(table) | (bit << 4));
289 tlb_remove_table(tlb, table);
290}
291
292void __tlb_remove_table(void *_table)
293{
294 void *table = (void *)((unsigned long) _table & PAGE_MASK);
295 unsigned type = (unsigned long) _table & ~PAGE_MASK;
296
297 if (type)
298 __page_table_free_rcu(table, type);
299 else
300 free_pages((unsigned long) table, ALLOC_ORDER);
80217147
MS
301}
302
36409f63
MS
303#endif
304
402b0862
CO
305/*
306 * switch on pgstes for its userspace process (for kvm)
307 */
308int s390_enable_sie(void)
309{
310 struct task_struct *tsk = current;
74b6b522 311 struct mm_struct *mm, *old_mm;
402b0862 312
702d9e58 313 /* Do we have switched amode? If no, we cannot do sie */
b11b5334 314 if (user_mode == HOME_SPACE_MODE)
702d9e58
CO
315 return -EINVAL;
316
74b6b522 317 /* Do we have pgstes? if yes, we are done */
36409f63 318 if (mm_has_pgste(tsk->mm))
74b6b522 319 return 0;
402b0862 320
74b6b522
CB
321 /* lets check if we are allowed to replace the mm */
322 task_lock(tsk);
402b0862 323 if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
52a21f2c
MS
324#ifdef CONFIG_AIO
325 !hlist_empty(&tsk->mm->ioctx_list) ||
326#endif
327 tsk->mm != tsk->active_mm) {
74b6b522
CB
328 task_unlock(tsk);
329 return -EINVAL;
330 }
331 task_unlock(tsk);
402b0862 332
250cf776
CB
333 /* we copy the mm and let dup_mm create the page tables with_pgstes */
334 tsk->mm->context.alloc_pgste = 1;
402b0862 335 mm = dup_mm(tsk);
250cf776 336 tsk->mm->context.alloc_pgste = 0;
402b0862 337 if (!mm)
74b6b522
CB
338 return -ENOMEM;
339
250cf776 340 /* Now lets check again if something happened */
74b6b522
CB
341 task_lock(tsk);
342 if (!tsk->mm || atomic_read(&tsk->mm->mm_users) > 1 ||
52a21f2c
MS
343#ifdef CONFIG_AIO
344 !hlist_empty(&tsk->mm->ioctx_list) ||
345#endif
346 tsk->mm != tsk->active_mm) {
74b6b522
CB
347 mmput(mm);
348 task_unlock(tsk);
349 return -EINVAL;
350 }
351
352 /* ok, we are alone. No ptrace, no threads, etc. */
353 old_mm = tsk->mm;
402b0862
CO
354 tsk->mm = tsk->active_mm = mm;
355 preempt_disable();
356 update_mm(mm, tsk);
e05ef9bd
CB
357 atomic_inc(&mm->context.attach_count);
358 atomic_dec(&old_mm->context.attach_count);
005f8eee 359 cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
402b0862 360 preempt_enable();
402b0862 361 task_unlock(tsk);
74b6b522
CB
362 mmput(old_mm);
363 return 0;
402b0862
CO
364}
365EXPORT_SYMBOL_GPL(s390_enable_sie);
7db11a36 366
87458ff4 367#if defined(CONFIG_DEBUG_PAGEALLOC) && defined(CONFIG_HIBERNATION)
7db11a36
HJP
368bool kernel_page_present(struct page *page)
369{
370 unsigned long addr;
371 int cc;
372
373 addr = page_to_phys(page);
87458ff4
HC
374 asm volatile(
375 " lra %1,0(%1)\n"
376 " ipm %0\n"
377 " srl %0,28"
378 : "=d" (cc), "+a" (addr) : : "cc");
7db11a36
HJP
379 return cc == 0;
380}
87458ff4 381#endif /* CONFIG_HIBERNATION && CONFIG_DEBUG_PAGEALLOC */