Commit | Line | Data |
---|---|---|
26ff6c11 PM |
1 | /* |
2 | * Page fault handler for SH with an MMU. | |
1da177e4 | 3 | * |
1da177e4 | 4 | * Copyright (C) 1999 Niibe Yutaka |
0f60bb25 | 5 | * Copyright (C) 2003 - 2009 Paul Mundt |
1da177e4 LT |
6 | * |
7 | * Based on linux/arch/i386/mm/fault.c: | |
8 | * Copyright (C) 1995 Linus Torvalds | |
26ff6c11 PM |
9 | * |
10 | * This file is subject to the terms and conditions of the GNU General Public | |
11 | * License. See the file "COPYING" in the main directory of this archive | |
12 | * for more details. | |
1da177e4 | 13 | */ |
1da177e4 | 14 | #include <linux/kernel.h> |
1da177e4 | 15 | #include <linux/mm.h> |
0f08f338 PM |
16 | #include <linux/hardirq.h> |
17 | #include <linux/kprobes.h> | |
cdd6c482 | 18 | #include <linux/perf_event.h> |
e7cc9a73 | 19 | #include <asm/io_trapped.h> |
1da177e4 | 20 | #include <asm/system.h> |
1da177e4 | 21 | #include <asm/mmu_context.h> |
db2e1fa3 | 22 | #include <asm/tlbflush.h> |
1da177e4 | 23 | |
7433ab77 PM |
24 | static inline int notify_page_fault(struct pt_regs *regs, int trap) |
25 | { | |
26 | int ret = 0; | |
27 | ||
c63c3105 | 28 | if (kprobes_built_in() && !user_mode(regs)) { |
7433ab77 PM |
29 | preempt_disable(); |
30 | if (kprobe_running() && kprobe_fault_handler(regs, trap)) | |
31 | ret = 1; | |
32 | preempt_enable(); | |
33 | } | |
7433ab77 PM |
34 | |
35 | return ret; | |
36 | } | |
37 | ||
0f60bb25 PM |
38 | static inline pmd_t *vmalloc_sync_one(pgd_t *pgd, unsigned long address) |
39 | { | |
40 | unsigned index = pgd_index(address); | |
41 | pgd_t *pgd_k; | |
42 | pud_t *pud, *pud_k; | |
43 | pmd_t *pmd, *pmd_k; | |
44 | ||
45 | pgd += index; | |
46 | pgd_k = init_mm.pgd + index; | |
47 | ||
48 | if (!pgd_present(*pgd_k)) | |
49 | return NULL; | |
50 | ||
51 | pud = pud_offset(pgd, address); | |
52 | pud_k = pud_offset(pgd_k, address); | |
53 | if (!pud_present(*pud_k)) | |
54 | return NULL; | |
55 | ||
5d9b4b19 MF |
56 | if (!pud_present(*pud)) |
57 | set_pud(pud, *pud_k); | |
58 | ||
0f60bb25 PM |
59 | pmd = pmd_offset(pud, address); |
60 | pmd_k = pmd_offset(pud_k, address); | |
61 | if (!pmd_present(*pmd_k)) | |
62 | return NULL; | |
63 | ||
64 | if (!pmd_present(*pmd)) | |
65 | set_pmd(pmd, *pmd_k); | |
05dd2cd3 MF |
66 | else { |
67 | /* | |
68 | * The page tables are fully synchronised so there must | |
69 | * be another reason for the fault. Return NULL here to | |
70 | * signal that we have not taken care of the fault. | |
71 | */ | |
0f60bb25 | 72 | BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k)); |
05dd2cd3 MF |
73 | return NULL; |
74 | } | |
0f60bb25 PM |
75 | |
76 | return pmd_k; | |
77 | } | |
78 | ||
79 | /* | |
80 | * Handle a fault on the vmalloc or module mapping area | |
81 | */ | |
82 | static noinline int vmalloc_fault(unsigned long address) | |
83 | { | |
84 | pgd_t *pgd_k; | |
85 | pmd_t *pmd_k; | |
86 | pte_t *pte_k; | |
87 | ||
0906a3ad PM |
88 | /* Make sure we are in vmalloc/module/P3 area: */ |
89 | if (!(address >= VMALLOC_START && address < P3_ADDR_MAX)) | |
0f60bb25 PM |
90 | return -1; |
91 | ||
92 | /* | |
93 | * Synchronize this task's top level page-table | |
94 | * with the 'reference' page table. | |
95 | * | |
96 | * Do _not_ use "current" here. We might be inside | |
97 | * an interrupt in the middle of a task switch.. | |
98 | */ | |
99 | pgd_k = get_TTB(); | |
05dd2cd3 | 100 | pmd_k = vmalloc_sync_one(pgd_k, address); |
0f60bb25 PM |
101 | if (!pmd_k) |
102 | return -1; | |
103 | ||
104 | pte_k = pte_offset_kernel(pmd_k, address); | |
105 | if (!pte_present(*pte_k)) | |
106 | return -1; | |
107 | ||
108 | return 0; | |
109 | } | |
110 | ||
111 | static int fault_in_kernel_space(unsigned long address) | |
112 | { | |
113 | return address >= TASK_SIZE; | |
114 | } | |
115 | ||
1da177e4 LT |
116 | /* |
117 | * This routine handles page faults. It determines the address, | |
118 | * and the problem, and then passes it off to one of the appropriate | |
119 | * routines. | |
120 | */ | |
b5a1bcbe SM |
121 | asmlinkage void __kprobes do_page_fault(struct pt_regs *regs, |
122 | unsigned long writeaccess, | |
123 | unsigned long address) | |
1da177e4 | 124 | { |
0f60bb25 | 125 | unsigned long vec; |
1da177e4 LT |
126 | struct task_struct *tsk; |
127 | struct mm_struct *mm; | |
128 | struct vm_area_struct * vma; | |
b5a1bcbe | 129 | int si_code; |
83c54070 | 130 | int fault; |
b5a1bcbe | 131 | siginfo_t info; |
1da177e4 | 132 | |
1da177e4 | 133 | tsk = current; |
0f60bb25 | 134 | mm = tsk->mm; |
b5a1bcbe | 135 | si_code = SEGV_MAPERR; |
0f60bb25 | 136 | vec = lookup_exception_vector(); |
1da177e4 | 137 | |
0f60bb25 PM |
138 | /* |
139 | * We fault-in kernel-space virtual memory on-demand. The | |
140 | * 'reference' page table is init_mm.pgd. | |
141 | * | |
142 | * NOTE! We MUST NOT take any locks for this case. We may | |
143 | * be in an interrupt or a critical region, and should | |
144 | * only copy the information from the master page table, | |
145 | * nothing more. | |
146 | */ | |
147 | if (unlikely(fault_in_kernel_space(address))) { | |
148 | if (vmalloc_fault(address) >= 0) | |
99a596f9 | 149 | return; |
0f60bb25 | 150 | if (notify_page_fault(regs, vec)) |
96e14e54 | 151 | return; |
99a596f9 | 152 | |
0f60bb25 | 153 | goto bad_area_nosemaphore; |
99a596f9 SM |
154 | } |
155 | ||
0f60bb25 | 156 | if (unlikely(notify_page_fault(regs, vec))) |
7433ab77 PM |
157 | return; |
158 | ||
f2fb4e4f | 159 | /* Only enable interrupts if they were on before the fault */ |
7433ab77 | 160 | if ((regs->sr & SR_IMASK) != SR_IMASK) |
f2fb4e4f | 161 | local_irq_enable(); |
f2fb4e4f | 162 | |
cdd6c482 | 163 | perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, 0, regs, address); |
f2fb4e4f | 164 | |
1da177e4 | 165 | /* |
0f60bb25 PM |
166 | * If we're in an interrupt, have no user context or are running |
167 | * in an atomic region then we must not take the fault: | |
1da177e4 LT |
168 | */ |
169 | if (in_atomic() || !mm) | |
170 | goto no_context; | |
171 | ||
172 | down_read(&mm->mmap_sem); | |
173 | ||
174 | vma = find_vma(mm, address); | |
175 | if (!vma) | |
176 | goto bad_area; | |
177 | if (vma->vm_start <= address) | |
178 | goto good_area; | |
179 | if (!(vma->vm_flags & VM_GROWSDOWN)) | |
180 | goto bad_area; | |
181 | if (expand_stack(vma, address)) | |
182 | goto bad_area; | |
0f60bb25 PM |
183 | |
184 | /* | |
185 | * Ok, we have a good vm_area for this memory access, so | |
186 | * we can handle it.. | |
187 | */ | |
1da177e4 | 188 | good_area: |
b5a1bcbe | 189 | si_code = SEGV_ACCERR; |
1da177e4 LT |
190 | if (writeaccess) { |
191 | if (!(vma->vm_flags & VM_WRITE)) | |
192 | goto bad_area; | |
193 | } else { | |
df67b3da | 194 | if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))) |
1da177e4 LT |
195 | goto bad_area; |
196 | } | |
197 | ||
198 | /* | |
199 | * If for any reason at all we couldn't handle the fault, | |
200 | * make sure we exit gracefully rather than endlessly redo | |
201 | * the fault. | |
202 | */ | |
d06063cc | 203 | fault = handle_mm_fault(mm, vma, address, writeaccess ? FAULT_FLAG_WRITE : 0); |
83c54070 NP |
204 | if (unlikely(fault & VM_FAULT_ERROR)) { |
205 | if (fault & VM_FAULT_OOM) | |
1da177e4 | 206 | goto out_of_memory; |
83c54070 NP |
207 | else if (fault & VM_FAULT_SIGBUS) |
208 | goto do_sigbus; | |
209 | BUG(); | |
1da177e4 | 210 | } |
7433ab77 | 211 | if (fault & VM_FAULT_MAJOR) { |
83c54070 | 212 | tsk->maj_flt++; |
cdd6c482 | 213 | perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, 0, |
7433ab77 PM |
214 | regs, address); |
215 | } else { | |
83c54070 | 216 | tsk->min_flt++; |
cdd6c482 | 217 | perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0, |
7433ab77 PM |
218 | regs, address); |
219 | } | |
1da177e4 LT |
220 | |
221 | up_read(&mm->mmap_sem); | |
222 | return; | |
223 | ||
0f60bb25 PM |
224 | /* |
225 | * Something tried to access memory that isn't in our memory map.. | |
226 | * Fix it, but check if it's kernel or user first.. | |
227 | */ | |
1da177e4 LT |
228 | bad_area: |
229 | up_read(&mm->mmap_sem); | |
230 | ||
99a596f9 | 231 | bad_area_nosemaphore: |
1da177e4 | 232 | if (user_mode(regs)) { |
b5a1bcbe SM |
233 | info.si_signo = SIGSEGV; |
234 | info.si_errno = 0; | |
235 | info.si_code = si_code; | |
236 | info.si_addr = (void *) address; | |
237 | force_sig_info(SIGSEGV, &info, tsk); | |
1da177e4 LT |
238 | return; |
239 | } | |
240 | ||
241 | no_context: | |
242 | /* Are we prepared to handle this kernel fault? */ | |
243 | if (fixup_exception(regs)) | |
244 | return; | |
245 | ||
e7cc9a73 MD |
246 | if (handle_trapped_io(regs, address)) |
247 | return; | |
1da177e4 LT |
248 | /* |
249 | * Oops. The kernel tried to access some bad page. We'll have to | |
250 | * terminate things with extreme prejudice. | |
251 | * | |
252 | */ | |
0630e45c PM |
253 | |
254 | bust_spinlocks(1); | |
255 | ||
256 | if (oops_may_print()) { | |
b62ad83d | 257 | unsigned long page; |
0630e45c PM |
258 | |
259 | if (address < PAGE_SIZE) | |
260 | printk(KERN_ALERT "Unable to handle kernel NULL " | |
261 | "pointer dereference"); | |
262 | else | |
263 | printk(KERN_ALERT "Unable to handle kernel paging " | |
264 | "request"); | |
265 | printk(" at virtual address %08lx\n", address); | |
266 | printk(KERN_ALERT "pc = %08lx\n", regs->pc); | |
267 | page = (unsigned long)get_TTB(); | |
268 | if (page) { | |
06f862c8 | 269 | page = ((__typeof__(page) *)page)[address >> PGDIR_SHIFT]; |
0630e45c PM |
270 | printk(KERN_ALERT "*pde = %08lx\n", page); |
271 | if (page & _PAGE_PRESENT) { | |
272 | page &= PAGE_MASK; | |
273 | address &= 0x003ff000; | |
274 | page = ((__typeof__(page) *) | |
275 | __va(page))[address >> | |
276 | PAGE_SHIFT]; | |
277 | printk(KERN_ALERT "*pte = %08lx\n", page); | |
278 | } | |
1da177e4 LT |
279 | } |
280 | } | |
0630e45c | 281 | |
1da177e4 | 282 | die("Oops", regs, writeaccess); |
0630e45c | 283 | bust_spinlocks(0); |
1da177e4 LT |
284 | do_exit(SIGKILL); |
285 | ||
286 | /* | |
287 | * We ran out of memory, or some other thing happened to us that made | |
288 | * us unable to handle the page fault gracefully. | |
289 | */ | |
290 | out_of_memory: | |
291 | up_read(&mm->mmap_sem); | |
6b6b18e6 NP |
292 | if (!user_mode(regs)) |
293 | goto no_context; | |
294 | pagefault_out_of_memory(); | |
295 | return; | |
1da177e4 LT |
296 | |
297 | do_sigbus: | |
298 | up_read(&mm->mmap_sem); | |
299 | ||
300 | /* | |
301 | * Send a sigbus, regardless of whether we were in kernel | |
302 | * or user mode. | |
303 | */ | |
b5a1bcbe SM |
304 | info.si_signo = SIGBUS; |
305 | info.si_errno = 0; | |
306 | info.si_code = BUS_ADRERR; | |
307 | info.si_addr = (void *)address; | |
308 | force_sig_info(SIGBUS, &info, tsk); | |
1da177e4 LT |
309 | |
310 | /* Kernel mode? Handle exceptions or die */ | |
311 | if (!user_mode(regs)) | |
312 | goto no_context; | |
313 | } | |
db2e1fa3 | 314 | |
db2e1fa3 PM |
315 | /* |
316 | * Called with interrupts disabled. | |
317 | */ | |
112e5847 PM |
318 | asmlinkage int __kprobes |
319 | handle_tlbmiss(struct pt_regs *regs, unsigned long writeaccess, | |
320 | unsigned long address) | |
db2e1fa3 PM |
321 | { |
322 | pgd_t *pgd; | |
323 | pud_t *pud; | |
324 | pmd_t *pmd; | |
325 | pte_t *pte; | |
326 | pte_t entry; | |
3d58695e | 327 | |
db2e1fa3 PM |
328 | /* |
329 | * We don't take page faults for P1, P2, and parts of P4, these | |
330 | * are always mapped, whether it be due to legacy behaviour in | |
331 | * 29-bit mode, or due to PMB configuration in 32-bit mode. | |
332 | */ | |
333 | if (address >= P3SEG && address < P3_ADDR_MAX) { | |
334 | pgd = pgd_offset_k(address); | |
db2e1fa3 | 335 | } else { |
0f1a394b | 336 | if (unlikely(address >= TASK_SIZE || !current->mm)) |
8010fbe7 | 337 | return 1; |
db2e1fa3 | 338 | |
0f1a394b | 339 | pgd = pgd_offset(current->mm, address); |
db2e1fa3 PM |
340 | } |
341 | ||
342 | pud = pud_offset(pgd, address); | |
343 | if (pud_none_or_clear_bad(pud)) | |
8010fbe7 | 344 | return 1; |
db2e1fa3 PM |
345 | pmd = pmd_offset(pud, address); |
346 | if (pmd_none_or_clear_bad(pmd)) | |
8010fbe7 | 347 | return 1; |
0f1a394b | 348 | pte = pte_offset_kernel(pmd, address); |
db2e1fa3 PM |
349 | entry = *pte; |
350 | if (unlikely(pte_none(entry) || pte_not_present(entry))) | |
8010fbe7 | 351 | return 1; |
db2e1fa3 | 352 | if (unlikely(writeaccess && !pte_write(entry))) |
8010fbe7 | 353 | return 1; |
db2e1fa3 PM |
354 | |
355 | if (writeaccess) | |
356 | entry = pte_mkdirty(entry); | |
357 | entry = pte_mkyoung(entry); | |
358 | ||
8010fbe7 PM |
359 | set_pte(pte, entry); |
360 | ||
a602cc05 HS |
361 | #if defined(CONFIG_CPU_SH4) && !defined(CONFIG_SMP) |
362 | /* | |
8010fbe7 PM |
363 | * SH-4 does not set MMUCR.RC to the corresponding TLB entry in |
364 | * the case of an initial page write exception, so we need to | |
365 | * flush it in order to avoid potential TLB entry duplication. | |
a602cc05 | 366 | */ |
8010fbe7 PM |
367 | if (writeaccess == 2) |
368 | local_flush_tlb_one(get_asid(), address & PAGE_MASK); | |
a602cc05 HS |
369 | #endif |
370 | ||
4b3073e1 | 371 | update_mmu_cache(NULL, address, pte); |
0f1a394b | 372 | |
8010fbe7 | 373 | return 0; |
db2e1fa3 | 374 | } |