Commit | Line | Data |
---|---|---|
664eec40 GH |
1 | // SPDX-License-Identifier: GPL-2.0 |
2 | // Copyright (C) 2005-2017 Andes Technology Corporation | |
3 | ||
4 | #include <linux/extable.h> | |
5 | #include <linux/module.h> | |
6 | #include <linux/signal.h> | |
7 | #include <linux/ptrace.h> | |
8 | #include <linux/mm.h> | |
9 | #include <linux/init.h> | |
10 | #include <linux/hardirq.h> | |
11 | #include <linux/uaccess.h> | |
ebd09753 | 12 | #include <linux/perf_event.h> |
664eec40 | 13 | |
664eec40 GH |
14 | #include <asm/tlbflush.h> |
15 | ||
16 | extern void die(const char *str, struct pt_regs *regs, long err); | |
17 | ||
18 | /* | |
19 | * This is useful to dump out the page tables associated with | |
20 | * 'addr' in mm 'mm'. | |
21 | */ | |
22 | void show_pte(struct mm_struct *mm, unsigned long addr) | |
23 | { | |
24 | pgd_t *pgd; | |
25 | if (!mm) | |
26 | mm = &init_mm; | |
27 | ||
28 | pr_alert("pgd = %p\n", mm->pgd); | |
29 | pgd = pgd_offset(mm, addr); | |
30 | pr_alert("[%08lx] *pgd=%08lx", addr, pgd_val(*pgd)); | |
31 | ||
32 | do { | |
7c2763c4 MR |
33 | p4d_t *p4d; |
34 | pud_t *pud; | |
664eec40 GH |
35 | pmd_t *pmd; |
36 | ||
37 | if (pgd_none(*pgd)) | |
38 | break; | |
39 | ||
40 | if (pgd_bad(*pgd)) { | |
41 | pr_alert("(bad)"); | |
42 | break; | |
43 | } | |
44 | ||
7c2763c4 MR |
45 | p4d = p4d_offset(pgd, addr); |
46 | pud = pud_offset(p4d, addr); | |
47 | pmd = pmd_offset(pud, addr); | |
664eec40 GH |
48 | #if PTRS_PER_PMD != 1 |
49 | pr_alert(", *pmd=%08lx", pmd_val(*pmd)); | |
50 | #endif | |
51 | ||
52 | if (pmd_none(*pmd)) | |
53 | break; | |
54 | ||
55 | if (pmd_bad(*pmd)) { | |
56 | pr_alert("(bad)"); | |
57 | break; | |
58 | } | |
59 | ||
60 | if (IS_ENABLED(CONFIG_HIGHMEM)) | |
61 | { | |
62 | pte_t *pte; | |
63 | /* We must not map this if we have highmem enabled */ | |
64 | pte = pte_offset_map(pmd, addr); | |
65 | pr_alert(", *pte=%08lx", pte_val(*pte)); | |
66 | pte_unmap(pte); | |
67 | } | |
68 | } while (0); | |
69 | ||
70 | pr_alert("\n"); | |
71 | } | |
72 | ||
73 | void do_page_fault(unsigned long entry, unsigned long addr, | |
74 | unsigned int error_code, struct pt_regs *regs) | |
75 | { | |
76 | struct task_struct *tsk; | |
77 | struct mm_struct *mm; | |
78 | struct vm_area_struct *vma; | |
d808e918 | 79 | int si_code; |
50a7ca3c | 80 | vm_fault_t fault; |
6cb4d9a2 | 81 | unsigned int mask = VM_ACCESS_FLAGS; |
dde16072 | 82 | unsigned int flags = FAULT_FLAG_DEFAULT; |
664eec40 GH |
83 | |
84 | error_code = error_code & (ITYPE_mskINST | ITYPE_mskETYPE); | |
85 | tsk = current; | |
86 | mm = tsk->mm; | |
d808e918 | 87 | si_code = SEGV_MAPERR; |
664eec40 GH |
88 | /* |
89 | * We fault-in kernel-space virtual memory on-demand. The | |
90 | * 'reference' page table is init_mm.pgd. | |
91 | * | |
92 | * NOTE! We MUST NOT take any locks for this case. We may | |
93 | * be in an interrupt or a critical region, and should | |
94 | * only copy the information from the master page table, | |
95 | * nothing more. | |
96 | */ | |
97 | if (addr >= TASK_SIZE) { | |
98 | if (user_mode(regs)) | |
99 | goto bad_area_nosemaphore; | |
100 | ||
101 | if (addr >= TASK_SIZE && addr < VMALLOC_END | |
102 | && (entry == ENTRY_PTE_NOT_PRESENT)) | |
103 | goto vmalloc_fault; | |
104 | else | |
105 | goto no_context; | |
106 | } | |
107 | ||
108 | /* Send a signal to the task for handling the unalignment access. */ | |
109 | if (entry == ENTRY_GENERAL_EXCPETION | |
110 | && error_code == ETYPE_ALIGNMENT_CHECK) { | |
111 | if (user_mode(regs)) | |
112 | goto bad_area_nosemaphore; | |
113 | else | |
114 | goto no_context; | |
115 | } | |
116 | ||
117 | /* | |
118 | * If we're in an interrupt or have no user | |
119 | * context, we must not take the fault.. | |
120 | */ | |
121 | if (unlikely(faulthandler_disabled() || !mm)) | |
122 | goto no_context; | |
123 | ||
124 | /* | |
125 | * As per x86, we may deadlock here. However, since the kernel only | |
126 | * validly references user space from well defined areas of the code, | |
127 | * we can bug out early if this is from code which shouldn't. | |
128 | */ | |
d8ed45c5 | 129 | if (unlikely(!mmap_read_trylock(mm))) { |
664eec40 GH |
130 | if (!user_mode(regs) && |
131 | !search_exception_tables(instruction_pointer(regs))) | |
132 | goto no_context; | |
133 | retry: | |
d8ed45c5 | 134 | mmap_read_lock(mm); |
664eec40 GH |
135 | } else { |
136 | /* | |
137 | * The above down_read_trylock() might have succeeded in which | |
138 | * case, we'll have missed the might_sleep() from down_read(). | |
139 | */ | |
140 | might_sleep(); | |
141 | if (IS_ENABLED(CONFIG_DEBUG_VM)) { | |
142 | if (!user_mode(regs) && | |
143 | !search_exception_tables(instruction_pointer(regs))) | |
144 | goto no_context; | |
145 | } | |
146 | } | |
147 | ||
148 | vma = find_vma(mm, addr); | |
149 | ||
150 | if (unlikely(!vma)) | |
151 | goto bad_area; | |
152 | ||
153 | if (vma->vm_start <= addr) | |
154 | goto good_area; | |
155 | ||
156 | if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) | |
157 | goto bad_area; | |
158 | ||
159 | if (unlikely(expand_stack(vma, addr))) | |
160 | goto bad_area; | |
161 | ||
162 | /* | |
163 | * Ok, we have a good vm_area for this memory access, so | |
164 | * we can handle it.. | |
165 | */ | |
166 | ||
167 | good_area: | |
d808e918 | 168 | si_code = SEGV_ACCERR; |
664eec40 GH |
169 | |
170 | /* first do some preliminary protection checks */ | |
171 | if (entry == ENTRY_PTE_NOT_PRESENT) { | |
172 | if (error_code & ITYPE_mskINST) | |
173 | mask = VM_EXEC; | |
174 | else { | |
175 | mask = VM_READ | VM_WRITE; | |
664eec40 GH |
176 | } |
177 | } else if (entry == ENTRY_TLB_MISC) { | |
178 | switch (error_code & ITYPE_mskETYPE) { | |
179 | case RD_PROT: | |
180 | mask = VM_READ; | |
181 | break; | |
182 | case WRT_PROT: | |
183 | mask = VM_WRITE; | |
184 | flags |= FAULT_FLAG_WRITE; | |
185 | break; | |
186 | case NOEXEC: | |
187 | mask = VM_EXEC; | |
188 | break; | |
189 | case PAGE_MODIFY: | |
190 | mask = VM_WRITE; | |
191 | flags |= FAULT_FLAG_WRITE; | |
192 | break; | |
193 | case ACC_BIT: | |
194 | BUG(); | |
195 | default: | |
196 | break; | |
197 | } | |
198 | ||
199 | } | |
200 | if (!(vma->vm_flags & mask)) | |
201 | goto bad_area; | |
202 | ||
203 | /* | |
204 | * If for any reason at all we couldn't handle the fault, | |
205 | * make sure we exit gracefully rather than endlessly redo | |
206 | * the fault. | |
207 | */ | |
208 | ||
209 | fault = handle_mm_fault(vma, addr, flags); | |
210 | ||
211 | /* | |
212 | * If we need to retry but a fatal signal is pending, handle the | |
c1e8d7c6 | 213 | * signal first. We do not need to release the mmap_lock because it |
664eec40 GH |
214 | * would already be released in __lock_page_or_retry in mm/filemap.c. |
215 | */ | |
4ef87322 | 216 | if (fault_signal_pending(fault, regs)) { |
664eec40 GH |
217 | if (!user_mode(regs)) |
218 | goto no_context; | |
219 | return; | |
220 | } | |
221 | ||
222 | if (unlikely(fault & VM_FAULT_ERROR)) { | |
223 | if (fault & VM_FAULT_OOM) | |
224 | goto out_of_memory; | |
225 | else if (fault & VM_FAULT_SIGBUS) | |
226 | goto do_sigbus; | |
227 | else | |
228 | goto bad_area; | |
229 | } | |
230 | ||
231 | /* | |
232 | * Major/minor page fault accounting is only done on the initial | |
233 | * attempt. If we go through a retry, it is extremely likely that the | |
234 | * page will be found in page cache at that point. | |
235 | */ | |
ebd09753 | 236 | perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); |
664eec40 | 237 | if (flags & FAULT_FLAG_ALLOW_RETRY) { |
ebd09753 | 238 | if (fault & VM_FAULT_MAJOR) { |
664eec40 | 239 | tsk->maj_flt++; |
ebd09753 N |
240 | perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, |
241 | 1, regs, addr); | |
242 | } else { | |
664eec40 | 243 | tsk->min_flt++; |
ebd09753 N |
244 | perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, |
245 | 1, regs, addr); | |
246 | } | |
664eec40 | 247 | if (fault & VM_FAULT_RETRY) { |
664eec40 GH |
248 | flags |= FAULT_FLAG_TRIED; |
249 | ||
3e4e28c5 | 250 | /* No need to mmap_read_unlock(mm) as we would |
664eec40 GH |
251 | * have already released it in __lock_page_or_retry |
252 | * in mm/filemap.c. | |
253 | */ | |
254 | goto retry; | |
255 | } | |
256 | } | |
257 | ||
d8ed45c5 | 258 | mmap_read_unlock(mm); |
664eec40 GH |
259 | return; |
260 | ||
261 | /* | |
262 | * Something tried to access memory that isn't in our memory map.. | |
263 | * Fix it, but check if it's kernel or user first.. | |
264 | */ | |
265 | bad_area: | |
d8ed45c5 | 266 | mmap_read_unlock(mm); |
664eec40 GH |
267 | |
268 | bad_area_nosemaphore: | |
269 | ||
270 | /* User mode accesses just cause a SIGSEGV */ | |
271 | ||
272 | if (user_mode(regs)) { | |
273 | tsk->thread.address = addr; | |
274 | tsk->thread.error_code = error_code; | |
275 | tsk->thread.trap_no = entry; | |
2e1661d2 | 276 | force_sig_fault(SIGSEGV, si_code, (void __user *)addr); |
664eec40 GH |
277 | return; |
278 | } | |
279 | ||
280 | no_context: | |
281 | ||
282 | /* Are we prepared to handle this kernel fault? | |
283 | * | |
284 | * (The kernel has valid exception-points in the source | |
285 | * when it acesses user-memory. When it fails in one | |
286 | * of those points, we find it in a table and do a jump | |
287 | * to some fixup code that loads an appropriate error | |
288 | * code) | |
289 | */ | |
290 | ||
291 | { | |
292 | const struct exception_table_entry *entry; | |
293 | ||
294 | if ((entry = | |
295 | search_exception_tables(instruction_pointer(regs))) != | |
296 | NULL) { | |
297 | /* Adjust the instruction pointer in the stackframe */ | |
298 | instruction_pointer(regs) = entry->fixup; | |
299 | return; | |
300 | } | |
301 | } | |
302 | ||
303 | /* | |
304 | * Oops. The kernel tried to access some bad page. We'll have to | |
305 | * terminate things with extreme prejudice. | |
306 | */ | |
307 | ||
308 | bust_spinlocks(1); | |
309 | pr_alert("Unable to handle kernel %s at virtual address %08lx\n", | |
310 | (addr < PAGE_SIZE) ? "NULL pointer dereference" : | |
311 | "paging request", addr); | |
312 | ||
313 | show_pte(mm, addr); | |
314 | die("Oops", regs, error_code); | |
315 | bust_spinlocks(0); | |
316 | do_exit(SIGKILL); | |
317 | ||
318 | return; | |
319 | ||
320 | /* | |
321 | * We ran out of memory, or some other thing happened to us that made | |
322 | * us unable to handle the page fault gracefully. | |
323 | */ | |
324 | ||
325 | out_of_memory: | |
d8ed45c5 | 326 | mmap_read_unlock(mm); |
664eec40 GH |
327 | if (!user_mode(regs)) |
328 | goto no_context; | |
329 | pagefault_out_of_memory(); | |
330 | return; | |
331 | ||
332 | do_sigbus: | |
d8ed45c5 | 333 | mmap_read_unlock(mm); |
664eec40 GH |
334 | |
335 | /* Kernel mode? Handle exceptions or die */ | |
336 | if (!user_mode(regs)) | |
337 | goto no_context; | |
338 | ||
339 | /* | |
340 | * Send a sigbus | |
341 | */ | |
342 | tsk->thread.address = addr; | |
343 | tsk->thread.error_code = error_code; | |
344 | tsk->thread.trap_no = entry; | |
2e1661d2 | 345 | force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)addr); |
664eec40 GH |
346 | |
347 | return; | |
348 | ||
349 | vmalloc_fault: | |
350 | { | |
351 | /* | |
352 | * Synchronize this task's top level page-table | |
353 | * with the 'reference' page table. | |
354 | * | |
355 | * Use current_pgd instead of tsk->active_mm->pgd | |
356 | * since the latter might be unavailable if this | |
357 | * code is executed in a misfortunately run irq | |
358 | * (like inside schedule() between switch_mm and | |
359 | * switch_to...). | |
360 | */ | |
361 | ||
362 | unsigned int index = pgd_index(addr); | |
363 | pgd_t *pgd, *pgd_k; | |
7c2763c4 | 364 | p4d_t *p4d, *p4d_k; |
664eec40 GH |
365 | pud_t *pud, *pud_k; |
366 | pmd_t *pmd, *pmd_k; | |
367 | pte_t *pte_k; | |
368 | ||
369 | pgd = (pgd_t *) __va(__nds32__mfsr(NDS32_SR_L1_PPTB)) + index; | |
370 | pgd_k = init_mm.pgd + index; | |
371 | ||
372 | if (!pgd_present(*pgd_k)) | |
373 | goto no_context; | |
374 | ||
7c2763c4 MR |
375 | p4d = p4d_offset(pgd, addr); |
376 | p4d_k = p4d_offset(pgd_k, addr); | |
377 | if (!p4d_present(*p4d_k)) | |
378 | goto no_context; | |
379 | ||
380 | pud = pud_offset(p4d, addr); | |
381 | pud_k = pud_offset(p4d_k, addr); | |
664eec40 GH |
382 | if (!pud_present(*pud_k)) |
383 | goto no_context; | |
384 | ||
385 | pmd = pmd_offset(pud, addr); | |
386 | pmd_k = pmd_offset(pud_k, addr); | |
387 | if (!pmd_present(*pmd_k)) | |
388 | goto no_context; | |
389 | ||
390 | if (!pmd_present(*pmd)) | |
391 | set_pmd(pmd, *pmd_k); | |
392 | else | |
393 | BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k)); | |
394 | ||
395 | /* | |
396 | * Since the vmalloc area is global, we don't | |
397 | * need to copy individual PTE's, it is enough to | |
398 | * copy the pgd pointer into the pte page of the | |
399 | * root task. If that is there, we'll find our pte if | |
400 | * it exists. | |
401 | */ | |
402 | ||
403 | /* Make sure the actual PTE exists as well to | |
404 | * catch kernel vmalloc-area accesses to non-mapped | |
405 | * addres. If we don't do this, this will just | |
406 | * silently loop forever. | |
407 | */ | |
408 | ||
409 | pte_k = pte_offset_kernel(pmd_k, addr); | |
410 | if (!pte_present(*pte_k)) | |
411 | goto no_context; | |
412 | ||
413 | return; | |
414 | } | |
415 | } |