Commit | Line | Data |
---|---|---|
588cb88c | 1 | // SPDX-License-Identifier: GPL-2.0-or-later |
07037db5 PD |
2 | /* |
3 | * Copyright (C) 2009 Sunplus Core Technology Co., Ltd. | |
4 | * Lennox Wu <lennox.wu@sunplusct.com> | |
5 | * Chen Liqin <liqin.chen@sunplusct.com> | |
6 | * Copyright (C) 2012 Regents of the University of California | |
07037db5 PD |
7 | */ |
8 | ||
9 | ||
10 | #include <linux/mm.h> | |
11 | #include <linux/kernel.h> | |
12 | #include <linux/interrupt.h> | |
13 | #include <linux/perf_event.h> | |
14 | #include <linux/signal.h> | |
15 | #include <linux/uaccess.h> | |
c22b0bcb | 16 | #include <linux/kprobes.h> |
47513f24 | 17 | #include <linux/kfence.h> |
07037db5 | 18 | |
07037db5 | 19 | #include <asm/ptrace.h> |
bf587caa | 20 | #include <asm/tlbflush.h> |
07037db5 | 21 | |
ffaee272 PW |
22 | #include "../kernel/head.h" |
23 | ||
21733cb5 EL |
24 | static void die_kernel_fault(const char *msg, unsigned long addr, |
25 | struct pt_regs *regs) | |
26 | { | |
27 | bust_spinlocks(1); | |
28 | ||
29 | pr_alert("Unable to handle kernel %s at virtual address " REG_FMT "\n", msg, | |
30 | addr); | |
31 | ||
32 | bust_spinlocks(0); | |
33 | die(regs, "Oops"); | |
0e25498f | 34 | make_task_dead(SIGKILL); |
21733cb5 EL |
35 | } |
36 | ||
cac4d1dc PE |
37 | static inline void no_context(struct pt_regs *regs, unsigned long addr) |
38 | { | |
21733cb5 EL |
39 | const char *msg; |
40 | ||
cac4d1dc PE |
41 | /* Are we prepared to handle this kernel fault? */ |
42 | if (fixup_exception(regs)) | |
43 | return; | |
44 | ||
45 | /* | |
46 | * Oops. The kernel tried to access some bad page. We'll have to | |
47 | * terminate things with extreme prejudice. | |
48 | */ | |
47513f24 LS |
49 | if (addr < PAGE_SIZE) |
50 | msg = "NULL pointer dereference"; | |
51 | else { | |
52 | if (kfence_handle_page_fault(addr, regs->cause == EXC_STORE_PAGE_FAULT, regs)) | |
53 | return; | |
54 | ||
55 | msg = "paging request"; | |
56 | } | |
57 | ||
21733cb5 | 58 | die_kernel_fault(msg, addr, regs); |
cac4d1dc PE |
59 | } |
60 | ||
6c11ffbf PE |
61 | static inline void mm_fault_error(struct pt_regs *regs, unsigned long addr, vm_fault_t fault) |
62 | { | |
7a75f3d4 PE |
63 | if (fault & VM_FAULT_OOM) { |
64 | /* | |
65 | * We ran out of memory, call the OOM killer, and return the userspace | |
66 | * (which will retry the fault, or kill us if we got oom-killed). | |
67 | */ | |
68 | if (!user_mode(regs)) { | |
69 | no_context(regs, addr); | |
70 | return; | |
71 | } | |
72 | pagefault_out_of_memory(); | |
6c11ffbf | 73 | return; |
7a75f3d4 PE |
74 | } else if (fault & VM_FAULT_SIGBUS) { |
75 | /* Kernel mode? Handle exceptions or die */ | |
76 | if (!user_mode(regs)) { | |
77 | no_context(regs, addr); | |
78 | return; | |
79 | } | |
80 | do_trap(regs, SIGBUS, BUS_ADRERR, addr); | |
6c11ffbf PE |
81 | return; |
82 | } | |
7a75f3d4 | 83 | BUG(); |
6c11ffbf PE |
84 | } |
85 | ||
a51271d9 PE |
86 | static inline void bad_area(struct pt_regs *regs, struct mm_struct *mm, int code, unsigned long addr) |
87 | { | |
88 | /* | |
89 | * Something tried to access memory that isn't in our memory map. | |
90 | * Fix it, but check if it's kernel or user first. | |
91 | */ | |
92 | mmap_read_unlock(mm); | |
93 | /* User mode accesses just cause a SIGSEGV */ | |
94 | if (user_mode(regs)) { | |
95 | do_trap(regs, SIGSEGV, code, addr); | |
96 | return; | |
97 | } | |
98 | ||
99 | no_context(regs, addr); | |
100 | } | |
101 | ||
2baa6d95 | 102 | static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long addr) |
ac416a72 PE |
103 | { |
104 | pgd_t *pgd, *pgd_k; | |
7da9ca3f CH |
105 | pud_t *pud_k; |
106 | p4d_t *p4d_k; | |
107 | pmd_t *pmd_k; | |
ac416a72 PE |
108 | pte_t *pte_k; |
109 | int index; | |
bcacf5f6 | 110 | unsigned long pfn; |
ac416a72 PE |
111 | |
112 | /* User mode accesses just cause a SIGSEGV */ | |
113 | if (user_mode(regs)) | |
114 | return do_trap(regs, SIGSEGV, code, addr); | |
115 | ||
116 | /* | |
117 | * Synchronize this task's top level page-table | |
118 | * with the 'reference' page table. | |
119 | * | |
120 | * Do _not_ use "tsk->active_mm->pgd" here. | |
121 | * We might be inside an interrupt in the middle | |
122 | * of a task switch. | |
123 | */ | |
124 | index = pgd_index(addr); | |
bcacf5f6 LS |
125 | pfn = csr_read(CSR_SATP) & SATP_PPN; |
126 | pgd = (pgd_t *)pfn_to_virt(pfn) + index; | |
ac416a72 PE |
127 | pgd_k = init_mm.pgd + index; |
128 | ||
129 | if (!pgd_present(*pgd_k)) { | |
130 | no_context(regs, addr); | |
131 | return; | |
132 | } | |
133 | set_pgd(pgd, *pgd_k); | |
134 | ||
ac416a72 PE |
135 | p4d_k = p4d_offset(pgd_k, addr); |
136 | if (!p4d_present(*p4d_k)) { | |
137 | no_context(regs, addr); | |
138 | return; | |
139 | } | |
140 | ||
ac416a72 PE |
141 | pud_k = pud_offset(p4d_k, addr); |
142 | if (!pud_present(*pud_k)) { | |
143 | no_context(regs, addr); | |
144 | return; | |
145 | } | |
146 | ||
147 | /* | |
148 | * Since the vmalloc area is global, it is unnecessary | |
149 | * to copy individual PTEs | |
150 | */ | |
ac416a72 PE |
151 | pmd_k = pmd_offset(pud_k, addr); |
152 | if (!pmd_present(*pmd_k)) { | |
153 | no_context(regs, addr); | |
154 | return; | |
155 | } | |
ac416a72 PE |
156 | |
157 | /* | |
158 | * Make sure the actual PTE exists as well to | |
159 | * catch kernel vmalloc-area accesses to non-mapped | |
160 | * addresses. If we don't do this, this will just | |
161 | * silently loop forever. | |
162 | */ | |
163 | pte_k = pte_offset_kernel(pmd_k, addr); | |
164 | if (!pte_present(*pte_k)) { | |
165 | no_context(regs, addr); | |
166 | return; | |
167 | } | |
168 | ||
169 | /* | |
170 | * The kernel assumes that TLBs don't cache invalid | |
171 | * entries, but in RISC-V, SFENCE.VMA specifies an | |
172 | * ordering constraint, not a cache flush; it is | |
173 | * necessary even after writing invalid entries. | |
174 | */ | |
175 | local_flush_tlb_page(addr); | |
176 | } | |
177 | ||
afb8c6fe PE |
178 | static inline bool access_error(unsigned long cause, struct vm_area_struct *vma) |
179 | { | |
180 | switch (cause) { | |
181 | case EXC_INST_PAGE_FAULT: | |
182 | if (!(vma->vm_flags & VM_EXEC)) { | |
183 | return true; | |
184 | } | |
185 | break; | |
186 | case EXC_LOAD_PAGE_FAULT: | |
7ab72c59 AB |
187 | /* Write implies read */ |
188 | if (!(vma->vm_flags & (VM_READ | VM_WRITE))) { | |
afb8c6fe PE |
189 | return true; |
190 | } | |
191 | break; | |
192 | case EXC_STORE_PAGE_FAULT: | |
193 | if (!(vma->vm_flags & VM_WRITE)) { | |
194 | return true; | |
195 | } | |
196 | break; | |
197 | default: | |
198 | panic("%s: unhandled cause %lu", __func__, cause); | |
199 | } | |
200 | return false; | |
201 | } | |
202 | ||
07037db5 PD |
203 | /* |
204 | * This routine handles page faults. It determines the address and the | |
205 | * problem, and then passes it off to one of the appropriate routines. | |
206 | */ | |
207 | asmlinkage void do_page_fault(struct pt_regs *regs) | |
208 | { | |
209 | struct task_struct *tsk; | |
210 | struct vm_area_struct *vma; | |
211 | struct mm_struct *mm; | |
212 | unsigned long addr, cause; | |
dde16072 | 213 | unsigned int flags = FAULT_FLAG_DEFAULT; |
50a7ca3c SJ |
214 | int code = SEGV_MAPERR; |
215 | vm_fault_t fault; | |
07037db5 | 216 | |
a4c3733d CH |
217 | cause = regs->cause; |
218 | addr = regs->badaddr; | |
07037db5 PD |
219 | |
220 | tsk = current; | |
221 | mm = tsk->mm; | |
222 | ||
c22b0bcb GR |
223 | if (kprobe_page_fault(regs, cause)) |
224 | return; | |
225 | ||
07037db5 PD |
226 | /* |
227 | * Fault-in kernel-space virtual memory on-demand. | |
228 | * The 'reference' page table is init_mm.pgd. | |
229 | * | |
230 | * NOTE! We MUST NOT take any locks for this case. We may | |
231 | * be in an interrupt or a critical region, and should | |
232 | * only copy the information from the master page table, | |
233 | * nothing more. | |
234 | */ | |
7cc8c75b | 235 | if (unlikely((addr >= VMALLOC_START) && (addr < VMALLOC_END))) { |
ac416a72 PE |
236 | vmalloc_fault(regs, code, addr); |
237 | return; | |
238 | } | |
07037db5 | 239 | |
2bfc6cd8 AG |
240 | #ifdef CONFIG_64BIT |
241 | /* | |
242 | * Modules in 64bit kernels lie in their own virtual region which is not | |
243 | * in the vmalloc region, but dealing with page faults in this region | |
244 | * or the vmalloc region amounts to doing the same thing: checking that | |
245 | * the mapping exists in init_mm.pgd and updating user page table, so | |
246 | * just use vmalloc_fault. | |
247 | */ | |
248 | if (unlikely(addr >= MODULES_VADDR && addr < MODULES_END)) { | |
249 | vmalloc_fault(regs, code, addr); | |
250 | return; | |
251 | } | |
252 | #endif | |
07037db5 | 253 | /* Enable interrupts if they were enabled in the parent context. */ |
a4c3733d | 254 | if (likely(regs->status & SR_PIE)) |
07037db5 PD |
255 | local_irq_enable(); |
256 | ||
257 | /* | |
258 | * If we're in an interrupt, have no user context, or are running | |
259 | * in an atomic region, then we must not take the fault. | |
260 | */ | |
cac4d1dc | 261 | if (unlikely(faulthandler_disabled() || !mm)) { |
74784081 | 262 | tsk->thread.bad_cause = cause; |
cac4d1dc PE |
263 | no_context(regs, addr); |
264 | return; | |
265 | } | |
07037db5 PD |
266 | |
267 | if (user_mode(regs)) | |
268 | flags |= FAULT_FLAG_USER; | |
269 | ||
21855cac EL |
270 | if (!user_mode(regs) && addr < TASK_SIZE && |
271 | unlikely(!(regs->status & SR_SUM))) | |
272 | die_kernel_fault("access to user memory without uaccess routines", | |
273 | addr, regs); | |
274 | ||
07037db5 PD |
275 | perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); |
276 | ||
67474301 PE |
277 | if (cause == EXC_STORE_PAGE_FAULT) |
278 | flags |= FAULT_FLAG_WRITE; | |
a960c132 PE |
279 | else if (cause == EXC_INST_PAGE_FAULT) |
280 | flags |= FAULT_FLAG_INSTRUCTION; | |
07037db5 | 281 | retry: |
d8ed45c5 | 282 | mmap_read_lock(mm); |
07037db5 | 283 | vma = find_vma(mm, addr); |
a51271d9 | 284 | if (unlikely(!vma)) { |
74784081 | 285 | tsk->thread.bad_cause = cause; |
a51271d9 PE |
286 | bad_area(regs, mm, code, addr); |
287 | return; | |
288 | } | |
07037db5 PD |
289 | if (likely(vma->vm_start <= addr)) |
290 | goto good_area; | |
a51271d9 | 291 | if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) { |
74784081 | 292 | tsk->thread.bad_cause = cause; |
a51271d9 PE |
293 | bad_area(regs, mm, code, addr); |
294 | return; | |
295 | } | |
296 | if (unlikely(expand_stack(vma, addr))) { | |
74784081 | 297 | tsk->thread.bad_cause = cause; |
a51271d9 PE |
298 | bad_area(regs, mm, code, addr); |
299 | return; | |
300 | } | |
07037db5 PD |
301 | |
302 | /* | |
303 | * Ok, we have a good vm_area for this memory access, so | |
304 | * we can handle it. | |
305 | */ | |
306 | good_area: | |
307 | code = SEGV_ACCERR; | |
308 | ||
afb8c6fe | 309 | if (unlikely(access_error(cause, vma))) { |
74784081 | 310 | tsk->thread.bad_cause = cause; |
afb8c6fe PE |
311 | bad_area(regs, mm, code, addr); |
312 | return; | |
07037db5 PD |
313 | } |
314 | ||
315 | /* | |
316 | * If for any reason at all we could not handle the fault, | |
317 | * make sure we exit gracefully rather than endlessly redo | |
318 | * the fault. | |
319 | */ | |
5ac365a4 | 320 | fault = handle_mm_fault(vma, addr, flags, regs); |
07037db5 PD |
321 | |
322 | /* | |
323 | * If we need to retry but a fatal signal is pending, handle the | |
c1e8d7c6 | 324 | * signal first. We do not need to release the mmap_lock because it |
07037db5 PD |
325 | * would already be released in __lock_page_or_retry in mm/filemap.c. |
326 | */ | |
4ef87322 | 327 | if (fault_signal_pending(fault, regs)) |
07037db5 PD |
328 | return; |
329 | ||
d9272525 PX |
330 | /* The fault is fully completed (including releasing mmap lock) */ |
331 | if (fault & VM_FAULT_COMPLETED) | |
332 | return; | |
333 | ||
36ef159f | 334 | if (unlikely(fault & VM_FAULT_RETRY)) { |
43632871 PE |
335 | flags |= FAULT_FLAG_TRIED; |
336 | ||
337 | /* | |
338 | * No need to mmap_read_unlock(mm) as we would | |
339 | * have already released it in __lock_page_or_retry | |
340 | * in mm/filemap.c. | |
341 | */ | |
342 | goto retry; | |
07037db5 PD |
343 | } |
344 | ||
d8ed45c5 | 345 | mmap_read_unlock(mm); |
bda281d5 PE |
346 | |
347 | if (unlikely(fault & VM_FAULT_ERROR)) { | |
74784081 | 348 | tsk->thread.bad_cause = cause; |
6c11ffbf | 349 | mm_fault_error(regs, addr, fault); |
cac4d1dc PE |
350 | return; |
351 | } | |
07037db5 | 352 | return; |
07037db5 | 353 | } |
2349a3b2 | 354 | NOKPROBE_SYMBOL(do_page_fault); |