mm: fault feedback #1
[linux-2.6-block.git] / arch / sh64 / mm / fault.c
CommitLineData
1da177e4
LT
1/*
2 * This file is subject to the terms and conditions of the GNU General Public
3 * License. See the file "COPYING" in the main directory of this archive
4 * for more details.
5 *
6 * arch/sh64/mm/fault.c
7 *
8 * Copyright (C) 2000, 2001 Paolo Alberelli
9 * Copyright (C) 2003 Richard Curnow (/proc/tlb, bug fixes)
10 * Copyright (C) 2003 Paul Mundt
11 *
12 */
13
14#include <linux/signal.h>
15#include <linux/rwsem.h>
16#include <linux/sched.h>
17#include <linux/kernel.h>
18#include <linux/errno.h>
19#include <linux/string.h>
20#include <linux/types.h>
21#include <linux/ptrace.h>
22#include <linux/mman.h>
23#include <linux/mm.h>
24#include <linux/smp.h>
1da177e4
LT
25#include <linux/interrupt.h>
26
27#include <asm/system.h>
28#include <asm/io.h>
29#include <asm/tlb.h>
30#include <asm/uaccess.h>
31#include <asm/pgalloc.h>
32#include <asm/mmu_context.h>
33#include <asm/registers.h> /* required by inline asm statements */
34
35#if defined(CONFIG_SH64_PROC_TLB)
36#include <linux/init.h>
37#include <linux/proc_fs.h>
38/* Count numbers of tlb refills in each region */
39static unsigned long long calls_to_update_mmu_cache = 0ULL;
40static unsigned long long calls_to_flush_tlb_page = 0ULL;
41static unsigned long long calls_to_flush_tlb_range = 0ULL;
42static unsigned long long calls_to_flush_tlb_mm = 0ULL;
43static unsigned long long calls_to_flush_tlb_all = 0ULL;
44unsigned long long calls_to_do_slow_page_fault = 0ULL;
45unsigned long long calls_to_do_fast_page_fault = 0ULL;
46
47/* Count size of ranges for flush_tlb_range */
48static unsigned long long flush_tlb_range_1 = 0ULL;
49static unsigned long long flush_tlb_range_2 = 0ULL;
50static unsigned long long flush_tlb_range_3_4 = 0ULL;
51static unsigned long long flush_tlb_range_5_7 = 0ULL;
52static unsigned long long flush_tlb_range_8_11 = 0ULL;
53static unsigned long long flush_tlb_range_12_15 = 0ULL;
54static unsigned long long flush_tlb_range_16_up = 0ULL;
55
56static unsigned long long page_not_present = 0ULL;
57
58#endif
59
60extern void die(const char *,struct pt_regs *,long);
61
62#define PFLAG(val,flag) (( (val) & (flag) ) ? #flag : "" )
63#define PPROT(flag) PFLAG(pgprot_val(prot),flag)
64
65static inline void print_prots(pgprot_t prot)
66{
67 printk("prot is 0x%08lx\n",pgprot_val(prot));
68
69 printk("%s %s %s %s %s\n",PPROT(_PAGE_SHARED),PPROT(_PAGE_READ),
70 PPROT(_PAGE_EXECUTE),PPROT(_PAGE_WRITE),PPROT(_PAGE_USER));
71}
72
73static inline void print_vma(struct vm_area_struct *vma)
74{
75 printk("vma start 0x%08lx\n", vma->vm_start);
76 printk("vma end 0x%08lx\n", vma->vm_end);
77
78 print_prots(vma->vm_page_prot);
79 printk("vm_flags 0x%08lx\n", vma->vm_flags);
80}
81
82static inline void print_task(struct task_struct *tsk)
83{
84 printk("Task pid %d\n", tsk->pid);
85}
86
87static pte_t *lookup_pte(struct mm_struct *mm, unsigned long address)
88{
89 pgd_t *dir;
90 pmd_t *pmd;
91 pte_t *pte;
92 pte_t entry;
93
94 dir = pgd_offset(mm, address);
95 if (pgd_none(*dir)) {
96 return NULL;
97 }
98
99 pmd = pmd_offset(dir, address);
100 if (pmd_none(*pmd)) {
101 return NULL;
102 }
103
104 pte = pte_offset_kernel(pmd, address);
105 entry = *pte;
106
107 if (pte_none(entry)) {
108 return NULL;
109 }
110 if (!pte_present(entry)) {
111 return NULL;
112 }
113
114 return pte;
115}
116
117/*
118 * This routine handles page faults. It determines the address,
119 * and the problem, and then passes it off to one of the appropriate
120 * routines.
121 */
122asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long writeaccess,
123 unsigned long textaccess, unsigned long address)
124{
125 struct task_struct *tsk;
126 struct mm_struct *mm;
127 struct vm_area_struct * vma;
128 const struct exception_table_entry *fixup;
129 pte_t *pte;
130
131#if defined(CONFIG_SH64_PROC_TLB)
132 ++calls_to_do_slow_page_fault;
133#endif
134
135 /* SIM
136 * Note this is now called with interrupts still disabled
137 * This is to cope with being called for a missing IO port
0a354775 138 * address with interrupts disabled. This should be fixed as
1da177e4
LT
139 * soon as we have a better 'fast path' miss handler.
140 *
141 * Plus take care how you try and debug this stuff.
142 * For example, writing debug data to a port which you
143 * have just faulted on is not going to work.
144 */
145
146 tsk = current;
147 mm = tsk->mm;
148
149 /* Not an IO address, so reenable interrupts */
150 local_irq_enable();
151
152 /*
153 * If we're in an interrupt or have no user
154 * context, we must not take the fault..
155 */
6edaf68a 156 if (in_atomic() || !mm)
1da177e4
LT
157 goto no_context;
158
159 /* TLB misses upon some cache flushes get done under cli() */
160 down_read(&mm->mmap_sem);
161
162 vma = find_vma(mm, address);
163
164 if (!vma) {
165#ifdef DEBUG_FAULT
166 print_task(tsk);
167 printk("%s:%d fault, address is 0x%08x PC %016Lx textaccess %d writeaccess %d\n",
168 __FUNCTION__,__LINE__,
169 address,regs->pc,textaccess,writeaccess);
170 show_regs(regs);
171#endif
172 goto bad_area;
173 }
174 if (vma->vm_start <= address) {
175 goto good_area;
176 }
177
178 if (!(vma->vm_flags & VM_GROWSDOWN)) {
179#ifdef DEBUG_FAULT
180 print_task(tsk);
181 printk("%s:%d fault, address is 0x%08x PC %016Lx textaccess %d writeaccess %d\n",
182 __FUNCTION__,__LINE__,
183 address,regs->pc,textaccess,writeaccess);
184 show_regs(regs);
185
186 print_vma(vma);
187#endif
188 goto bad_area;
189 }
190 if (expand_stack(vma, address)) {
191#ifdef DEBUG_FAULT
192 print_task(tsk);
193 printk("%s:%d fault, address is 0x%08x PC %016Lx textaccess %d writeaccess %d\n",
194 __FUNCTION__,__LINE__,
195 address,regs->pc,textaccess,writeaccess);
196 show_regs(regs);
197#endif
198 goto bad_area;
199 }
200/*
201 * Ok, we have a good vm_area for this memory access, so
202 * we can handle it..
203 */
204good_area:
205 if (textaccess) {
206 if (!(vma->vm_flags & VM_EXEC))
207 goto bad_area;
208 } else {
209 if (writeaccess) {
210 if (!(vma->vm_flags & VM_WRITE))
211 goto bad_area;
212 } else {
213 if (!(vma->vm_flags & VM_READ))
214 goto bad_area;
215 }
216 }
217
218 /*
219 * If for any reason at all we couldn't handle the fault,
220 * make sure we exit gracefully rather than endlessly redo
221 * the fault.
222 */
223survive:
224 switch (handle_mm_fault(mm, vma, address, writeaccess)) {
6e346228 225 case VM_FAULT_MINOR:
1da177e4
LT
226 tsk->min_flt++;
227 break;
6e346228 228 case VM_FAULT_MAJOR:
1da177e4
LT
229 tsk->maj_flt++;
230 break;
6e346228 231 case VM_FAULT_SIGBUS:
1da177e4
LT
232 goto do_sigbus;
233 default:
234 goto out_of_memory;
235 }
236 /* If we get here, the page fault has been handled. Do the TLB refill
237 now from the newly-setup PTE, to avoid having to fault again right
238 away on the same instruction. */
239 pte = lookup_pte (mm, address);
240 if (!pte) {
241 /* From empirical evidence, we can get here, due to
242 !pte_present(pte). (e.g. if a swap-in occurs, and the page
243 is swapped back out again before the process that wanted it
244 gets rescheduled?) */
245 goto no_pte;
246 }
247
248 __do_tlb_refill(address, textaccess, pte);
249
250no_pte:
251
252 up_read(&mm->mmap_sem);
253 return;
254
255/*
256 * Something tried to access memory that isn't in our memory map..
257 * Fix it, but check if it's kernel or user first..
258 */
259bad_area:
260#ifdef DEBUG_FAULT
261 printk("fault:bad area\n");
262#endif
263 up_read(&mm->mmap_sem);
264
265 if (user_mode(regs)) {
266 static int count=0;
267 siginfo_t info;
268 if (count < 4) {
269 /* This is really to help debug faults when starting
270 * usermode, so only need a few */
271 count++;
272 printk("user mode bad_area address=%08lx pid=%d (%s) pc=%08lx\n",
273 address, current->pid, current->comm,
274 (unsigned long) regs->pc);
275#if 0
276 show_regs(regs);
277#endif
278 }
f400e198 279 if (is_init(tsk)) {
1da177e4
LT
280 panic("INIT had user mode bad_area\n");
281 }
282 tsk->thread.address = address;
283 tsk->thread.error_code = writeaccess;
284 info.si_signo = SIGSEGV;
285 info.si_errno = 0;
286 info.si_addr = (void *) address;
287 force_sig_info(SIGSEGV, &info, tsk);
288 return;
289 }
290
291no_context:
292#ifdef DEBUG_FAULT
293 printk("fault:No context\n");
294#endif
295 /* Are we prepared to handle this kernel fault? */
296 fixup = search_exception_tables(regs->pc);
297 if (fixup) {
298 regs->pc = fixup->fixup;
299 return;
300 }
301
302/*
303 * Oops. The kernel tried to access some bad page. We'll have to
304 * terminate things with extreme prejudice.
305 *
306 */
307 if (address < PAGE_SIZE)
308 printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference");
309 else
310 printk(KERN_ALERT "Unable to handle kernel paging request");
311 printk(" at virtual address %08lx\n", address);
312 printk(KERN_ALERT "pc = %08Lx%08Lx\n", regs->pc >> 32, regs->pc & 0xffffffff);
313 die("Oops", regs, writeaccess);
314 do_exit(SIGKILL);
315
316/*
317 * We ran out of memory, or some other thing happened to us that made
318 * us unable to handle the page fault gracefully.
319 */
320out_of_memory:
f400e198 321 if (is_init(current)) {
1da177e4
LT
322 panic("INIT out of memory\n");
323 yield();
324 goto survive;
325 }
326 printk("fault:Out of memory\n");
327 up_read(&mm->mmap_sem);
f400e198 328 if (is_init(current)) {
1da177e4
LT
329 yield();
330 down_read(&mm->mmap_sem);
331 goto survive;
332 }
333 printk("VM: killing process %s\n", tsk->comm);
334 if (user_mode(regs))
335 do_exit(SIGKILL);
336 goto no_context;
337
338do_sigbus:
339 printk("fault:Do sigbus\n");
340 up_read(&mm->mmap_sem);
341
342 /*
343 * Send a sigbus, regardless of whether we were in kernel
344 * or user mode.
345 */
346 tsk->thread.address = address;
347 tsk->thread.error_code = writeaccess;
348 tsk->thread.trap_no = 14;
349 force_sig(SIGBUS, tsk);
350
351 /* Kernel mode? Handle exceptions or die */
352 if (!user_mode(regs))
353 goto no_context;
354}
355
356
357void flush_tlb_all(void);
358
359void update_mmu_cache(struct vm_area_struct * vma,
360 unsigned long address, pte_t pte)
361{
362#if defined(CONFIG_SH64_PROC_TLB)
363 ++calls_to_update_mmu_cache;
364#endif
365
366 /*
367 * This appears to get called once for every pte entry that gets
368 * established => I don't think it's efficient to try refilling the
369 * TLBs with the pages - some may not get accessed even. Also, for
370 * executable pages, it is impossible to determine reliably here which
371 * TLB they should be mapped into (or both even).
372 *
373 * So, just do nothing here and handle faults on demand. In the
374 * TLBMISS handling case, the refill is now done anyway after the pte
375 * has been fixed up, so that deals with most useful cases.
376 */
377}
378
379static void __flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
380{
381 unsigned long long match, pteh=0, lpage;
382 unsigned long tlb;
383 struct mm_struct *mm;
384
385 mm = vma->vm_mm;
386
387 if (mm->context == NO_CONTEXT)
388 return;
389
390 /*
391 * Sign-extend based on neff.
392 */
393 lpage = (page & NEFF_SIGN) ? (page | NEFF_MASK) : page;
394 match = ((mm->context & MMU_CONTEXT_ASID_MASK) << PTEH_ASID_SHIFT) | PTEH_VALID;
395 match |= lpage;
396
397 /* Do ITLB : don't bother for pages in non-exectutable VMAs */
398 if (vma->vm_flags & VM_EXEC) {
399 for_each_itlb_entry(tlb) {
400 asm volatile ("getcfg %1, 0, %0"
401 : "=r" (pteh)
402 : "r" (tlb) );
403
404 if (pteh == match) {
405 __flush_tlb_slot(tlb);
406 break;
407 }
408
409 }
410 }
411
412 /* Do DTLB : any page could potentially be in here. */
413 for_each_dtlb_entry(tlb) {
414 asm volatile ("getcfg %1, 0, %0"
415 : "=r" (pteh)
416 : "r" (tlb) );
417
418 if (pteh == match) {
419 __flush_tlb_slot(tlb);
420 break;
421 }
422
423 }
424}
425
426void flush_tlb_page(struct vm_area_struct *vma, unsigned long page)
427{
428 unsigned long flags;
429
430#if defined(CONFIG_SH64_PROC_TLB)
431 ++calls_to_flush_tlb_page;
432#endif
433
434 if (vma->vm_mm) {
435 page &= PAGE_MASK;
436 local_irq_save(flags);
437 __flush_tlb_page(vma, page);
438 local_irq_restore(flags);
439 }
440}
441
442void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
443 unsigned long end)
444{
445 unsigned long flags;
446 unsigned long long match, pteh=0, pteh_epn, pteh_low;
447 unsigned long tlb;
448 struct mm_struct *mm;
449
450 mm = vma->vm_mm;
451
452#if defined(CONFIG_SH64_PROC_TLB)
453 ++calls_to_flush_tlb_range;
454
455 {
456 unsigned long size = (end - 1) - start;
457 size >>= 12; /* divide by PAGE_SIZE */
458 size++; /* end=start+4096 => 1 page */
459 switch (size) {
460 case 1 : flush_tlb_range_1++; break;
461 case 2 : flush_tlb_range_2++; break;
462 case 3 ... 4 : flush_tlb_range_3_4++; break;
463 case 5 ... 7 : flush_tlb_range_5_7++; break;
464 case 8 ... 11 : flush_tlb_range_8_11++; break;
465 case 12 ... 15 : flush_tlb_range_12_15++; break;
466 default : flush_tlb_range_16_up++; break;
467 }
468 }
469#endif
470
471 if (mm->context == NO_CONTEXT)
472 return;
473
474 local_irq_save(flags);
475
476 start &= PAGE_MASK;
477 end &= PAGE_MASK;
478
479 match = ((mm->context & MMU_CONTEXT_ASID_MASK) << PTEH_ASID_SHIFT) | PTEH_VALID;
480
481 /* Flush ITLB */
482 for_each_itlb_entry(tlb) {
483 asm volatile ("getcfg %1, 0, %0"
484 : "=r" (pteh)
485 : "r" (tlb) );
486
487 pteh_epn = pteh & PAGE_MASK;
488 pteh_low = pteh & ~PAGE_MASK;
489
490 if (pteh_low == match && pteh_epn >= start && pteh_epn <= end)
491 __flush_tlb_slot(tlb);
492 }
493
494 /* Flush DTLB */
495 for_each_dtlb_entry(tlb) {
496 asm volatile ("getcfg %1, 0, %0"
497 : "=r" (pteh)
498 : "r" (tlb) );
499
500 pteh_epn = pteh & PAGE_MASK;
501 pteh_low = pteh & ~PAGE_MASK;
502
503 if (pteh_low == match && pteh_epn >= start && pteh_epn <= end)
504 __flush_tlb_slot(tlb);
505 }
506
507 local_irq_restore(flags);
508}
509
510void flush_tlb_mm(struct mm_struct *mm)
511{
512 unsigned long flags;
513
514#if defined(CONFIG_SH64_PROC_TLB)
515 ++calls_to_flush_tlb_mm;
516#endif
517
518 if (mm->context == NO_CONTEXT)
519 return;
520
521 local_irq_save(flags);
522
523 mm->context=NO_CONTEXT;
524 if(mm==current->mm)
525 activate_context(mm);
526
527 local_irq_restore(flags);
528
529}
530
531void flush_tlb_all(void)
532{
533 /* Invalidate all, including shared pages, excluding fixed TLBs */
534
535 unsigned long flags, tlb;
536
537#if defined(CONFIG_SH64_PROC_TLB)
538 ++calls_to_flush_tlb_all;
539#endif
540
541 local_irq_save(flags);
542
543 /* Flush each ITLB entry */
544 for_each_itlb_entry(tlb) {
545 __flush_tlb_slot(tlb);
546 }
547
548 /* Flush each DTLB entry */
549 for_each_dtlb_entry(tlb) {
550 __flush_tlb_slot(tlb);
551 }
552
553 local_irq_restore(flags);
554}
555
556void flush_tlb_kernel_range(unsigned long start, unsigned long end)
557{
558 /* FIXME: Optimize this later.. */
559 flush_tlb_all();
560}
561
562#if defined(CONFIG_SH64_PROC_TLB)
563/* Procfs interface to read the performance information */
564
565static int
566tlb_proc_info(char *buf, char **start, off_t fpos, int length, int *eof, void *data)
567{
568 int len=0;
569 len += sprintf(buf+len, "do_fast_page_fault called %12lld times\n", calls_to_do_fast_page_fault);
570 len += sprintf(buf+len, "do_slow_page_fault called %12lld times\n", calls_to_do_slow_page_fault);
571 len += sprintf(buf+len, "update_mmu_cache called %12lld times\n", calls_to_update_mmu_cache);
572 len += sprintf(buf+len, "flush_tlb_page called %12lld times\n", calls_to_flush_tlb_page);
573 len += sprintf(buf+len, "flush_tlb_range called %12lld times\n", calls_to_flush_tlb_range);
574 len += sprintf(buf+len, "flush_tlb_mm called %12lld times\n", calls_to_flush_tlb_mm);
575 len += sprintf(buf+len, "flush_tlb_all called %12lld times\n", calls_to_flush_tlb_all);
576 len += sprintf(buf+len, "flush_tlb_range_sizes\n"
577 " 1 : %12lld\n"
578 " 2 : %12lld\n"
579 " 3 - 4 : %12lld\n"
580 " 5 - 7 : %12lld\n"
581 " 8 - 11 : %12lld\n"
582 "12 - 15 : %12lld\n"
583 "16+ : %12lld\n",
584 flush_tlb_range_1, flush_tlb_range_2, flush_tlb_range_3_4,
585 flush_tlb_range_5_7, flush_tlb_range_8_11, flush_tlb_range_12_15,
586 flush_tlb_range_16_up);
587 len += sprintf(buf+len, "page not present %12lld times\n", page_not_present);
588 *eof = 1;
589 return len;
590}
591
592static int __init register_proc_tlb(void)
593{
594 create_proc_read_entry("tlb", 0, NULL, tlb_proc_info, NULL);
595 return 0;
596}
597
598__initcall(register_proc_tlb);
599
600#endif