[linux-2.6-block.git] / arch / ia64 / mm / fault.c

/*
 * MMU fault handling support.
 *
 * Copyright (C) 1998-2002 Hewlett-Packard Co
 *	David Mosberger-Tang <davidm@hpl.hp.com>
 */
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/smp_lock.h>
#include <linux/interrupt.h>

#include <asm/pgtable.h>
#include <asm/processor.h>
#include <asm/system.h>
#include <asm/uaccess.h>

extern void die (char *, struct pt_regs *, long);

/*
 * This routine is analogous to expand_stack() but instead grows the
 * register backing store (which grows towards higher addresses).
 * Since the register backing store is access sequentially, we
 * disallow growing the RBS by more than a page at a time.  Note that
 * the VM_GROWSUP flag can be set on any VM area but that's fine
 * because the total process size is still limited by RLIMIT_STACK and
 * RLIMIT_AS.
 */
static inline long
expand_backing_store (struct vm_area_struct *vma, unsigned long address)
{
	unsigned long grow;

	grow = PAGE_SIZE >> PAGE_SHIFT;
	if (address - vma->vm_start > current->signal->rlim[RLIMIT_STACK].rlim_cur
	    || (((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) > current->signal->rlim[RLIMIT_AS].rlim_cur))
		return -ENOMEM;
	vma->vm_end += PAGE_SIZE;
	vma->vm_mm->total_vm += grow;
	if (vma->vm_flags & VM_LOCKED)
		vma->vm_mm->locked_vm += grow;
	__vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file, grow);
	return 0;
}

/*
 * Return TRUE if ADDRESS points at a page in the kernel's mapped segment
 * (inside region 5, on ia64) and that page is present.
 */
static int
mapped_kernel_page_is_present (unsigned long address)
{
	pgd_t *pgd;
	pud_t *pud;
	pmd_t *pmd;
	pte_t *ptep, pte;

	pgd = pgd_offset_k(address);
	if (pgd_none(*pgd) || pgd_bad(*pgd))
		return 0;

	pud = pud_offset(pgd, address);
	if (pud_none(*pud) || pud_bad(*pud))
		return 0;

	pmd = pmd_offset(pud, address);
	if (pmd_none(*pmd) || pmd_bad(*pmd))
		return 0;

	ptep = pte_offset_kernel(pmd, address);
	if (!ptep)
		return 0;

	pte = *ptep;
	return pte_present(pte);
}

void
ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *regs)
{
	int signal = SIGSEGV, code = SEGV_MAPERR;
	struct vm_area_struct *vma, *prev_vma;
	struct mm_struct *mm = current->mm;
	struct siginfo si;
	unsigned long mask;

	/*
	 * If we're in an interrupt or have no user context, we must not take the fault..
	 */
	if (in_atomic() || !mm)
		goto no_context;

#ifdef CONFIG_VIRTUAL_MEM_MAP
	/*
	 * If fault is in region 5 and we are in the kernel, we may already
	 * have the mmap_sem (pfn_valid macro is called during mmap). There
	 * is no vma for region 5 addr's anyway, so skip getting the semaphore
	 * and go directly to the exception handling code.
	 */

	if ((REGION_NUMBER(address) == 5) && !user_mode(regs))
		goto bad_area_no_up;
#endif

	down_read(&mm->mmap_sem);

	vma = find_vma_prev(mm, address, &prev_vma);
	if (!vma)
		goto bad_area;

	/* find_vma_prev() returns vma such that address < vma->vm_end or NULL */
	if (address < vma->vm_start)
		goto check_expansion;

  good_area:
	code = SEGV_ACCERR;

	/* OK, we've got a good vm_area for this memory area.  Check the access permissions: */

#	define VM_READ_BIT	0
#	define VM_WRITE_BIT	1
#	define VM_EXEC_BIT	2

#	if (((1 << VM_READ_BIT) != VM_READ || (1 << VM_WRITE_BIT) != VM_WRITE) \
	    || (1 << VM_EXEC_BIT) != VM_EXEC)
#		error File is out of sync with <linux/mm.h>.  Please update.
#	endif

	mask = (  (((isr >> IA64_ISR_X_BIT) & 1UL) << VM_EXEC_BIT)
		| (((isr >> IA64_ISR_W_BIT) & 1UL) << VM_WRITE_BIT)
		| (((isr >> IA64_ISR_R_BIT) & 1UL) << VM_READ_BIT));

	if ((vma->vm_flags & mask) != mask)
		goto bad_area;

  survive:
	/*
	 * If for any reason at all we couldn't handle the fault, make
	 * sure we exit gracefully rather than endlessly redo the
	 * fault.
	 */
	switch (handle_mm_fault(mm, vma, address, (mask & VM_WRITE) != 0)) {
	      case VM_FAULT_MINOR:
		++current->min_flt;
		break;
	      case VM_FAULT_MAJOR:
		++current->maj_flt;
		break;
	      case VM_FAULT_SIGBUS:
		/*
		 * We ran out of memory, or some other thing happened
		 * to us that made us unable to handle the page fault
		 * gracefully.
		 */
		signal = SIGBUS;
		goto bad_area;
	      case VM_FAULT_OOM:
		goto out_of_memory;
	      default:
		BUG();
	}
	up_read(&mm->mmap_sem);
	return;

  check_expansion:
	if (!(prev_vma && (prev_vma->vm_flags & VM_GROWSUP) && (address == prev_vma->vm_end))) {
		if (!(vma->vm_flags & VM_GROWSDOWN))
			goto bad_area;
		if (REGION_NUMBER(address) != REGION_NUMBER(vma->vm_start)
		    || REGION_OFFSET(address) >= RGN_MAP_LIMIT)
			goto bad_area;
		if (expand_stack(vma, address))
			goto bad_area;
	} else {
		vma = prev_vma;
		if (REGION_NUMBER(address) != REGION_NUMBER(vma->vm_start)
		    || REGION_OFFSET(address) >= RGN_MAP_LIMIT)
			goto bad_area;
		if (expand_backing_store(vma, address))
			goto bad_area;
	}
	goto good_area;

  bad_area:
	up_read(&mm->mmap_sem);
#ifdef CONFIG_VIRTUAL_MEM_MAP
  bad_area_no_up:
#endif
	if ((isr & IA64_ISR_SP)
	    || ((isr & IA64_ISR_NA) && (isr & IA64_ISR_CODE_MASK) == IA64_ISR_CODE_LFETCH))
	{
		/*
		 * This fault was due to a speculative load or lfetch.fault, set the "ed"
		 * bit in the psr to ensure forward progress.  (Target register will get a
		 * NaT for ld.s, lfetch will be canceled.)
		 */
		ia64_psr(regs)->ed = 1;
		return;
	}
	if (user_mode(regs)) {
		si.si_signo = signal;
		si.si_errno = 0;
		si.si_code = code;
		si.si_addr = (void __user *) address;
		si.si_isr = isr;
		si.si_flags = __ISR_VALID;
		force_sig_info(signal, &si, current);
		return;
	}

  no_context:
	if (isr & IA64_ISR_SP) {
		/*
		 * This fault was due to a speculative load set the "ed" bit in the psr to
		 * ensure forward progress (target register will get a NaT).
		 */
		ia64_psr(regs)->ed = 1;
		return;
	}

	if (ia64_done_with_exception(regs))
		return;

	/*
	 * Since we have no vma's for region 5, we might get here even if the address is
	 * valid, due to the VHPT walker inserting a non present translation that becomes
	 * stale. If that happens, the non present fault handler already purged the stale
	 * translation, which fixed the problem. So, we check to see if the translation is
	 * valid, and return if it is.
	 */
	if (REGION_NUMBER(address) == 5 && mapped_kernel_page_is_present(address))
		return;

	/*
	 * Oops. The kernel tried to access some bad page. We'll have to terminate things
	 * with extreme prejudice.
	 */
	bust_spinlocks(1);

	if (address < PAGE_SIZE)
		printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference (address %016lx)\n", address);
	else
		printk(KERN_ALERT "Unable to handle kernel paging request at "
		       "virtual address %016lx\n", address);
	die("Oops", regs, isr);
	bust_spinlocks(0);
	do_exit(SIGKILL);
	return;

  out_of_memory:
	up_read(&mm->mmap_sem);
	if (current->pid == 1) {
		yield();
		down_read(&mm->mmap_sem);
		goto survive;
	}
	printk(KERN_CRIT "VM: killing process %s\n", current->comm);
	if (user_mode(regs))
		do_exit(SIGKILL);
	goto no_context;
}
Commit	Line	Data
1da177e4 LT	1	/*
	2	* MMU fault handling support.
	3	*
	4	* Copyright (C) 1998-2002 Hewlett-Packard Co
	5	* David Mosberger-Tang <davidm@hpl.hp.com>
	6	*/
	7	#include <linux/sched.h>
	8	#include <linux/kernel.h>
	9	#include <linux/mm.h>
	10	#include <linux/smp_lock.h>
	11	#include <linux/interrupt.h>
	12
	13	#include <asm/pgtable.h>
	14	#include <asm/processor.h>
	15	#include <asm/system.h>
	16	#include <asm/uaccess.h>
	17
	18	extern void die (char , struct pt_regs , long);
	19
	20	/*
	21	* This routine is analogous to expand_stack() but instead grows the
	22	* register backing store (which grows towards higher addresses).
	23	* Since the register backing store is access sequentially, we
	24	* disallow growing the RBS by more than a page at a time. Note that
	25	* the VM_GROWSUP flag can be set on any VM area but that's fine
	26	* because the total process size is still limited by RLIMIT_STACK and
	27	* RLIMIT_AS.
	28	*/
	29	static inline long
	30	expand_backing_store (struct vm_area_struct *vma, unsigned long address)
	31	{
	32	unsigned long grow;
	33
	34	grow = PAGE_SIZE >> PAGE_SHIFT;
	35	if (address - vma->vm_start > current->signal->rlim[RLIMIT_STACK].rlim_cur
	36	\|\| (((vma->vm_mm->total_vm + grow) << PAGE_SHIFT) > current->signal->rlim[RLIMIT_AS].rlim_cur))
	37	return -ENOMEM;
	38	vma->vm_end += PAGE_SIZE;
	39	vma->vm_mm->total_vm += grow;
	40	if (vma->vm_flags & VM_LOCKED)
	41	vma->vm_mm->locked_vm += grow;
	42	__vm_stat_account(vma->vm_mm, vma->vm_flags, vma->vm_file, grow);
	43	return 0;
	44	}
	45
	46	/*
	47	* Return TRUE if ADDRESS points at a page in the kernel's mapped segment
	48	* (inside region 5, on ia64) and that page is present.
	49	*/
	50	static int
	51	mapped_kernel_page_is_present (unsigned long address)
	52	{
	53	pgd_t *pgd;
	54	pud_t *pud;
	55	pmd_t *pmd;
	56	pte_t *ptep, pte;
	57
	58	pgd = pgd_offset_k(address);
	59	if (pgd_none(pgd) \|\| pgd_bad(pgd))
	60	return 0;
	61
	62	pud = pud_offset(pgd, address);
	63	if (pud_none(pud) \|\| pud_bad(pud))
	64	return 0;
65
66	pmd = pmd_offset(pud, address);
67	if (pmd_none(pmd) \|\| pmd_bad(pmd))
68	return 0;
69
70	ptep = pte_offset_kernel(pmd, address);
71	if (!ptep)
72	return 0;
73
74	pte = *ptep;
75	return pte_present(pte);
76	}
77
78	void
79	ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *regs)
80	{
81	int signal = SIGSEGV, code = SEGV_MAPERR;
82	struct vm_area_struct vma, prev_vma;
83	struct mm_struct *mm = current->mm;
84	struct siginfo si;
85	unsigned long mask;
86
87	/*
88	* If we're in an interrupt or have no user context, we must not take the fault..
89	*/
90	if (in_atomic() \|\| !mm)
91	goto no_context;
92
93	#ifdef CONFIG_VIRTUAL_MEM_MAP
94	/*
95	* If fault is in region 5 and we are in the kernel, we may already
96	* have the mmap_sem (pfn_valid macro is called during mmap). There
97	* is no vma for region 5 addr's anyway, so skip getting the semaphore
98	* and go directly to the exception handling code.
99	*/
100
101	if ((REGION_NUMBER(address) == 5) && !user_mode(regs))
102	goto bad_area_no_up;
103	#endif
104
105	down_read(&mm->mmap_sem);
106
107	vma = find_vma_prev(mm, address, &prev_vma);
108	if (!vma)
109	goto bad_area;
110
111	/* find_vma_prev() returns vma such that address < vma->vm_end or NULL */
112	if (address < vma->vm_start)
113	goto check_expansion;
114
115	good_area:
116	code = SEGV_ACCERR;
117
118	/* OK, we've got a good vm_area for this memory area. Check the access permissions: */
119
120	# define VM_READ_BIT 0
121	# define VM_WRITE_BIT 1
122	# define VM_EXEC_BIT 2
123
124	# if (((1 << VM_READ_BIT) != VM_READ \|\| (1 << VM_WRITE_BIT) != VM_WRITE) \
125	\|\| (1 << VM_EXEC_BIT) != VM_EXEC)
126	# error File is out of sync with <linux/mm.h>. Please update.
127	# endif
128
129	mask = ( (((isr >> IA64_ISR_X_BIT) & 1UL) << VM_EXEC_BIT)
130	\| (((isr >> IA64_ISR_W_BIT) & 1UL) << VM_WRITE_BIT)
131	\| (((isr >> IA64_ISR_R_BIT) & 1UL) << VM_READ_BIT));
132
133	if ((vma->vm_flags & mask) != mask)
134	goto bad_area;
135
136	survive:
137	/*
138	* If for any reason at all we couldn't handle the fault, make
139	* sure we exit gracefully rather than endlessly redo the
140	* fault.
141	*/
142	switch (handle_mm_fault(mm, vma, address, (mask & VM_WRITE) != 0)) {
143	case VM_FAULT_MINOR:
144	++current->min_flt;
145	break;
146	case VM_FAULT_MAJOR:
147	++current->maj_flt;
148	break;
149	case VM_FAULT_SIGBUS:
150	/*
151	* We ran out of memory, or some other thing happened
152	* to us that made us unable to handle the page fault
153	* gracefully.
154	*/
155	signal = SIGBUS;
156	goto bad_area;
157	case VM_FAULT_OOM:
158	goto out_of_memory;
159	default:
160	BUG();
161	}
162	up_read(&mm->mmap_sem);
163	return;
164
165	check_expansion:
166	if (!(prev_vma && (prev_vma->vm_flags & VM_GROWSUP) && (address == prev_vma->vm_end))) {
167	if (!(vma->vm_flags & VM_GROWSDOWN))
168	goto bad_area;
169	if (REGION_NUMBER(address) != REGION_NUMBER(vma->vm_start)
170	\|\| REGION_OFFSET(address) >= RGN_MAP_LIMIT)
171	goto bad_area;
172	if (expand_stack(vma, address))
173	goto bad_area;
174	} else {
175	vma = prev_vma;
176	if (REGION_NUMBER(address) != REGION_NUMBER(vma->vm_start)
177	\|\| REGION_OFFSET(address) >= RGN_MAP_LIMIT)
178	goto bad_area;
179	if (expand_backing_store(vma, address))
180	goto bad_area;
181	}
182	goto good_area;
183
184	bad_area:
185	up_read(&mm->mmap_sem);
186	#ifdef CONFIG_VIRTUAL_MEM_MAP
187	bad_area_no_up:
188	#endif
189	if ((isr & IA64_ISR_SP)
190	\|\| ((isr & IA64_ISR_NA) && (isr & IA64_ISR_CODE_MASK) == IA64_ISR_CODE_LFETCH))
191	{
192	/*
193	* This fault was due to a speculative load or lfetch.fault, set the "ed"
194	* bit in the psr to ensure forward progress. (Target register will get a
195	* NaT for ld.s, lfetch will be canceled.)
196	*/
197	ia64_psr(regs)->ed = 1;
198	return;
199	}
200	if (user_mode(regs)) {
201	si.si_signo = signal;
202	si.si_errno = 0;
203	si.si_code = code;
204	si.si_addr = (void __user *) address;
205	si.si_isr = isr;
206	si.si_flags = __ISR_VALID;
207	force_sig_info(signal, &si, current);
208	return;
209	}
210
211	no_context:
212	if (isr & IA64_ISR_SP) {
213	/*
214	* This fault was due to a speculative load set the "ed" bit in the psr to
215	* ensure forward progress (target register will get a NaT).
216	*/
217	ia64_psr(regs)->ed = 1;
218	return;
219	}
220
221	if (ia64_done_with_exception(regs))
222	return;
223
224	/*
225	* Since we have no vma's for region 5, we might get here even if the address is
226	* valid, due to the VHPT walker inserting a non present translation that becomes
227	* stale. If that happens, the non present fault handler already purged the stale
228	* translation, which fixed the problem. So, we check to see if the translation is
229	* valid, and return if it is.
230	*/
231	if (REGION_NUMBER(address) == 5 && mapped_kernel_page_is_present(address))
232	return;
233
234	/*
235	* Oops. The kernel tried to access some bad page. We'll have to terminate things
236	* with extreme prejudice.
237	*/
238	bust_spinlocks(1);
239
240	if (address < PAGE_SIZE)
241	printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference (address %016lx)\n", address);
242	else
243	printk(KERN_ALERT "Unable to handle kernel paging request at "
244	"virtual address %016lx\n", address);
245	die("Oops", regs, isr);
246	bust_spinlocks(0);
247	do_exit(SIGKILL);
248	return;
249
250	out_of_memory:
251	up_read(&mm->mmap_sem);
252	if (current->pid == 1) {
253	yield();
254	down_read(&mm->mmap_sem);
255	goto survive;
256	}
257	printk(KERN_CRIT "VM: killing process %s\n", current->comm);
258	if (user_mode(regs))
259	do_exit(SIGKILL);
260	goto no_context;
261	}