[linux-2.6-block.git] / arch / powerpc / mm / pgtable.c

/*
 * This file contains common routines for dealing with free of page tables
 * Along with common page table handling code
 *
 *  Derived from arch/powerpc/mm/tlb_64.c:
 *    Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
 *
 *  Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
 *  and Cort Dougan (PReP) (cort@cs.nmt.edu)
 *    Copyright (C) 1996 Paul Mackerras
 *
 *  Derived from "arch/i386/mm/init.c"
 *    Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
 *
 *  Dave Engebretsen <engebret@us.ibm.com>
 *      Rework for PPC64 port.
 *
 *  This program is free software; you can redistribute it and/or
 *  modify it under the terms of the GNU General Public License
 *  as published by the Free Software Foundation; either version
 *  2 of the License, or (at your option) any later version.
 */

#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/init.h>
#include <linux/percpu.h>
#include <linux/hardirq.h>
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
#include <asm/tlb.h>

#ifdef CONFIG_SMP

/*
 * Handle batching of page table freeing on SMP. Page tables are
 * queued up and send to be freed later by RCU in order to avoid
 * freeing a page table page that is being walked without locks
 */

static DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
static unsigned long pte_freelist_forced_free;

struct pte_freelist_batch
{
	struct rcu_head	rcu;
	unsigned int	index;
	pgtable_free_t	tables[0];
};

#define PTE_FREELIST_SIZE \
	((PAGE_SIZE - sizeof(struct pte_freelist_batch)) \
	  / sizeof(pgtable_free_t))

static void pte_free_smp_sync(void *arg)
{
	/* Do nothing, just ensure we sync with all CPUs */
}

/* This is only called when we are critically out of memory
 * (and fail to get a page in pte_free_tlb).
 */
static void pgtable_free_now(pgtable_free_t pgf)
{
	pte_freelist_forced_free++;

	smp_call_function(pte_free_smp_sync, NULL, 1);

	pgtable_free(pgf);
}

static void pte_free_rcu_callback(struct rcu_head *head)
{
	struct pte_freelist_batch *batch =
		container_of(head, struct pte_freelist_batch, rcu);
	unsigned int i;

	for (i = 0; i < batch->index; i++)
		pgtable_free(batch->tables[i]);

	free_page((unsigned long)batch);
}

static void pte_free_submit(struct pte_freelist_batch *batch)
{
	INIT_RCU_HEAD(&batch->rcu);
	call_rcu(&batch->rcu, pte_free_rcu_callback);
}

void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf)
{
	/* This is safe since tlb_gather_mmu has disabled preemption */
	struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);

	if (atomic_read(&tlb->mm->mm_users) < 2 ||
	    cpumask_equal(mm_cpumask(tlb->mm), cpumask_of(smp_processor_id()))){
		pgtable_free(pgf);
		return;
	}

	if (*batchp == NULL) {
		*batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC);
		if (*batchp == NULL) {
			pgtable_free_now(pgf);
			return;
		}
		(*batchp)->index = 0;
	}
	(*batchp)->tables[(*batchp)->index++] = pgf;
	if ((*batchp)->index == PTE_FREELIST_SIZE) {
		pte_free_submit(*batchp);
		*batchp = NULL;
	}
}

void pte_free_finish(void)
{
	/* This is safe since tlb_gather_mmu has disabled preemption */
	struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);

	if (*batchp == NULL)
		return;
	pte_free_submit(*batchp);
	*batchp = NULL;
}

#endif /* CONFIG_SMP */

/*
 * Handle i/d cache flushing, called from set_pte_at() or ptep_set_access_flags()
 */
static pte_t do_dcache_icache_coherency(pte_t pte)
{
	unsigned long pfn = pte_pfn(pte);
	struct page *page;

	if (unlikely(!pfn_valid(pfn)))
		return pte;
	page = pfn_to_page(pfn);

	if (!PageReserved(page) && !test_bit(PG_arch_1, &page->flags)) {
		pr_devel("do_dcache_icache_coherency... flushing\n");
		flush_dcache_icache_page(page);
		set_bit(PG_arch_1, &page->flags);
	}
	else
		pr_devel("do_dcache_icache_coherency... already clean\n");
	return __pte(pte_val(pte) | _PAGE_HWEXEC);
}

static inline int is_exec_fault(void)
{
	return current->thread.regs && TRAP(current->thread.regs) == 0x400;
}

/* We only try to do i/d cache coherency on stuff that looks like
 * reasonably "normal" PTEs. We currently require a PTE to be present
 * and we avoid _PAGE_SPECIAL and _PAGE_NO_CACHE
 */
static inline int pte_looks_normal(pte_t pte)
{
	return (pte_val(pte) &
		(_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE)) ==
		(_PAGE_PRESENT);
}

#if defined(CONFIG_PPC_STD_MMU)
/* Server-style MMU handles coherency when hashing if HW exec permission
 * is supposed per page (currently 64-bit only). Else, we always flush
 * valid PTEs in set_pte.
 */
static inline int pte_need_exec_flush(pte_t pte, int set_pte)
{
	return set_pte && pte_looks_normal(pte) &&
		!(cpu_has_feature(CPU_FTR_COHERENT_ICACHE) ||
		  cpu_has_feature(CPU_FTR_NOEXECUTE));
}
#elif _PAGE_HWEXEC == 0
/* Embedded type MMU without HW exec support (8xx only so far), we flush
 * the cache for any present PTE
 */
static inline int pte_need_exec_flush(pte_t pte, int set_pte)
{
	return set_pte && pte_looks_normal(pte);
}
#else
/* Other embedded CPUs with HW exec support per-page, we flush on exec
 * fault if HWEXEC is not set
 */
static inline int pte_need_exec_flush(pte_t pte, int set_pte)
{
	return pte_looks_normal(pte) && is_exec_fault() &&
		!(pte_val(pte) & _PAGE_HWEXEC);
}
#endif

/*
 * set_pte stores a linux PTE into the linux page table.
 */
void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte)
{
#ifdef CONFIG_DEBUG_VM
	WARN_ON(pte_present(*ptep));
#endif
	/* Note: mm->context.id might not yet have been assigned as
	 * this context might not have been activated yet when this
	 * is called.
	 */
	pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
	if (pte_need_exec_flush(pte, 1))
		pte = do_dcache_icache_coherency(pte);

	/* Perform the setting of the PTE */
	__set_pte_at(mm, addr, ptep, pte, 0);
}

/*
 * This is called when relaxing access to a PTE. It's also called in the page
 * fault path when we don't hit any of the major fault cases, ie, a minor
 * update of _PAGE_ACCESSED, _PAGE_DIRTY, etc... The generic code will have
 * handled those two for us, we additionally deal with missing execute
 * permission here on some processors
 */
int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
			  pte_t *ptep, pte_t entry, int dirty)
{
	int changed;
	if (!dirty && pte_need_exec_flush(entry, 0))
		entry = do_dcache_icache_coherency(entry);
	changed = !pte_same(*(ptep), entry);
	if (changed) {
		if (!(vma->vm_flags & VM_HUGETLB))
			assert_pte_locked(vma->vm_mm, address);
		__ptep_set_access_flags(ptep, entry);
		flush_tlb_page_nohash(vma, address);
	}
	return changed;
}

#ifdef CONFIG_DEBUG_VM
void assert_pte_locked(struct mm_struct *mm, unsigned long addr)
{
	pgd_t *pgd;
	pud_t *pud;
	pmd_t *pmd;

	if (mm == &init_mm)
		return;
	pgd = mm->pgd + pgd_index(addr);
	BUG_ON(pgd_none(*pgd));
	pud = pud_offset(pgd, addr);
	BUG_ON(pud_none(*pud));
	pmd = pmd_offset(pud, addr);
	BUG_ON(!pmd_present(*pmd));
	BUG_ON(!spin_is_locked(pte_lockptr(mm, pmd)));
}
#endif /* CONFIG_DEBUG_VM */
Commit	Line	Data
0186f47e KG	1	/*
0186f47e KG	2	* This file contains common routines for dealing with free of page tables
8d30c14c	3	* Along with common page table handling code
0186f47e KG	4	*
	5	* Derived from arch/powerpc/mm/tlb_64.c:
	6	* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
	7	*
	8	* Modifications by Paul Mackerras (PowerMac) (paulus@cs.anu.edu.au)
	9	* and Cort Dougan (PReP) (cort@cs.nmt.edu)
	10	* Copyright (C) 1996 Paul Mackerras
	11	*
	12	* Derived from "arch/i386/mm/init.c"
	13	* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
	14	*
	15	* Dave Engebretsen <engebret@us.ibm.com>
	16	* Rework for PPC64 port.
	17	*
	18	* This program is free software; you can redistribute it and/or
	19	* modify it under the terms of the GNU General Public License
	20	* as published by the Free Software Foundation; either version
	21	* 2 of the License, or (at your option) any later version.
	22	*/
	23
	24	#include <linux/kernel.h>
	25	#include <linux/mm.h>
	26	#include <linux/init.h>
	27	#include <linux/percpu.h>
	28	#include <linux/hardirq.h>
	29	#include <asm/pgalloc.h>
	30	#include <asm/tlbflush.h>
	31	#include <asm/tlb.h>
	32
c7cc58a1 BH	33	#ifdef CONFIG_SMP
	34
	35	/*
	36	* Handle batching of page table freeing on SMP. Page tables are
	37	* queued up and send to be freed later by RCU in order to avoid
	38	* freeing a page table page that is being walked without locks
	39	*/
	40
0186f47e KG	41	static DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
	42	static unsigned long pte_freelist_forced_free;
	43
	44	struct pte_freelist_batch
	45	{
	46	struct rcu_head rcu;
	47	unsigned int index;
	48	pgtable_free_t tables[0];
	49	};
	50
	51	#define PTE_FREELIST_SIZE \
	52	((PAGE_SIZE - sizeof(struct pte_freelist_batch)) \
	53	/ sizeof(pgtable_free_t))
	54
	55	static void pte_free_smp_sync(void *arg)
	56	{
	57	/* Do nothing, just ensure we sync with all CPUs */
	58	}
	59
	60	/* This is only called when we are critically out of memory
	61	* (and fail to get a page in pte_free_tlb).
	62	*/
	63	static void pgtable_free_now(pgtable_free_t pgf)
	64	{
	65	pte_freelist_forced_free++;
	66
	67	smp_call_function(pte_free_smp_sync, NULL, 1);
	68
	69	pgtable_free(pgf);
	70	}
	71
	72	static void pte_free_rcu_callback(struct rcu_head *head)
	73	{
	74	struct pte_freelist_batch *batch =
	75	container_of(head, struct pte_freelist_batch, rcu);
	76	unsigned int i;
	77
	78	for (i = 0; i < batch->index; i++)
	79	pgtable_free(batch->tables[i]);
	80
	81	free_page((unsigned long)batch);
	82	}
	83
	84	static void pte_free_submit(struct pte_freelist_batch *batch)
	85	{
	86	INIT_RCU_HEAD(&batch->rcu);
	87	call_rcu(&batch->rcu, pte_free_rcu_callback);
	88	}
	89
	90	void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf)
	91	{
	92	/* This is safe since tlb_gather_mmu has disabled preemption */
0186f47e KG	93	struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
	94
	95	if (atomic_read(&tlb->mm->mm_users) < 2 \|\|
56aa4129	96	cpumask_equal(mm_cpumask(tlb->mm), cpumask_of(smp_processor_id()))){
0186f47e KG	97	pgtable_free(pgf);
	98	return;
	99	}
	100
	101	if (*batchp == NULL) {
	102	batchp = (struct pte_freelist_batch )__get_free_page(GFP_ATOMIC);
	103	if (*batchp == NULL) {
	104	pgtable_free_now(pgf);
	105	return;
	106	}
	107	(*batchp)->index = 0;
	108	}
	109	(batchp)->tables[(batchp)->index++] = pgf;
	110	if ((*batchp)->index == PTE_FREELIST_SIZE) {
	111	pte_free_submit(*batchp);
	112	*batchp = NULL;
	113	}
	114	}
	115
	116	void pte_free_finish(void)
	117	{
	118	/* This is safe since tlb_gather_mmu has disabled preemption */
	119	struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
	120
	121	if (*batchp == NULL)
	122	return;
	123	pte_free_submit(*batchp);
	124	*batchp = NULL;
	125	}
8d30c14c	126
c7cc58a1 BH	127	#endif /* CONFIG_SMP */
c7cc58a1 BH	128
8d30c14c BH	129	/*
	130	* Handle i/d cache flushing, called from set_pte_at() or ptep_set_access_flags()
	131	*/
	132	static pte_t do_dcache_icache_coherency(pte_t pte)
	133	{
	134	unsigned long pfn = pte_pfn(pte);
	135	struct page *page;
	136
	137	if (unlikely(!pfn_valid(pfn)))
	138	return pte;
	139	page = pfn_to_page(pfn);
	140
	141	if (!PageReserved(page) && !test_bit(PG_arch_1, &page->flags)) {
30c5af43	142	pr_devel("do_dcache_icache_coherency... flushing\n");
8d30c14c BH	143	flush_dcache_icache_page(page);
	144	set_bit(PG_arch_1, &page->flags);
	145	}
	146	else
30c5af43	147	pr_devel("do_dcache_icache_coherency... already clean\n");
8d30c14c BH	148	return __pte(pte_val(pte) \| _PAGE_HWEXEC);
	149	}
	150
	151	static inline int is_exec_fault(void)
	152	{
	153	return current->thread.regs && TRAP(current->thread.regs) == 0x400;
	154	}
	155
	156	/* We only try to do i/d cache coherency on stuff that looks like
	157	* reasonably "normal" PTEs. We currently require a PTE to be present
	158	* and we avoid _PAGE_SPECIAL and _PAGE_NO_CACHE
	159	*/
	160	static inline int pte_looks_normal(pte_t pte)
	161	{
	162	return (pte_val(pte) &
	163	(_PAGE_PRESENT \| _PAGE_SPECIAL \| _PAGE_NO_CACHE)) ==
	164	(_PAGE_PRESENT);
	165	}
	166
	167	#if defined(CONFIG_PPC_STD_MMU)
	168	/* Server-style MMU handles coherency when hashing if HW exec permission
	169	* is supposed per page (currently 64-bit only). Else, we always flush
	170	* valid PTEs in set_pte.
	171	*/
	172	static inline int pte_need_exec_flush(pte_t pte, int set_pte)
	173	{
	174	return set_pte && pte_looks_normal(pte) &&
	175	!(cpu_has_feature(CPU_FTR_COHERENT_ICACHE) \|\|
	176	cpu_has_feature(CPU_FTR_NOEXECUTE));
	177	}
	178	#elif _PAGE_HWEXEC == 0
	179	/* Embedded type MMU without HW exec support (8xx only so far), we flush
	180	* the cache for any present PTE
	181	*/
	182	static inline int pte_need_exec_flush(pte_t pte, int set_pte)
	183	{
	184	return set_pte && pte_looks_normal(pte);
	185	}
	186	#else
	187	/* Other embedded CPUs with HW exec support per-page, we flush on exec
	188	* fault if HWEXEC is not set
	189	*/
	190	static inline int pte_need_exec_flush(pte_t pte, int set_pte)
	191	{
	192	return pte_looks_normal(pte) && is_exec_fault() &&
	193	!(pte_val(pte) & _PAGE_HWEXEC);
	194	}
	195	#endif
	196
	197	/*
	198	* set_pte stores a linux PTE into the linux page table.
	199	*/
	200	void set_pte_at(struct mm_struct mm, unsigned long addr, pte_t ptep, pte_t pte)
	201	{
	202	#ifdef CONFIG_DEBUG_VM
	203	WARN_ON(pte_present(*ptep));
	204	#endif
	205	/* Note: mm->context.id might not yet have been assigned as
	206	* this context might not have been activated yet when this
	207	* is called.
	208	*/
	209	pte = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS);
	210	if (pte_need_exec_flush(pte, 1))
	211	pte = do_dcache_icache_coherency(pte);
212
213	/* Perform the setting of the PTE */
214	__set_pte_at(mm, addr, ptep, pte, 0);
215	}
216
217	/*
218	* This is called when relaxing access to a PTE. It's also called in the page
219	* fault path when we don't hit any of the major fault cases, ie, a minor
220	* update of _PAGE_ACCESSED, _PAGE_DIRTY, etc... The generic code will have
221	* handled those two for us, we additionally deal with missing execute
222	* permission here on some processors
223	*/
224	int ptep_set_access_flags(struct vm_area_struct *vma, unsigned long address,
225	pte_t *ptep, pte_t entry, int dirty)
226	{
227	int changed;
228	if (!dirty && pte_need_exec_flush(entry, 0))
229	entry = do_dcache_icache_coherency(entry);
230	changed = !pte_same(*(ptep), entry);
231	if (changed) {
af3e4aca MG	232	if (!(vma->vm_flags & VM_HUGETLB))
af3e4aca MG	233	assert_pte_locked(vma->vm_mm, address);
8d30c14c BH	234	__ptep_set_access_flags(ptep, entry);
	235	flush_tlb_page_nohash(vma, address);
	236	}
	237	return changed;
	238	}
	239
	240	#ifdef CONFIG_DEBUG_VM
	241	void assert_pte_locked(struct mm_struct *mm, unsigned long addr)
	242	{
	243	pgd_t *pgd;
	244	pud_t *pud;
	245	pmd_t *pmd;
	246
	247	if (mm == &init_mm)
	248	return;
	249	pgd = mm->pgd + pgd_index(addr);
	250	BUG_ON(pgd_none(*pgd));
	251	pud = pud_offset(pgd, addr);
	252	BUG_ON(pud_none(*pud));
	253	pmd = pmd_offset(pud, addr);
	254	BUG_ON(!pmd_present(*pmd));
	255	BUG_ON(!spin_is_locked(pte_lockptr(mm, pmd)));
	256	}
	257	#endif /* CONFIG_DEBUG_VM */
	258