[linux-block.git] / include / asm-generic / pgtable.h

#ifndef _ASM_GENERIC_PGTABLE_H
#define _ASM_GENERIC_PGTABLE_H

#ifndef __ASSEMBLY__
#ifdef CONFIG_MMU

#ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
/*
 * Largely same as above, but only sets the access flags (dirty,
 * accessed, and writable). Furthermore, we know it always gets set
 * to a "more permissive" setting, which allows most architectures
 * to optimize this. We return whether the PTE actually changed, which
 * in turn instructs the caller to do things like update__mmu_cache.
 * This used to be done in the caller, but sparc needs minor faults to
 * force that call on sun4c so we changed this macro slightly
 */
#define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \
({									  \
	int __changed = !pte_same(*(__ptep), __entry);			  \
	if (__changed) {						  \
		set_pte_at((__vma)->vm_mm, (__address), __ptep, __entry); \
		flush_tlb_page(__vma, __address);			  \
	}								  \
	__changed;							  \
})
#endif

#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
#define ptep_test_and_clear_young(__vma, __address, __ptep)		\
({									\
	pte_t __pte = *(__ptep);					\
	int r = 1;							\
	if (!pte_young(__pte))						\
		r = 0;							\
	else								\
		set_pte_at((__vma)->vm_mm, (__address),			\
			   (__ptep), pte_mkold(__pte));			\
	r;								\
})
#endif

#ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
#define ptep_clear_flush_young(__vma, __address, __ptep)		\
({									\
	int __young;							\
	__young = ptep_test_and_clear_young(__vma, __address, __ptep);	\
	if (__young)							\
		flush_tlb_page(__vma, __address);			\
	__young;							\
})
#endif

#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR
#define ptep_get_and_clear(__mm, __address, __ptep)			\
({									\
	pte_t __pte = *(__ptep);					\
	pte_clear((__mm), (__address), (__ptep));			\
	__pte;								\
})
#endif

#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
#define ptep_get_and_clear_full(__mm, __address, __ptep, __full)	\
({									\
	pte_t __pte;							\
	__pte = ptep_get_and_clear((__mm), (__address), (__ptep));	\
	__pte;								\
})
#endif

/*
 * Some architectures may be able to avoid expensive synchronization
 * primitives when modifications are made to PTE's which are already
 * not present, or in the process of an address space destruction.
 */
#ifndef __HAVE_ARCH_PTE_CLEAR_NOT_PRESENT_FULL
#define pte_clear_not_present_full(__mm, __address, __ptep, __full)	\
do {									\
	pte_clear((__mm), (__address), (__ptep));			\
} while (0)
#endif

#ifndef __HAVE_ARCH_PTEP_CLEAR_FLUSH
#define ptep_clear_flush(__vma, __address, __ptep)			\
({									\
	pte_t __pte;							\
	__pte = ptep_get_and_clear((__vma)->vm_mm, __address, __ptep);	\
	flush_tlb_page(__vma, __address);				\
	__pte;								\
})
#endif

#ifndef __HAVE_ARCH_PTEP_SET_WRPROTECT
struct mm_struct;
static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long address, pte_t *ptep)
{
	pte_t old_pte = *ptep;
	set_pte_at(mm, address, ptep, pte_wrprotect(old_pte));
}
#endif

#ifndef __HAVE_ARCH_PTE_SAME
#define pte_same(A,B)	(pte_val(A) == pte_val(B))
#endif

#ifndef __HAVE_ARCH_PAGE_TEST_DIRTY
#define page_test_dirty(page)		(0)
#endif

#ifndef __HAVE_ARCH_PAGE_CLEAR_DIRTY
#define page_clear_dirty(page)		do { } while (0)
#endif

#ifndef __HAVE_ARCH_PAGE_TEST_DIRTY
#define pte_maybe_dirty(pte)		pte_dirty(pte)
#else
#define pte_maybe_dirty(pte)		(1)
#endif

#ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG
#define page_test_and_clear_young(page) (0)
#endif

#ifndef __HAVE_ARCH_PGD_OFFSET_GATE
#define pgd_offset_gate(mm, addr)	pgd_offset(mm, addr)
#endif

#ifndef __HAVE_ARCH_LAZY_MMU_PROT_UPDATE
#define lazy_mmu_prot_update(pte)	do { } while (0)
#endif

#ifndef __HAVE_ARCH_MOVE_PTE
#define move_pte(pte, prot, old_addr, new_addr)	(pte)
#endif

/*
 * When walking page tables, get the address of the next boundary,
 * or the end address of the range if that comes earlier.  Although no
 * vma end wraps to 0, rounded up __boundary may wrap to 0 throughout.
 */

#define pgd_addr_end(addr, end)						\
({	unsigned long __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK;	\
	(__boundary - 1 < (end) - 1)? __boundary: (end);		\
})

#ifndef pud_addr_end
#define pud_addr_end(addr, end)						\
({	unsigned long __boundary = ((addr) + PUD_SIZE) & PUD_MASK;	\
	(__boundary - 1 < (end) - 1)? __boundary: (end);		\
})
#endif

#ifndef pmd_addr_end
#define pmd_addr_end(addr, end)						\
({	unsigned long __boundary = ((addr) + PMD_SIZE) & PMD_MASK;	\
	(__boundary - 1 < (end) - 1)? __boundary: (end);		\
})
#endif

/*
 * When walking page tables, we usually want to skip any p?d_none entries;
 * and any p?d_bad entries - reporting the error before resetting to none.
 * Do the tests inline, but report and clear the bad entry in mm/memory.c.
 */
void pgd_clear_bad(pgd_t *);
void pud_clear_bad(pud_t *);
void pmd_clear_bad(pmd_t *);

static inline int pgd_none_or_clear_bad(pgd_t *pgd)
{
	if (pgd_none(*pgd))
		return 1;
	if (unlikely(pgd_bad(*pgd))) {
		pgd_clear_bad(pgd);
		return 1;
	}
	return 0;
}

static inline int pud_none_or_clear_bad(pud_t *pud)
{
	if (pud_none(*pud))
		return 1;
	if (unlikely(pud_bad(*pud))) {
		pud_clear_bad(pud);
		return 1;
	}
	return 0;
}

static inline int pmd_none_or_clear_bad(pmd_t *pmd)
{
	if (pmd_none(*pmd))
		return 1;
	if (unlikely(pmd_bad(*pmd))) {
		pmd_clear_bad(pmd);
		return 1;
	}
	return 0;
}
#endif /* CONFIG_MMU */

/*
 * A facility to provide lazy MMU batching.  This allows PTE updates and
 * page invalidations to be delayed until a call to leave lazy MMU mode
 * is issued.  Some architectures may benefit from doing this, and it is
 * beneficial for both shadow and direct mode hypervisors, which may batch
 * the PTE updates which happen during this window.  Note that using this
 * interface requires that read hazards be removed from the code.  A read
 * hazard could result in the direct mode hypervisor case, since the actual
 * write to the page tables may not yet have taken place, so reads though
 * a raw PTE pointer after it has been modified are not guaranteed to be
 * up to date.  This mode can only be entered and left under the protection of
 * the page table locks for all page tables which may be modified.  In the UP
 * case, this is required so that preemption is disabled, and in the SMP case,
 * it must synchronize the delayed page table writes properly on other CPUs.
 */
#ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE
#define arch_enter_lazy_mmu_mode()	do {} while (0)
#define arch_leave_lazy_mmu_mode()	do {} while (0)
#define arch_flush_lazy_mmu_mode()	do {} while (0)
#endif

/*
 * A facility to provide batching of the reload of page tables with the
 * actual context switch code for paravirtualized guests.  By convention,
 * only one of the lazy modes (CPU, MMU) should be active at any given
 * time, entry should never be nested, and entry and exits should always
 * be paired.  This is for sanity of maintaining and reasoning about the
 * kernel code.
 */
#ifndef __HAVE_ARCH_ENTER_LAZY_CPU_MODE
#define arch_enter_lazy_cpu_mode()	do {} while (0)
#define arch_leave_lazy_cpu_mode()	do {} while (0)
#define arch_flush_lazy_cpu_mode()	do {} while (0)
#endif

#endif /* !__ASSEMBLY__ */

#endif /* _ASM_GENERIC_PGTABLE_H */
Commit	Line	Data
1da177e4 LT	1	#ifndef _ASM_GENERIC_PGTABLE_H
	2	#define _ASM_GENERIC_PGTABLE_H
	3
673eae82	4	#ifndef __ASSEMBLY__
9535239f	5	#ifdef CONFIG_MMU
673eae82	6
1da177e4 LT	7	#ifndef __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
	8	/*
	9	* Largely same as above, but only sets the access flags (dirty,
	10	* accessed, and writable). Furthermore, we know it always gets set
	11	* to a "more permissive" setting, which allows most architectures
8dab5241 BH	12	* to optimize this. We return whether the PTE actually changed, which
	13	* in turn instructs the caller to do things like update__mmu_cache.
	14	* This used to be done in the caller, but sparc needs minor faults to
	15	* force that call on sun4c so we changed this macro slightly
1da177e4 LT	16	*/
1da177e4 LT	17	#define ptep_set_access_flags(__vma, __address, __ptep, __entry, __dirty) \
8dab5241 BH	18	({ \
	19	int __changed = !pte_same(*(__ptep), __entry); \
	20	if (__changed) { \
	21	set_pte_at((__vma)->vm_mm, (__address), __ptep, __entry); \
	22	flush_tlb_page(__vma, __address); \
	23	} \
	24	__changed; \
	25	})
1da177e4 LT	26	#endif
	27
	28	#ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
	29	#define ptep_test_and_clear_young(__vma, __address, __ptep) \
	30	({ \
	31	pte_t __pte = *(__ptep); \
	32	int r = 1; \
	33	if (!pte_young(__pte)) \
	34	r = 0; \
	35	else \
	36	set_pte_at((__vma)->vm_mm, (__address), \
	37	(__ptep), pte_mkold(__pte)); \
	38	r; \
	39	})
	40	#endif
	41
	42	#ifndef __HAVE_ARCH_PTEP_CLEAR_YOUNG_FLUSH
	43	#define ptep_clear_flush_young(__vma, __address, __ptep) \
	44	({ \
	45	int __young; \
	46	__young = ptep_test_and_clear_young(__vma, __address, __ptep); \
	47	if (__young) \
	48	flush_tlb_page(__vma, __address); \
	49	__young; \
	50	})
	51	#endif
	52
1da177e4 LT	53	#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR
	54	#define ptep_get_and_clear(__mm, __address, __ptep) \
	55	({ \
	56	pte_t __pte = *(__ptep); \
	57	pte_clear((__mm), (__address), (__ptep)); \
	58	__pte; \
	59	})
	60	#endif
	61
a600388d ZA	62	#ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
	63	#define ptep_get_and_clear_full(__mm, __address, __ptep, __full) \
	64	({ \
	65	pte_t __pte; \
	66	__pte = ptep_get_and_clear((__mm), (__address), (__ptep)); \
	67	__pte; \
	68	})
	69	#endif
	70
9888a1ca ZA	71	/*
	72	* Some architectures may be able to avoid expensive synchronization
	73	* primitives when modifications are made to PTE's which are already
	74	* not present, or in the process of an address space destruction.
	75	*/
	76	#ifndef __HAVE_ARCH_PTE_CLEAR_NOT_PRESENT_FULL
	77	#define pte_clear_not_present_full(__mm, __address, __ptep, __full) \
a600388d ZA	78	do { \
	79	pte_clear((__mm), (__address), (__ptep)); \
	80	} while (0)
	81	#endif
	82
1da177e4 LT	83	#ifndef __HAVE_ARCH_PTEP_CLEAR_FLUSH
	84	#define ptep_clear_flush(__vma, __address, __ptep) \
	85	({ \
	86	pte_t __pte; \
	87	__pte = ptep_get_and_clear((__vma)->vm_mm, __address, __ptep); \
	88	flush_tlb_page(__vma, __address); \
	89	__pte; \
	90	})
	91	#endif
	92
	93	#ifndef __HAVE_ARCH_PTEP_SET_WRPROTECT
8c65b4a6	94	struct mm_struct;
1da177e4 LT	95	static inline void ptep_set_wrprotect(struct mm_struct mm, unsigned long address, pte_t ptep)
	96	{
	97	pte_t old_pte = *ptep;
	98	set_pte_at(mm, address, ptep, pte_wrprotect(old_pte));
	99	}
	100	#endif
	101
	102	#ifndef __HAVE_ARCH_PTE_SAME
	103	#define pte_same(A,B) (pte_val(A) == pte_val(B))
	104	#endif
	105
6c210482 MS	106	#ifndef __HAVE_ARCH_PAGE_TEST_DIRTY
	107	#define page_test_dirty(page) (0)
	108	#endif
	109
	110	#ifndef __HAVE_ARCH_PAGE_CLEAR_DIRTY
	111	#define page_clear_dirty(page) do { } while (0)
	112	#endif
	113
	114	#ifndef __HAVE_ARCH_PAGE_TEST_DIRTY
b4955ce3 AK	115	#define pte_maybe_dirty(pte) pte_dirty(pte)
	116	#else
	117	#define pte_maybe_dirty(pte) (1)
1da177e4 LT	118	#endif
	119
	120	#ifndef __HAVE_ARCH_PAGE_TEST_AND_CLEAR_YOUNG
	121	#define page_test_and_clear_young(page) (0)
	122	#endif
	123
	124	#ifndef __HAVE_ARCH_PGD_OFFSET_GATE
	125	#define pgd_offset_gate(mm, addr) pgd_offset(mm, addr)
	126	#endif
	127
	128	#ifndef __HAVE_ARCH_LAZY_MMU_PROT_UPDATE
	129	#define lazy_mmu_prot_update(pte) do { } while (0)
	130	#endif
	131
0b0968a3	132	#ifndef __HAVE_ARCH_MOVE_PTE
8b1f3124	133	#define move_pte(pte, prot, old_addr, new_addr) (pte)
8b1f3124 NP	134	#endif
8b1f3124 NP	135
1da177e4	136	/*
8f6c99c1 HD	137	* When walking page tables, get the address of the next boundary,
	138	* or the end address of the range if that comes earlier. Although no
	139	* vma end wraps to 0, rounded up __boundary may wrap to 0 throughout.
1da177e4 LT	140	*/
1da177e4 LT	141
1da177e4 LT	142	#define pgd_addr_end(addr, end) \
	143	({ unsigned long __boundary = ((addr) + PGDIR_SIZE) & PGDIR_MASK; \
	144	(__boundary - 1 < (end) - 1)? __boundary: (end); \
	145	})
1da177e4 LT	146
	147	#ifndef pud_addr_end
	148	#define pud_addr_end(addr, end) \
	149	({ unsigned long __boundary = ((addr) + PUD_SIZE) & PUD_MASK; \
	150	(__boundary - 1 < (end) - 1)? __boundary: (end); \
	151	})
	152	#endif
	153
	154	#ifndef pmd_addr_end
	155	#define pmd_addr_end(addr, end) \
	156	({ unsigned long __boundary = ((addr) + PMD_SIZE) & PMD_MASK; \
	157	(__boundary - 1 < (end) - 1)? __boundary: (end); \
	158	})
	159	#endif
	160
1da177e4 LT	161	/*
	162	* When walking page tables, we usually want to skip any p?d_none entries;
	163	* and any p?d_bad entries - reporting the error before resetting to none.
	164	* Do the tests inline, but report and clear the bad entry in mm/memory.c.
	165	*/
	166	void pgd_clear_bad(pgd_t *);
	167	void pud_clear_bad(pud_t *);
	168	void pmd_clear_bad(pmd_t *);
	169
	170	static inline int pgd_none_or_clear_bad(pgd_t *pgd)
	171	{
	172	if (pgd_none(*pgd))
	173	return 1;
	174	if (unlikely(pgd_bad(*pgd))) {
	175	pgd_clear_bad(pgd);
	176	return 1;
	177	}
	178	return 0;
	179	}
	180
	181	static inline int pud_none_or_clear_bad(pud_t *pud)
	182	{
	183	if (pud_none(*pud))
	184	return 1;
	185	if (unlikely(pud_bad(*pud))) {
	186	pud_clear_bad(pud);
	187	return 1;
	188	}
	189	return 0;
	190	}
	191
	192	static inline int pmd_none_or_clear_bad(pmd_t *pmd)
	193	{
	194	if (pmd_none(*pmd))
	195	return 1;
	196	if (unlikely(pmd_bad(*pmd))) {
	197	pmd_clear_bad(pmd);
	198	return 1;
	199	}
	200	return 0;
	201	}
9535239f GU	202	#endif /* CONFIG_MMU */
	203
	204	/*
	205	* A facility to provide lazy MMU batching. This allows PTE updates and
	206	* page invalidations to be delayed until a call to leave lazy MMU mode
	207	* is issued. Some architectures may benefit from doing this, and it is
	208	* beneficial for both shadow and direct mode hypervisors, which may batch
	209	* the PTE updates which happen during this window. Note that using this
	210	* interface requires that read hazards be removed from the code. A read
	211	* hazard could result in the direct mode hypervisor case, since the actual
	212	* write to the page tables may not yet have taken place, so reads though
	213	* a raw PTE pointer after it has been modified are not guaranteed to be
	214	* up to date. This mode can only be entered and left under the protection of
	215	* the page table locks for all page tables which may be modified. In the UP
	216	* case, this is required so that preemption is disabled, and in the SMP case,
	217	* it must synchronize the delayed page table writes properly on other CPUs.
	218	*/
	219	#ifndef __HAVE_ARCH_ENTER_LAZY_MMU_MODE
	220	#define arch_enter_lazy_mmu_mode() do {} while (0)
	221	#define arch_leave_lazy_mmu_mode() do {} while (0)
	222	#define arch_flush_lazy_mmu_mode() do {} while (0)
	223	#endif
	224
	225	/*
	226	* A facility to provide batching of the reload of page tables with the
	227	* actual context switch code for paravirtualized guests. By convention,
	228	* only one of the lazy modes (CPU, MMU) should be active at any given
	229	* time, entry should never be nested, and entry and exits should always
	230	* be paired. This is for sanity of maintaining and reasoning about the
	231	* kernel code.
	232	*/
	233	#ifndef __HAVE_ARCH_ENTER_LAZY_CPU_MODE
	234	#define arch_enter_lazy_cpu_mode() do {} while (0)
	235	#define arch_leave_lazy_cpu_mode() do {} while (0)
	236	#define arch_flush_lazy_cpu_mode() do {} while (0)
	237	#endif
	238
1da177e4 LT	239	#endif /* !__ASSEMBLY__ */
	240
	241	#endif /* _ASM_GENERIC_PGTABLE_H */