[linux-2.6-block.git] / arch / x86 / mm / hugetlbpage.c

/*
 * IA-32 Huge TLB Page Support for Kernel.
 *
 * Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
 */

#include <linux/init.h>
#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/hugetlb.h>
#include <linux/pagemap.h>
#include <linux/err.h>
#include <linux/sysctl.h>
#include <asm/mman.h>
#include <asm/tlb.h>
#include <asm/tlbflush.h>
#include <asm/pgalloc.h>

static unsigned long page_table_shareable(struct vm_area_struct *svma,
				struct vm_area_struct *vma,
				unsigned long addr, pgoff_t idx)
{
	unsigned long saddr = ((idx - svma->vm_pgoff) << PAGE_SHIFT) +
				svma->vm_start;
	unsigned long sbase = saddr & PUD_MASK;
	unsigned long s_end = sbase + PUD_SIZE;

	/* Allow segments to share if only one is marked locked */
	unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED;
	unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED;

	/*
	 * match the virtual addresses, permission and the alignment of the
	 * page table page.
	 */
	if (pmd_index(addr) != pmd_index(saddr) ||
	    vm_flags != svm_flags ||
	    sbase < svma->vm_start || svma->vm_end < s_end)
		return 0;

	return saddr;
}

static int vma_shareable(struct vm_area_struct *vma, unsigned long addr)
{
	unsigned long base = addr & PUD_MASK;
	unsigned long end = base + PUD_SIZE;

	/*
	 * check on proper vm_flags and page table alignment
	 */
	if (vma->vm_flags & VM_MAYSHARE &&
	    vma->vm_start <= base && end <= vma->vm_end)
		return 1;
	return 0;
}

/*
 * Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc()
 * and returns the corresponding pte. While this is not necessary for the
 * !shared pmd case because we can allocate the pmd later as well, it makes the
 * code much cleaner. pmd allocation is essential for the shared case because
 * pud has to be populated inside the same i_mmap_mutex section - otherwise
 * racing tasks could either miss the sharing (see huge_pte_offset) or select a
 * bad pmd for sharing.
 */
static pte_t *
huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
{
	struct vm_area_struct *vma = find_vma(mm, addr);
	struct address_space *mapping = vma->vm_file->f_mapping;
	pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) +
			vma->vm_pgoff;
	struct vm_area_struct *svma;
	unsigned long saddr;
	pte_t *spte = NULL;
	pte_t *pte;

	if (!vma_shareable(vma, addr))
		return (pte_t *)pmd_alloc(mm, pud, addr);

	mutex_lock(&mapping->i_mmap_mutex);
	vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) {
		if (svma == vma)
			continue;

		saddr = page_table_shareable(svma, vma, addr, idx);
		if (saddr) {
			spte = huge_pte_offset(svma->vm_mm, saddr);
			if (spte) {
				get_page(virt_to_page(spte));
				break;
			}
		}
	}

	if (!spte)
		goto out;

	spin_lock(&mm->page_table_lock);
	if (pud_none(*pud))
		pud_populate(mm, pud, (pmd_t *)((unsigned long)spte & PAGE_MASK));
	else
		put_page(virt_to_page(spte));
	spin_unlock(&mm->page_table_lock);
out:
	pte = (pte_t *)pmd_alloc(mm, pud, addr);
	mutex_unlock(&mapping->i_mmap_mutex);
	return pte;
}

/*
 * unmap huge page backed by shared pte.
 *
 * Hugetlb pte page is ref counted at the time of mapping.  If pte is shared
 * indicated by page_count > 1, unmap is achieved by clearing pud and
 * decrementing the ref count. If count == 1, the pte page is not shared.
 *
 * called with vma->vm_mm->page_table_lock held.
 *
 * returns: 1 successfully unmapped a shared pte page
 *	    0 the underlying pte page is not shared, or it is the last user
 */
int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep)
{
	pgd_t *pgd = pgd_offset(mm, *addr);
	pud_t *pud = pud_offset(pgd, *addr);

	BUG_ON(page_count(virt_to_page(ptep)) == 0);
	if (page_count(virt_to_page(ptep)) == 1)
		return 0;

	pud_clear(pud);
	put_page(virt_to_page(ptep));
	*addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE;
	return 1;
}

pte_t *huge_pte_alloc(struct mm_struct *mm,
			unsigned long addr, unsigned long sz)
{
	pgd_t *pgd;
	pud_t *pud;
	pte_t *pte = NULL;

	pgd = pgd_offset(mm, addr);
	pud = pud_alloc(mm, pgd, addr);
	if (pud) {
		if (sz == PUD_SIZE) {
			pte = (pte_t *)pud;
		} else {
			BUG_ON(sz != PMD_SIZE);
			if (pud_none(*pud))
				pte = huge_pmd_share(mm, addr, pud);
			else
				pte = (pte_t *)pmd_alloc(mm, pud, addr);
		}
	}
	BUG_ON(pte && !pte_none(*pte) && !pte_huge(*pte));

	return pte;
}

pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr)
{
	pgd_t *pgd;
	pud_t *pud;
	pmd_t *pmd = NULL;

	pgd = pgd_offset(mm, addr);
	if (pgd_present(*pgd)) {
		pud = pud_offset(pgd, addr);
		if (pud_present(*pud)) {
			if (pud_large(*pud))
				return (pte_t *)pud;
			pmd = pmd_offset(pud, addr);
		}
	}
	return (pte_t *) pmd;
}

#if 0	/* This is just for testing */
struct page *
follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
{
	unsigned long start = address;
	int length = 1;
	int nr;
	struct page *page;
	struct vm_area_struct *vma;

	vma = find_vma(mm, addr);
	if (!vma || !is_vm_hugetlb_page(vma))
		return ERR_PTR(-EINVAL);

	pte = huge_pte_offset(mm, address);

	/* hugetlb should be locked, and hence, prefaulted */
	WARN_ON(!pte || pte_none(*pte));

	page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)];

	WARN_ON(!PageHead(page));

	return page;
}

int pmd_huge(pmd_t pmd)
{
	return 0;
}

int pud_huge(pud_t pud)
{
	return 0;
}

struct page *
follow_huge_pmd(struct mm_struct *mm, unsigned long address,
		pmd_t *pmd, int write)
{
	return NULL;
}

#else

struct page *
follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
{
	return ERR_PTR(-EINVAL);
}

int pmd_huge(pmd_t pmd)
{
	return !!(pmd_val(pmd) & _PAGE_PSE);
}

int pud_huge(pud_t pud)
{
	return !!(pud_val(pud) & _PAGE_PSE);
}

struct page *
follow_huge_pmd(struct mm_struct *mm, unsigned long address,
		pmd_t *pmd, int write)
{
	struct page *page;

	page = pte_page(*(pte_t *)pmd);
	if (page)
		page += ((address & ~PMD_MASK) >> PAGE_SHIFT);
	return page;
}

struct page *
follow_huge_pud(struct mm_struct *mm, unsigned long address,
		pud_t *pud, int write)
{
	struct page *page;

	page = pte_page(*(pte_t *)pud);
	if (page)
		page += ((address & ~PUD_MASK) >> PAGE_SHIFT);
	return page;
}

#endif

/* x86_64 also uses this file */

#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
		unsigned long addr, unsigned long len,
		unsigned long pgoff, unsigned long flags)
{
	struct hstate *h = hstate_file(file);
	struct mm_struct *mm = current->mm;
	struct vm_area_struct *vma;
	unsigned long start_addr;

	if (len > mm->cached_hole_size) {
	        start_addr = mm->free_area_cache;
	} else {
	        start_addr = TASK_UNMAPPED_BASE;
	        mm->cached_hole_size = 0;
	}

full_search:
	addr = ALIGN(start_addr, huge_page_size(h));

	for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
		/* At this point:  (!vma || addr < vma->vm_end). */
		if (TASK_SIZE - len < addr) {
			/*
			 * Start a new search - just in case we missed
			 * some holes.
			 */
			if (start_addr != TASK_UNMAPPED_BASE) {
				start_addr = TASK_UNMAPPED_BASE;
				mm->cached_hole_size = 0;
				goto full_search;
			}
			return -ENOMEM;
		}
		if (!vma || addr + len <= vma->vm_start) {
			mm->free_area_cache = addr + len;
			return addr;
		}
		if (addr + mm->cached_hole_size < vma->vm_start)
		        mm->cached_hole_size = vma->vm_start - addr;
		addr = ALIGN(vma->vm_end, huge_page_size(h));
	}
}

static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
		unsigned long addr0, unsigned long len,
		unsigned long pgoff, unsigned long flags)
{
	struct hstate *h = hstate_file(file);
	struct mm_struct *mm = current->mm;
	struct vm_area_struct *vma;
	unsigned long base = mm->mmap_base;
	unsigned long addr = addr0;
	unsigned long largest_hole = mm->cached_hole_size;
	unsigned long start_addr;

	/* don't allow allocations above current base */
	if (mm->free_area_cache > base)
		mm->free_area_cache = base;

	if (len <= largest_hole) {
	        largest_hole = 0;
		mm->free_area_cache  = base;
	}
try_again:
	start_addr = mm->free_area_cache;

	/* make sure it can fit in the remaining address space */
	if (mm->free_area_cache < len)
		goto fail;

	/* either no address requested or can't fit in requested address hole */
	addr = (mm->free_area_cache - len) & huge_page_mask(h);
	do {
		/*
		 * Lookup failure means no vma is above this address,
		 * i.e. return with success:
		 */
		vma = find_vma(mm, addr);
		if (!vma)
			return addr;

		if (addr + len <= vma->vm_start) {
			/* remember the address as a hint for next time */
		        mm->cached_hole_size = largest_hole;
		        return (mm->free_area_cache = addr);
		} else if (mm->free_area_cache == vma->vm_end) {
			/* pull free_area_cache down to the first hole */
			mm->free_area_cache = vma->vm_start;
			mm->cached_hole_size = largest_hole;
		}

		/* remember the largest hole we saw so far */
		if (addr + largest_hole < vma->vm_start)
		        largest_hole = vma->vm_start - addr;

		/* try just below the current vma->vm_start */
		addr = (vma->vm_start - len) & huge_page_mask(h);
	} while (len <= vma->vm_start);

fail:
	/*
	 * if hint left us with no space for the requested
	 * mapping then try again:
	 */
	if (start_addr != base) {
		mm->free_area_cache = base;
		largest_hole = 0;
		goto try_again;
	}
	/*
	 * A failed mmap() very likely causes application failure,
	 * so fall back to the bottom-up function here. This scenario
	 * can happen with large stack limits and large mmap()
	 * allocations.
	 */
	mm->free_area_cache = TASK_UNMAPPED_BASE;
	mm->cached_hole_size = ~0UL;
	addr = hugetlb_get_unmapped_area_bottomup(file, addr0,
			len, pgoff, flags);

	/*
	 * Restore the topdown base:
	 */
	mm->free_area_cache = base;
	mm->cached_hole_size = ~0UL;

	return addr;
}

unsigned long
hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
		unsigned long len, unsigned long pgoff, unsigned long flags)
{
	struct hstate *h = hstate_file(file);
	struct mm_struct *mm = current->mm;
	struct vm_area_struct *vma;

	if (len & ~huge_page_mask(h))
		return -EINVAL;
	if (len > TASK_SIZE)
		return -ENOMEM;

	if (flags & MAP_FIXED) {
		if (prepare_hugepage_range(file, addr, len))
			return -EINVAL;
		return addr;
	}

	if (addr) {
		addr = ALIGN(addr, huge_page_size(h));
		vma = find_vma(mm, addr);
		if (TASK_SIZE - len >= addr &&
		    (!vma || addr + len <= vma->vm_start))
			return addr;
	}
	if (mm->get_unmapped_area == arch_get_unmapped_area)
		return hugetlb_get_unmapped_area_bottomup(file, addr, len,
				pgoff, flags);
	else
		return hugetlb_get_unmapped_area_topdown(file, addr, len,
				pgoff, flags);
}

#endif /*HAVE_ARCH_HUGETLB_UNMAPPED_AREA*/

#ifdef CONFIG_X86_64
static __init int setup_hugepagesz(char *opt)
{
	unsigned long ps = memparse(opt, &opt);
	if (ps == PMD_SIZE) {
		hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
	} else if (ps == PUD_SIZE && cpu_has_gbpages) {
		hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
	} else {
		printk(KERN_ERR "hugepagesz: Unsupported page size %lu M\n",
			ps >> 20);
		return 0;
	}
	return 1;
}
__setup("hugepagesz=", setup_hugepagesz);
#endif
Commit	Line	Data
1da177e4 LT	1	/*
	2	* IA-32 Huge TLB Page Support for Kernel.
	3	*
	4	* Copyright (C) 2002, Rohit Seth <rohit.seth@intel.com>
	5	*/
	6
1da177e4 LT	7	#include <linux/init.h>
	8	#include <linux/fs.h>
	9	#include <linux/mm.h>
	10	#include <linux/hugetlb.h>
	11	#include <linux/pagemap.h>
1da177e4 LT	12	#include <linux/err.h>
	13	#include <linux/sysctl.h>
	14	#include <asm/mman.h>
	15	#include <asm/tlb.h>
	16	#include <asm/tlbflush.h>
a5a19c63	17	#include <asm/pgalloc.h>
1da177e4	18
39dde65c CK	19	static unsigned long page_table_shareable(struct vm_area_struct *svma,
	20	struct vm_area_struct *vma,
	21	unsigned long addr, pgoff_t idx)
	22	{
	23	unsigned long saddr = ((idx - svma->vm_pgoff) << PAGE_SHIFT) +
	24	svma->vm_start;
	25	unsigned long sbase = saddr & PUD_MASK;
	26	unsigned long s_end = sbase + PUD_SIZE;
	27
32b154c0 MG	28	/* Allow segments to share if only one is marked locked */
	29	unsigned long vm_flags = vma->vm_flags & ~VM_LOCKED;
	30	unsigned long svm_flags = svma->vm_flags & ~VM_LOCKED;
	31
39dde65c CK	32	/*
	33	* match the virtual addresses, permission and the alignment of the
	34	* page table page.
	35	*/
	36	if (pmd_index(addr) != pmd_index(saddr) \|\|
32b154c0	37	vm_flags != svm_flags \|\|
39dde65c CK	38	sbase < svma->vm_start \|\| svma->vm_end < s_end)
	39	return 0;
	40
	41	return saddr;
	42	}
	43
	44	static int vma_shareable(struct vm_area_struct *vma, unsigned long addr)
	45	{
	46	unsigned long base = addr & PUD_MASK;
	47	unsigned long end = base + PUD_SIZE;
	48
	49	/*
	50	* check on proper vm_flags and page table alignment
	51	*/
	52	if (vma->vm_flags & VM_MAYSHARE &&
	53	vma->vm_start <= base && end <= vma->vm_end)
	54	return 1;
	55	return 0;
	56	}
	57
	58	/*
eb48c071 MH	59	* Search for a shareable pmd page for hugetlb. In any case calls pmd_alloc()
	60	* and returns the corresponding pte. While this is not necessary for the
	61	* !shared pmd case because we can allocate the pmd later as well, it makes the
	62	* code much cleaner. pmd allocation is essential for the shared case because
	63	* pud has to be populated inside the same i_mmap_mutex section - otherwise
	64	* racing tasks could either miss the sharing (see huge_pte_offset) or select a
	65	* bad pmd for sharing.
39dde65c	66	*/
eb48c071 MH	67	static pte_t *
eb48c071 MH	68	huge_pmd_share(struct mm_struct mm, unsigned long addr, pud_t pud)
39dde65c CK	69	{
	70	struct vm_area_struct *vma = find_vma(mm, addr);
	71	struct address_space *mapping = vma->vm_file->f_mapping;
	72	pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) +
	73	vma->vm_pgoff;
39dde65c CK	74	struct vm_area_struct *svma;
	75	unsigned long saddr;
	76	pte_t *spte = NULL;
eb48c071	77	pte_t *pte;
39dde65c CK	78
39dde65c CK	79	if (!vma_shareable(vma, addr))
eb48c071	80	return (pte_t *)pmd_alloc(mm, pud, addr);
39dde65c	81
3d48ae45	82	mutex_lock(&mapping->i_mmap_mutex);
6b2dbba8	83	vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) {
39dde65c CK	84	if (svma == vma)
	85	continue;
	86
	87	saddr = page_table_shareable(svma, vma, addr, idx);
	88	if (saddr) {
	89	spte = huge_pte_offset(svma->vm_mm, saddr);
	90	if (spte) {
	91	get_page(virt_to_page(spte));
	92	break;
	93	}
	94	}
	95	}
	96
	97	if (!spte)
	98	goto out;
	99
	100	spin_lock(&mm->page_table_lock);
	101	if (pud_none(*pud))
a5a19c63	102	pud_populate(mm, pud, (pmd_t *)((unsigned long)spte & PAGE_MASK));
39dde65c CK	103	else
	104	put_page(virt_to_page(spte));
	105	spin_unlock(&mm->page_table_lock);
	106	out:
eb48c071	107	pte = (pte_t *)pmd_alloc(mm, pud, addr);
3d48ae45	108	mutex_unlock(&mapping->i_mmap_mutex);
eb48c071	109	return pte;
39dde65c CK	110	}
	111
	112	/*
	113	* unmap huge page backed by shared pte.
	114	*
	115	* Hugetlb pte page is ref counted at the time of mapping. If pte is shared
	116	* indicated by page_count > 1, unmap is achieved by clearing pud and
	117	* decrementing the ref count. If count == 1, the pte page is not shared.
	118	*
	119	* called with vma->vm_mm->page_table_lock held.
	120	*
	121	* returns: 1 successfully unmapped a shared pte page
	122	* 0 the underlying pte page is not shared, or it is the last user
	123	*/
	124	int huge_pmd_unshare(struct mm_struct mm, unsigned long addr, pte_t *ptep)
	125	{
	126	pgd_t pgd = pgd_offset(mm, addr);
	127	pud_t pud = pud_offset(pgd, addr);
	128
	129	BUG_ON(page_count(virt_to_page(ptep)) == 0);
	130	if (page_count(virt_to_page(ptep)) == 1)
	131	return 0;
	132
	133	pud_clear(pud);
	134	put_page(virt_to_page(ptep));
	135	addr = ALIGN(addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE;
	136	return 1;
	137	}
	138
a5516438 AK	139	pte_t huge_pte_alloc(struct mm_struct mm,
a5516438 AK	140	unsigned long addr, unsigned long sz)
1da177e4 LT	141	{
	142	pgd_t *pgd;
	143	pud_t *pud;
7bf07f3d	144	pte_t *pte = NULL;
1da177e4 LT	145
	146	pgd = pgd_offset(mm, addr);
	147	pud = pud_alloc(mm, pgd, addr);
39dde65c	148	if (pud) {
39c11e6c AK	149	if (sz == PUD_SIZE) {
	150	pte = (pte_t *)pud;
	151	} else {
	152	BUG_ON(sz != PMD_SIZE);
	153	if (pud_none(*pud))
eb48c071 MH	154	pte = huge_pmd_share(mm, addr, pud);
	155	else
	156	pte = (pte_t *)pmd_alloc(mm, pud, addr);
39c11e6c	157	}
39dde65c	158	}
0e5c9f39	159	BUG_ON(pte && !pte_none(pte) && !pte_huge(pte));
7bf07f3d	160
7bf07f3d	161	return pte;
1da177e4 LT	162	}
1da177e4 LT	163
63551ae0	164	pte_t huge_pte_offset(struct mm_struct mm, unsigned long addr)
1da177e4 LT	165	{
	166	pgd_t *pgd;
	167	pud_t *pud;
	168	pmd_t *pmd = NULL;
	169
	170	pgd = pgd_offset(mm, addr);
02b0ccef AL	171	if (pgd_present(*pgd)) {
02b0ccef AL	172	pud = pud_offset(pgd, addr);
39c11e6c AK	173	if (pud_present(*pud)) {
	174	if (pud_large(*pud))
	175	return (pte_t *)pud;
02b0ccef	176	pmd = pmd_offset(pud, addr);
39c11e6c	177	}
02b0ccef	178	}
1da177e4 LT	179	return (pte_t *) pmd;
	180	}
	181
1da177e4 LT	182	#if 0 /* This is just for testing */
	183	struct page *
	184	follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
	185	{
	186	unsigned long start = address;
	187	int length = 1;
	188	int nr;
	189	struct page *page;
	190	struct vm_area_struct *vma;
	191
	192	vma = find_vma(mm, addr);
	193	if (!vma \|\| !is_vm_hugetlb_page(vma))
	194	return ERR_PTR(-EINVAL);
	195
	196	pte = huge_pte_offset(mm, address);
	197
	198	/* hugetlb should be locked, and hence, prefaulted */
	199	WARN_ON(!pte \|\| pte_none(*pte));
	200
	201	page = &pte_page(*pte)[vpfn % (HPAGE_SIZE/PAGE_SIZE)];
	202
25e59881	203	WARN_ON(!PageHead(page));
1da177e4 LT	204
	205	return page;
	206	}
	207
	208	int pmd_huge(pmd_t pmd)
	209	{
	210	return 0;
	211	}
	212
ceb86879 AK	213	int pud_huge(pud_t pud)
	214	{
	215	return 0;
	216	}
	217
1da177e4 LT	218	struct page *
	219	follow_huge_pmd(struct mm_struct *mm, unsigned long address,
	220	pmd_t *pmd, int write)
	221	{
	222	return NULL;
	223	}
	224
	225	#else
	226
	227	struct page *
	228	follow_huge_addr(struct mm_struct *mm, unsigned long address, int write)
	229	{
	230	return ERR_PTR(-EINVAL);
	231	}
	232
	233	int pmd_huge(pmd_t pmd)
	234	{
	235	return !!(pmd_val(pmd) & _PAGE_PSE);
	236	}
	237
ceb86879 AK	238	int pud_huge(pud_t pud)
ceb86879 AK	239	{
39c11e6c	240	return !!(pud_val(pud) & _PAGE_PSE);
ceb86879 AK	241	}
ceb86879 AK	242
1da177e4 LT	243	struct page *
	244	follow_huge_pmd(struct mm_struct *mm, unsigned long address,
	245	pmd_t *pmd, int write)
	246	{
	247	struct page *page;
	248
	249	page = pte_page((pte_t )pmd);
	250	if (page)
ceb86879	251	page += ((address & ~PMD_MASK) >> PAGE_SHIFT);
1da177e4 LT	252	return page;
1da177e4 LT	253	}
ceb86879 AK	254
	255	struct page *
	256	follow_huge_pud(struct mm_struct *mm, unsigned long address,
	257	pud_t *pud, int write)
	258	{
	259	struct page *page;
	260
	261	page = pte_page((pte_t )pud);
	262	if (page)
	263	page += ((address & ~PUD_MASK) >> PAGE_SHIFT);
	264	return page;
	265	}
	266
1da177e4 LT	267	#endif
1da177e4 LT	268
1da177e4 LT	269	/* x86_64 also uses this file */
	270
	271	#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
	272	static unsigned long hugetlb_get_unmapped_area_bottomup(struct file *file,
	273	unsigned long addr, unsigned long len,
	274	unsigned long pgoff, unsigned long flags)
	275	{
39c11e6c	276	struct hstate *h = hstate_file(file);
1da177e4 LT	277	struct mm_struct *mm = current->mm;
	278	struct vm_area_struct *vma;
	279	unsigned long start_addr;
	280
1363c3cd WW	281	if (len > mm->cached_hole_size) {
	282	start_addr = mm->free_area_cache;
	283	} else {
	284	start_addr = TASK_UNMAPPED_BASE;
	285	mm->cached_hole_size = 0;
	286	}
1da177e4 LT	287
1da177e4 LT	288	full_search:
39c11e6c	289	addr = ALIGN(start_addr, huge_page_size(h));
1da177e4 LT	290
	291	for (vma = find_vma(mm, addr); ; vma = vma->vm_next) {
	292	/* At this point: (!vma \|\| addr < vma->vm_end). */
	293	if (TASK_SIZE - len < addr) {
	294	/*
	295	* Start a new search - just in case we missed
	296	* some holes.
	297	*/
	298	if (start_addr != TASK_UNMAPPED_BASE) {
	299	start_addr = TASK_UNMAPPED_BASE;
1363c3cd	300	mm->cached_hole_size = 0;
1da177e4 LT	301	goto full_search;
	302	}
	303	return -ENOMEM;
	304	}
	305	if (!vma \|\| addr + len <= vma->vm_start) {
	306	mm->free_area_cache = addr + len;
	307	return addr;
	308	}
1363c3cd WW	309	if (addr + mm->cached_hole_size < vma->vm_start)
1363c3cd WW	310	mm->cached_hole_size = vma->vm_start - addr;
39c11e6c	311	addr = ALIGN(vma->vm_end, huge_page_size(h));
1da177e4 LT	312	}
	313	}
	314
	315	static unsigned long hugetlb_get_unmapped_area_topdown(struct file *file,
	316	unsigned long addr0, unsigned long len,
	317	unsigned long pgoff, unsigned long flags)
	318	{
39c11e6c	319	struct hstate *h = hstate_file(file);
1da177e4	320	struct mm_struct *mm = current->mm;
b69add21	321	struct vm_area_struct *vma;
cbde83e2 XG	322	unsigned long base = mm->mmap_base;
cbde83e2 XG	323	unsigned long addr = addr0;
1363c3cd	324	unsigned long largest_hole = mm->cached_hole_size;
cbde83e2	325	unsigned long start_addr;
1da177e4 LT	326
	327	/* don't allow allocations above current base */
	328	if (mm->free_area_cache > base)
	329	mm->free_area_cache = base;
	330
1363c3cd WW	331	if (len <= largest_hole) {
	332	largest_hole = 0;
	333	mm->free_area_cache = base;
	334	}
1da177e4	335	try_again:
cbde83e2 XG	336	start_addr = mm->free_area_cache;
cbde83e2 XG	337
1da177e4 LT	338	/* make sure it can fit in the remaining address space */
	339	if (mm->free_area_cache < len)
	340	goto fail;
	341
0d2eb44f	342	/* either no address requested or can't fit in requested address hole */
39c11e6c	343	addr = (mm->free_area_cache - len) & huge_page_mask(h);
1da177e4 LT	344	do {
	345	/*
	346	* Lookup failure means no vma is above this address,
	347	* i.e. return with success:
	348	*/
55062d06	349	vma = find_vma(mm, addr);
097d5910	350	if (!vma)
1da177e4 LT	351	return addr;
1da177e4 LT	352
b69add21	353	if (addr + len <= vma->vm_start) {
1da177e4	354	/* remember the address as a hint for next time */
1363c3cd WW	355	mm->cached_hole_size = largest_hole;
1363c3cd WW	356	return (mm->free_area_cache = addr);
b69add21	357	} else if (mm->free_area_cache == vma->vm_end) {
1da177e4	358	/* pull free_area_cache down to the first hole */
b69add21 XG	359	mm->free_area_cache = vma->vm_start;
b69add21 XG	360	mm->cached_hole_size = largest_hole;
1363c3cd WW	361	}
	362
	363	/* remember the largest hole we saw so far */
	364	if (addr + largest_hole < vma->vm_start)
	365	largest_hole = vma->vm_start - addr;
1da177e4 LT	366
1da177e4 LT	367	/* try just below the current vma->vm_start */
39c11e6c	368	addr = (vma->vm_start - len) & huge_page_mask(h);
1da177e4 LT	369	} while (len <= vma->vm_start);
	370
	371	fail:
	372	/*
	373	* if hint left us with no space for the requested
	374	* mapping then try again:
	375	*/
cbde83e2	376	if (start_addr != base) {
1da177e4	377	mm->free_area_cache = base;
1363c3cd	378	largest_hole = 0;
1da177e4 LT	379	goto try_again;
	380	}
	381	/*
	382	* A failed mmap() very likely causes application failure,
	383	* so fall back to the bottom-up function here. This scenario
	384	* can happen with large stack limits and large mmap()
	385	* allocations.
	386	*/
	387	mm->free_area_cache = TASK_UNMAPPED_BASE;
1363c3cd	388	mm->cached_hole_size = ~0UL;
1da177e4 LT	389	addr = hugetlb_get_unmapped_area_bottomup(file, addr0,
	390	len, pgoff, flags);
	391
	392	/*
	393	* Restore the topdown base:
	394	*/
	395	mm->free_area_cache = base;
1363c3cd	396	mm->cached_hole_size = ~0UL;
1da177e4 LT	397
	398	return addr;
	399	}
	400
	401	unsigned long
	402	hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
	403	unsigned long len, unsigned long pgoff, unsigned long flags)
	404	{
39c11e6c	405	struct hstate *h = hstate_file(file);
1da177e4 LT	406	struct mm_struct *mm = current->mm;
	407	struct vm_area_struct *vma;
	408
39c11e6c	409	if (len & ~huge_page_mask(h))
1da177e4 LT	410	return -EINVAL;
	411	if (len > TASK_SIZE)
	412	return -ENOMEM;
	413
5a8130f2	414	if (flags & MAP_FIXED) {
a5516438	415	if (prepare_hugepage_range(file, addr, len))
5a8130f2 BH	416	return -EINVAL;
	417	return addr;
	418	}
	419
1da177e4	420	if (addr) {
39c11e6c	421	addr = ALIGN(addr, huge_page_size(h));
1da177e4 LT	422	vma = find_vma(mm, addr);
	423	if (TASK_SIZE - len >= addr &&
	424	(!vma \|\| addr + len <= vma->vm_start))
	425	return addr;
	426	}
	427	if (mm->get_unmapped_area == arch_get_unmapped_area)
	428	return hugetlb_get_unmapped_area_bottomup(file, addr, len,
	429	pgoff, flags);
	430	else
	431	return hugetlb_get_unmapped_area_topdown(file, addr, len,
	432	pgoff, flags);
	433	}
	434
	435	#endif /HAVE_ARCH_HUGETLB_UNMAPPED_AREA/
	436
b4718e62 AK	437	#ifdef CONFIG_X86_64
	438	static __init int setup_hugepagesz(char *opt)
	439	{
	440	unsigned long ps = memparse(opt, &opt);
	441	if (ps == PMD_SIZE) {
	442	hugetlb_add_hstate(PMD_SHIFT - PAGE_SHIFT);
	443	} else if (ps == PUD_SIZE && cpu_has_gbpages) {
	444	hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT);
	445	} else {
	446	printk(KERN_ERR "hugepagesz: Unsupported page size %lu M\n",
	447	ps >> 20);
	448	return 0;
	449	}
	450	return 1;
	451	}
	452	__setup("hugepagesz=", setup_hugepagesz);
	453	#endif