[linux-2.6-block.git] / mm / mprotect.c

/*
 *  mm/mprotect.c
 *
 *  (C) Copyright 1994 Linus Torvalds
 *  (C) Copyright 2002 Christoph Hellwig
 *
 *  Address space accounting code	<alan@lxorguk.ukuu.org.uk>
 *  (C) Copyright 2002 Red Hat Inc, All Rights Reserved
 */

#include <linux/mm.h>
#include <linux/hugetlb.h>
#include <linux/shm.h>
#include <linux/mman.h>
#include <linux/fs.h>
#include <linux/highmem.h>
#include <linux/security.h>
#include <linux/mempolicy.h>
#include <linux/personality.h>
#include <linux/syscalls.h>
#include <linux/swap.h>
#include <linux/swapops.h>
#include <linux/mmu_notifier.h>
#include <linux/migrate.h>
#include <linux/perf_event.h>
#include <linux/ksm.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>

/*
 * For a prot_numa update we only hold mmap_sem for read so there is a
 * potential race with faulting where a pmd was temporarily none. This
 * function checks for a transhuge pmd under the appropriate lock. It
 * returns a pte if it was successfully locked or NULL if it raced with
 * a transhuge insertion.
 */
static pte_t *lock_pte_protection(struct vm_area_struct *vma, pmd_t *pmd,
			unsigned long addr, int prot_numa, spinlock_t **ptl)
{
	pte_t *pte;
	spinlock_t *pmdl;

	/* !prot_numa is protected by mmap_sem held for write */
	if (!prot_numa)
		return pte_offset_map_lock(vma->vm_mm, pmd, addr, ptl);

	pmdl = pmd_lock(vma->vm_mm, pmd);
	if (unlikely(pmd_trans_huge(*pmd) || pmd_none(*pmd))) {
		spin_unlock(pmdl);
		return NULL;
	}

	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, ptl);
	spin_unlock(pmdl);
	return pte;
}

static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
		unsigned long addr, unsigned long end, pgprot_t newprot,
		int dirty_accountable, int prot_numa)
{
	struct mm_struct *mm = vma->vm_mm;
	pte_t *pte, oldpte;
	spinlock_t *ptl;
	unsigned long pages = 0;

	pte = lock_pte_protection(vma, pmd, addr, prot_numa, &ptl);
	if (!pte)
		return 0;

	arch_enter_lazy_mmu_mode();
	do {
		oldpte = *pte;
		if (pte_present(oldpte)) {
			pte_t ptent;

			/*
			 * Avoid trapping faults against the zero or KSM
			 * pages. See similar comment in change_huge_pmd.
			 */
			if (prot_numa) {
				struct page *page;

				page = vm_normal_page(vma, addr, oldpte);
				if (!page || PageKsm(page))
					continue;
			}

			ptent = ptep_modify_prot_start(mm, addr, pte);
			ptent = pte_modify(ptent, newprot);

			/* Avoid taking write faults for known dirty pages */
			if (dirty_accountable && pte_dirty(ptent) &&
					(pte_soft_dirty(ptent) ||
					 !(vma->vm_flags & VM_SOFTDIRTY))) {
				ptent = pte_mkwrite(ptent);
			}
			ptep_modify_prot_commit(mm, addr, pte, ptent);
			pages++;
		} else if (IS_ENABLED(CONFIG_MIGRATION)) {
			swp_entry_t entry = pte_to_swp_entry(oldpte);

			if (is_write_migration_entry(entry)) {
				pte_t newpte;
				/*
				 * A protection check is difficult so
				 * just be safe and disable write
				 */
				make_migration_entry_read(&entry);
				newpte = swp_entry_to_pte(entry);
				if (pte_swp_soft_dirty(oldpte))
					newpte = pte_swp_mksoft_dirty(newpte);
				set_pte_at(mm, addr, pte, newpte);

				pages++;
			}
		}
	} while (pte++, addr += PAGE_SIZE, addr != end);
	arch_leave_lazy_mmu_mode();
	pte_unmap_unlock(pte - 1, ptl);

	return pages;
}

static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
		pud_t *pud, unsigned long addr, unsigned long end,
		pgprot_t newprot, int dirty_accountable, int prot_numa)
{
	pmd_t *pmd;
	struct mm_struct *mm = vma->vm_mm;
	unsigned long next;
	unsigned long pages = 0;
	unsigned long nr_huge_updates = 0;
	unsigned long mni_start = 0;

	pmd = pmd_offset(pud, addr);
	do {
		unsigned long this_pages;

		next = pmd_addr_end(addr, end);
		if (!pmd_trans_huge(*pmd) && pmd_none_or_clear_bad(pmd))
			continue;

		/* invoke the mmu notifier if the pmd is populated */
		if (!mni_start) {
			mni_start = addr;
			mmu_notifier_invalidate_range_start(mm, mni_start, end);
		}

		if (pmd_trans_huge(*pmd)) {
			if (next - addr != HPAGE_PMD_SIZE)
				split_huge_page_pmd(vma, addr, pmd);
			else {
				int nr_ptes = change_huge_pmd(vma, pmd, addr,
						newprot, prot_numa);

				if (nr_ptes) {
					if (nr_ptes == HPAGE_PMD_NR) {
						pages += HPAGE_PMD_NR;
						nr_huge_updates++;
					}

					/* huge pmd was handled */
					continue;
				}
			}
			/* fall through, the trans huge pmd just split */
		}
		this_pages = change_pte_range(vma, pmd, addr, next, newprot,
				 dirty_accountable, prot_numa);
		pages += this_pages;
	} while (pmd++, addr = next, addr != end);

	if (mni_start)
		mmu_notifier_invalidate_range_end(mm, mni_start, end);

	if (nr_huge_updates)
		count_vm_numa_events(NUMA_HUGE_PTE_UPDATES, nr_huge_updates);
	return pages;
}

static inline unsigned long change_pud_range(struct vm_area_struct *vma,
		pgd_t *pgd, unsigned long addr, unsigned long end,
		pgprot_t newprot, int dirty_accountable, int prot_numa)
{
	pud_t *pud;
	unsigned long next;
	unsigned long pages = 0;

	pud = pud_offset(pgd, addr);
	do {
		next = pud_addr_end(addr, end);
		if (pud_none_or_clear_bad(pud))
			continue;
		pages += change_pmd_range(vma, pud, addr, next, newprot,
				 dirty_accountable, prot_numa);
	} while (pud++, addr = next, addr != end);

	return pages;
}

static unsigned long change_protection_range(struct vm_area_struct *vma,
		unsigned long addr, unsigned long end, pgprot_t newprot,
		int dirty_accountable, int prot_numa)
{
	struct mm_struct *mm = vma->vm_mm;
	pgd_t *pgd;
	unsigned long next;
	unsigned long start = addr;
	unsigned long pages = 0;

	BUG_ON(addr >= end);
	pgd = pgd_offset(mm, addr);
	flush_cache_range(vma, addr, end);
	set_tlb_flush_pending(mm);
	do {
		next = pgd_addr_end(addr, end);
		if (pgd_none_or_clear_bad(pgd))
			continue;
		pages += change_pud_range(vma, pgd, addr, next, newprot,
				 dirty_accountable, prot_numa);
	} while (pgd++, addr = next, addr != end);

	/* Only flush the TLB if we actually modified any entries: */
	if (pages)
		flush_tlb_range(vma, start, end);
	clear_tlb_flush_pending(mm);

	return pages;
}

unsigned long change_protection(struct vm_area_struct *vma, unsigned long start,
		       unsigned long end, pgprot_t newprot,
		       int dirty_accountable, int prot_numa)
{
	unsigned long pages;

	if (is_vm_hugetlb_page(vma))
		pages = hugetlb_change_protection(vma, start, end, newprot);
	else
		pages = change_protection_range(vma, start, end, newprot, dirty_accountable, prot_numa);

	return pages;
}

int
mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
	unsigned long start, unsigned long end, unsigned long newflags)
{
	struct mm_struct *mm = vma->vm_mm;
	unsigned long oldflags = vma->vm_flags;
	long nrpages = (end - start) >> PAGE_SHIFT;
	unsigned long charged = 0;
	pgoff_t pgoff;
	int error;
	int dirty_accountable = 0;

	if (newflags == oldflags) {
		*pprev = vma;
		return 0;
	}

	/*
	 * If we make a private mapping writable we increase our commit;
	 * but (without finer accounting) cannot reduce our commit if we
	 * make it unwritable again. hugetlb mapping were accounted for
	 * even if read-only so there is no need to account for them here
	 */
	if (newflags & VM_WRITE) {
		if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_HUGETLB|
						VM_SHARED|VM_NORESERVE))) {
			charged = nrpages;
			if (security_vm_enough_memory_mm(mm, charged))
				return -ENOMEM;
			newflags |= VM_ACCOUNT;
		}
	}

	/*
	 * First try to merge with previous and/or next vma.
	 */
	pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
	*pprev = vma_merge(mm, *pprev, start, end, newflags,
			vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma));
	if (*pprev) {
		vma = *pprev;
		goto success;
	}

	*pprev = vma;

	if (start != vma->vm_start) {
		error = split_vma(mm, vma, start, 1);
		if (error)
			goto fail;
	}

	if (end != vma->vm_end) {
		error = split_vma(mm, vma, end, 0);
		if (error)
			goto fail;
	}

success:
	/*
	 * vm_flags and vm_page_prot are protected by the mmap_sem
	 * held in write mode.
	 */
	vma->vm_flags = newflags;
	dirty_accountable = vma_wants_writenotify(vma);
	vma_set_page_prot(vma);

	change_protection(vma, start, end, vma->vm_page_prot,
			  dirty_accountable, 0);

	vm_stat_account(mm, oldflags, vma->vm_file, -nrpages);
	vm_stat_account(mm, newflags, vma->vm_file, nrpages);
	perf_event_mmap(vma);
	return 0;

fail:
	vm_unacct_memory(charged);
	return error;
}

SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len,
		unsigned long, prot)
{
	unsigned long vm_flags, nstart, end, tmp, reqprot;
	struct vm_area_struct *vma, *prev;
	int error = -EINVAL;
	const int grows = prot & (PROT_GROWSDOWN|PROT_GROWSUP);
	prot &= ~(PROT_GROWSDOWN|PROT_GROWSUP);
	if (grows == (PROT_GROWSDOWN|PROT_GROWSUP)) /* can't be both */
		return -EINVAL;

	if (start & ~PAGE_MASK)
		return -EINVAL;
	if (!len)
		return 0;
	len = PAGE_ALIGN(len);
	end = start + len;
	if (end <= start)
		return -ENOMEM;
	if (!arch_validate_prot(prot))
		return -EINVAL;

	reqprot = prot;
	/*
	 * Does the application expect PROT_READ to imply PROT_EXEC:
	 */
	if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
		prot |= PROT_EXEC;

	vm_flags = calc_vm_prot_bits(prot);

	down_write(&current->mm->mmap_sem);

	vma = find_vma(current->mm, start);
	error = -ENOMEM;
	if (!vma)
		goto out;
	prev = vma->vm_prev;
	if (unlikely(grows & PROT_GROWSDOWN)) {
		if (vma->vm_start >= end)
			goto out;
		start = vma->vm_start;
		error = -EINVAL;
		if (!(vma->vm_flags & VM_GROWSDOWN))
			goto out;
	} else {
		if (vma->vm_start > start)
			goto out;
		if (unlikely(grows & PROT_GROWSUP)) {
			end = vma->vm_end;
			error = -EINVAL;
			if (!(vma->vm_flags & VM_GROWSUP))
				goto out;
		}
	}
	if (start > vma->vm_start)
		prev = vma;

	for (nstart = start ; ; ) {
		unsigned long newflags;

		/* Here we know that vma->vm_start <= nstart < vma->vm_end. */

		newflags = vm_flags;
		newflags |= (vma->vm_flags & ~(VM_READ | VM_WRITE | VM_EXEC));

		/* newflags >> 4 shift VM_MAY% in place of VM_% */
		if ((newflags & ~(newflags >> 4)) & (VM_READ | VM_WRITE | VM_EXEC)) {
			error = -EACCES;
			goto out;
		}

		error = security_file_mprotect(vma, reqprot, prot);
		if (error)
			goto out;

		tmp = vma->vm_end;
		if (tmp > end)
			tmp = end;
		error = mprotect_fixup(vma, &prev, nstart, tmp, newflags);
		if (error)
			goto out;
		nstart = tmp;

		if (nstart < prev->vm_end)
			nstart = prev->vm_end;
		if (nstart >= end)
			goto out;

		vma = prev->vm_next;
		if (!vma || vma->vm_start != nstart) {
			error = -ENOMEM;
			goto out;
		}
	}
out:
	up_write(&current->mm->mmap_sem);
	return error;
}
Commit	Line	Data
	1	/*
	2	* mm/mprotect.c
	3	*
	4	* (C) Copyright 1994 Linus Torvalds
	5	* (C) Copyright 2002 Christoph Hellwig
	6	*
	7	* Address space accounting code <alan@lxorguk.ukuu.org.uk>
	8	* (C) Copyright 2002 Red Hat Inc, All Rights Reserved
	9	*/
	10
	11	#include <linux/mm.h>
	12	#include <linux/hugetlb.h>
	13	#include <linux/shm.h>
	14	#include <linux/mman.h>
	15	#include <linux/fs.h>
	16	#include <linux/highmem.h>
	17	#include <linux/security.h>
	18	#include <linux/mempolicy.h>
	19	#include <linux/personality.h>
	20	#include <linux/syscalls.h>
	21	#include <linux/swap.h>
	22	#include <linux/swapops.h>
	23	#include <linux/mmu_notifier.h>
	24	#include <linux/migrate.h>
	25	#include <linux/perf_event.h>
	26	#include <linux/ksm.h>
	27	#include <asm/uaccess.h>
	28	#include <asm/pgtable.h>
	29	#include <asm/cacheflush.h>
	30	#include <asm/tlbflush.h>
	31
	32	/*
	33	* For a prot_numa update we only hold mmap_sem for read so there is a
	34	* potential race with faulting where a pmd was temporarily none. This
	35	* function checks for a transhuge pmd under the appropriate lock. It
	36	* returns a pte if it was successfully locked or NULL if it raced with
	37	* a transhuge insertion.
	38	*/
	39	static pte_t lock_pte_protection(struct vm_area_struct vma, pmd_t *pmd,
	40	unsigned long addr, int prot_numa, spinlock_t **ptl)
	41	{
	42	pte_t *pte;
	43	spinlock_t *pmdl;
	44
	45	/* !prot_numa is protected by mmap_sem held for write */
	46	if (!prot_numa)
	47	return pte_offset_map_lock(vma->vm_mm, pmd, addr, ptl);
	48
	49	pmdl = pmd_lock(vma->vm_mm, pmd);
	50	if (unlikely(pmd_trans_huge(pmd) \|\| pmd_none(pmd))) {
	51	spin_unlock(pmdl);
	52	return NULL;
	53	}
	54
	55	pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, ptl);
	56	spin_unlock(pmdl);
	57	return pte;
	58	}
	59
	60	static unsigned long change_pte_range(struct vm_area_struct vma, pmd_t pmd,
	61	unsigned long addr, unsigned long end, pgprot_t newprot,
	62	int dirty_accountable, int prot_numa)
	63	{
	64	struct mm_struct *mm = vma->vm_mm;
	65	pte_t *pte, oldpte;
	66	spinlock_t *ptl;
	67	unsigned long pages = 0;
	68
	69	pte = lock_pte_protection(vma, pmd, addr, prot_numa, &ptl);
	70	if (!pte)
	71	return 0;
	72
	73	arch_enter_lazy_mmu_mode();
	74	do {
	75	oldpte = *pte;
	76	if (pte_present(oldpte)) {
	77	pte_t ptent;
	78
	79	/*
	80	* Avoid trapping faults against the zero or KSM
	81	* pages. See similar comment in change_huge_pmd.
	82	*/
	83	if (prot_numa) {
	84	struct page *page;
	85
	86	page = vm_normal_page(vma, addr, oldpte);
	87	if (!page \|\| PageKsm(page))
	88	continue;
	89	}
	90
	91	ptent = ptep_modify_prot_start(mm, addr, pte);
	92	ptent = pte_modify(ptent, newprot);
	93
	94	/* Avoid taking write faults for known dirty pages */
	95	if (dirty_accountable && pte_dirty(ptent) &&
	96	(pte_soft_dirty(ptent) \|\|
	97	!(vma->vm_flags & VM_SOFTDIRTY))) {
	98	ptent = pte_mkwrite(ptent);
	99	}
	100	ptep_modify_prot_commit(mm, addr, pte, ptent);
	101	pages++;
	102	} else if (IS_ENABLED(CONFIG_MIGRATION)) {
	103	swp_entry_t entry = pte_to_swp_entry(oldpte);
	104
	105	if (is_write_migration_entry(entry)) {
	106	pte_t newpte;
	107	/*
	108	* A protection check is difficult so
	109	* just be safe and disable write
	110	*/
	111	make_migration_entry_read(&entry);
	112	newpte = swp_entry_to_pte(entry);
	113	if (pte_swp_soft_dirty(oldpte))
	114	newpte = pte_swp_mksoft_dirty(newpte);
	115	set_pte_at(mm, addr, pte, newpte);
	116
	117	pages++;
	118	}
	119	}
	120	} while (pte++, addr += PAGE_SIZE, addr != end);
	121	arch_leave_lazy_mmu_mode();
	122	pte_unmap_unlock(pte - 1, ptl);
	123
	124	return pages;
	125	}
	126
	127	static inline unsigned long change_pmd_range(struct vm_area_struct *vma,
	128	pud_t *pud, unsigned long addr, unsigned long end,
	129	pgprot_t newprot, int dirty_accountable, int prot_numa)
	130	{
	131	pmd_t *pmd;
	132	struct mm_struct *mm = vma->vm_mm;
	133	unsigned long next;
	134	unsigned long pages = 0;
	135	unsigned long nr_huge_updates = 0;
	136	unsigned long mni_start = 0;
	137
	138	pmd = pmd_offset(pud, addr);
	139	do {
	140	unsigned long this_pages;
	141
	142	next = pmd_addr_end(addr, end);
	143	if (!pmd_trans_huge(*pmd) && pmd_none_or_clear_bad(pmd))
	144	continue;
	145
	146	/* invoke the mmu notifier if the pmd is populated */
	147	if (!mni_start) {
	148	mni_start = addr;
	149	mmu_notifier_invalidate_range_start(mm, mni_start, end);
	150	}
	151
	152	if (pmd_trans_huge(*pmd)) {
	153	if (next - addr != HPAGE_PMD_SIZE)
	154	split_huge_page_pmd(vma, addr, pmd);
	155	else {
	156	int nr_ptes = change_huge_pmd(vma, pmd, addr,
	157	newprot, prot_numa);
	158
	159	if (nr_ptes) {
	160	if (nr_ptes == HPAGE_PMD_NR) {
	161	pages += HPAGE_PMD_NR;
	162	nr_huge_updates++;
	163	}
	164
	165	/* huge pmd was handled */
	166	continue;
	167	}
	168	}
	169	/* fall through, the trans huge pmd just split */
	170	}
	171	this_pages = change_pte_range(vma, pmd, addr, next, newprot,
	172	dirty_accountable, prot_numa);
	173	pages += this_pages;
	174	} while (pmd++, addr = next, addr != end);
	175
	176	if (mni_start)
	177	mmu_notifier_invalidate_range_end(mm, mni_start, end);
	178
	179	if (nr_huge_updates)
	180	count_vm_numa_events(NUMA_HUGE_PTE_UPDATES, nr_huge_updates);
	181	return pages;
	182	}
	183
	184	static inline unsigned long change_pud_range(struct vm_area_struct *vma,
	185	pgd_t *pgd, unsigned long addr, unsigned long end,
	186	pgprot_t newprot, int dirty_accountable, int prot_numa)
	187	{
	188	pud_t *pud;
	189	unsigned long next;
	190	unsigned long pages = 0;
	191
	192	pud = pud_offset(pgd, addr);
	193	do {
	194	next = pud_addr_end(addr, end);
	195	if (pud_none_or_clear_bad(pud))
	196	continue;
	197	pages += change_pmd_range(vma, pud, addr, next, newprot,
	198	dirty_accountable, prot_numa);
	199	} while (pud++, addr = next, addr != end);
	200
	201	return pages;
	202	}
	203
	204	static unsigned long change_protection_range(struct vm_area_struct *vma,
	205	unsigned long addr, unsigned long end, pgprot_t newprot,
	206	int dirty_accountable, int prot_numa)
	207	{
	208	struct mm_struct *mm = vma->vm_mm;
	209	pgd_t *pgd;
	210	unsigned long next;
	211	unsigned long start = addr;
	212	unsigned long pages = 0;
	213
	214	BUG_ON(addr >= end);
	215	pgd = pgd_offset(mm, addr);
	216	flush_cache_range(vma, addr, end);
	217	set_tlb_flush_pending(mm);
	218	do {
	219	next = pgd_addr_end(addr, end);
	220	if (pgd_none_or_clear_bad(pgd))
	221	continue;
	222	pages += change_pud_range(vma, pgd, addr, next, newprot,
	223	dirty_accountable, prot_numa);
	224	} while (pgd++, addr = next, addr != end);
	225
	226	/* Only flush the TLB if we actually modified any entries: */
	227	if (pages)
	228	flush_tlb_range(vma, start, end);
	229	clear_tlb_flush_pending(mm);
	230
	231	return pages;
	232	}
	233
	234	unsigned long change_protection(struct vm_area_struct *vma, unsigned long start,
	235	unsigned long end, pgprot_t newprot,
	236	int dirty_accountable, int prot_numa)
	237	{
	238	unsigned long pages;
	239
	240	if (is_vm_hugetlb_page(vma))
	241	pages = hugetlb_change_protection(vma, start, end, newprot);
	242	else
	243	pages = change_protection_range(vma, start, end, newprot, dirty_accountable, prot_numa);
	244
	245	return pages;
	246	}
	247
	248	int
	249	mprotect_fixup(struct vm_area_struct vma, struct vm_area_struct *pprev,
	250	unsigned long start, unsigned long end, unsigned long newflags)
	251	{
	252	struct mm_struct *mm = vma->vm_mm;
	253	unsigned long oldflags = vma->vm_flags;
	254	long nrpages = (end - start) >> PAGE_SHIFT;
	255	unsigned long charged = 0;
	256	pgoff_t pgoff;
	257	int error;
	258	int dirty_accountable = 0;
	259
	260	if (newflags == oldflags) {
	261	*pprev = vma;
	262	return 0;
	263	}
	264
	265	/*
	266	* If we make a private mapping writable we increase our commit;
	267	* but (without finer accounting) cannot reduce our commit if we
	268	* make it unwritable again. hugetlb mapping were accounted for
	269	* even if read-only so there is no need to account for them here
	270	*/
	271	if (newflags & VM_WRITE) {
	272	if (!(oldflags & (VM_ACCOUNT\|VM_WRITE\|VM_HUGETLB\|
	273	VM_SHARED\|VM_NORESERVE))) {
	274	charged = nrpages;
	275	if (security_vm_enough_memory_mm(mm, charged))
	276	return -ENOMEM;
	277	newflags \|= VM_ACCOUNT;
	278	}
	279	}
	280
	281	/*
	282	* First try to merge with previous and/or next vma.
	283	*/
	284	pgoff = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
	285	pprev = vma_merge(mm, pprev, start, end, newflags,
	286	vma->anon_vma, vma->vm_file, pgoff, vma_policy(vma));
	287	if (*pprev) {
	288	vma = *pprev;
	289	goto success;
	290	}
	291
	292	*pprev = vma;
	293
	294	if (start != vma->vm_start) {
	295	error = split_vma(mm, vma, start, 1);
	296	if (error)
	297	goto fail;
	298	}
	299
	300	if (end != vma->vm_end) {
	301	error = split_vma(mm, vma, end, 0);
	302	if (error)
	303	goto fail;
	304	}
	305
	306	success:
	307	/*
	308	* vm_flags and vm_page_prot are protected by the mmap_sem
	309	* held in write mode.
	310	*/
	311	vma->vm_flags = newflags;
	312	dirty_accountable = vma_wants_writenotify(vma);
	313	vma_set_page_prot(vma);
	314
	315	change_protection(vma, start, end, vma->vm_page_prot,
	316	dirty_accountable, 0);
	317
	318	vm_stat_account(mm, oldflags, vma->vm_file, -nrpages);
	319	vm_stat_account(mm, newflags, vma->vm_file, nrpages);
	320	perf_event_mmap(vma);
	321	return 0;
	322
	323	fail:
	324	vm_unacct_memory(charged);
	325	return error;
	326	}
	327
	328	SYSCALL_DEFINE3(mprotect, unsigned long, start, size_t, len,
	329	unsigned long, prot)
	330	{
	331	unsigned long vm_flags, nstart, end, tmp, reqprot;
	332	struct vm_area_struct vma, prev;
	333	int error = -EINVAL;
	334	const int grows = prot & (PROT_GROWSDOWN\|PROT_GROWSUP);
	335	prot &= ~(PROT_GROWSDOWN\|PROT_GROWSUP);
	336	if (grows == (PROT_GROWSDOWN\|PROT_GROWSUP)) /* can't be both */
	337	return -EINVAL;
	338
	339	if (start & ~PAGE_MASK)
	340	return -EINVAL;
	341	if (!len)
	342	return 0;
	343	len = PAGE_ALIGN(len);
	344	end = start + len;
	345	if (end <= start)
	346	return -ENOMEM;
	347	if (!arch_validate_prot(prot))
	348	return -EINVAL;
	349
	350	reqprot = prot;
	351	/*
	352	* Does the application expect PROT_READ to imply PROT_EXEC:
	353	*/
	354	if ((prot & PROT_READ) && (current->personality & READ_IMPLIES_EXEC))
	355	prot \|= PROT_EXEC;
	356
	357	vm_flags = calc_vm_prot_bits(prot);
	358
	359	down_write(&current->mm->mmap_sem);
	360
	361	vma = find_vma(current->mm, start);
	362	error = -ENOMEM;
	363	if (!vma)
	364	goto out;
	365	prev = vma->vm_prev;
	366	if (unlikely(grows & PROT_GROWSDOWN)) {
	367	if (vma->vm_start >= end)
	368	goto out;
	369	start = vma->vm_start;
	370	error = -EINVAL;
	371	if (!(vma->vm_flags & VM_GROWSDOWN))
	372	goto out;
	373	} else {
	374	if (vma->vm_start > start)
	375	goto out;
	376	if (unlikely(grows & PROT_GROWSUP)) {
	377	end = vma->vm_end;
	378	error = -EINVAL;
	379	if (!(vma->vm_flags & VM_GROWSUP))
	380	goto out;
	381	}
	382	}
	383	if (start > vma->vm_start)
	384	prev = vma;
	385
	386	for (nstart = start ; ; ) {
	387	unsigned long newflags;
	388
	389	/* Here we know that vma->vm_start <= nstart < vma->vm_end. */
	390
	391	newflags = vm_flags;
	392	newflags \|= (vma->vm_flags & ~(VM_READ \| VM_WRITE \| VM_EXEC));
	393
	394	/* newflags >> 4 shift VM_MAY% in place of VM_% */
	395	if ((newflags & ~(newflags >> 4)) & (VM_READ \| VM_WRITE \| VM_EXEC)) {
	396	error = -EACCES;
	397	goto out;
	398	}
	399
	400	error = security_file_mprotect(vma, reqprot, prot);
	401	if (error)
	402	goto out;
	403
	404	tmp = vma->vm_end;
	405	if (tmp > end)
	406	tmp = end;
	407	error = mprotect_fixup(vma, &prev, nstart, tmp, newflags);
	408	if (error)
	409	goto out;
	410	nstart = tmp;
	411
	412	if (nstart < prev->vm_end)
	413	nstart = prev->vm_end;
	414	if (nstart >= end)
	415	goto out;
	416
	417	vma = prev->vm_next;
	418	if (!vma \|\| vma->vm_start != nstart) {
	419	error = -ENOMEM;
	420	goto out;
	421	}
	422	}
	423	out:
	424	up_write(&current->mm->mmap_sem);
	425	return error;
	426	}