return -EEXIST;
}
+/*
+ * FOLL_FORCE can write to even unwritable pte's, but only
+ * after we've gone through a COW cycle and they are dirty.
+ */
+static inline bool can_follow_write_pte(pte_t pte, unsigned int flags)
+{
+ return pte_write(pte) ||
+ ((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte));
+}
+
static struct page *follow_page_pte(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmd, unsigned int flags)
{
}
if ((flags & FOLL_NUMA) && pte_protnone(pte))
goto no_page;
- if ((flags & FOLL_WRITE) && !pte_write(pte)) {
+ if ((flags & FOLL_WRITE) && !can_follow_write_pte(pte, flags)) {
pte_unmap_unlock(ptep, ptl);
return NULL;
}
* reCOWed by userspace write).
*/
if ((ret & VM_FAULT_WRITE) && !(vma->vm_flags & VM_WRITE))
- *flags &= ~FOLL_WRITE;
+ *flags |= FOLL_COW;
return 0;
}
* instead of __get_user_pages. __get_user_pages should be used only if
* you need some special @gup_flags.
*/
-long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+static long __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas, int *nonblocking)
} while (nr_pages);
return i;
}
-EXPORT_SYMBOL(__get_user_pages);
-bool vma_permits_fault(struct vm_area_struct *vma, unsigned int fault_flags)
+static bool vma_permits_fault(struct vm_area_struct *vma,
+ unsigned int fault_flags)
{
bool write = !!(fault_flags & FAULT_FLAG_WRITE);
bool foreign = !!(fault_flags & FAULT_FLAG_REMOTE);
struct mm_struct *mm,
unsigned long start,
unsigned long nr_pages,
- int write, int force,
struct page **pages,
struct vm_area_struct **vmas,
int *locked, bool notify_drop,
if (pages)
flags |= FOLL_GET;
- if (write)
- flags |= FOLL_WRITE;
- if (force)
- flags |= FOLL_FORCE;
pages_done = 0;
lock_dropped = false;
* up_read(&mm->mmap_sem);
*/
long get_user_pages_locked(unsigned long start, unsigned long nr_pages,
- int write, int force, struct page **pages,
+ unsigned int gup_flags, struct page **pages,
int *locked)
{
return __get_user_pages_locked(current, current->mm, start, nr_pages,
- write, force, pages, NULL, locked, true,
- FOLL_TOUCH);
+ pages, NULL, locked, true,
+ gup_flags | FOLL_TOUCH);
}
EXPORT_SYMBOL(get_user_pages_locked);
/*
- * Same as get_user_pages_unlocked(...., FOLL_TOUCH) but it allows to
- * pass additional gup_flags as last parameter (like FOLL_HWPOISON).
+ * Same as get_user_pages_unlocked(...., FOLL_TOUCH) but it allows for
+ * tsk, mm to be specified.
*
* NOTE: here FOLL_TOUCH is not set implicitly and must be set by the
- * caller if required (just like with __get_user_pages). "FOLL_GET",
- * "FOLL_WRITE" and "FOLL_FORCE" are set implicitly as needed
- * according to the parameters "pages", "write", "force"
- * respectively.
+ * caller if required (just like with __get_user_pages). "FOLL_GET"
+ * is set implicitly if "pages" is non-NULL.
*/
-__always_inline long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm,
- unsigned long start, unsigned long nr_pages,
- int write, int force, struct page **pages,
- unsigned int gup_flags)
+static __always_inline long __get_user_pages_unlocked(struct task_struct *tsk,
+ struct mm_struct *mm, unsigned long start,
+ unsigned long nr_pages, struct page **pages,
+ unsigned int gup_flags)
{
long ret;
int locked = 1;
+
down_read(&mm->mmap_sem);
- ret = __get_user_pages_locked(tsk, mm, start, nr_pages, write, force,
- pages, NULL, &locked, false, gup_flags);
+ ret = __get_user_pages_locked(tsk, mm, start, nr_pages, pages, NULL,
+ &locked, false, gup_flags);
if (locked)
up_read(&mm->mmap_sem);
return ret;
}
-EXPORT_SYMBOL(__get_user_pages_unlocked);
/*
* get_user_pages_unlocked() is suitable to replace the form:
* get_user_pages_unlocked(tsk, mm, ..., pages);
*
* It is functionally equivalent to get_user_pages_fast so
- * get_user_pages_fast should be used instead, if the two parameters
- * "tsk" and "mm" are respectively equal to current and current->mm,
- * or if "force" shall be set to 1 (get_user_pages_fast misses the
- * "force" parameter).
+ * get_user_pages_fast should be used instead if specific gup_flags
+ * (e.g. FOLL_FORCE) are not required.
*/
long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages,
- int write, int force, struct page **pages)
+ struct page **pages, unsigned int gup_flags)
{
return __get_user_pages_unlocked(current, current->mm, start, nr_pages,
- write, force, pages, FOLL_TOUCH);
+ pages, gup_flags | FOLL_TOUCH);
}
EXPORT_SYMBOL(get_user_pages_unlocked);
* @mm: mm_struct of target mm
* @start: starting user address
* @nr_pages: number of pages from start to pin
- * @write: whether pages will be written to by the caller
- * @force: whether to force access even when user mapping is currently
- * protected (but never forces write access to shared mapping).
+ * @gup_flags: flags modifying lookup behaviour
* @pages: array that receives pointers to the pages pinned.
* Should be at least nr_pages long. Or NULL, if caller
* only intends to ensure the pages are faulted in.
* @vmas: array of pointers to vmas corresponding to each page.
* Or NULL if the caller does not require them.
+ * @locked: pointer to lock flag indicating whether lock is held and
+ * subsequently whether VM_FAULT_RETRY functionality can be
+ * utilised. Lock must initially be held.
*
* Returns number of pages pinned. This may be fewer than the number
* requested. If nr_pages is 0 or negative, returns 0. If no pages
* or similar operation cannot guarantee anything stronger anyway because
* locks can't be held over the syscall boundary.
*
- * If write=0, the page must not be written to. If the page is written to,
- * set_page_dirty (or set_page_dirty_lock, as appropriate) must be called
- * after the page is finished with, and before put_page is called.
+ * If gup_flags & FOLL_WRITE == 0, the page must not be written to. If the page
+ * is written to, set_page_dirty (or set_page_dirty_lock, as appropriate) must
+ * be called after the page is finished with, and before put_page is called.
*
* get_user_pages is typically used for fewer-copy IO operations, to get a
* handle on the memory by some means other than accesses via the user virtual
*/
long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
- int write, int force, struct page **pages,
- struct vm_area_struct **vmas)
+ unsigned int gup_flags, struct page **pages,
+ struct vm_area_struct **vmas, int *locked)
{
- return __get_user_pages_locked(tsk, mm, start, nr_pages, write, force,
- pages, vmas, NULL, false,
- FOLL_TOUCH | FOLL_REMOTE);
+ return __get_user_pages_locked(tsk, mm, start, nr_pages, pages, vmas,
+ locked, true,
+ gup_flags | FOLL_TOUCH | FOLL_REMOTE);
}
EXPORT_SYMBOL(get_user_pages_remote);
/*
* This is the same as get_user_pages_remote(), just with a
* less-flexible calling convention where we assume that the task
- * and mm being operated on are the current task's. We also
- * obviously don't pass FOLL_REMOTE in here.
+ * and mm being operated on are the current task's and don't allow
+ * passing of a locked parameter. We also obviously don't pass
+ * FOLL_REMOTE in here.
*/
long get_user_pages(unsigned long start, unsigned long nr_pages,
- int write, int force, struct page **pages,
+ unsigned int gup_flags, struct page **pages,
struct vm_area_struct **vmas)
{
return __get_user_pages_locked(current, current->mm, start, nr_pages,
- write, force, pages, vmas, NULL, false,
- FOLL_TOUCH);
+ pages, vmas, NULL, false,
+ gup_flags | FOLL_TOUCH);
}
EXPORT_SYMBOL(get_user_pages);
start += nr << PAGE_SHIFT;
pages += nr;
- ret = get_user_pages_unlocked(start, nr_pages - nr, write, 0, pages);
+ ret = get_user_pages_unlocked(start, nr_pages - nr, pages,
+ write ? FOLL_WRITE : 0);
/* Have to be a bit careful with return values */
if (nr > 0) {