*/
static inline bool can_follow_write_pte(pte_t pte, unsigned int flags)
{
- return pte_write(pte) ||
+ return pte_access_permitted(pte, WRITE) ||
((flags & FOLL_FORCE) && (flags & FOLL_COW) && pte_dirty(pte));
}
}
EXPORT_SYMBOL(get_user_pages);
+#ifdef CONFIG_FS_DAX
+/*
+ * This is the same as get_user_pages() in that it assumes we are
+ * operating on the current task's mm, but it goes further to validate
+ * that the vmas associated with the address range are suitable for
+ * longterm elevated page reference counts. For example, filesystem-dax
+ * mappings are subject to the lifetime enforced by the filesystem and
+ * we need guarantees that longterm users like RDMA and V4L2 only
+ * establish mappings that have a kernel enforced revocation mechanism.
+ *
+ * "longterm" == userspace controlled elevated page count lifetime.
+ * Contrast this to iov_iter_get_pages() usages which are transient.
+ */
+long get_user_pages_longterm(unsigned long start, unsigned long nr_pages,
+ unsigned int gup_flags, struct page **pages,
+ struct vm_area_struct **vmas_arg)
+{
+ struct vm_area_struct **vmas = vmas_arg;
+ struct vm_area_struct *vma_prev = NULL;
+ long rc, i;
+
+ if (!pages)
+ return -EINVAL;
+
+ if (!vmas) {
+ vmas = kcalloc(nr_pages, sizeof(struct vm_area_struct *),
+ GFP_KERNEL);
+ if (!vmas)
+ return -ENOMEM;
+ }
+
+ rc = get_user_pages(start, nr_pages, gup_flags, pages, vmas);
+
+ for (i = 0; i < rc; i++) {
+ struct vm_area_struct *vma = vmas[i];
+
+ if (vma == vma_prev)
+ continue;
+
+ vma_prev = vma;
+
+ if (vma_is_fsdax(vma))
+ break;
+ }
+
+ /*
+ * Either get_user_pages() failed, or the vma validation
+ * succeeded, in either case we don't need to put_page() before
+ * returning.
+ */
+ if (i >= rc)
+ goto out;
+
+ for (i = 0; i < rc; i++)
+ put_page(pages[i]);
+ rc = -EOPNOTSUPP;
+out:
+ if (vmas != vmas_arg)
+ kfree(vmas);
+ return rc;
+}
+EXPORT_SYMBOL(get_user_pages_longterm);
+#endif /* CONFIG_FS_DAX */
+
/**
* populate_vma_page_range() - populate a range of pages in the vma.
* @vma: target vma
return 1;
}
+static void gup_pgd_range(unsigned long addr, unsigned long end,
+ int write, struct page **pages, int *nr)
+{
+ unsigned long next;
+ pgd_t *pgdp;
+
+ pgdp = pgd_offset(current->mm, addr);
+ do {
+ pgd_t pgd = READ_ONCE(*pgdp);
+
+ next = pgd_addr_end(addr, end);
+ if (pgd_none(pgd))
+ return;
+ if (unlikely(pgd_huge(pgd))) {
+ if (!gup_huge_pgd(pgd, pgdp, addr, next, write,
+ pages, nr))
+ return;
+ } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
+ if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
+ PGDIR_SHIFT, next, write, pages, nr))
+ return;
+ } else if (!gup_p4d_range(pgd, addr, next, write, pages, nr))
+ return;
+ } while (pgdp++, addr = next, addr != end);
+}
+
+#ifndef gup_fast_permitted
+/*
+ * Check if it's allowed to use __get_user_pages_fast() for the range, or
+ * we need to fall back to the slow version:
+ */
+bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
+{
+ unsigned long len, end;
+
+ len = (unsigned long) nr_pages << PAGE_SHIFT;
+ end = start + len;
+ return end >= start;
+}
+#endif
+
/*
* Like get_user_pages_fast() except it's IRQ-safe in that it won't fall back to
* the regular GUP. It will only return non-negative values.
int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
struct page **pages)
{
- struct mm_struct *mm = current->mm;
unsigned long addr, len, end;
- unsigned long next, flags;
- pgd_t *pgdp;
+ unsigned long flags;
int nr = 0;
start &= PAGE_MASK;
* block IPIs that come from THPs splitting.
*/
- local_irq_save(flags);
- pgdp = pgd_offset(mm, addr);
- do {
- pgd_t pgd = READ_ONCE(*pgdp);
-
- next = pgd_addr_end(addr, end);
- if (pgd_none(pgd))
- break;
- if (unlikely(pgd_huge(pgd))) {
- if (!gup_huge_pgd(pgd, pgdp, addr, next, write,
- pages, &nr))
- break;
- } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
- if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
- PGDIR_SHIFT, next, write, pages, &nr))
- break;
- } else if (!gup_p4d_range(pgd, addr, next, write, pages, &nr))
- break;
- } while (pgdp++, addr = next, addr != end);
- local_irq_restore(flags);
+ if (gup_fast_permitted(start, nr_pages, write)) {
+ local_irq_save(flags);
+ gup_pgd_range(addr, end, write, pages, &nr);
+ local_irq_restore(flags);
+ }
return nr;
}
-#ifndef gup_fast_permitted
-/*
- * Check if it's allowed to use __get_user_pages_fast() for the range, or
- * we need to fall back to the slow version:
- */
-bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
-{
- unsigned long len, end;
-
- len = (unsigned long) nr_pages << PAGE_SHIFT;
- end = start + len;
- return end >= start;
-}
-#endif
-
/**
* get_user_pages_fast() - pin user pages in memory
* @start: starting user address
int get_user_pages_fast(unsigned long start, int nr_pages, int write,
struct page **pages)
{
+ unsigned long addr, len, end;
int nr = 0, ret = 0;
start &= PAGE_MASK;
+ addr = start;
+ len = (unsigned long) nr_pages << PAGE_SHIFT;
+ end = start + len;
+
+ if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
+ (void __user *)start, len)))
+ return 0;
if (gup_fast_permitted(start, nr_pages, write)) {
- nr = __get_user_pages_fast(start, nr_pages, write, pages);
+ local_irq_disable();
+ gup_pgd_range(addr, end, write, pages, &nr);
+ local_irq_enable();
ret = nr;
}