#include <linux/mmu_notifier.h>
#include <linux/uaccess.h>
#include <linux/mm-arch-hooks.h>
+#include <linux/userfaultfd_k.h>
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
unsigned long old_addr, unsigned long old_end,
struct vm_area_struct *new_vma, pmd_t *new_pmd,
- unsigned long new_addr, bool need_rmap_locks)
+ unsigned long new_addr, bool need_rmap_locks, bool *need_flush)
{
struct mm_struct *mm = vma->vm_mm;
pte_t *old_pte, *new_pte, pte;
spinlock_t *old_ptl, *new_ptl;
+ bool force_flush = false;
+ unsigned long len = old_end - old_addr;
/*
* When need_rmap_locks is true, we take the i_mmap_rwsem and anon_vma
new_pte++, new_addr += PAGE_SIZE) {
if (pte_none(*old_pte))
continue;
+
pte = ptep_get_and_clear(mm, old_addr, old_pte);
+ /*
+ * If we are remapping a dirty PTE, make sure
+ * to flush TLB before we drop the PTL for the
+ * old PTE or we may race with page_mkclean().
+ *
+ * This check has to be done after we removed the
+ * old PTE from page tables or another thread may
+ * dirty it after the check and before the removal.
+ */
+ if (pte_present(pte) && pte_dirty(pte))
+ force_flush = true;
pte = move_pte(pte, new_vma->vm_page_prot, old_addr, new_addr);
pte = move_soft_dirty_pte(pte);
set_pte_at(mm, new_addr, new_pte, pte);
if (new_ptl != old_ptl)
spin_unlock(new_ptl);
pte_unmap(new_pte - 1);
+ if (force_flush)
+ flush_tlb_range(vma, old_end - len, old_end);
+ else
+ *need_flush = true;
pte_unmap_unlock(old_pte - 1, old_ptl);
if (need_rmap_locks)
drop_rmap_locks(vma);
if (need_rmap_locks)
take_rmap_locks(vma);
moved = move_huge_pmd(vma, old_addr, new_addr,
- old_end, old_pmd, new_pmd);
+ old_end, old_pmd, new_pmd,
+ &need_flush);
if (need_rmap_locks)
drop_rmap_locks(vma);
- if (moved) {
- need_flush = true;
+ if (moved)
continue;
- }
}
split_huge_pmd(vma, old_pmd, old_addr);
if (pmd_trans_unstable(old_pmd))
extent = next - new_addr;
if (extent > LATENCY_LIMIT)
extent = LATENCY_LIMIT;
- move_ptes(vma, old_pmd, old_addr, old_addr + extent,
- new_vma, new_pmd, new_addr, need_rmap_locks);
- need_flush = true;
+ move_ptes(vma, old_pmd, old_addr, old_addr + extent, new_vma,
+ new_pmd, new_addr, need_rmap_locks, &need_flush);
}
- if (likely(need_flush))
+ if (need_flush)
flush_tlb_range(vma, old_end-len, old_addr);
mmu_notifier_invalidate_range_end(vma->vm_mm, mmun_start, mmun_end);
static unsigned long move_vma(struct vm_area_struct *vma,
unsigned long old_addr, unsigned long old_len,
- unsigned long new_len, unsigned long new_addr, bool *locked)
+ unsigned long new_len, unsigned long new_addr,
+ bool *locked, struct vm_userfaultfd_ctx *uf,
+ struct list_head *uf_unmap)
{
struct mm_struct *mm = vma->vm_mm;
struct vm_area_struct *new_vma;
old_addr = new_addr;
new_addr = err;
} else {
+ mremap_userfaultfd_prep(new_vma, uf);
arch_remap(mm, old_addr, old_addr + old_len,
new_addr, new_addr + new_len);
}
if (unlikely(vma->vm_flags & VM_PFNMAP))
untrack_pfn_moved(vma);
- if (do_munmap(mm, old_addr, old_len) < 0) {
+ if (do_munmap(mm, old_addr, old_len, uf_unmap) < 0) {
/* OOM: unable to split vma, just get accounts right */
vm_unacct_memory(excess >> PAGE_SHIFT);
excess = 0;
}
static unsigned long mremap_to(unsigned long addr, unsigned long old_len,
- unsigned long new_addr, unsigned long new_len, bool *locked)
+ unsigned long new_addr, unsigned long new_len, bool *locked,
+ struct vm_userfaultfd_ctx *uf,
+ struct list_head *uf_unmap)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
if (addr + old_len > new_addr && new_addr + new_len > addr)
goto out;
- ret = do_munmap(mm, new_addr, new_len);
+ ret = do_munmap(mm, new_addr, new_len, NULL);
if (ret)
goto out;
if (old_len >= new_len) {
- ret = do_munmap(mm, addr+new_len, old_len - new_len);
+ ret = do_munmap(mm, addr+new_len, old_len - new_len, uf_unmap);
if (ret && old_len != new_len)
goto out;
old_len = new_len;
if (offset_in_page(ret))
goto out1;
- ret = move_vma(vma, addr, old_len, new_len, new_addr, locked);
+ ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, uf,
+ uf_unmap);
if (!(offset_in_page(ret)))
goto out;
out1:
unsigned long ret = -EINVAL;
unsigned long charged = 0;
bool locked = false;
+ struct vm_userfaultfd_ctx uf = NULL_VM_UFFD_CTX;
+ LIST_HEAD(uf_unmap);
if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE))
return ret;
if (flags & MREMAP_FIXED) {
ret = mremap_to(addr, old_len, new_addr, new_len,
- &locked);
+ &locked, &uf, &uf_unmap);
goto out;
}
* do_munmap does all the needed commit accounting
*/
if (old_len >= new_len) {
- ret = do_munmap(mm, addr+new_len, old_len - new_len);
+ ret = do_munmap(mm, addr+new_len, old_len - new_len, &uf_unmap);
if (ret && old_len != new_len)
goto out;
ret = addr;
goto out;
}
- ret = move_vma(vma, addr, old_len, new_len, new_addr, &locked);
+ ret = move_vma(vma, addr, old_len, new_len, new_addr,
+ &locked, &uf, &uf_unmap);
}
out:
if (offset_in_page(ret)) {
up_write(¤t->mm->mmap_sem);
if (locked && new_len > old_len)
mm_populate(new_addr + old_len, new_len - old_len);
+ mremap_userfaultfd_complete(&uf, addr, new_addr, old_len);
+ userfaultfd_unmap_complete(mm, &uf_unmap);
return ret;
}