T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvms390/linux.git
F: Documentation/virt/kvm/s390*
F: arch/s390/include/asm/gmap.h
+F: arch/s390/include/asm/gmap_helpers.h
F: arch/s390/include/asm/kvm*
F: arch/s390/include/uapi/asm/kvm*
F: arch/s390/include/uapi/asm/uvdevice.h
F: arch/s390/kernel/uv.c
F: arch/s390/kvm/
F: arch/s390/mm/gmap.c
+F: arch/s390/mm/gmap_helpers.c
F: drivers/s390/char/uvdevice.c
F: tools/testing/selftests/drivers/s390x/uvdevice/
F: tools/testing/selftests/kvm/*/s390/
int gmap_unmap_segment(struct gmap *gmap, unsigned long to, unsigned long len);
unsigned long __gmap_translate(struct gmap *, unsigned long gaddr);
int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr);
-void gmap_discard(struct gmap *, unsigned long from, unsigned long to);
void __gmap_zap(struct gmap *, unsigned long gaddr);
void gmap_unlink(struct mm_struct *, unsigned long *table, unsigned long vmaddr);
void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long dirty_bitmap[4],
unsigned long gaddr, unsigned long vmaddr);
-int s390_disable_cow_sharing(void);
int s390_replace_asce(struct gmap *gmap);
void s390_uv_destroy_pfns(unsigned long count, unsigned long *pfns);
int __s390_uv_destroy_range(struct mm_struct *mm, unsigned long start,
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Helper functions for KVM guest address space mapping code
+ *
+ * Copyright IBM Corp. 2025
+ */
+
+#ifndef _ASM_S390_GMAP_HELPERS_H
+#define _ASM_S390_GMAP_HELPERS_H
+
+void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr);
+void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end);
+int gmap_helper_disable_cow_sharing(void);
+
+#endif /* _ASM_S390_GMAP_HELPERS_H */
#include <linux/kvm.h>
#include <linux/kvm_host.h>
#include <asm/gmap.h>
+#include <asm/gmap_helpers.h>
#include <asm/virtio-ccw.h>
#include "kvm-s390.h"
#include "trace.h"
#include "trace-s390.h"
#include "gaccess.h"
+static void do_discard_gfn_range(struct kvm_vcpu *vcpu, gfn_t gfn_start, gfn_t gfn_end)
+{
+ struct kvm_memslot_iter iter;
+ struct kvm_memory_slot *slot;
+ struct kvm_memslots *slots;
+ unsigned long start, end;
+
+ slots = kvm_vcpu_memslots(vcpu);
+
+ kvm_for_each_memslot_in_gfn_range(&iter, slots, gfn_start, gfn_end) {
+ slot = iter.slot;
+ start = __gfn_to_hva_memslot(slot, max(gfn_start, slot->base_gfn));
+ end = __gfn_to_hva_memslot(slot, min(gfn_end, slot->base_gfn + slot->npages));
+ gmap_helper_discard(vcpu->kvm->mm, start, end);
+ }
+}
+
static int diag_release_pages(struct kvm_vcpu *vcpu)
{
unsigned long start, end;
VCPU_EVENT(vcpu, 5, "diag release pages %lX %lX", start, end);
+ mmap_read_lock(vcpu->kvm->mm);
/*
* We checked for start >= end above, so lets check for the
* fast path (no prefix swap page involved)
*/
if (end <= prefix || start >= prefix + 2 * PAGE_SIZE) {
- gmap_discard(vcpu->arch.gmap, start, end);
+ do_discard_gfn_range(vcpu, gpa_to_gfn(start), gpa_to_gfn(end));
} else {
/*
* This is slow path. gmap_discard will check for start
* prefix and let gmap_discard make some of these calls
* NOPs.
*/
- gmap_discard(vcpu->arch.gmap, start, prefix);
+ do_discard_gfn_range(vcpu, gpa_to_gfn(start), gpa_to_gfn(prefix));
if (start <= prefix)
- gmap_discard(vcpu->arch.gmap, 0, PAGE_SIZE);
+ do_discard_gfn_range(vcpu, 0, 1);
if (end > prefix + PAGE_SIZE)
- gmap_discard(vcpu->arch.gmap, PAGE_SIZE, 2 * PAGE_SIZE);
- gmap_discard(vcpu->arch.gmap, prefix + 2 * PAGE_SIZE, end);
+ do_discard_gfn_range(vcpu, 1, 2);
+ do_discard_gfn_range(vcpu, gpa_to_gfn(prefix) + 2, gpa_to_gfn(end));
}
+ mmap_read_unlock(vcpu->kvm->mm);
return 0;
}
#include <asm/machine.h>
#include <asm/stp.h>
#include <asm/gmap.h>
+#include <asm/gmap_helpers.h>
#include <asm/nmi.h>
#include <asm/isc.h>
#include <asm/sclp.h>
if (r)
break;
- r = s390_disable_cow_sharing();
+ mmap_write_lock(kvm->mm);
+ r = gmap_helper_disable_cow_sharing();
+ mmap_write_unlock(kvm->mm);
if (r)
break;
obj-$(CONFIG_PTDUMP) += dump_pagetables.o
obj-$(CONFIG_PGSTE) += gmap.o
obj-$(CONFIG_PFAULT) += pfault.o
+
+obj-$(subst m,y,$(CONFIG_KVM)) += gmap_helpers.o
#include <asm/page-states.h>
#include <asm/pgalloc.h>
#include <asm/machine.h>
+#include <asm/gmap_helpers.h>
#include <asm/gmap.h>
#include <asm/page.h>
*/
void __gmap_zap(struct gmap *gmap, unsigned long gaddr)
{
- struct vm_area_struct *vma;
unsigned long vmaddr;
- spinlock_t *ptl;
- pte_t *ptep;
+
+ mmap_assert_locked(gmap->mm);
/* Find the vm address for the guest address */
vmaddr = (unsigned long) radix_tree_lookup(&gmap->guest_to_host,
gaddr >> PMD_SHIFT);
if (vmaddr) {
vmaddr |= gaddr & ~PMD_MASK;
-
- vma = vma_lookup(gmap->mm, vmaddr);
- if (!vma || is_vm_hugetlb_page(vma))
- return;
-
- /* Get pointer to the page table entry */
- ptep = get_locked_pte(gmap->mm, vmaddr, &ptl);
- if (likely(ptep)) {
- ptep_zap_unused(gmap->mm, vmaddr, ptep, 0);
- pte_unmap_unlock(ptep, ptl);
- }
+ gmap_helper_zap_one_page(gmap->mm, vmaddr);
}
}
EXPORT_SYMBOL_GPL(__gmap_zap);
-void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to)
-{
- unsigned long gaddr, vmaddr, size;
- struct vm_area_struct *vma;
-
- mmap_read_lock(gmap->mm);
- for (gaddr = from; gaddr < to;
- gaddr = (gaddr + PMD_SIZE) & PMD_MASK) {
- /* Find the vm address for the guest address */
- vmaddr = (unsigned long)
- radix_tree_lookup(&gmap->guest_to_host,
- gaddr >> PMD_SHIFT);
- if (!vmaddr)
- continue;
- vmaddr |= gaddr & ~PMD_MASK;
- /* Find vma in the parent mm */
- vma = find_vma(gmap->mm, vmaddr);
- if (!vma)
- continue;
- /*
- * We do not discard pages that are backed by
- * hugetlbfs, so we don't have to refault them.
- */
- if (is_vm_hugetlb_page(vma))
- continue;
- size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK));
- zap_page_range_single(vma, vmaddr, size, NULL);
- }
- mmap_read_unlock(gmap->mm);
-}
-EXPORT_SYMBOL_GPL(gmap_discard);
-
static LIST_HEAD(gmap_notifier_list);
static DEFINE_SPINLOCK(gmap_notifier_lock);
}
EXPORT_SYMBOL_GPL(s390_enable_sie);
-static int find_zeropage_pte_entry(pte_t *pte, unsigned long addr,
- unsigned long end, struct mm_walk *walk)
-{
- unsigned long *found_addr = walk->private;
-
- /* Return 1 of the page is a zeropage. */
- if (is_zero_pfn(pte_pfn(*pte))) {
- /*
- * Shared zeropage in e.g., a FS DAX mapping? We cannot do the
- * right thing and likely don't care: FAULT_FLAG_UNSHARE
- * currently only works in COW mappings, which is also where
- * mm_forbids_zeropage() is checked.
- */
- if (!is_cow_mapping(walk->vma->vm_flags))
- return -EFAULT;
-
- *found_addr = addr;
- return 1;
- }
- return 0;
-}
-
-static const struct mm_walk_ops find_zeropage_ops = {
- .pte_entry = find_zeropage_pte_entry,
- .walk_lock = PGWALK_WRLOCK,
-};
-
-/*
- * Unshare all shared zeropages, replacing them by anonymous pages. Note that
- * we cannot simply zap all shared zeropages, because this could later
- * trigger unexpected userfaultfd missing events.
- *
- * This must be called after mm->context.allow_cow_sharing was
- * set to 0, to avoid future mappings of shared zeropages.
- *
- * mm contracts with s390, that even if mm were to remove a page table,
- * and racing with walk_page_range_vma() calling pte_offset_map_lock()
- * would fail, it will never insert a page table containing empty zero
- * pages once mm_forbids_zeropage(mm) i.e.
- * mm->context.allow_cow_sharing is set to 0.
- */
-static int __s390_unshare_zeropages(struct mm_struct *mm)
-{
- struct vm_area_struct *vma;
- VMA_ITERATOR(vmi, mm, 0);
- unsigned long addr;
- vm_fault_t fault;
- int rc;
-
- for_each_vma(vmi, vma) {
- /*
- * We could only look at COW mappings, but it's more future
- * proof to catch unexpected zeropages in other mappings and
- * fail.
- */
- if ((vma->vm_flags & VM_PFNMAP) || is_vm_hugetlb_page(vma))
- continue;
- addr = vma->vm_start;
-
-retry:
- rc = walk_page_range_vma(vma, addr, vma->vm_end,
- &find_zeropage_ops, &addr);
- if (rc < 0)
- return rc;
- else if (!rc)
- continue;
-
- /* addr was updated by find_zeropage_pte_entry() */
- fault = handle_mm_fault(vma, addr,
- FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE,
- NULL);
- if (fault & VM_FAULT_OOM)
- return -ENOMEM;
- /*
- * See break_ksm(): even after handle_mm_fault() returned 0, we
- * must start the lookup from the current address, because
- * handle_mm_fault() may back out if there's any difficulty.
- *
- * VM_FAULT_SIGBUS and VM_FAULT_SIGSEGV are unexpected but
- * maybe they could trigger in the future on concurrent
- * truncation. In that case, the shared zeropage would be gone
- * and we can simply retry and make progress.
- */
- cond_resched();
- goto retry;
- }
-
- return 0;
-}
-
-static int __s390_disable_cow_sharing(struct mm_struct *mm)
-{
- int rc;
-
- if (!mm->context.allow_cow_sharing)
- return 0;
-
- mm->context.allow_cow_sharing = 0;
-
- /* Replace all shared zeropages by anonymous pages. */
- rc = __s390_unshare_zeropages(mm);
- /*
- * Make sure to disable KSM (if enabled for the whole process or
- * individual VMAs). Note that nothing currently hinders user space
- * from re-enabling it.
- */
- if (!rc)
- rc = ksm_disable(mm);
- if (rc)
- mm->context.allow_cow_sharing = 1;
- return rc;
-}
-
-/*
- * Disable most COW-sharing of memory pages for the whole process:
- * (1) Disable KSM and unmerge/unshare any KSM pages.
- * (2) Disallow shared zeropages and unshare any zerpages that are mapped.
- *
- * Not that we currently don't bother with COW-shared pages that are shared
- * with parent/child processes due to fork().
- */
-int s390_disable_cow_sharing(void)
-{
- int rc;
-
- mmap_write_lock(current->mm);
- rc = __s390_disable_cow_sharing(current->mm);
- mmap_write_unlock(current->mm);
- return rc;
-}
-EXPORT_SYMBOL_GPL(s390_disable_cow_sharing);
-
/*
* Enable storage key handling from now on and initialize the storage
* keys with the default key.
goto out_up;
mm->context.uses_skeys = 1;
- rc = __s390_disable_cow_sharing(mm);
+ rc = gmap_helper_disable_cow_sharing();
if (rc) {
mm->context.uses_skeys = 0;
goto out_up;
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Helper functions for KVM guest address space mapping code
+ *
+ * Copyright IBM Corp. 2007, 2025
+ */
+#include <linux/mm_types.h>
+#include <linux/mmap_lock.h>
+#include <linux/mm.h>
+#include <linux/hugetlb.h>
+#include <linux/swap.h>
+#include <linux/swapops.h>
+#include <linux/pagewalk.h>
+#include <linux/ksm.h>
+#include <asm/gmap_helpers.h>
+
+/**
+ * ptep_zap_swap_entry() - discard a swap entry.
+ * @mm: the mm
+ * @entry: the swap entry that needs to be zapped
+ *
+ * Discards the given swap entry. If the swap entry was an actual swap
+ * entry (and not a migration entry, for example), the actual swapped
+ * page is also discarded from swap.
+ */
+static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry)
+{
+ if (!non_swap_entry(entry))
+ dec_mm_counter(mm, MM_SWAPENTS);
+ else if (is_migration_entry(entry))
+ dec_mm_counter(mm, mm_counter(pfn_swap_entry_folio(entry)));
+ free_swap_and_cache(entry);
+}
+
+/**
+ * gmap_helper_zap_one_page() - discard a page if it was swapped.
+ * @mm: the mm
+ * @vmaddr: the userspace virtual address that needs to be discarded
+ *
+ * If the given address maps to a swap entry, discard it.
+ *
+ * Context: needs to be called while holding the mmap lock.
+ */
+void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr)
+{
+ struct vm_area_struct *vma;
+ spinlock_t *ptl;
+ pte_t *ptep;
+
+ mmap_assert_locked(mm);
+
+ /* Find the vm address for the guest address */
+ vma = vma_lookup(mm, vmaddr);
+ if (!vma || is_vm_hugetlb_page(vma))
+ return;
+
+ /* Get pointer to the page table entry */
+ ptep = get_locked_pte(mm, vmaddr, &ptl);
+ if (unlikely(!ptep))
+ return;
+ if (pte_swap(*ptep))
+ ptep_zap_swap_entry(mm, pte_to_swp_entry(*ptep));
+ pte_unmap_unlock(ptep, ptl);
+}
+EXPORT_SYMBOL_GPL(gmap_helper_zap_one_page);
+
+/**
+ * gmap_helper_discard() - discard user pages in the given range
+ * @mm: the mm
+ * @vmaddr: starting userspace address
+ * @end: end address (first address outside the range)
+ *
+ * All userpace pages in the range [@vamddr, @end) are discarded and unmapped.
+ *
+ * Context: needs to be called while holding the mmap lock.
+ */
+void gmap_helper_discard(struct mm_struct *mm, unsigned long vmaddr, unsigned long end)
+{
+ struct vm_area_struct *vma;
+
+ mmap_assert_locked(mm);
+
+ while (vmaddr < end) {
+ vma = find_vma_intersection(mm, vmaddr, end);
+ if (!vma)
+ return;
+ if (!is_vm_hugetlb_page(vma))
+ zap_page_range_single(vma, vmaddr, min(end, vma->vm_end) - vmaddr, NULL);
+ vmaddr = vma->vm_end;
+ }
+}
+EXPORT_SYMBOL_GPL(gmap_helper_discard);
+
+static int find_zeropage_pte_entry(pte_t *pte, unsigned long addr,
+ unsigned long end, struct mm_walk *walk)
+{
+ unsigned long *found_addr = walk->private;
+
+ /* Return 1 of the page is a zeropage. */
+ if (is_zero_pfn(pte_pfn(*pte))) {
+ /*
+ * Shared zeropage in e.g., a FS DAX mapping? We cannot do the
+ * right thing and likely don't care: FAULT_FLAG_UNSHARE
+ * currently only works in COW mappings, which is also where
+ * mm_forbids_zeropage() is checked.
+ */
+ if (!is_cow_mapping(walk->vma->vm_flags))
+ return -EFAULT;
+
+ *found_addr = addr;
+ return 1;
+ }
+ return 0;
+}
+
+static const struct mm_walk_ops find_zeropage_ops = {
+ .pte_entry = find_zeropage_pte_entry,
+ .walk_lock = PGWALK_WRLOCK,
+};
+
+/** __gmap_helper_unshare_zeropages() - unshare all shared zeropages
+ * @mm: the mm whose zero pages are to be unshared
+ *
+ * Unshare all shared zeropages, replacing them by anonymous pages. Note that
+ * we cannot simply zap all shared zeropages, because this could later
+ * trigger unexpected userfaultfd missing events.
+ *
+ * This must be called after mm->context.allow_cow_sharing was
+ * set to 0, to avoid future mappings of shared zeropages.
+ *
+ * mm contracts with s390, that even if mm were to remove a page table,
+ * and racing with walk_page_range_vma() calling pte_offset_map_lock()
+ * would fail, it will never insert a page table containing empty zero
+ * pages once mm_forbids_zeropage(mm) i.e.
+ * mm->context.allow_cow_sharing is set to 0.
+ */
+static int __gmap_helper_unshare_zeropages(struct mm_struct *mm)
+{
+ struct vm_area_struct *vma;
+ VMA_ITERATOR(vmi, mm, 0);
+ unsigned long addr;
+ vm_fault_t fault;
+ int rc;
+
+ for_each_vma(vmi, vma) {
+ /*
+ * We could only look at COW mappings, but it's more future
+ * proof to catch unexpected zeropages in other mappings and
+ * fail.
+ */
+ if ((vma->vm_flags & VM_PFNMAP) || is_vm_hugetlb_page(vma))
+ continue;
+ addr = vma->vm_start;
+
+retry:
+ rc = walk_page_range_vma(vma, addr, vma->vm_end,
+ &find_zeropage_ops, &addr);
+ if (rc < 0)
+ return rc;
+ else if (!rc)
+ continue;
+
+ /* addr was updated by find_zeropage_pte_entry() */
+ fault = handle_mm_fault(vma, addr,
+ FAULT_FLAG_UNSHARE | FAULT_FLAG_REMOTE,
+ NULL);
+ if (fault & VM_FAULT_OOM)
+ return -ENOMEM;
+ /*
+ * See break_ksm(): even after handle_mm_fault() returned 0, we
+ * must start the lookup from the current address, because
+ * handle_mm_fault() may back out if there's any difficulty.
+ *
+ * VM_FAULT_SIGBUS and VM_FAULT_SIGSEGV are unexpected but
+ * maybe they could trigger in the future on concurrent
+ * truncation. In that case, the shared zeropage would be gone
+ * and we can simply retry and make progress.
+ */
+ cond_resched();
+ goto retry;
+ }
+
+ return 0;
+}
+
+/**
+ * gmap_helper_disable_cow_sharing() - disable all COW sharing
+ *
+ * Disable most COW-sharing of memory pages for the whole process:
+ * (1) Disable KSM and unmerge/unshare any KSM pages.
+ * (2) Disallow shared zeropages and unshare any zerpages that are mapped.
+ *
+ * Not that we currently don't bother with COW-shared pages that are shared
+ * with parent/child processes due to fork().
+ */
+int gmap_helper_disable_cow_sharing(void)
+{
+ struct mm_struct *mm = current->mm;
+ int rc;
+
+ mmap_assert_write_locked(mm);
+
+ if (!mm->context.allow_cow_sharing)
+ return 0;
+
+ mm->context.allow_cow_sharing = 0;
+
+ /* Replace all shared zeropages by anonymous pages. */
+ rc = __gmap_helper_unshare_zeropages(mm);
+ /*
+ * Make sure to disable KSM (if enabled for the whole process or
+ * individual VMAs). Note that nothing currently hinders user space
+ * from re-enabling it.
+ */
+ if (!rc)
+ rc = ksm_disable(mm);
+ if (rc)
+ mm->context.allow_cow_sharing = 1;
+ return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_helper_disable_cow_sharing);