gmap = kzalloc(sizeof(struct gmap), GFP_KERNEL_ACCOUNT);
if (!gmap)
goto out;
- INIT_LIST_HEAD(&gmap->crst_list);
INIT_LIST_HEAD(&gmap->children);
- INIT_LIST_HEAD(&gmap->pt_list);
INIT_RADIX_TREE(&gmap->guest_to_host, GFP_KERNEL_ACCOUNT);
INIT_RADIX_TREE(&gmap->host_to_guest, GFP_ATOMIC | __GFP_ACCOUNT);
INIT_RADIX_TREE(&gmap->host_to_rmap, GFP_ATOMIC | __GFP_ACCOUNT);
page = gmap_alloc_crst();
if (!page)
goto out_free;
- list_add(&page->lru, &gmap->crst_list);
table = page_to_virt(page);
crst_table_init(table, etype);
gmap->table = table;
} while (nr > 0);
}
+static void gmap_free_crst(unsigned long *table, bool free_ptes)
+{
+ bool is_segment = (table[0] & _SEGMENT_ENTRY_TYPE_MASK) == 0;
+ int i;
+
+ if (is_segment) {
+ if (!free_ptes)
+ goto out;
+ for (i = 0; i < _CRST_ENTRIES; i++)
+ if (!(table[i] & _SEGMENT_ENTRY_INVALID))
+ page_table_free_pgste(page_ptdesc(phys_to_page(table[i])));
+ } else {
+ for (i = 0; i < _CRST_ENTRIES; i++)
+ if (!(table[i] & _REGION_ENTRY_INVALID))
+ gmap_free_crst(__va(table[i] & PAGE_MASK), free_ptes);
+ }
+
+out:
+ free_pages((unsigned long)table, CRST_ALLOC_ORDER);
+}
+
/**
* gmap_free - free a guest address space
* @gmap: pointer to the guest address space structure
*/
void gmap_free(struct gmap *gmap)
{
- struct page *page, *next;
-
/* Flush tlb of all gmaps (if not already done for shadows) */
if (!(gmap_is_shadow(gmap) && gmap->removed))
gmap_flush_tlb(gmap);
/* Free all segment & region tables. */
- list_for_each_entry_safe(page, next, &gmap->crst_list, lru)
- __free_pages(page, CRST_ALLOC_ORDER);
+ gmap_free_crst(gmap->table, gmap_is_shadow(gmap));
+
gmap_radix_tree_free(&gmap->guest_to_host);
gmap_radix_tree_free(&gmap->host_to_guest);
/* Free additional data for a shadow gmap */
if (gmap_is_shadow(gmap)) {
- struct ptdesc *ptdesc, *n;
-
- /* Free all page tables. */
- list_for_each_entry_safe(ptdesc, n, &gmap->pt_list, pt_list)
- page_table_free_pgste(ptdesc);
gmap_rmap_radix_tree_free(&gmap->host_to_rmap);
/* Release reference to the parent */
gmap_put(gmap->parent);
crst_table_init(new, init);
spin_lock(&gmap->guest_table_lock);
if (*table & _REGION_ENTRY_INVALID) {
- list_add(&page->lru, &gmap->crst_list);
*table = __pa(new) | _REGION_ENTRY_LENGTH |
(*table & _REGION_ENTRY_TYPE_MASK);
page = NULL;
__gmap_unshadow_pgt(sg, raddr, __va(pgt));
/* Free page table */
ptdesc = page_ptdesc(phys_to_page(pgt));
- list_del(&ptdesc->pt_list);
page_table_free_pgste(ptdesc);
}
__gmap_unshadow_pgt(sg, raddr, __va(pgt));
/* Free page table */
ptdesc = page_ptdesc(phys_to_page(pgt));
- list_del(&ptdesc->pt_list);
page_table_free_pgste(ptdesc);
}
}
__gmap_unshadow_sgt(sg, raddr, __va(sgt));
/* Free segment table */
page = phys_to_page(sgt);
- list_del(&page->lru);
__free_pages(page, CRST_ALLOC_ORDER);
}
__gmap_unshadow_sgt(sg, raddr, __va(sgt));
/* Free segment table */
page = phys_to_page(sgt);
- list_del(&page->lru);
__free_pages(page, CRST_ALLOC_ORDER);
}
}
__gmap_unshadow_r3t(sg, raddr, __va(r3t));
/* Free region 3 table */
page = phys_to_page(r3t);
- list_del(&page->lru);
__free_pages(page, CRST_ALLOC_ORDER);
}
__gmap_unshadow_r3t(sg, raddr, __va(r3t));
/* Free region 3 table */
page = phys_to_page(r3t);
- list_del(&page->lru);
__free_pages(page, CRST_ALLOC_ORDER);
}
}
__gmap_unshadow_r2t(sg, raddr, __va(r2t));
/* Free region 2 table */
page = phys_to_page(r2t);
- list_del(&page->lru);
__free_pages(page, CRST_ALLOC_ORDER);
}
r1t[i] = _REGION1_ENTRY_EMPTY;
/* Free region 2 table */
page = phys_to_page(r2t);
- list_del(&page->lru);
__free_pages(page, CRST_ALLOC_ORDER);
}
}
_REGION_ENTRY_TYPE_R1 | _REGION_ENTRY_INVALID;
if (sg->edat_level >= 1)
*table |= (r2t & _REGION_ENTRY_PROTECT);
- list_add(&page->lru, &sg->crst_list);
if (fake) {
/* nothing to protect for fake tables */
*table &= ~_REGION_ENTRY_INVALID;
_REGION_ENTRY_TYPE_R2 | _REGION_ENTRY_INVALID;
if (sg->edat_level >= 1)
*table |= (r3t & _REGION_ENTRY_PROTECT);
- list_add(&page->lru, &sg->crst_list);
if (fake) {
/* nothing to protect for fake tables */
*table &= ~_REGION_ENTRY_INVALID;
_REGION_ENTRY_TYPE_R3 | _REGION_ENTRY_INVALID;
if (sg->edat_level >= 1)
*table |= sgt & _REGION_ENTRY_PROTECT;
- list_add(&page->lru, &sg->crst_list);
if (fake) {
/* nothing to protect for fake tables */
*table &= ~_REGION_ENTRY_INVALID;
/* mark as invalid as long as the parent table is not protected */
*table = (unsigned long) s_pgt | _SEGMENT_ENTRY |
(pgt & _SEGMENT_ENTRY_PROTECT) | _SEGMENT_ENTRY_INVALID;
- list_add(&ptdesc->pt_list, &sg->pt_list);
if (fake) {
/* nothing to protect for fake tables */
*table &= ~_SEGMENT_ENTRY_INVALID;
}
EXPORT_SYMBOL_GPL(__s390_uv_destroy_range);
-/**
- * s390_unlist_old_asce - Remove the topmost level of page tables from the
- * list of page tables of the gmap.
- * @gmap: the gmap whose table is to be removed
- *
- * On s390x, KVM keeps a list of all pages containing the page tables of the
- * gmap (the CRST list). This list is used at tear down time to free all
- * pages that are now not needed anymore.
- *
- * This function removes the topmost page of the tree (the one pointed to by
- * the ASCE) from the CRST list.
- *
- * This means that it will not be freed when the VM is torn down, and needs
- * to be handled separately by the caller, unless a leak is actually
- * intended. Notice that this function will only remove the page from the
- * list, the page will still be used as a top level page table (and ASCE).
- */
-void s390_unlist_old_asce(struct gmap *gmap)
-{
- struct page *old;
-
- old = virt_to_page(gmap->table);
- spin_lock(&gmap->guest_table_lock);
- list_del(&old->lru);
- /*
- * Sometimes the topmost page might need to be "removed" multiple
- * times, for example if the VM is rebooted into secure mode several
- * times concurrently, or if s390_replace_asce fails after calling
- * s390_remove_old_asce and is attempted again later. In that case
- * the old asce has been removed from the list, and therefore it
- * will not be freed when the VM terminates, but the ASCE is still
- * in use and still pointed to.
- * A subsequent call to replace_asce will follow the pointer and try
- * to remove the same page from the list again.
- * Therefore it's necessary that the page of the ASCE has valid
- * pointers, so list_del can work (and do nothing) without
- * dereferencing stale or invalid pointers.
- */
- INIT_LIST_HEAD(&old->lru);
- spin_unlock(&gmap->guest_table_lock);
-}
-EXPORT_SYMBOL_GPL(s390_unlist_old_asce);
-
/**
* s390_replace_asce - Try to replace the current ASCE of a gmap with a copy
* @gmap: the gmap whose ASCE needs to be replaced
struct page *page;
void *table;
- s390_unlist_old_asce(gmap);
-
/* Replacing segment type ASCEs would cause serious issues */
if ((gmap->asce & _ASCE_TYPE_MASK) == _ASCE_TYPE_SEGMENT)
return -EINVAL;
table = page_to_virt(page);
memcpy(table, gmap->table, 1UL << (CRST_ALLOC_ORDER + PAGE_SHIFT));
- /*
- * The caller has to deal with the old ASCE, but here we make sure
- * the new one is properly added to the CRST list, so that
- * it will be freed when the VM is torn down.
- */
- spin_lock(&gmap->guest_table_lock);
- list_add(&page->lru, &gmap->crst_list);
- spin_unlock(&gmap->guest_table_lock);
-
/* Set new table origin while preserving existing ASCE control bits */
asce = (gmap->asce & ~_ASCE_ORIGIN) | __pa(table);
WRITE_ONCE(gmap->asce, asce);