[linux-2.6-block.git] / arch / powerpc / mm / book3s64 / mmu_context.c

// SPDX-License-Identifier: GPL-2.0-or-later
/*
 *  MMU context allocation for 64-bit kernels.
 *
 *  Copyright (C) 2004 Anton Blanchard, IBM Corp. <anton@samba.org>
 */

#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/string.h>
#include <linux/types.h>
#include <linux/mm.h>
#include <linux/pkeys.h>
#include <linux/spinlock.h>
#include <linux/idr.h>
#include <linux/export.h>
#include <linux/gfp.h>
#include <linux/slab.h>
#include <linux/cpu.h>

#include <asm/mmu_context.h>
#include <asm/pgalloc.h>

#include "internal.h"

static DEFINE_IDA(mmu_context_ida);

static int alloc_context_id(int min_id, int max_id)
{
	return ida_alloc_range(&mmu_context_ida, min_id, max_id, GFP_KERNEL);
}

void hash__reserve_context_id(int id)
{
	int result = ida_alloc_range(&mmu_context_ida, id, id, GFP_KERNEL);

	WARN(result != id, "mmu: Failed to reserve context id %d (rc %d)\n", id, result);
}

int hash__alloc_context_id(void)
{
	unsigned long max;

	if (mmu_has_feature(MMU_FTR_68_BIT_VA))
		max = MAX_USER_CONTEXT;
	else
		max = MAX_USER_CONTEXT_65BIT_VA;

	return alloc_context_id(MIN_USER_CONTEXT, max);
}
EXPORT_SYMBOL_GPL(hash__alloc_context_id);

static int realloc_context_ids(mm_context_t *ctx)
{
	int i, id;

	/*
	 * id 0 (aka. ctx->id) is special, we always allocate a new one, even if
	 * there wasn't one allocated previously (which happens in the exec
	 * case where ctx is newly allocated).
	 *
	 * We have to be a bit careful here. We must keep the existing ids in
	 * the array, so that we can test if they're non-zero to decide if we
	 * need to allocate a new one. However in case of error we must free the
	 * ids we've allocated but *not* any of the existing ones (or risk a
	 * UAF). That's why we decrement i at the start of the error handling
	 * loop, to skip the id that we just tested but couldn't reallocate.
	 */
	for (i = 0; i < ARRAY_SIZE(ctx->extended_id); i++) {
		if (i == 0 || ctx->extended_id[i]) {
			id = hash__alloc_context_id();
			if (id < 0)
				goto error;

			ctx->extended_id[i] = id;
		}
	}

	/* The caller expects us to return id */
	return ctx->id;

error:
	for (i--; i >= 0; i--) {
		if (ctx->extended_id[i])
			ida_free(&mmu_context_ida, ctx->extended_id[i]);
	}

	return id;
}

static int hash__init_new_context(struct mm_struct *mm)
{
	int index;

	mm->context.hash_context = kmalloc(sizeof(struct hash_mm_context),
					   GFP_KERNEL);
	if (!mm->context.hash_context)
		return -ENOMEM;

	/*
	 * The old code would re-promote on fork, we don't do that when using
	 * slices as it could cause problem promoting slices that have been
	 * forced down to 4K.
	 *
	 * For book3s we have MMU_NO_CONTEXT set to be ~0. Hence check
	 * explicitly against context.id == 0. This ensures that we properly
	 * initialize context slice details for newly allocated mm's (which will
	 * have id == 0) and don't alter context slice inherited via fork (which
	 * will have id != 0).
	 *
	 * We should not be calling init_new_context() on init_mm. Hence a
	 * check against 0 is OK.
	 */
	if (mm->context.id == 0) {
		memset(mm->context.hash_context, 0, sizeof(struct hash_mm_context));
		slice_init_new_context_exec(mm);
	} else {
		/* This is fork. Copy hash_context details from current->mm */
		memcpy(mm->context.hash_context, current->mm->context.hash_context, sizeof(struct hash_mm_context));
#ifdef CONFIG_PPC_SUBPAGE_PROT
		/* inherit subpage prot detalis if we have one. */
		if (current->mm->context.hash_context->spt) {
			mm->context.hash_context->spt = kmalloc(sizeof(struct subpage_prot_table),
								GFP_KERNEL);
			if (!mm->context.hash_context->spt) {
				kfree(mm->context.hash_context);
				return -ENOMEM;
			}
		}
#endif
	}

	index = realloc_context_ids(&mm->context);
	if (index < 0) {
#ifdef CONFIG_PPC_SUBPAGE_PROT
		kfree(mm->context.hash_context->spt);
#endif
		kfree(mm->context.hash_context);
		return index;
	}

	pkey_mm_init(mm);
	return index;
}

void hash__setup_new_exec(void)
{
	slice_setup_new_exec();

	slb_setup_new_exec();
}

static int radix__init_new_context(struct mm_struct *mm)
{
	unsigned long rts_field;
	int index, max_id;

	max_id = (1 << mmu_pid_bits) - 1;
	index = alloc_context_id(mmu_base_pid, max_id);
	if (index < 0)
		return index;

	/*
	 * set the process table entry,
	 */
	rts_field = radix__get_tree_size();
	process_tb[index].prtb0 = cpu_to_be64(rts_field | __pa(mm->pgd) | RADIX_PGD_INDEX_SIZE);

	/*
	 * Order the above store with subsequent update of the PID
	 * register (at which point HW can start loading/caching
	 * the entry) and the corresponding load by the MMU from
	 * the L2 cache.
	 */
	asm volatile("ptesync;isync" : : : "memory");

	mm->context.hash_context = NULL;

	return index;
}

int init_new_context(struct task_struct *tsk, struct mm_struct *mm)
{
	int index;

	if (radix_enabled())
		index = radix__init_new_context(mm);
	else
		index = hash__init_new_context(mm);

	if (index < 0)
		return index;

	mm->context.id = index;

	mm->context.pte_frag = NULL;
	mm->context.pmd_frag = NULL;
#ifdef CONFIG_SPAPR_TCE_IOMMU
	mm_iommu_init(mm);
#endif
	atomic_set(&mm->context.active_cpus, 0);
	atomic_set(&mm->context.copros, 0);

	return 0;
}

void __destroy_context(int context_id)
{
	ida_free(&mmu_context_ida, context_id);
}
EXPORT_SYMBOL_GPL(__destroy_context);

static void destroy_contexts(mm_context_t *ctx)
{
	int index, context_id;

	for (index = 0; index < ARRAY_SIZE(ctx->extended_id); index++) {
		context_id = ctx->extended_id[index];
		if (context_id)
			ida_free(&mmu_context_ida, context_id);
	}
	kfree(ctx->hash_context);
}

static void pmd_frag_destroy(void *pmd_frag)
{
	int count;
	struct page *page;

	page = virt_to_page(pmd_frag);
	/* drop all the pending references */
	count = ((unsigned long)pmd_frag & ~PAGE_MASK) >> PMD_FRAG_SIZE_SHIFT;
	/* We allow PTE_FRAG_NR fragments from a PTE page */
	if (atomic_sub_and_test(PMD_FRAG_NR - count, &page->pt_frag_refcount)) {
		pgtable_pmd_page_dtor(page);
		__free_page(page);
	}
}

static void destroy_pagetable_cache(struct mm_struct *mm)
{
	void *frag;

	frag = mm->context.pte_frag;
	if (frag)
		pte_frag_destroy(frag);

	frag = mm->context.pmd_frag;
	if (frag)
		pmd_frag_destroy(frag);
	return;
}

void destroy_context(struct mm_struct *mm)
{
#ifdef CONFIG_SPAPR_TCE_IOMMU
	WARN_ON_ONCE(!list_empty(&mm->context.iommu_group_mem_list));
#endif
	/*
	 * For tasks which were successfully initialized we end up calling
	 * arch_exit_mmap() which clears the process table entry. And
	 * arch_exit_mmap() is called before the required fullmm TLB flush
	 * which does a RIC=2 flush. Hence for an initialized task, we do clear
	 * any cached process table entries.
	 *
	 * The condition below handles the error case during task init. We have
	 * set the process table entry early and if we fail a task
	 * initialization, we need to ensure the process table entry is zeroed.
	 * We need not worry about process table entry caches because the task
	 * never ran with the PID value.
	 */
	if (radix_enabled())
		process_tb[mm->context.id].prtb0 = 0;
	else
		subpage_prot_free(mm);
	destroy_contexts(&mm->context);
	mm->context.id = MMU_NO_CONTEXT;
}

void arch_exit_mmap(struct mm_struct *mm)
{
	destroy_pagetable_cache(mm);

	if (radix_enabled()) {
		/*
		 * Radix doesn't have a valid bit in the process table
		 * entries. However we know that at least P9 implementation
		 * will avoid caching an entry with an invalid RTS field,
		 * and 0 is invalid. So this will do.
		 *
		 * This runs before the "fullmm" tlb flush in exit_mmap,
		 * which does a RIC=2 tlbie to clear the process table
		 * entry. See the "fullmm" comments in tlb-radix.c.
		 *
		 * No barrier required here after the store because
		 * this process will do the invalidate, which starts with
		 * ptesync.
		 */
		process_tb[mm->context.id].prtb0 = 0;
	}
}

#ifdef CONFIG_PPC_RADIX_MMU
void radix__switch_mmu_context(struct mm_struct *prev, struct mm_struct *next)
{
	mtspr(SPRN_PID, next->context.id);
	isync();
}
#endif

/**
 * cleanup_cpu_mmu_context - Clean up MMU details for this CPU (newly offlined)
 *
 * This clears the CPU from mm_cpumask for all processes, and then flushes the
 * local TLB to ensure TLB coherency in case the CPU is onlined again.
 *
 * KVM guest translations are not necessarily flushed here. If KVM started
 * using mm_cpumask or the Linux APIs which do, this would have to be resolved.
 */
#ifdef CONFIG_HOTPLUG_CPU
void cleanup_cpu_mmu_context(void)
{
	int cpu = smp_processor_id();

	clear_tasks_mm_cpumask(cpu);
	tlbiel_all();
}
#endif
Commit	Line	Data
2874c5fd	1	// SPDX-License-Identifier: GPL-2.0-or-later
14cf11af PM	2	/*
	3	* MMU context allocation for 64-bit kernels.
	4	*
	5	* Copyright (C) 2004 Anton Blanchard, IBM Corp. <anton@samba.org>
14cf11af PM	6	*/
14cf11af PM	7
14cf11af PM	8	#include <linux/sched.h>
	9	#include <linux/kernel.h>
	10	#include <linux/errno.h>
	11	#include <linux/string.h>
	12	#include <linux/types.h>
	13	#include <linux/mm.h>
4fb158f6	14	#include <linux/pkeys.h>
14cf11af PM	15	#include <linux/spinlock.h>
14cf11af PM	16	#include <linux/idr.h>
4b16f8e2	17	#include <linux/export.h>
5a0e3ad6	18	#include <linux/gfp.h>
851d2e2f	19	#include <linux/slab.h>
01b0f0ea	20	#include <linux/cpu.h>
14cf11af PM	21
14cf11af PM	22	#include <asm/mmu_context.h>
5c1f6ee9	23	#include <asm/pgalloc.h>
14cf11af	24
ef1edbba ME	25	#include "internal.h"
ef1edbba ME	26
7317ac87	27	static DEFINE_IDA(mmu_context_ida);
14cf11af	28
c1ff840d	29	static int alloc_context_id(int min_id, int max_id)
14cf11af	30	{
b3fa6417	31	return ida_alloc_range(&mmu_context_ida, min_id, max_id, GFP_KERNEL);
e85a4710	32	}
a336f2f5	33
82228e36 AK	34	void hash__reserve_context_id(int id)
82228e36 AK	35	{
b3fa6417	36	int result = ida_alloc_range(&mmu_context_ida, id, id, GFP_KERNEL);
82228e36 AK	37
	38	WARN(result != id, "mmu: Failed to reserve context id %d (rc %d)\n", id, result);
	39	}
	40
a336f2f5 ME	41	int hash__alloc_context_id(void)
a336f2f5 ME	42	{
e6f81a92 AK	43	unsigned long max;
	44
	45	if (mmu_has_feature(MMU_FTR_68_BIT_VA))
	46	max = MAX_USER_CONTEXT;
	47	else
	48	max = MAX_USER_CONTEXT_65BIT_VA;
	49
	50	return alloc_context_id(MIN_USER_CONTEXT, max);
a336f2f5 ME	51	}
	52	EXPORT_SYMBOL_GPL(hash__alloc_context_id);
	53
ca72d883 ME	54	static int realloc_context_ids(mm_context_t *ctx)
	55	{
	56	int i, id;
	57
	58	/*
	59	* id 0 (aka. ctx->id) is special, we always allocate a new one, even if
	60	* there wasn't one allocated previously (which happens in the exec
	61	* case where ctx is newly allocated).
	62	*
	63	* We have to be a bit careful here. We must keep the existing ids in
	64	* the array, so that we can test if they're non-zero to decide if we
	65	* need to allocate a new one. However in case of error we must free the
	66	* ids we've allocated but not any of the existing ones (or risk a
	67	* UAF). That's why we decrement i at the start of the error handling
	68	* loop, to skip the id that we just tested but couldn't reallocate.
	69	*/
	70	for (i = 0; i < ARRAY_SIZE(ctx->extended_id); i++) {
	71	if (i == 0 \|\| ctx->extended_id[i]) {
	72	id = hash__alloc_context_id();
	73	if (id < 0)
	74	goto error;
	75
	76	ctx->extended_id[i] = id;
	77	}
	78	}
	79
	80	/* The caller expects us to return id */
	81	return ctx->id;
	82
	83	error:
	84	for (i--; i >= 0; i--) {
	85	if (ctx->extended_id[i])
	86	ida_free(&mmu_context_ida, ctx->extended_id[i]);
	87	}
	88
	89	return id;
	90	}
	91
760573c1 ME	92	static int hash__init_new_context(struct mm_struct *mm)
	93	{
	94	int index;
	95
ef629cc5 AK	96	mm->context.hash_context = kmalloc(sizeof(struct hash_mm_context),
ef629cc5 AK	97	GFP_KERNEL);
65565a68	98	if (!mm->context.hash_context)
70110186	99	return -ENOMEM;
70110186	100
760573c1 ME	101	/*
	102	* The old code would re-promote on fork, we don't do that when using
	103	* slices as it could cause problem promoting slices that have been
	104	* forced down to 4K.
	105	*
	106	* For book3s we have MMU_NO_CONTEXT set to be ~0. Hence check
	107	* explicitly against context.id == 0. This ensures that we properly
	108	* initialize context slice details for newly allocated mm's (which will
	109	* have id == 0) and don't alter context slice inherited via fork (which
	110	* will have id != 0).
	111	*
	112	* We should not be calling init_new_context() on init_mm. Hence a
	113	* check against 0 is OK.
	114	*/
70110186 AK	115	if (mm->context.id == 0) {
70110186 AK	116	memset(mm->context.hash_context, 0, sizeof(struct hash_mm_context));
1753dd18	117	slice_init_new_context_exec(mm);
70110186 AK	118	} else {
	119	/* This is fork. Copy hash_context details from current->mm */
	120	memcpy(mm->context.hash_context, current->mm->context.hash_context, sizeof(struct hash_mm_context));
ef629cc5 AK	121	#ifdef CONFIG_PPC_SUBPAGE_PROT
	122	/* inherit subpage prot detalis if we have one. */
	123	if (current->mm->context.hash_context->spt) {
	124	mm->context.hash_context->spt = kmalloc(sizeof(struct subpage_prot_table),
	125	GFP_KERNEL);
	126	if (!mm->context.hash_context->spt) {
ef629cc5 AK	127	kfree(mm->context.hash_context);
	128	return -ENOMEM;
	129	}
	130	}
	131	#endif
65565a68	132	}
70110186	133
ca72d883	134	index = realloc_context_ids(&mm->context);
65565a68 ME	135	if (index < 0) {
	136	#ifdef CONFIG_PPC_SUBPAGE_PROT
	137	kfree(mm->context.hash_context->spt);
	138	#endif
	139	kfree(mm->context.hash_context);
ca72d883	140	return index;
70110186	141	}
760573c1	142
4fb158f6	143	pkey_mm_init(mm);
760573c1 ME	144	return index;
	145	}
	146
425d3314 NP	147	void hash__setup_new_exec(void)
	148	{
	149	slice_setup_new_exec();
5434ae74 NP	150
5434ae74 NP	151	slb_setup_new_exec();
425d3314 NP	152	}
425d3314 NP	153
760573c1	154	static int radix__init_new_context(struct mm_struct *mm)
7e381c0f AK	155	{
7e381c0f AK	156	unsigned long rts_field;
a25bd72b	157	int index, max_id;
760573c1	158
a25bd72b BH	159	max_id = (1 << mmu_pid_bits) - 1;
a25bd72b BH	160	index = alloc_context_id(mmu_base_pid, max_id);
760573c1 ME	161	if (index < 0)
760573c1 ME	162	return index;
7e381c0f AK	163
	164	/*
	165	* set the process table entry,
	166	*/
b23d9c5b	167	rts_field = radix__get_tree_size();
7e381c0f	168	process_tb[index].prtb0 = cpu_to_be64(rts_field \| __pa(mm->pgd) \| RADIX_PGD_INDEX_SIZE);
760573c1	169
3a6a0470 BH	170	/*
	171	* Order the above store with subsequent update of the PID
	172	* register (at which point HW can start loading/caching
	173	* the entry) and the corresponding load by the MMU from
	174	* the L2 cache.
	175	*/
	176	asm volatile("ptesync;isync" : : : "memory");
	177
70110186	178	mm->context.hash_context = NULL;
1ab66d1f	179
760573c1	180	return index;
7e381c0f	181	}
e85a4710 AG	182
	183	int init_new_context(struct task_struct tsk, struct mm_struct mm)
	184	{
	185	int index;
	186
760573c1 ME	187	if (radix_enabled())
	188	index = radix__init_new_context(mm);
	189	else
	190	index = hash__init_new_context(mm);
	191
e85a4710 AG	192	if (index < 0)
	193	return index;
	194
9dfe5c53	195	mm->context.id = index;
14cf11af	196
5c1f6ee9	197	mm->context.pte_frag = NULL;
8a6c697b	198	mm->context.pmd_frag = NULL;
15b244a8	199	#ifdef CONFIG_SPAPR_TCE_IOMMU
88f54a35	200	mm_iommu_init(mm);
5c1f6ee9	201	#endif
a619e59c	202	atomic_set(&mm->context.active_cpus, 0);
aff6f8cb	203	atomic_set(&mm->context.copros, 0);
a619e59c	204
14cf11af PM	205	return 0;
	206	}
	207
e85a4710	208	void __destroy_context(int context_id)
14cf11af	209	{
b3fa6417	210	ida_free(&mmu_context_ida, context_id);
e85a4710 AG	211	}
e85a4710 AG	212	EXPORT_SYMBOL_GPL(__destroy_context);
14cf11af	213
f384796c AK	214	static void destroy_contexts(mm_context_t *ctx)
	215	{
	216	int index, context_id;
	217
f384796c AK	218	for (index = 0; index < ARRAY_SIZE(ctx->extended_id); index++) {
	219	context_id = ctx->extended_id[index];
	220	if (context_id)
b3fa6417	221	ida_free(&mmu_context_ida, context_id);
f384796c	222	}
70110186	223	kfree(ctx->hash_context);
f384796c AK	224	}
f384796c AK	225
8a6c697b AK	226	static void pmd_frag_destroy(void *pmd_frag)
	227	{
	228	int count;
	229	struct page *page;
	230
	231	page = virt_to_page(pmd_frag);
	232	/* drop all the pending references */
	233	count = ((unsigned long)pmd_frag & ~PAGE_MASK) >> PMD_FRAG_SIZE_SHIFT;
	234	/* We allow PTE_FRAG_NR fragments from a PTE page */
4231aba0	235	if (atomic_sub_and_test(PMD_FRAG_NR - count, &page->pt_frag_refcount)) {
8a6c697b	236	pgtable_pmd_page_dtor(page);
4231aba0	237	__free_page(page);
8a6c697b AK	238	}
	239	}
	240
34c604d2	241	static void destroy_pagetable_cache(struct mm_struct *mm)
8a6c697b AK	242	{
	243	void *frag;
	244
	245	frag = mm->context.pte_frag;
	246	if (frag)
	247	pte_frag_destroy(frag);
	248
	249	frag = mm->context.pmd_frag;
	250	if (frag)
	251	pmd_frag_destroy(frag);
	252	return;
	253	}
	254
e85a4710 AG	255	void destroy_context(struct mm_struct *mm)
e85a4710 AG	256	{
15b244a8	257	#ifdef CONFIG_SPAPR_TCE_IOMMU
4b6fad70	258	WARN_ON_ONCE(!list_empty(&mm->context.iommu_group_mem_list));
15b244a8	259	#endif
7aec584e AK	260	/*
	261	* For tasks which were successfully initialized we end up calling
	262	* arch_exit_mmap() which clears the process table entry. And
	263	* arch_exit_mmap() is called before the required fullmm TLB flush
	264	* which does a RIC=2 flush. Hence for an initialized task, we do clear
	265	* any cached process table entries.
	266	*
	267	* The condition below handles the error case during task init. We have
	268	* set the process table entry early and if we fail a task
	269	* initialization, we need to ensure the process table entry is zeroed.
	270	* We need not worry about process table entry caches because the task
	271	* never ran with the PID value.
	272	*/
30b49ec7	273	if (radix_enabled())
7aec584e	274	process_tb[mm->context.id].prtb0 = 0;
30b49ec7 NP	275	else
30b49ec7 NP	276	subpage_prot_free(mm);
f384796c	277	destroy_contexts(&mm->context);
30b49ec7 NP	278	mm->context.id = MMU_NO_CONTEXT;
	279	}
	280
	281	void arch_exit_mmap(struct mm_struct *mm)
	282	{
34c604d2 NP	283	destroy_pagetable_cache(mm);
34c604d2 NP	284
c6bb0b8d BH	285	if (radix_enabled()) {
	286	/*
	287	* Radix doesn't have a valid bit in the process table
	288	* entries. However we know that at least P9 implementation
	289	* will avoid caching an entry with an invalid RTS field,
	290	* and 0 is invalid. So this will do.
30b49ec7 NP	291	*
	292	* This runs before the "fullmm" tlb flush in exit_mmap,
	293	* which does a RIC=2 tlbie to clear the process table
	294	* entry. See the "fullmm" comments in tlb-radix.c.
	295	*
	296	* No barrier required here after the store because
	297	* this process will do the invalidate, which starts with
	298	* ptesync.
c6bb0b8d BH	299	*/
c6bb0b8d BH	300	process_tb[mm->context.id].prtb0 = 0;
30b49ec7	301	}
14cf11af	302	}
7e381c0f AK	303
	304	#ifdef CONFIG_PPC_RADIX_MMU
	305	void radix__switch_mmu_context(struct mm_struct prev, struct mm_struct next)
	306	{
2bf1071a NP	307	mtspr(SPRN_PID, next->context.id);
2bf1071a NP	308	isync();
7e381c0f AK	309	}
7e381c0f AK	310	#endif
01b0f0ea NP	311
	312	/**
	313	* cleanup_cpu_mmu_context - Clean up MMU details for this CPU (newly offlined)
	314	*
	315	* This clears the CPU from mm_cpumask for all processes, and then flushes the
	316	* local TLB to ensure TLB coherency in case the CPU is onlined again.
	317	*
	318	* KVM guest translations are not necessarily flushed here. If KVM started
	319	* using mm_cpumask or the Linux APIs which do, this would have to be resolved.
	320	*/
	321	#ifdef CONFIG_HOTPLUG_CPU
	322	void cleanup_cpu_mmu_context(void)
	323	{
	324	int cpu = smp_processor_id();
	325
	326	clear_tasks_mm_cpumask(cpu);
	327	tlbiel_all();
	328	}
	329	#endif