[linux-block.git] / mm / mmu_gather.c

#include <linux/gfp.h>
#include <linux/highmem.h>
#include <linux/kernel.h>
#include <linux/kmsan-checks.h>
#include <linux/mmdebug.h>
#include <linux/mm_types.h>
#include <linux/mm_inline.h>
#include <linux/pagemap.h>
#include <linux/rcupdate.h>
#include <linux/smp.h>
#include <linux/swap.h>

#include <asm/pgalloc.h>
#include <asm/tlb.h>

#ifndef CONFIG_MMU_GATHER_NO_GATHER

static bool tlb_next_batch(struct mmu_gather *tlb)
{
	struct mmu_gather_batch *batch;

	batch = tlb->active;
	if (batch->next) {
		tlb->active = batch->next;
		return true;
	}

	if (tlb->batch_count == MAX_GATHER_BATCH_COUNT)
		return false;

	batch = (void *)__get_free_pages(GFP_NOWAIT | __GFP_NOWARN, 0);
	if (!batch)
		return false;

	tlb->batch_count++;
	batch->next = NULL;
	batch->nr   = 0;
	batch->max  = MAX_GATHER_BATCH;

	tlb->active->next = batch;
	tlb->active = batch;

	return true;
}

static void tlb_batch_pages_flush(struct mmu_gather *tlb)
{
	struct mmu_gather_batch *batch;

	for (batch = &tlb->local; batch && batch->nr; batch = batch->next) {
		struct page **pages = batch->pages;

		do {
			/*
			 * limit free batch count when PAGE_SIZE > 4K
			 */
			unsigned int nr = min(512U, batch->nr);

			free_pages_and_swap_cache(pages, nr);
			pages += nr;
			batch->nr -= nr;

			cond_resched();
		} while (batch->nr);
	}
	tlb->active = &tlb->local;
}

static void tlb_batch_list_free(struct mmu_gather *tlb)
{
	struct mmu_gather_batch *batch, *next;

	for (batch = tlb->local.next; batch; batch = next) {
		next = batch->next;
		free_pages((unsigned long)batch, 0);
	}
	tlb->local.next = NULL;
}

bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_size)
{
	struct mmu_gather_batch *batch;

	VM_BUG_ON(!tlb->end);

#ifdef CONFIG_MMU_GATHER_PAGE_SIZE
	VM_WARN_ON(tlb->page_size != page_size);
#endif

	batch = tlb->active;
	/*
	 * Add the page and check if we are full. If so
	 * force a flush.
	 */
	batch->pages[batch->nr++] = page;
	if (batch->nr == batch->max) {
		if (!tlb_next_batch(tlb))
			return true;
		batch = tlb->active;
	}
	VM_BUG_ON_PAGE(batch->nr > batch->max, page);

	return false;
}

#endif /* MMU_GATHER_NO_GATHER */

#ifdef CONFIG_MMU_GATHER_TABLE_FREE

static void __tlb_remove_table_free(struct mmu_table_batch *batch)
{
	int i;

	for (i = 0; i < batch->nr; i++)
		__tlb_remove_table(batch->tables[i]);

	free_page((unsigned long)batch);
}

#ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE

/*
 * Semi RCU freeing of the page directories.
 *
 * This is needed by some architectures to implement software pagetable walkers.
 *
 * gup_fast() and other software pagetable walkers do a lockless page-table
 * walk and therefore needs some synchronization with the freeing of the page
 * directories. The chosen means to accomplish that is by disabling IRQs over
 * the walk.
 *
 * Architectures that use IPIs to flush TLBs will then automagically DTRT,
 * since we unlink the page, flush TLBs, free the page. Since the disabling of
 * IRQs delays the completion of the TLB flush we can never observe an already
 * freed page.
 *
 * Architectures that do not have this (PPC) need to delay the freeing by some
 * other means, this is that means.
 *
 * What we do is batch the freed directory pages (tables) and RCU free them.
 * We use the sched RCU variant, as that guarantees that IRQ/preempt disabling
 * holds off grace periods.
 *
 * However, in order to batch these pages we need to allocate storage, this
 * allocation is deep inside the MM code and can thus easily fail on memory
 * pressure. To guarantee progress we fall back to single table freeing, see
 * the implementation of tlb_remove_table_one().
 *
 */

static void tlb_remove_table_smp_sync(void *arg)
{
	/* Simply deliver the interrupt */
}

static void tlb_remove_table_sync_one(void)
{
	/*
	 * This isn't an RCU grace period and hence the page-tables cannot be
	 * assumed to be actually RCU-freed.
	 *
	 * It is however sufficient for software page-table walkers that rely on
	 * IRQ disabling.
	 */
	smp_call_function(tlb_remove_table_smp_sync, NULL, 1);
}

static void tlb_remove_table_rcu(struct rcu_head *head)
{
	__tlb_remove_table_free(container_of(head, struct mmu_table_batch, rcu));
}

static void tlb_remove_table_free(struct mmu_table_batch *batch)
{
	call_rcu(&batch->rcu, tlb_remove_table_rcu);
}

#else /* !CONFIG_MMU_GATHER_RCU_TABLE_FREE */

static void tlb_remove_table_sync_one(void) { }

static void tlb_remove_table_free(struct mmu_table_batch *batch)
{
	__tlb_remove_table_free(batch);
}

#endif /* CONFIG_MMU_GATHER_RCU_TABLE_FREE */

/*
 * If we want tlb_remove_table() to imply TLB invalidates.
 */
static inline void tlb_table_invalidate(struct mmu_gather *tlb)
{
	if (tlb_needs_table_invalidate()) {
		/*
		 * Invalidate page-table caches used by hardware walkers. Then
		 * we still need to RCU-sched wait while freeing the pages
		 * because software walkers can still be in-flight.
		 */
		tlb_flush_mmu_tlbonly(tlb);
	}
}

static void tlb_remove_table_one(void *table)
{
	tlb_remove_table_sync_one();
	__tlb_remove_table(table);
}

static void tlb_table_flush(struct mmu_gather *tlb)
{
	struct mmu_table_batch **batch = &tlb->batch;

	if (*batch) {
		tlb_table_invalidate(tlb);
		tlb_remove_table_free(*batch);
		*batch = NULL;
	}
}

void tlb_remove_table(struct mmu_gather *tlb, void *table)
{
	struct mmu_table_batch **batch = &tlb->batch;

	if (*batch == NULL) {
		*batch = (struct mmu_table_batch *)__get_free_page(GFP_NOWAIT | __GFP_NOWARN);
		if (*batch == NULL) {
			tlb_table_invalidate(tlb);
			tlb_remove_table_one(table);
			return;
		}
		(*batch)->nr = 0;
	}

	(*batch)->tables[(*batch)->nr++] = table;
	if ((*batch)->nr == MAX_TABLE_BATCH)
		tlb_table_flush(tlb);
}

static inline void tlb_table_init(struct mmu_gather *tlb)
{
	tlb->batch = NULL;
}

#else /* !CONFIG_MMU_GATHER_TABLE_FREE */

static inline void tlb_table_flush(struct mmu_gather *tlb) { }
static inline void tlb_table_init(struct mmu_gather *tlb) { }

#endif /* CONFIG_MMU_GATHER_TABLE_FREE */

static void tlb_flush_mmu_free(struct mmu_gather *tlb)
{
	tlb_table_flush(tlb);
#ifndef CONFIG_MMU_GATHER_NO_GATHER
	tlb_batch_pages_flush(tlb);
#endif
}

void tlb_flush_mmu(struct mmu_gather *tlb)
{
	tlb_flush_mmu_tlbonly(tlb);
	tlb_flush_mmu_free(tlb);
}

static void __tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm,
			     bool fullmm)
{
	/*
	 * struct mmu_gather contains 7 1-bit fields packed into a 32-bit
	 * unsigned int value. The remaining 25 bits remain uninitialized
	 * and are never used, but KMSAN updates the origin for them in
	 * zap_pXX_range() in mm/memory.c, thus creating very long origin
	 * chains. This is technically correct, but consumes too much memory.
	 * Unpoisoning the whole structure will prevent creating such chains.
	 */
	kmsan_unpoison_memory(tlb, sizeof(*tlb));
	tlb->mm = mm;
	tlb->fullmm = fullmm;

#ifndef CONFIG_MMU_GATHER_NO_GATHER
	tlb->need_flush_all = 0;
	tlb->local.next = NULL;
	tlb->local.nr   = 0;
	tlb->local.max  = ARRAY_SIZE(tlb->__pages);
	tlb->active     = &tlb->local;
	tlb->batch_count = 0;
#endif

	tlb_table_init(tlb);
#ifdef CONFIG_MMU_GATHER_PAGE_SIZE
	tlb->page_size = 0;
#endif

	__tlb_reset_range(tlb);
	inc_tlb_flush_pending(tlb->mm);
}

/**
 * tlb_gather_mmu - initialize an mmu_gather structure for page-table tear-down
 * @tlb: the mmu_gather structure to initialize
 * @mm: the mm_struct of the target address space
 *
 * Called to initialize an (on-stack) mmu_gather structure for page-table
 * tear-down from @mm.
 */
void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm)
{
	__tlb_gather_mmu(tlb, mm, false);
}

/**
 * tlb_gather_mmu_fullmm - initialize an mmu_gather structure for page-table tear-down
 * @tlb: the mmu_gather structure to initialize
 * @mm: the mm_struct of the target address space
 *
 * In this case, @mm is without users and we're going to destroy the
 * full address space (exit/execve).
 *
 * Called to initialize an (on-stack) mmu_gather structure for page-table
 * tear-down from @mm.
 */
void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm)
{
	__tlb_gather_mmu(tlb, mm, true);
}

/**
 * tlb_finish_mmu - finish an mmu_gather structure
 * @tlb: the mmu_gather structure to finish
 *
 * Called at the end of the shootdown operation to free up any resources that
 * were required.
 */
void tlb_finish_mmu(struct mmu_gather *tlb)
{
	/*
	 * If there are parallel threads are doing PTE changes on same range
	 * under non-exclusive lock (e.g., mmap_lock read-side) but defer TLB
	 * flush by batching, one thread may end up seeing inconsistent PTEs
	 * and result in having stale TLB entries.  So flush TLB forcefully
	 * if we detect parallel PTE batching threads.
	 *
	 * However, some syscalls, e.g. munmap(), may free page tables, this
	 * needs force flush everything in the given range. Otherwise this
	 * may result in having stale TLB entries for some architectures,
	 * e.g. aarch64, that could specify flush what level TLB.
	 */
	if (mm_tlb_flush_nested(tlb->mm)) {
		/*
		 * The aarch64 yields better performance with fullmm by
		 * avoiding multiple CPUs spamming TLBI messages at the
		 * same time.
		 *
		 * On x86 non-fullmm doesn't yield significant difference
		 * against fullmm.
		 */
		tlb->fullmm = 1;
		__tlb_reset_range(tlb);
		tlb->freed_tables = 1;
	}

	tlb_flush_mmu(tlb);

#ifndef CONFIG_MMU_GATHER_NO_GATHER
	tlb_batch_list_free(tlb);
#endif
	dec_tlb_flush_pending(tlb->mm);
}
Commit	Line	Data
196d9d8b PZ	1	#include <linux/gfp.h>
	2	#include <linux/highmem.h>
	3	#include <linux/kernel.h>
ac801e7e	4	#include <linux/kmsan-checks.h>
196d9d8b PZ	5	#include <linux/mmdebug.h>
196d9d8b PZ	6	#include <linux/mm_types.h>
36090def	7	#include <linux/mm_inline.h>
196d9d8b PZ	8	#include <linux/pagemap.h>
	9	#include <linux/rcupdate.h>
	10	#include <linux/smp.h>
	11	#include <linux/swap.h>
	12
	13	#include <asm/pgalloc.h>
	14	#include <asm/tlb.h>
	15
580a586c	16	#ifndef CONFIG_MMU_GATHER_NO_GATHER
952a31c9	17
196d9d8b PZ	18	static bool tlb_next_batch(struct mmu_gather *tlb)
	19	{
	20	struct mmu_gather_batch *batch;
	21
	22	batch = tlb->active;
	23	if (batch->next) {
	24	tlb->active = batch->next;
	25	return true;
	26	}
	27
	28	if (tlb->batch_count == MAX_GATHER_BATCH_COUNT)
	29	return false;
	30
	31	batch = (void *)__get_free_pages(GFP_NOWAIT \| __GFP_NOWARN, 0);
	32	if (!batch)
	33	return false;
	34
	35	tlb->batch_count++;
	36	batch->next = NULL;
	37	batch->nr = 0;
	38	batch->max = MAX_GATHER_BATCH;
	39
	40	tlb->active->next = batch;
	41	tlb->active = batch;
	42
	43	return true;
	44	}
	45
952a31c9 MS	46	static void tlb_batch_pages_flush(struct mmu_gather *tlb)
	47	{
	48	struct mmu_gather_batch *batch;
	49
	50	for (batch = &tlb->local; batch && batch->nr; batch = batch->next) {
b191c9bc JW	51	struct page **pages = batch->pages;
	52
	53	do {
	54	/*
	55	* limit free batch count when PAGE_SIZE > 4K
	56	*/
	57	unsigned int nr = min(512U, batch->nr);
	58
	59	free_pages_and_swap_cache(pages, nr);
	60	pages += nr;
	61	batch->nr -= nr;
	62
	63	cond_resched();
	64	} while (batch->nr);
952a31c9 MS	65	}
	66	tlb->active = &tlb->local;
	67	}
	68
	69	static void tlb_batch_list_free(struct mmu_gather *tlb)
	70	{
	71	struct mmu_gather_batch batch, next;
	72
	73	for (batch = tlb->local.next; batch; batch = next) {
	74	next = batch->next;
	75	free_pages((unsigned long)batch, 0);
	76	}
	77	tlb->local.next = NULL;
	78	}
	79
	80	bool __tlb_remove_page_size(struct mmu_gather tlb, struct page page, int page_size)
	81	{
	82	struct mmu_gather_batch *batch;
	83
	84	VM_BUG_ON(!tlb->end);
	85
3af4bd03	86	#ifdef CONFIG_MMU_GATHER_PAGE_SIZE
952a31c9 MS	87	VM_WARN_ON(tlb->page_size != page_size);
	88	#endif
	89
	90	batch = tlb->active;
	91	/*
	92	* Add the page and check if we are full. If so
	93	* force a flush.
	94	*/
	95	batch->pages[batch->nr++] = page;
	96	if (batch->nr == batch->max) {
	97	if (!tlb_next_batch(tlb))
	98	return true;
	99	batch = tlb->active;
	100	}
	101	VM_BUG_ON_PAGE(batch->nr > batch->max, page);
	102
	103	return false;
	104	}
	105
580a586c	106	#endif /* MMU_GATHER_NO_GATHER */
952a31c9	107
0d6e24d4	108	#ifdef CONFIG_MMU_GATHER_TABLE_FREE
196d9d8b	109
0d6e24d4 PZ	110	static void __tlb_remove_table_free(struct mmu_table_batch *batch)
	111	{
	112	int i;
	113
	114	for (i = 0; i < batch->nr; i++)
	115	__tlb_remove_table(batch->tables[i]);
	116
	117	free_page((unsigned long)batch);
	118	}
	119
	120	#ifdef CONFIG_MMU_GATHER_RCU_TABLE_FREE
196d9d8b PZ	121
196d9d8b PZ	122	/*
0d6e24d4 PZ	123	* Semi RCU freeing of the page directories.
	124	*
	125	* This is needed by some architectures to implement software pagetable walkers.
	126	*
	127	* gup_fast() and other software pagetable walkers do a lockless page-table
	128	* walk and therefore needs some synchronization with the freeing of the page
	129	* directories. The chosen means to accomplish that is by disabling IRQs over
	130	* the walk.
	131	*
	132	* Architectures that use IPIs to flush TLBs will then automagically DTRT,
	133	* since we unlink the page, flush TLBs, free the page. Since the disabling of
	134	* IRQs delays the completion of the TLB flush we can never observe an already
	135	* freed page.
	136	*
	137	* Architectures that do not have this (PPC) need to delay the freeing by some
	138	* other means, this is that means.
	139	*
	140	* What we do is batch the freed directory pages (tables) and RCU free them.
	141	* We use the sched RCU variant, as that guarantees that IRQ/preempt disabling
	142	* holds off grace periods.
	143	*
	144	* However, in order to batch these pages we need to allocate storage, this
	145	* allocation is deep inside the MM code and can thus easily fail on memory
	146	* pressure. To guarantee progress we fall back to single table freeing, see
	147	* the implementation of tlb_remove_table_one().
	148	*
196d9d8b	149	*/
196d9d8b PZ	150
	151	static void tlb_remove_table_smp_sync(void *arg)
	152	{
	153	/* Simply deliver the interrupt */
	154	}
	155
0d6e24d4	156	static void tlb_remove_table_sync_one(void)
196d9d8b PZ	157	{
	158	/*
	159	* This isn't an RCU grace period and hence the page-tables cannot be
	160	* assumed to be actually RCU-freed.
	161	*
	162	* It is however sufficient for software page-table walkers that rely on
0d6e24d4	163	* IRQ disabling.
196d9d8b PZ	164	*/
196d9d8b PZ	165	smp_call_function(tlb_remove_table_smp_sync, NULL, 1);
196d9d8b PZ	166	}
	167
	168	static void tlb_remove_table_rcu(struct rcu_head *head)
	169	{
0d6e24d4 PZ	170	__tlb_remove_table_free(container_of(head, struct mmu_table_batch, rcu));
0d6e24d4 PZ	171	}
196d9d8b	172
0d6e24d4 PZ	173	static void tlb_remove_table_free(struct mmu_table_batch *batch)
	174	{
	175	call_rcu(&batch->rcu, tlb_remove_table_rcu);
	176	}
196d9d8b	177
0d6e24d4	178	#else /* !CONFIG_MMU_GATHER_RCU_TABLE_FREE */
196d9d8b	179
0d6e24d4 PZ	180	static void tlb_remove_table_sync_one(void) { }
	181
	182	static void tlb_remove_table_free(struct mmu_table_batch *batch)
	183	{
	184	__tlb_remove_table_free(batch);
	185	}
	186
	187	#endif /* CONFIG_MMU_GATHER_RCU_TABLE_FREE */
	188
	189	/*
	190	* If we want tlb_remove_table() to imply TLB invalidates.
	191	*/
	192	static inline void tlb_table_invalidate(struct mmu_gather *tlb)
	193	{
	194	if (tlb_needs_table_invalidate()) {
	195	/*
	196	* Invalidate page-table caches used by hardware walkers. Then
	197	* we still need to RCU-sched wait while freeing the pages
	198	* because software walkers can still be in-flight.
	199	*/
	200	tlb_flush_mmu_tlbonly(tlb);
	201	}
	202	}
	203
	204	static void tlb_remove_table_one(void *table)
	205	{
	206	tlb_remove_table_sync_one();
	207	__tlb_remove_table(table);
196d9d8b PZ	208	}
196d9d8b PZ	209
0a8caf21	210	static void tlb_table_flush(struct mmu_gather *tlb)
196d9d8b PZ	211	{
	212	struct mmu_table_batch **batch = &tlb->batch;
	213
	214	if (*batch) {
	215	tlb_table_invalidate(tlb);
0d6e24d4	216	tlb_remove_table_free(*batch);
196d9d8b PZ	217	*batch = NULL;
	218	}
	219	}
	220
	221	void tlb_remove_table(struct mmu_gather tlb, void table)
	222	{
	223	struct mmu_table_batch **batch = &tlb->batch;
	224
	225	if (*batch == NULL) {
	226	batch = (struct mmu_table_batch )__get_free_page(GFP_NOWAIT \| __GFP_NOWARN);
	227	if (*batch == NULL) {
	228	tlb_table_invalidate(tlb);
	229	tlb_remove_table_one(table);
	230	return;
	231	}
	232	(*batch)->nr = 0;
	233	}
	234
	235	(batch)->tables[(batch)->nr++] = table;
	236	if ((*batch)->nr == MAX_TABLE_BATCH)
	237	tlb_table_flush(tlb);
	238	}
	239
0d6e24d4 PZ	240	static inline void tlb_table_init(struct mmu_gather *tlb)
	241	{
	242	tlb->batch = NULL;
	243	}
	244
	245	#else /* !CONFIG_MMU_GATHER_TABLE_FREE */
	246
	247	static inline void tlb_table_flush(struct mmu_gather *tlb) { }
	248	static inline void tlb_table_init(struct mmu_gather *tlb) { }
	249
	250	#endif /* CONFIG_MMU_GATHER_TABLE_FREE */
196d9d8b	251
0a8caf21 PZ	252	static void tlb_flush_mmu_free(struct mmu_gather *tlb)
0a8caf21 PZ	253	{
0a8caf21	254	tlb_table_flush(tlb);
580a586c	255	#ifndef CONFIG_MMU_GATHER_NO_GATHER
0a8caf21 PZ	256	tlb_batch_pages_flush(tlb);
	257	#endif
	258	}
	259
	260	void tlb_flush_mmu(struct mmu_gather *tlb)
	261	{
	262	tlb_flush_mmu_tlbonly(tlb);
	263	tlb_flush_mmu_free(tlb);
	264	}
	265
d8b45053	266	static void __tlb_gather_mmu(struct mmu_gather tlb, struct mm_struct mm,
a72afd87	267	bool fullmm)
196d9d8b	268	{
ac801e7e AP	269	/*
	270	* struct mmu_gather contains 7 1-bit fields packed into a 32-bit
	271	* unsigned int value. The remaining 25 bits remain uninitialized
	272	* and are never used, but KMSAN updates the origin for them in
	273	* zap_pXX_range() in mm/memory.c, thus creating very long origin
	274	* chains. This is technically correct, but consumes too much memory.
	275	* Unpoisoning the whole structure will prevent creating such chains.
	276	*/
	277	kmsan_unpoison_memory(tlb, sizeof(*tlb));
1808d65b	278	tlb->mm = mm;
a72afd87	279	tlb->fullmm = fullmm;
1808d65b	280
580a586c	281	#ifndef CONFIG_MMU_GATHER_NO_GATHER
1808d65b PZ	282	tlb->need_flush_all = 0;
	283	tlb->local.next = NULL;
	284	tlb->local.nr = 0;
	285	tlb->local.max = ARRAY_SIZE(tlb->__pages);
	286	tlb->active = &tlb->local;
	287	tlb->batch_count = 0;
	288	#endif
	289
0d6e24d4	290	tlb_table_init(tlb);
3af4bd03	291	#ifdef CONFIG_MMU_GATHER_PAGE_SIZE
1808d65b PZ	292	tlb->page_size = 0;
	293	#endif
	294
	295	__tlb_reset_range(tlb);
196d9d8b PZ	296	inc_tlb_flush_pending(tlb->mm);
	297	}
	298
845be1cd RD	299	/**
	300	* tlb_gather_mmu - initialize an mmu_gather structure for page-table tear-down
	301	* @tlb: the mmu_gather structure to initialize
	302	* @mm: the mm_struct of the target address space
	303	*
	304	* Called to initialize an (on-stack) mmu_gather structure for page-table
	305	* tear-down from @mm.
	306	*/
a72afd87	307	void tlb_gather_mmu(struct mmu_gather tlb, struct mm_struct mm)
d8b45053	308	{
a72afd87	309	__tlb_gather_mmu(tlb, mm, false);
d8b45053 WD	310	}
d8b45053 WD	311
845be1cd RD	312	/**
	313	* tlb_gather_mmu_fullmm - initialize an mmu_gather structure for page-table tear-down
	314	* @tlb: the mmu_gather structure to initialize
	315	* @mm: the mm_struct of the target address space
	316	*
	317	* In this case, @mm is without users and we're going to destroy the
	318	* full address space (exit/execve).
	319	*
	320	* Called to initialize an (on-stack) mmu_gather structure for page-table
	321	* tear-down from @mm.
	322	*/
d8b45053 WD	323	void tlb_gather_mmu_fullmm(struct mmu_gather tlb, struct mm_struct mm)
d8b45053 WD	324	{
a72afd87	325	__tlb_gather_mmu(tlb, mm, true);
d8b45053 WD	326	}
d8b45053 WD	327
1808d65b PZ	328	/**
	329	* tlb_finish_mmu - finish an mmu_gather structure
	330	* @tlb: the mmu_gather structure to finish
1808d65b PZ	331	*
	332	* Called at the end of the shootdown operation to free up any resources that
	333	* were required.
	334	*/
ae8eba8b	335	void tlb_finish_mmu(struct mmu_gather *tlb)
196d9d8b PZ	336	{
	337	/*
	338	* If there are parallel threads are doing PTE changes on same range
c1e8d7c6	339	* under non-exclusive lock (e.g., mmap_lock read-side) but defer TLB
7a30df49 YS	340	* flush by batching, one thread may end up seeing inconsistent PTEs
	341	* and result in having stale TLB entries. So flush TLB forcefully
	342	* if we detect parallel PTE batching threads.
	343	*
	344	* However, some syscalls, e.g. munmap(), may free page tables, this
	345	* needs force flush everything in the given range. Otherwise this
	346	* may result in having stale TLB entries for some architectures,
	347	* e.g. aarch64, that could specify flush what level TLB.
196d9d8b	348	*/
1808d65b	349	if (mm_tlb_flush_nested(tlb->mm)) {
7a30df49 YS	350	/*
	351	* The aarch64 yields better performance with fullmm by
	352	* avoiding multiple CPUs spamming TLBI messages at the
	353	* same time.
	354	*
	355	* On x86 non-fullmm doesn't yield significant difference
	356	* against fullmm.
	357	*/
	358	tlb->fullmm = 1;
1808d65b	359	__tlb_reset_range(tlb);
7a30df49	360	tlb->freed_tables = 1;
1808d65b	361	}
196d9d8b	362
1808d65b PZ	363	tlb_flush_mmu(tlb);
1808d65b PZ	364
580a586c	365	#ifndef CONFIG_MMU_GATHER_NO_GATHER
1808d65b PZ	366	tlb_batch_list_free(tlb);
1808d65b PZ	367	#endif
196d9d8b PZ	368	dec_tlb_flush_pending(tlb->mm);
196d9d8b PZ	369	}